xref: /openbmc/qemu/tcg/tcg.c (revision 53ba2eee)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 
39 /* Note: the long term plan is to reduce the dependencies on the QEMU
40    CPU definitions. Currently they are used for qemu_ld/st
41    instructions */
42 #define NO_CPU_IO_DEFS
43 #include "cpu.h"
44 
45 #include "exec/exec-all.h"
46 
47 #if !defined(CONFIG_USER_ONLY)
48 #include "hw/boards.h"
49 #endif
50 
51 #include "tcg/tcg-op.h"
52 
53 #if UINTPTR_MAX == UINT32_MAX
54 # define ELF_CLASS  ELFCLASS32
55 #else
56 # define ELF_CLASS  ELFCLASS64
57 #endif
58 #ifdef HOST_WORDS_BIGENDIAN
59 # define ELF_DATA   ELFDATA2MSB
60 #else
61 # define ELF_DATA   ELFDATA2LSB
62 #endif
63 
64 #include "elf.h"
65 #include "exec/log.h"
66 #include "sysemu/sysemu.h"
67 
68 /* Forward declarations for functions declared in tcg-target.c.inc and
69    used here. */
70 static void tcg_target_init(TCGContext *s);
71 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74                         intptr_t value, intptr_t addend);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 static void tcg_register_jit_int(void *buf, size_t size,
100                                  const void *debug_frame,
101                                  size_t debug_frame_size)
102     __attribute__((unused));
103 
104 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
105 static const char *target_parse_constraint(TCGArgConstraint *ct,
106                                            const char *ct_str, TCGType type);
107 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
108                        intptr_t arg2);
109 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
110 static void tcg_out_movi(TCGContext *s, TCGType type,
111                          TCGReg ret, tcg_target_long arg);
112 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
113                        const int *const_args);
114 #if TCG_TARGET_MAYBE_vec
115 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
116                             TCGReg dst, TCGReg src);
117 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
118                              TCGReg dst, TCGReg base, intptr_t offset);
119 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
120                              TCGReg dst, tcg_target_long arg);
121 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
122                            unsigned vece, const TCGArg *args,
123                            const int *const_args);
124 #else
125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126                                    TCGReg dst, TCGReg src)
127 {
128     g_assert_not_reached();
129 }
130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
131                                     TCGReg dst, TCGReg base, intptr_t offset)
132 {
133     g_assert_not_reached();
134 }
135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
136                                     TCGReg dst, tcg_target_long arg)
137 {
138     g_assert_not_reached();
139 }
140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
141                                   unsigned vece, const TCGArg *args,
142                                   const int *const_args)
143 {
144     g_assert_not_reached();
145 }
146 #endif
147 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
148                        intptr_t arg2);
149 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
150                         TCGReg base, intptr_t ofs);
151 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
152 static int tcg_target_const_match(tcg_target_long val, TCGType type,
153                                   const TCGArgConstraint *arg_ct);
154 #ifdef TCG_TARGET_NEED_LDST_LABELS
155 static int tcg_out_ldst_finalize(TCGContext *s);
156 #endif
157 
158 #define TCG_HIGHWATER 1024
159 
160 static TCGContext **tcg_ctxs;
161 static unsigned int n_tcg_ctxs;
162 TCGv_env cpu_env = 0;
163 
164 struct tcg_region_tree {
165     QemuMutex lock;
166     GTree *tree;
167     /* padding to avoid false sharing is computed at run-time */
168 };
169 
170 /*
171  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
172  * dynamically allocate from as demand dictates. Given appropriate region
173  * sizing, this minimizes flushes even when some TCG threads generate a lot
174  * more code than others.
175  */
176 struct tcg_region_state {
177     QemuMutex lock;
178 
179     /* fields set at init time */
180     void *start;
181     void *start_aligned;
182     void *end;
183     size_t n;
184     size_t size; /* size of one region */
185     size_t stride; /* .size + guard size */
186 
187     /* fields protected by the lock */
188     size_t current; /* current region index */
189     size_t agg_size_full; /* aggregate size of full regions */
190 };
191 
192 static struct tcg_region_state region;
193 /*
194  * This is an array of struct tcg_region_tree's, with padding.
195  * We use void * to simplify the computation of region_trees[i]; each
196  * struct is found every tree_size bytes.
197  */
198 static void *region_trees;
199 static size_t tree_size;
200 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
201 static TCGRegSet tcg_target_call_clobber_regs;
202 
203 #if TCG_TARGET_INSN_UNIT_SIZE == 1
204 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
205 {
206     *s->code_ptr++ = v;
207 }
208 
209 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
210                                                       uint8_t v)
211 {
212     *p = v;
213 }
214 #endif
215 
216 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
217 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
218 {
219     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
220         *s->code_ptr++ = v;
221     } else {
222         tcg_insn_unit *p = s->code_ptr;
223         memcpy(p, &v, sizeof(v));
224         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
225     }
226 }
227 
228 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
229                                                        uint16_t v)
230 {
231     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
232         *p = v;
233     } else {
234         memcpy(p, &v, sizeof(v));
235     }
236 }
237 #endif
238 
239 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
240 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
241 {
242     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
243         *s->code_ptr++ = v;
244     } else {
245         tcg_insn_unit *p = s->code_ptr;
246         memcpy(p, &v, sizeof(v));
247         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
248     }
249 }
250 
251 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
252                                                        uint32_t v)
253 {
254     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
255         *p = v;
256     } else {
257         memcpy(p, &v, sizeof(v));
258     }
259 }
260 #endif
261 
262 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
263 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
264 {
265     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
266         *s->code_ptr++ = v;
267     } else {
268         tcg_insn_unit *p = s->code_ptr;
269         memcpy(p, &v, sizeof(v));
270         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
271     }
272 }
273 
274 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
275                                                        uint64_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
278         *p = v;
279     } else {
280         memcpy(p, &v, sizeof(v));
281     }
282 }
283 #endif
284 
285 /* label relocation processing */
286 
287 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
288                           TCGLabel *l, intptr_t addend)
289 {
290     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
291 
292     r->type = type;
293     r->ptr = code_ptr;
294     r->addend = addend;
295     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
296 }
297 
298 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
299 {
300     tcg_debug_assert(!l->has_value);
301     l->has_value = 1;
302     l->u.value_ptr = ptr;
303 }
304 
305 TCGLabel *gen_new_label(void)
306 {
307     TCGContext *s = tcg_ctx;
308     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
309 
310     memset(l, 0, sizeof(TCGLabel));
311     l->id = s->nb_labels++;
312     QSIMPLEQ_INIT(&l->relocs);
313 
314     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
315 
316     return l;
317 }
318 
319 static bool tcg_resolve_relocs(TCGContext *s)
320 {
321     TCGLabel *l;
322 
323     QSIMPLEQ_FOREACH(l, &s->labels, next) {
324         TCGRelocation *r;
325         uintptr_t value = l->u.value;
326 
327         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
328             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
329                 return false;
330             }
331         }
332     }
333     return true;
334 }
335 
336 static void set_jmp_reset_offset(TCGContext *s, int which)
337 {
338     size_t off = tcg_current_code_size(s);
339     s->tb_jmp_reset_offset[which] = off;
340     /* Make sure that we didn't overflow the stored offset.  */
341     assert(s->tb_jmp_reset_offset[which] == off);
342 }
343 
344 #include "tcg-target.c.inc"
345 
346 /* compare a pointer @ptr and a tb_tc @s */
347 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
348 {
349     if (ptr >= s->ptr + s->size) {
350         return 1;
351     } else if (ptr < s->ptr) {
352         return -1;
353     }
354     return 0;
355 }
356 
357 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
358 {
359     const struct tb_tc *a = ap;
360     const struct tb_tc *b = bp;
361 
362     /*
363      * When both sizes are set, we know this isn't a lookup.
364      * This is the most likely case: every TB must be inserted; lookups
365      * are a lot less frequent.
366      */
367     if (likely(a->size && b->size)) {
368         if (a->ptr > b->ptr) {
369             return 1;
370         } else if (a->ptr < b->ptr) {
371             return -1;
372         }
373         /* a->ptr == b->ptr should happen only on deletions */
374         g_assert(a->size == b->size);
375         return 0;
376     }
377     /*
378      * All lookups have either .size field set to 0.
379      * From the glib sources we see that @ap is always the lookup key. However
380      * the docs provide no guarantee, so we just mark this case as likely.
381      */
382     if (likely(a->size == 0)) {
383         return ptr_cmp_tb_tc(a->ptr, b);
384     }
385     return ptr_cmp_tb_tc(b->ptr, a);
386 }
387 
388 static void tcg_region_trees_init(void)
389 {
390     size_t i;
391 
392     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
393     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
394     for (i = 0; i < region.n; i++) {
395         struct tcg_region_tree *rt = region_trees + i * tree_size;
396 
397         qemu_mutex_init(&rt->lock);
398         rt->tree = g_tree_new(tb_tc_cmp);
399     }
400 }
401 
402 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
403 {
404     size_t region_idx;
405 
406     if (p < region.start_aligned) {
407         region_idx = 0;
408     } else {
409         ptrdiff_t offset = p - region.start_aligned;
410 
411         if (offset > region.stride * (region.n - 1)) {
412             region_idx = region.n - 1;
413         } else {
414             region_idx = offset / region.stride;
415         }
416     }
417     return region_trees + region_idx * tree_size;
418 }
419 
420 void tcg_tb_insert(TranslationBlock *tb)
421 {
422     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
423 
424     qemu_mutex_lock(&rt->lock);
425     g_tree_insert(rt->tree, &tb->tc, tb);
426     qemu_mutex_unlock(&rt->lock);
427 }
428 
429 void tcg_tb_remove(TranslationBlock *tb)
430 {
431     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
432 
433     qemu_mutex_lock(&rt->lock);
434     g_tree_remove(rt->tree, &tb->tc);
435     qemu_mutex_unlock(&rt->lock);
436 }
437 
438 /*
439  * Find the TB 'tb' such that
440  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
441  * Return NULL if not found.
442  */
443 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
444 {
445     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
446     TranslationBlock *tb;
447     struct tb_tc s = { .ptr = (void *)tc_ptr };
448 
449     qemu_mutex_lock(&rt->lock);
450     tb = g_tree_lookup(rt->tree, &s);
451     qemu_mutex_unlock(&rt->lock);
452     return tb;
453 }
454 
455 static void tcg_region_tree_lock_all(void)
456 {
457     size_t i;
458 
459     for (i = 0; i < region.n; i++) {
460         struct tcg_region_tree *rt = region_trees + i * tree_size;
461 
462         qemu_mutex_lock(&rt->lock);
463     }
464 }
465 
466 static void tcg_region_tree_unlock_all(void)
467 {
468     size_t i;
469 
470     for (i = 0; i < region.n; i++) {
471         struct tcg_region_tree *rt = region_trees + i * tree_size;
472 
473         qemu_mutex_unlock(&rt->lock);
474     }
475 }
476 
477 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
478 {
479     size_t i;
480 
481     tcg_region_tree_lock_all();
482     for (i = 0; i < region.n; i++) {
483         struct tcg_region_tree *rt = region_trees + i * tree_size;
484 
485         g_tree_foreach(rt->tree, func, user_data);
486     }
487     tcg_region_tree_unlock_all();
488 }
489 
490 size_t tcg_nb_tbs(void)
491 {
492     size_t nb_tbs = 0;
493     size_t i;
494 
495     tcg_region_tree_lock_all();
496     for (i = 0; i < region.n; i++) {
497         struct tcg_region_tree *rt = region_trees + i * tree_size;
498 
499         nb_tbs += g_tree_nnodes(rt->tree);
500     }
501     tcg_region_tree_unlock_all();
502     return nb_tbs;
503 }
504 
505 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
506 {
507     TranslationBlock *tb = v;
508 
509     tb_destroy(tb);
510     return FALSE;
511 }
512 
513 static void tcg_region_tree_reset_all(void)
514 {
515     size_t i;
516 
517     tcg_region_tree_lock_all();
518     for (i = 0; i < region.n; i++) {
519         struct tcg_region_tree *rt = region_trees + i * tree_size;
520 
521         g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
522         /* Increment the refcount first so that destroy acts as a reset */
523         g_tree_ref(rt->tree);
524         g_tree_destroy(rt->tree);
525     }
526     tcg_region_tree_unlock_all();
527 }
528 
529 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
530 {
531     void *start, *end;
532 
533     start = region.start_aligned + curr_region * region.stride;
534     end = start + region.size;
535 
536     if (curr_region == 0) {
537         start = region.start;
538     }
539     if (curr_region == region.n - 1) {
540         end = region.end;
541     }
542 
543     *pstart = start;
544     *pend = end;
545 }
546 
547 static void tcg_region_assign(TCGContext *s, size_t curr_region)
548 {
549     void *start, *end;
550 
551     tcg_region_bounds(curr_region, &start, &end);
552 
553     s->code_gen_buffer = start;
554     s->code_gen_ptr = start;
555     s->code_gen_buffer_size = end - start;
556     s->code_gen_highwater = end - TCG_HIGHWATER;
557 }
558 
559 static bool tcg_region_alloc__locked(TCGContext *s)
560 {
561     if (region.current == region.n) {
562         return true;
563     }
564     tcg_region_assign(s, region.current);
565     region.current++;
566     return false;
567 }
568 
569 /*
570  * Request a new region once the one in use has filled up.
571  * Returns true on error.
572  */
573 static bool tcg_region_alloc(TCGContext *s)
574 {
575     bool err;
576     /* read the region size now; alloc__locked will overwrite it on success */
577     size_t size_full = s->code_gen_buffer_size;
578 
579     qemu_mutex_lock(&region.lock);
580     err = tcg_region_alloc__locked(s);
581     if (!err) {
582         region.agg_size_full += size_full - TCG_HIGHWATER;
583     }
584     qemu_mutex_unlock(&region.lock);
585     return err;
586 }
587 
588 /*
589  * Perform a context's first region allocation.
590  * This function does _not_ increment region.agg_size_full.
591  */
592 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
593 {
594     return tcg_region_alloc__locked(s);
595 }
596 
597 /* Call from a safe-work context */
598 void tcg_region_reset_all(void)
599 {
600     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
601     unsigned int i;
602 
603     qemu_mutex_lock(&region.lock);
604     region.current = 0;
605     region.agg_size_full = 0;
606 
607     for (i = 0; i < n_ctxs; i++) {
608         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
609         bool err = tcg_region_initial_alloc__locked(s);
610 
611         g_assert(!err);
612     }
613     qemu_mutex_unlock(&region.lock);
614 
615     tcg_region_tree_reset_all();
616 }
617 
618 #ifdef CONFIG_USER_ONLY
619 static size_t tcg_n_regions(void)
620 {
621     return 1;
622 }
623 #else
624 /*
625  * It is likely that some vCPUs will translate more code than others, so we
626  * first try to set more regions than max_cpus, with those regions being of
627  * reasonable size. If that's not possible we make do by evenly dividing
628  * the code_gen_buffer among the vCPUs.
629  */
630 static size_t tcg_n_regions(void)
631 {
632     size_t i;
633 
634     /* Use a single region if all we have is one vCPU thread */
635 #if !defined(CONFIG_USER_ONLY)
636     MachineState *ms = MACHINE(qdev_get_machine());
637     unsigned int max_cpus = ms->smp.max_cpus;
638 #endif
639     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
640         return 1;
641     }
642 
643     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
644     for (i = 8; i > 0; i--) {
645         size_t regions_per_thread = i;
646         size_t region_size;
647 
648         region_size = tcg_init_ctx.code_gen_buffer_size;
649         region_size /= max_cpus * regions_per_thread;
650 
651         if (region_size >= 2 * 1024u * 1024) {
652             return max_cpus * regions_per_thread;
653         }
654     }
655     /* If we can't, then just allocate one region per vCPU thread */
656     return max_cpus;
657 }
658 #endif
659 
660 /*
661  * Initializes region partitioning.
662  *
663  * Called at init time from the parent thread (i.e. the one calling
664  * tcg_context_init), after the target's TCG globals have been set.
665  *
666  * Region partitioning works by splitting code_gen_buffer into separate regions,
667  * and then assigning regions to TCG threads so that the threads can translate
668  * code in parallel without synchronization.
669  *
670  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
671  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
672  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
673  * must have been parsed before calling this function, since it calls
674  * qemu_tcg_mttcg_enabled().
675  *
676  * In user-mode we use a single region.  Having multiple regions in user-mode
677  * is not supported, because the number of vCPU threads (recall that each thread
678  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
679  * OS, and usually this number is huge (tens of thousands is not uncommon).
680  * Thus, given this large bound on the number of vCPU threads and the fact
681  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
682  * that the availability of at least one region per vCPU thread.
683  *
684  * However, this user-mode limitation is unlikely to be a significant problem
685  * in practice. Multi-threaded guests share most if not all of their translated
686  * code, which makes parallel code generation less appealing than in softmmu.
687  */
688 void tcg_region_init(void)
689 {
690     void *buf = tcg_init_ctx.code_gen_buffer;
691     void *aligned;
692     size_t size = tcg_init_ctx.code_gen_buffer_size;
693     size_t page_size = qemu_real_host_page_size;
694     size_t region_size;
695     size_t n_regions;
696     size_t i;
697 
698     n_regions = tcg_n_regions();
699 
700     /* The first region will be 'aligned - buf' bytes larger than the others */
701     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
702     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
703     /*
704      * Make region_size a multiple of page_size, using aligned as the start.
705      * As a result of this we might end up with a few extra pages at the end of
706      * the buffer; we will assign those to the last region.
707      */
708     region_size = (size - (aligned - buf)) / n_regions;
709     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
710 
711     /* A region must have at least 2 pages; one code, one guard */
712     g_assert(region_size >= 2 * page_size);
713 
714     /* init the region struct */
715     qemu_mutex_init(&region.lock);
716     region.n = n_regions;
717     region.size = region_size - page_size;
718     region.stride = region_size;
719     region.start = buf;
720     region.start_aligned = aligned;
721     /* page-align the end, since its last page will be a guard page */
722     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
723     /* account for that last guard page */
724     region.end -= page_size;
725 
726     /* set guard pages */
727     for (i = 0; i < region.n; i++) {
728         void *start, *end;
729         int rc;
730 
731         tcg_region_bounds(i, &start, &end);
732         rc = qemu_mprotect_none(end, page_size);
733         g_assert(!rc);
734     }
735 
736     tcg_region_trees_init();
737 
738     /* In user-mode we support only one ctx, so do the initial allocation now */
739 #ifdef CONFIG_USER_ONLY
740     {
741         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
742 
743         g_assert(!err);
744     }
745 #endif
746 }
747 
748 static void alloc_tcg_plugin_context(TCGContext *s)
749 {
750 #ifdef CONFIG_PLUGIN
751     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
752     s->plugin_tb->insns =
753         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
754 #endif
755 }
756 
757 /*
758  * All TCG threads except the parent (i.e. the one that called tcg_context_init
759  * and registered the target's TCG globals) must register with this function
760  * before initiating translation.
761  *
762  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
763  * of tcg_region_init() for the reasoning behind this.
764  *
765  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
766  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
767  * is not used anymore for translation once this function is called.
768  *
769  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
770  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
771  */
772 #ifdef CONFIG_USER_ONLY
773 void tcg_register_thread(void)
774 {
775     tcg_ctx = &tcg_init_ctx;
776 }
777 #else
778 void tcg_register_thread(void)
779 {
780     MachineState *ms = MACHINE(qdev_get_machine());
781     TCGContext *s = g_malloc(sizeof(*s));
782     unsigned int i, n;
783     bool err;
784 
785     *s = tcg_init_ctx;
786 
787     /* Relink mem_base.  */
788     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
789         if (tcg_init_ctx.temps[i].mem_base) {
790             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
791             tcg_debug_assert(b >= 0 && b < n);
792             s->temps[i].mem_base = &s->temps[b];
793         }
794     }
795 
796     /* Claim an entry in tcg_ctxs */
797     n = qatomic_fetch_inc(&n_tcg_ctxs);
798     g_assert(n < ms->smp.max_cpus);
799     qatomic_set(&tcg_ctxs[n], s);
800 
801     if (n > 0) {
802         alloc_tcg_plugin_context(s);
803     }
804 
805     tcg_ctx = s;
806     qemu_mutex_lock(&region.lock);
807     err = tcg_region_initial_alloc__locked(tcg_ctx);
808     g_assert(!err);
809     qemu_mutex_unlock(&region.lock);
810 }
811 #endif /* !CONFIG_USER_ONLY */
812 
813 /*
814  * Returns the size (in bytes) of all translated code (i.e. from all regions)
815  * currently in the cache.
816  * See also: tcg_code_capacity()
817  * Do not confuse with tcg_current_code_size(); that one applies to a single
818  * TCG context.
819  */
820 size_t tcg_code_size(void)
821 {
822     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
823     unsigned int i;
824     size_t total;
825 
826     qemu_mutex_lock(&region.lock);
827     total = region.agg_size_full;
828     for (i = 0; i < n_ctxs; i++) {
829         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
830         size_t size;
831 
832         size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
833         g_assert(size <= s->code_gen_buffer_size);
834         total += size;
835     }
836     qemu_mutex_unlock(&region.lock);
837     return total;
838 }
839 
840 /*
841  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
842  * regions.
843  * See also: tcg_code_size()
844  */
845 size_t tcg_code_capacity(void)
846 {
847     size_t guard_size, capacity;
848 
849     /* no need for synchronization; these variables are set at init time */
850     guard_size = region.stride - region.size;
851     capacity = region.end + guard_size - region.start;
852     capacity -= region.n * (guard_size + TCG_HIGHWATER);
853     return capacity;
854 }
855 
856 size_t tcg_tb_phys_invalidate_count(void)
857 {
858     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
859     unsigned int i;
860     size_t total = 0;
861 
862     for (i = 0; i < n_ctxs; i++) {
863         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
864 
865         total += qatomic_read(&s->tb_phys_invalidate_count);
866     }
867     return total;
868 }
869 
870 /* pool based memory allocation */
871 void *tcg_malloc_internal(TCGContext *s, int size)
872 {
873     TCGPool *p;
874     int pool_size;
875 
876     if (size > TCG_POOL_CHUNK_SIZE) {
877         /* big malloc: insert a new pool (XXX: could optimize) */
878         p = g_malloc(sizeof(TCGPool) + size);
879         p->size = size;
880         p->next = s->pool_first_large;
881         s->pool_first_large = p;
882         return p->data;
883     } else {
884         p = s->pool_current;
885         if (!p) {
886             p = s->pool_first;
887             if (!p)
888                 goto new_pool;
889         } else {
890             if (!p->next) {
891             new_pool:
892                 pool_size = TCG_POOL_CHUNK_SIZE;
893                 p = g_malloc(sizeof(TCGPool) + pool_size);
894                 p->size = pool_size;
895                 p->next = NULL;
896                 if (s->pool_current)
897                     s->pool_current->next = p;
898                 else
899                     s->pool_first = p;
900             } else {
901                 p = p->next;
902             }
903         }
904     }
905     s->pool_current = p;
906     s->pool_cur = p->data + size;
907     s->pool_end = p->data + p->size;
908     return p->data;
909 }
910 
911 void tcg_pool_reset(TCGContext *s)
912 {
913     TCGPool *p, *t;
914     for (p = s->pool_first_large; p; p = t) {
915         t = p->next;
916         g_free(p);
917     }
918     s->pool_first_large = NULL;
919     s->pool_cur = s->pool_end = NULL;
920     s->pool_current = NULL;
921 }
922 
923 typedef struct TCGHelperInfo {
924     void *func;
925     const char *name;
926     unsigned flags;
927     unsigned sizemask;
928 } TCGHelperInfo;
929 
930 #include "exec/helper-proto.h"
931 
932 static const TCGHelperInfo all_helpers[] = {
933 #include "exec/helper-tcg.h"
934 };
935 static GHashTable *helper_table;
936 
937 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
938 static void process_op_defs(TCGContext *s);
939 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
940                                             TCGReg reg, const char *name);
941 
942 void tcg_context_init(TCGContext *s)
943 {
944     int op, total_args, n, i;
945     TCGOpDef *def;
946     TCGArgConstraint *args_ct;
947     TCGTemp *ts;
948 
949     memset(s, 0, sizeof(*s));
950     s->nb_globals = 0;
951 
952     /* Count total number of arguments and allocate the corresponding
953        space */
954     total_args = 0;
955     for(op = 0; op < NB_OPS; op++) {
956         def = &tcg_op_defs[op];
957         n = def->nb_iargs + def->nb_oargs;
958         total_args += n;
959     }
960 
961     args_ct = g_new0(TCGArgConstraint, total_args);
962 
963     for(op = 0; op < NB_OPS; op++) {
964         def = &tcg_op_defs[op];
965         def->args_ct = args_ct;
966         n = def->nb_iargs + def->nb_oargs;
967         args_ct += n;
968     }
969 
970     /* Register helpers.  */
971     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
972     helper_table = g_hash_table_new(NULL, NULL);
973 
974     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
975         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
976                             (gpointer)&all_helpers[i]);
977     }
978 
979     tcg_target_init(s);
980     process_op_defs(s);
981 
982     /* Reverse the order of the saved registers, assuming they're all at
983        the start of tcg_target_reg_alloc_order.  */
984     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
985         int r = tcg_target_reg_alloc_order[n];
986         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
987             break;
988         }
989     }
990     for (i = 0; i < n; ++i) {
991         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
992     }
993     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
994         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
995     }
996 
997     alloc_tcg_plugin_context(s);
998 
999     tcg_ctx = s;
1000     /*
1001      * In user-mode we simply share the init context among threads, since we
1002      * use a single region. See the documentation tcg_region_init() for the
1003      * reasoning behind this.
1004      * In softmmu we will have at most max_cpus TCG threads.
1005      */
1006 #ifdef CONFIG_USER_ONLY
1007     tcg_ctxs = &tcg_ctx;
1008     n_tcg_ctxs = 1;
1009 #else
1010     MachineState *ms = MACHINE(qdev_get_machine());
1011     unsigned int max_cpus = ms->smp.max_cpus;
1012     tcg_ctxs = g_new(TCGContext *, max_cpus);
1013 #endif
1014 
1015     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1016     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1017     cpu_env = temp_tcgv_ptr(ts);
1018 }
1019 
1020 /*
1021  * Allocate TBs right before their corresponding translated code, making
1022  * sure that TBs and code are on different cache lines.
1023  */
1024 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1025 {
1026     uintptr_t align = qemu_icache_linesize;
1027     TranslationBlock *tb;
1028     void *next;
1029 
1030  retry:
1031     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1032     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1033 
1034     if (unlikely(next > s->code_gen_highwater)) {
1035         if (tcg_region_alloc(s)) {
1036             return NULL;
1037         }
1038         goto retry;
1039     }
1040     qatomic_set(&s->code_gen_ptr, next);
1041     s->data_gen_ptr = NULL;
1042     return tb;
1043 }
1044 
1045 void tcg_prologue_init(TCGContext *s)
1046 {
1047     size_t prologue_size, total_size;
1048     void *buf0, *buf1;
1049 
1050     /* Put the prologue at the beginning of code_gen_buffer.  */
1051     buf0 = s->code_gen_buffer;
1052     total_size = s->code_gen_buffer_size;
1053     s->code_ptr = buf0;
1054     s->code_buf = buf0;
1055     s->data_gen_ptr = NULL;
1056     s->code_gen_prologue = buf0;
1057 
1058     /* Compute a high-water mark, at which we voluntarily flush the buffer
1059        and start over.  The size here is arbitrary, significantly larger
1060        than we expect the code generation for any one opcode to require.  */
1061     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1062 
1063 #ifdef TCG_TARGET_NEED_POOL_LABELS
1064     s->pool_labels = NULL;
1065 #endif
1066 
1067     /* Generate the prologue.  */
1068     tcg_target_qemu_prologue(s);
1069 
1070 #ifdef TCG_TARGET_NEED_POOL_LABELS
1071     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1072     {
1073         int result = tcg_out_pool_finalize(s);
1074         tcg_debug_assert(result == 0);
1075     }
1076 #endif
1077 
1078     buf1 = s->code_ptr;
1079     flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1080 
1081     /* Deduct the prologue from the buffer.  */
1082     prologue_size = tcg_current_code_size(s);
1083     s->code_gen_ptr = buf1;
1084     s->code_gen_buffer = buf1;
1085     s->code_buf = buf1;
1086     total_size -= prologue_size;
1087     s->code_gen_buffer_size = total_size;
1088 
1089     tcg_register_jit(s->code_gen_buffer, total_size);
1090 
1091 #ifdef DEBUG_DISAS
1092     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1093         FILE *logfile = qemu_log_lock();
1094         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1095         if (s->data_gen_ptr) {
1096             size_t code_size = s->data_gen_ptr - buf0;
1097             size_t data_size = prologue_size - code_size;
1098             size_t i;
1099 
1100             log_disas(buf0, code_size);
1101 
1102             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1103                 if (sizeof(tcg_target_ulong) == 8) {
1104                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1105                              (uintptr_t)s->data_gen_ptr + i,
1106                              *(uint64_t *)(s->data_gen_ptr + i));
1107                 } else {
1108                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1109                              (uintptr_t)s->data_gen_ptr + i,
1110                              *(uint32_t *)(s->data_gen_ptr + i));
1111                 }
1112             }
1113         } else {
1114             log_disas(buf0, prologue_size);
1115         }
1116         qemu_log("\n");
1117         qemu_log_flush();
1118         qemu_log_unlock(logfile);
1119     }
1120 #endif
1121 
1122     /* Assert that goto_ptr is implemented completely.  */
1123     if (TCG_TARGET_HAS_goto_ptr) {
1124         tcg_debug_assert(s->code_gen_epilogue != NULL);
1125     }
1126 }
1127 
1128 void tcg_func_start(TCGContext *s)
1129 {
1130     tcg_pool_reset(s);
1131     s->nb_temps = s->nb_globals;
1132 
1133     /* No temps have been previously allocated for size or locality.  */
1134     memset(s->free_temps, 0, sizeof(s->free_temps));
1135 
1136     s->nb_ops = 0;
1137     s->nb_labels = 0;
1138     s->current_frame_offset = s->frame_start;
1139 
1140 #ifdef CONFIG_DEBUG_TCG
1141     s->goto_tb_issue_mask = 0;
1142 #endif
1143 
1144     QTAILQ_INIT(&s->ops);
1145     QTAILQ_INIT(&s->free_ops);
1146     QSIMPLEQ_INIT(&s->labels);
1147 }
1148 
1149 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1150 {
1151     int n = s->nb_temps++;
1152     tcg_debug_assert(n < TCG_MAX_TEMPS);
1153     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1154 }
1155 
1156 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1157 {
1158     TCGTemp *ts;
1159 
1160     tcg_debug_assert(s->nb_globals == s->nb_temps);
1161     s->nb_globals++;
1162     ts = tcg_temp_alloc(s);
1163     ts->temp_global = 1;
1164 
1165     return ts;
1166 }
1167 
1168 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1169                                             TCGReg reg, const char *name)
1170 {
1171     TCGTemp *ts;
1172 
1173     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1174         tcg_abort();
1175     }
1176 
1177     ts = tcg_global_alloc(s);
1178     ts->base_type = type;
1179     ts->type = type;
1180     ts->fixed_reg = 1;
1181     ts->reg = reg;
1182     ts->name = name;
1183     tcg_regset_set_reg(s->reserved_regs, reg);
1184 
1185     return ts;
1186 }
1187 
1188 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1189 {
1190     s->frame_start = start;
1191     s->frame_end = start + size;
1192     s->frame_temp
1193         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1194 }
1195 
1196 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1197                                      intptr_t offset, const char *name)
1198 {
1199     TCGContext *s = tcg_ctx;
1200     TCGTemp *base_ts = tcgv_ptr_temp(base);
1201     TCGTemp *ts = tcg_global_alloc(s);
1202     int indirect_reg = 0, bigendian = 0;
1203 #ifdef HOST_WORDS_BIGENDIAN
1204     bigendian = 1;
1205 #endif
1206 
1207     if (!base_ts->fixed_reg) {
1208         /* We do not support double-indirect registers.  */
1209         tcg_debug_assert(!base_ts->indirect_reg);
1210         base_ts->indirect_base = 1;
1211         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1212                             ? 2 : 1);
1213         indirect_reg = 1;
1214     }
1215 
1216     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1217         TCGTemp *ts2 = tcg_global_alloc(s);
1218         char buf[64];
1219 
1220         ts->base_type = TCG_TYPE_I64;
1221         ts->type = TCG_TYPE_I32;
1222         ts->indirect_reg = indirect_reg;
1223         ts->mem_allocated = 1;
1224         ts->mem_base = base_ts;
1225         ts->mem_offset = offset + bigendian * 4;
1226         pstrcpy(buf, sizeof(buf), name);
1227         pstrcat(buf, sizeof(buf), "_0");
1228         ts->name = strdup(buf);
1229 
1230         tcg_debug_assert(ts2 == ts + 1);
1231         ts2->base_type = TCG_TYPE_I64;
1232         ts2->type = TCG_TYPE_I32;
1233         ts2->indirect_reg = indirect_reg;
1234         ts2->mem_allocated = 1;
1235         ts2->mem_base = base_ts;
1236         ts2->mem_offset = offset + (1 - bigendian) * 4;
1237         pstrcpy(buf, sizeof(buf), name);
1238         pstrcat(buf, sizeof(buf), "_1");
1239         ts2->name = strdup(buf);
1240     } else {
1241         ts->base_type = type;
1242         ts->type = type;
1243         ts->indirect_reg = indirect_reg;
1244         ts->mem_allocated = 1;
1245         ts->mem_base = base_ts;
1246         ts->mem_offset = offset;
1247         ts->name = name;
1248     }
1249     return ts;
1250 }
1251 
1252 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1253 {
1254     TCGContext *s = tcg_ctx;
1255     TCGTemp *ts;
1256     int idx, k;
1257 
1258     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1259     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1260     if (idx < TCG_MAX_TEMPS) {
1261         /* There is already an available temp with the right type.  */
1262         clear_bit(idx, s->free_temps[k].l);
1263 
1264         ts = &s->temps[idx];
1265         ts->temp_allocated = 1;
1266         tcg_debug_assert(ts->base_type == type);
1267         tcg_debug_assert(ts->temp_local == temp_local);
1268     } else {
1269         ts = tcg_temp_alloc(s);
1270         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1271             TCGTemp *ts2 = tcg_temp_alloc(s);
1272 
1273             ts->base_type = type;
1274             ts->type = TCG_TYPE_I32;
1275             ts->temp_allocated = 1;
1276             ts->temp_local = temp_local;
1277 
1278             tcg_debug_assert(ts2 == ts + 1);
1279             ts2->base_type = TCG_TYPE_I64;
1280             ts2->type = TCG_TYPE_I32;
1281             ts2->temp_allocated = 1;
1282             ts2->temp_local = temp_local;
1283         } else {
1284             ts->base_type = type;
1285             ts->type = type;
1286             ts->temp_allocated = 1;
1287             ts->temp_local = temp_local;
1288         }
1289     }
1290 
1291 #if defined(CONFIG_DEBUG_TCG)
1292     s->temps_in_use++;
1293 #endif
1294     return ts;
1295 }
1296 
1297 TCGv_vec tcg_temp_new_vec(TCGType type)
1298 {
1299     TCGTemp *t;
1300 
1301 #ifdef CONFIG_DEBUG_TCG
1302     switch (type) {
1303     case TCG_TYPE_V64:
1304         assert(TCG_TARGET_HAS_v64);
1305         break;
1306     case TCG_TYPE_V128:
1307         assert(TCG_TARGET_HAS_v128);
1308         break;
1309     case TCG_TYPE_V256:
1310         assert(TCG_TARGET_HAS_v256);
1311         break;
1312     default:
1313         g_assert_not_reached();
1314     }
1315 #endif
1316 
1317     t = tcg_temp_new_internal(type, 0);
1318     return temp_tcgv_vec(t);
1319 }
1320 
1321 /* Create a new temp of the same type as an existing temp.  */
1322 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1323 {
1324     TCGTemp *t = tcgv_vec_temp(match);
1325 
1326     tcg_debug_assert(t->temp_allocated != 0);
1327 
1328     t = tcg_temp_new_internal(t->base_type, 0);
1329     return temp_tcgv_vec(t);
1330 }
1331 
1332 void tcg_temp_free_internal(TCGTemp *ts)
1333 {
1334     TCGContext *s = tcg_ctx;
1335     int k, idx;
1336 
1337 #if defined(CONFIG_DEBUG_TCG)
1338     s->temps_in_use--;
1339     if (s->temps_in_use < 0) {
1340         fprintf(stderr, "More temporaries freed than allocated!\n");
1341     }
1342 #endif
1343 
1344     tcg_debug_assert(ts->temp_global == 0);
1345     tcg_debug_assert(ts->temp_allocated != 0);
1346     ts->temp_allocated = 0;
1347 
1348     idx = temp_idx(ts);
1349     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1350     set_bit(idx, s->free_temps[k].l);
1351 }
1352 
1353 TCGv_i32 tcg_const_i32(int32_t val)
1354 {
1355     TCGv_i32 t0;
1356     t0 = tcg_temp_new_i32();
1357     tcg_gen_movi_i32(t0, val);
1358     return t0;
1359 }
1360 
1361 TCGv_i64 tcg_const_i64(int64_t val)
1362 {
1363     TCGv_i64 t0;
1364     t0 = tcg_temp_new_i64();
1365     tcg_gen_movi_i64(t0, val);
1366     return t0;
1367 }
1368 
1369 TCGv_i32 tcg_const_local_i32(int32_t val)
1370 {
1371     TCGv_i32 t0;
1372     t0 = tcg_temp_local_new_i32();
1373     tcg_gen_movi_i32(t0, val);
1374     return t0;
1375 }
1376 
1377 TCGv_i64 tcg_const_local_i64(int64_t val)
1378 {
1379     TCGv_i64 t0;
1380     t0 = tcg_temp_local_new_i64();
1381     tcg_gen_movi_i64(t0, val);
1382     return t0;
1383 }
1384 
1385 #if defined(CONFIG_DEBUG_TCG)
1386 void tcg_clear_temp_count(void)
1387 {
1388     TCGContext *s = tcg_ctx;
1389     s->temps_in_use = 0;
1390 }
1391 
1392 int tcg_check_temp_count(void)
1393 {
1394     TCGContext *s = tcg_ctx;
1395     if (s->temps_in_use) {
1396         /* Clear the count so that we don't give another
1397          * warning immediately next time around.
1398          */
1399         s->temps_in_use = 0;
1400         return 1;
1401     }
1402     return 0;
1403 }
1404 #endif
1405 
1406 /* Return true if OP may appear in the opcode stream.
1407    Test the runtime variable that controls each opcode.  */
1408 bool tcg_op_supported(TCGOpcode op)
1409 {
1410     const bool have_vec
1411         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1412 
1413     switch (op) {
1414     case INDEX_op_discard:
1415     case INDEX_op_set_label:
1416     case INDEX_op_call:
1417     case INDEX_op_br:
1418     case INDEX_op_mb:
1419     case INDEX_op_insn_start:
1420     case INDEX_op_exit_tb:
1421     case INDEX_op_goto_tb:
1422     case INDEX_op_qemu_ld_i32:
1423     case INDEX_op_qemu_st_i32:
1424     case INDEX_op_qemu_ld_i64:
1425     case INDEX_op_qemu_st_i64:
1426         return true;
1427 
1428     case INDEX_op_goto_ptr:
1429         return TCG_TARGET_HAS_goto_ptr;
1430 
1431     case INDEX_op_mov_i32:
1432     case INDEX_op_movi_i32:
1433     case INDEX_op_setcond_i32:
1434     case INDEX_op_brcond_i32:
1435     case INDEX_op_ld8u_i32:
1436     case INDEX_op_ld8s_i32:
1437     case INDEX_op_ld16u_i32:
1438     case INDEX_op_ld16s_i32:
1439     case INDEX_op_ld_i32:
1440     case INDEX_op_st8_i32:
1441     case INDEX_op_st16_i32:
1442     case INDEX_op_st_i32:
1443     case INDEX_op_add_i32:
1444     case INDEX_op_sub_i32:
1445     case INDEX_op_mul_i32:
1446     case INDEX_op_and_i32:
1447     case INDEX_op_or_i32:
1448     case INDEX_op_xor_i32:
1449     case INDEX_op_shl_i32:
1450     case INDEX_op_shr_i32:
1451     case INDEX_op_sar_i32:
1452         return true;
1453 
1454     case INDEX_op_movcond_i32:
1455         return TCG_TARGET_HAS_movcond_i32;
1456     case INDEX_op_div_i32:
1457     case INDEX_op_divu_i32:
1458         return TCG_TARGET_HAS_div_i32;
1459     case INDEX_op_rem_i32:
1460     case INDEX_op_remu_i32:
1461         return TCG_TARGET_HAS_rem_i32;
1462     case INDEX_op_div2_i32:
1463     case INDEX_op_divu2_i32:
1464         return TCG_TARGET_HAS_div2_i32;
1465     case INDEX_op_rotl_i32:
1466     case INDEX_op_rotr_i32:
1467         return TCG_TARGET_HAS_rot_i32;
1468     case INDEX_op_deposit_i32:
1469         return TCG_TARGET_HAS_deposit_i32;
1470     case INDEX_op_extract_i32:
1471         return TCG_TARGET_HAS_extract_i32;
1472     case INDEX_op_sextract_i32:
1473         return TCG_TARGET_HAS_sextract_i32;
1474     case INDEX_op_extract2_i32:
1475         return TCG_TARGET_HAS_extract2_i32;
1476     case INDEX_op_add2_i32:
1477         return TCG_TARGET_HAS_add2_i32;
1478     case INDEX_op_sub2_i32:
1479         return TCG_TARGET_HAS_sub2_i32;
1480     case INDEX_op_mulu2_i32:
1481         return TCG_TARGET_HAS_mulu2_i32;
1482     case INDEX_op_muls2_i32:
1483         return TCG_TARGET_HAS_muls2_i32;
1484     case INDEX_op_muluh_i32:
1485         return TCG_TARGET_HAS_muluh_i32;
1486     case INDEX_op_mulsh_i32:
1487         return TCG_TARGET_HAS_mulsh_i32;
1488     case INDEX_op_ext8s_i32:
1489         return TCG_TARGET_HAS_ext8s_i32;
1490     case INDEX_op_ext16s_i32:
1491         return TCG_TARGET_HAS_ext16s_i32;
1492     case INDEX_op_ext8u_i32:
1493         return TCG_TARGET_HAS_ext8u_i32;
1494     case INDEX_op_ext16u_i32:
1495         return TCG_TARGET_HAS_ext16u_i32;
1496     case INDEX_op_bswap16_i32:
1497         return TCG_TARGET_HAS_bswap16_i32;
1498     case INDEX_op_bswap32_i32:
1499         return TCG_TARGET_HAS_bswap32_i32;
1500     case INDEX_op_not_i32:
1501         return TCG_TARGET_HAS_not_i32;
1502     case INDEX_op_neg_i32:
1503         return TCG_TARGET_HAS_neg_i32;
1504     case INDEX_op_andc_i32:
1505         return TCG_TARGET_HAS_andc_i32;
1506     case INDEX_op_orc_i32:
1507         return TCG_TARGET_HAS_orc_i32;
1508     case INDEX_op_eqv_i32:
1509         return TCG_TARGET_HAS_eqv_i32;
1510     case INDEX_op_nand_i32:
1511         return TCG_TARGET_HAS_nand_i32;
1512     case INDEX_op_nor_i32:
1513         return TCG_TARGET_HAS_nor_i32;
1514     case INDEX_op_clz_i32:
1515         return TCG_TARGET_HAS_clz_i32;
1516     case INDEX_op_ctz_i32:
1517         return TCG_TARGET_HAS_ctz_i32;
1518     case INDEX_op_ctpop_i32:
1519         return TCG_TARGET_HAS_ctpop_i32;
1520 
1521     case INDEX_op_brcond2_i32:
1522     case INDEX_op_setcond2_i32:
1523         return TCG_TARGET_REG_BITS == 32;
1524 
1525     case INDEX_op_mov_i64:
1526     case INDEX_op_movi_i64:
1527     case INDEX_op_setcond_i64:
1528     case INDEX_op_brcond_i64:
1529     case INDEX_op_ld8u_i64:
1530     case INDEX_op_ld8s_i64:
1531     case INDEX_op_ld16u_i64:
1532     case INDEX_op_ld16s_i64:
1533     case INDEX_op_ld32u_i64:
1534     case INDEX_op_ld32s_i64:
1535     case INDEX_op_ld_i64:
1536     case INDEX_op_st8_i64:
1537     case INDEX_op_st16_i64:
1538     case INDEX_op_st32_i64:
1539     case INDEX_op_st_i64:
1540     case INDEX_op_add_i64:
1541     case INDEX_op_sub_i64:
1542     case INDEX_op_mul_i64:
1543     case INDEX_op_and_i64:
1544     case INDEX_op_or_i64:
1545     case INDEX_op_xor_i64:
1546     case INDEX_op_shl_i64:
1547     case INDEX_op_shr_i64:
1548     case INDEX_op_sar_i64:
1549     case INDEX_op_ext_i32_i64:
1550     case INDEX_op_extu_i32_i64:
1551         return TCG_TARGET_REG_BITS == 64;
1552 
1553     case INDEX_op_movcond_i64:
1554         return TCG_TARGET_HAS_movcond_i64;
1555     case INDEX_op_div_i64:
1556     case INDEX_op_divu_i64:
1557         return TCG_TARGET_HAS_div_i64;
1558     case INDEX_op_rem_i64:
1559     case INDEX_op_remu_i64:
1560         return TCG_TARGET_HAS_rem_i64;
1561     case INDEX_op_div2_i64:
1562     case INDEX_op_divu2_i64:
1563         return TCG_TARGET_HAS_div2_i64;
1564     case INDEX_op_rotl_i64:
1565     case INDEX_op_rotr_i64:
1566         return TCG_TARGET_HAS_rot_i64;
1567     case INDEX_op_deposit_i64:
1568         return TCG_TARGET_HAS_deposit_i64;
1569     case INDEX_op_extract_i64:
1570         return TCG_TARGET_HAS_extract_i64;
1571     case INDEX_op_sextract_i64:
1572         return TCG_TARGET_HAS_sextract_i64;
1573     case INDEX_op_extract2_i64:
1574         return TCG_TARGET_HAS_extract2_i64;
1575     case INDEX_op_extrl_i64_i32:
1576         return TCG_TARGET_HAS_extrl_i64_i32;
1577     case INDEX_op_extrh_i64_i32:
1578         return TCG_TARGET_HAS_extrh_i64_i32;
1579     case INDEX_op_ext8s_i64:
1580         return TCG_TARGET_HAS_ext8s_i64;
1581     case INDEX_op_ext16s_i64:
1582         return TCG_TARGET_HAS_ext16s_i64;
1583     case INDEX_op_ext32s_i64:
1584         return TCG_TARGET_HAS_ext32s_i64;
1585     case INDEX_op_ext8u_i64:
1586         return TCG_TARGET_HAS_ext8u_i64;
1587     case INDEX_op_ext16u_i64:
1588         return TCG_TARGET_HAS_ext16u_i64;
1589     case INDEX_op_ext32u_i64:
1590         return TCG_TARGET_HAS_ext32u_i64;
1591     case INDEX_op_bswap16_i64:
1592         return TCG_TARGET_HAS_bswap16_i64;
1593     case INDEX_op_bswap32_i64:
1594         return TCG_TARGET_HAS_bswap32_i64;
1595     case INDEX_op_bswap64_i64:
1596         return TCG_TARGET_HAS_bswap64_i64;
1597     case INDEX_op_not_i64:
1598         return TCG_TARGET_HAS_not_i64;
1599     case INDEX_op_neg_i64:
1600         return TCG_TARGET_HAS_neg_i64;
1601     case INDEX_op_andc_i64:
1602         return TCG_TARGET_HAS_andc_i64;
1603     case INDEX_op_orc_i64:
1604         return TCG_TARGET_HAS_orc_i64;
1605     case INDEX_op_eqv_i64:
1606         return TCG_TARGET_HAS_eqv_i64;
1607     case INDEX_op_nand_i64:
1608         return TCG_TARGET_HAS_nand_i64;
1609     case INDEX_op_nor_i64:
1610         return TCG_TARGET_HAS_nor_i64;
1611     case INDEX_op_clz_i64:
1612         return TCG_TARGET_HAS_clz_i64;
1613     case INDEX_op_ctz_i64:
1614         return TCG_TARGET_HAS_ctz_i64;
1615     case INDEX_op_ctpop_i64:
1616         return TCG_TARGET_HAS_ctpop_i64;
1617     case INDEX_op_add2_i64:
1618         return TCG_TARGET_HAS_add2_i64;
1619     case INDEX_op_sub2_i64:
1620         return TCG_TARGET_HAS_sub2_i64;
1621     case INDEX_op_mulu2_i64:
1622         return TCG_TARGET_HAS_mulu2_i64;
1623     case INDEX_op_muls2_i64:
1624         return TCG_TARGET_HAS_muls2_i64;
1625     case INDEX_op_muluh_i64:
1626         return TCG_TARGET_HAS_muluh_i64;
1627     case INDEX_op_mulsh_i64:
1628         return TCG_TARGET_HAS_mulsh_i64;
1629 
1630     case INDEX_op_mov_vec:
1631     case INDEX_op_dup_vec:
1632     case INDEX_op_dupi_vec:
1633     case INDEX_op_dupm_vec:
1634     case INDEX_op_ld_vec:
1635     case INDEX_op_st_vec:
1636     case INDEX_op_add_vec:
1637     case INDEX_op_sub_vec:
1638     case INDEX_op_and_vec:
1639     case INDEX_op_or_vec:
1640     case INDEX_op_xor_vec:
1641     case INDEX_op_cmp_vec:
1642         return have_vec;
1643     case INDEX_op_dup2_vec:
1644         return have_vec && TCG_TARGET_REG_BITS == 32;
1645     case INDEX_op_not_vec:
1646         return have_vec && TCG_TARGET_HAS_not_vec;
1647     case INDEX_op_neg_vec:
1648         return have_vec && TCG_TARGET_HAS_neg_vec;
1649     case INDEX_op_abs_vec:
1650         return have_vec && TCG_TARGET_HAS_abs_vec;
1651     case INDEX_op_andc_vec:
1652         return have_vec && TCG_TARGET_HAS_andc_vec;
1653     case INDEX_op_orc_vec:
1654         return have_vec && TCG_TARGET_HAS_orc_vec;
1655     case INDEX_op_mul_vec:
1656         return have_vec && TCG_TARGET_HAS_mul_vec;
1657     case INDEX_op_shli_vec:
1658     case INDEX_op_shri_vec:
1659     case INDEX_op_sari_vec:
1660         return have_vec && TCG_TARGET_HAS_shi_vec;
1661     case INDEX_op_shls_vec:
1662     case INDEX_op_shrs_vec:
1663     case INDEX_op_sars_vec:
1664         return have_vec && TCG_TARGET_HAS_shs_vec;
1665     case INDEX_op_shlv_vec:
1666     case INDEX_op_shrv_vec:
1667     case INDEX_op_sarv_vec:
1668         return have_vec && TCG_TARGET_HAS_shv_vec;
1669     case INDEX_op_rotli_vec:
1670         return have_vec && TCG_TARGET_HAS_roti_vec;
1671     case INDEX_op_rotls_vec:
1672         return have_vec && TCG_TARGET_HAS_rots_vec;
1673     case INDEX_op_rotlv_vec:
1674     case INDEX_op_rotrv_vec:
1675         return have_vec && TCG_TARGET_HAS_rotv_vec;
1676     case INDEX_op_ssadd_vec:
1677     case INDEX_op_usadd_vec:
1678     case INDEX_op_sssub_vec:
1679     case INDEX_op_ussub_vec:
1680         return have_vec && TCG_TARGET_HAS_sat_vec;
1681     case INDEX_op_smin_vec:
1682     case INDEX_op_umin_vec:
1683     case INDEX_op_smax_vec:
1684     case INDEX_op_umax_vec:
1685         return have_vec && TCG_TARGET_HAS_minmax_vec;
1686     case INDEX_op_bitsel_vec:
1687         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1688     case INDEX_op_cmpsel_vec:
1689         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1690 
1691     default:
1692         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1693         return true;
1694     }
1695 }
1696 
1697 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1698    and endian swap. Maybe it would be better to do the alignment
1699    and endian swap in tcg_reg_alloc_call(). */
1700 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1701 {
1702     int i, real_args, nb_rets, pi;
1703     unsigned sizemask, flags;
1704     TCGHelperInfo *info;
1705     TCGOp *op;
1706 
1707     info = g_hash_table_lookup(helper_table, (gpointer)func);
1708     flags = info->flags;
1709     sizemask = info->sizemask;
1710 
1711 #ifdef CONFIG_PLUGIN
1712     /* detect non-plugin helpers */
1713     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1714         tcg_ctx->plugin_insn->calls_helpers = true;
1715     }
1716 #endif
1717 
1718 #if defined(__sparc__) && !defined(__arch64__) \
1719     && !defined(CONFIG_TCG_INTERPRETER)
1720     /* We have 64-bit values in one register, but need to pass as two
1721        separate parameters.  Split them.  */
1722     int orig_sizemask = sizemask;
1723     int orig_nargs = nargs;
1724     TCGv_i64 retl, reth;
1725     TCGTemp *split_args[MAX_OPC_PARAM];
1726 
1727     retl = NULL;
1728     reth = NULL;
1729     if (sizemask != 0) {
1730         for (i = real_args = 0; i < nargs; ++i) {
1731             int is_64bit = sizemask & (1 << (i+1)*2);
1732             if (is_64bit) {
1733                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1734                 TCGv_i32 h = tcg_temp_new_i32();
1735                 TCGv_i32 l = tcg_temp_new_i32();
1736                 tcg_gen_extr_i64_i32(l, h, orig);
1737                 split_args[real_args++] = tcgv_i32_temp(h);
1738                 split_args[real_args++] = tcgv_i32_temp(l);
1739             } else {
1740                 split_args[real_args++] = args[i];
1741             }
1742         }
1743         nargs = real_args;
1744         args = split_args;
1745         sizemask = 0;
1746     }
1747 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1748     for (i = 0; i < nargs; ++i) {
1749         int is_64bit = sizemask & (1 << (i+1)*2);
1750         int is_signed = sizemask & (2 << (i+1)*2);
1751         if (!is_64bit) {
1752             TCGv_i64 temp = tcg_temp_new_i64();
1753             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1754             if (is_signed) {
1755                 tcg_gen_ext32s_i64(temp, orig);
1756             } else {
1757                 tcg_gen_ext32u_i64(temp, orig);
1758             }
1759             args[i] = tcgv_i64_temp(temp);
1760         }
1761     }
1762 #endif /* TCG_TARGET_EXTEND_ARGS */
1763 
1764     op = tcg_emit_op(INDEX_op_call);
1765 
1766     pi = 0;
1767     if (ret != NULL) {
1768 #if defined(__sparc__) && !defined(__arch64__) \
1769     && !defined(CONFIG_TCG_INTERPRETER)
1770         if (orig_sizemask & 1) {
1771             /* The 32-bit ABI is going to return the 64-bit value in
1772                the %o0/%o1 register pair.  Prepare for this by using
1773                two return temporaries, and reassemble below.  */
1774             retl = tcg_temp_new_i64();
1775             reth = tcg_temp_new_i64();
1776             op->args[pi++] = tcgv_i64_arg(reth);
1777             op->args[pi++] = tcgv_i64_arg(retl);
1778             nb_rets = 2;
1779         } else {
1780             op->args[pi++] = temp_arg(ret);
1781             nb_rets = 1;
1782         }
1783 #else
1784         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1785 #ifdef HOST_WORDS_BIGENDIAN
1786             op->args[pi++] = temp_arg(ret + 1);
1787             op->args[pi++] = temp_arg(ret);
1788 #else
1789             op->args[pi++] = temp_arg(ret);
1790             op->args[pi++] = temp_arg(ret + 1);
1791 #endif
1792             nb_rets = 2;
1793         } else {
1794             op->args[pi++] = temp_arg(ret);
1795             nb_rets = 1;
1796         }
1797 #endif
1798     } else {
1799         nb_rets = 0;
1800     }
1801     TCGOP_CALLO(op) = nb_rets;
1802 
1803     real_args = 0;
1804     for (i = 0; i < nargs; i++) {
1805         int is_64bit = sizemask & (1 << (i+1)*2);
1806         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1807 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1808             /* some targets want aligned 64 bit args */
1809             if (real_args & 1) {
1810                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1811                 real_args++;
1812             }
1813 #endif
1814            /* If stack grows up, then we will be placing successive
1815               arguments at lower addresses, which means we need to
1816               reverse the order compared to how we would normally
1817               treat either big or little-endian.  For those arguments
1818               that will wind up in registers, this still works for
1819               HPPA (the only current STACK_GROWSUP target) since the
1820               argument registers are *also* allocated in decreasing
1821               order.  If another such target is added, this logic may
1822               have to get more complicated to differentiate between
1823               stack arguments and register arguments.  */
1824 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1825             op->args[pi++] = temp_arg(args[i] + 1);
1826             op->args[pi++] = temp_arg(args[i]);
1827 #else
1828             op->args[pi++] = temp_arg(args[i]);
1829             op->args[pi++] = temp_arg(args[i] + 1);
1830 #endif
1831             real_args += 2;
1832             continue;
1833         }
1834 
1835         op->args[pi++] = temp_arg(args[i]);
1836         real_args++;
1837     }
1838     op->args[pi++] = (uintptr_t)func;
1839     op->args[pi++] = flags;
1840     TCGOP_CALLI(op) = real_args;
1841 
1842     /* Make sure the fields didn't overflow.  */
1843     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1844     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1845 
1846 #if defined(__sparc__) && !defined(__arch64__) \
1847     && !defined(CONFIG_TCG_INTERPRETER)
1848     /* Free all of the parts we allocated above.  */
1849     for (i = real_args = 0; i < orig_nargs; ++i) {
1850         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1851         if (is_64bit) {
1852             tcg_temp_free_internal(args[real_args++]);
1853             tcg_temp_free_internal(args[real_args++]);
1854         } else {
1855             real_args++;
1856         }
1857     }
1858     if (orig_sizemask & 1) {
1859         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1860            Note that describing these as TCGv_i64 eliminates an unnecessary
1861            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1862         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1863         tcg_temp_free_i64(retl);
1864         tcg_temp_free_i64(reth);
1865     }
1866 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1867     for (i = 0; i < nargs; ++i) {
1868         int is_64bit = sizemask & (1 << (i+1)*2);
1869         if (!is_64bit) {
1870             tcg_temp_free_internal(args[i]);
1871         }
1872     }
1873 #endif /* TCG_TARGET_EXTEND_ARGS */
1874 }
1875 
1876 static void tcg_reg_alloc_start(TCGContext *s)
1877 {
1878     int i, n;
1879     TCGTemp *ts;
1880 
1881     for (i = 0, n = s->nb_globals; i < n; i++) {
1882         ts = &s->temps[i];
1883         ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1884     }
1885     for (n = s->nb_temps; i < n; i++) {
1886         ts = &s->temps[i];
1887         ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1888         ts->mem_allocated = 0;
1889         ts->fixed_reg = 0;
1890     }
1891 
1892     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1893 }
1894 
1895 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1896                                  TCGTemp *ts)
1897 {
1898     int idx = temp_idx(ts);
1899 
1900     if (ts->temp_global) {
1901         pstrcpy(buf, buf_size, ts->name);
1902     } else if (ts->temp_local) {
1903         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1904     } else {
1905         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1906     }
1907     return buf;
1908 }
1909 
1910 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1911                              int buf_size, TCGArg arg)
1912 {
1913     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1914 }
1915 
1916 /* Find helper name.  */
1917 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1918 {
1919     const char *ret = NULL;
1920     if (helper_table) {
1921         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1922         if (info) {
1923             ret = info->name;
1924         }
1925     }
1926     return ret;
1927 }
1928 
1929 static const char * const cond_name[] =
1930 {
1931     [TCG_COND_NEVER] = "never",
1932     [TCG_COND_ALWAYS] = "always",
1933     [TCG_COND_EQ] = "eq",
1934     [TCG_COND_NE] = "ne",
1935     [TCG_COND_LT] = "lt",
1936     [TCG_COND_GE] = "ge",
1937     [TCG_COND_LE] = "le",
1938     [TCG_COND_GT] = "gt",
1939     [TCG_COND_LTU] = "ltu",
1940     [TCG_COND_GEU] = "geu",
1941     [TCG_COND_LEU] = "leu",
1942     [TCG_COND_GTU] = "gtu"
1943 };
1944 
1945 static const char * const ldst_name[] =
1946 {
1947     [MO_UB]   = "ub",
1948     [MO_SB]   = "sb",
1949     [MO_LEUW] = "leuw",
1950     [MO_LESW] = "lesw",
1951     [MO_LEUL] = "leul",
1952     [MO_LESL] = "lesl",
1953     [MO_LEQ]  = "leq",
1954     [MO_BEUW] = "beuw",
1955     [MO_BESW] = "besw",
1956     [MO_BEUL] = "beul",
1957     [MO_BESL] = "besl",
1958     [MO_BEQ]  = "beq",
1959 };
1960 
1961 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1962 #ifdef TARGET_ALIGNED_ONLY
1963     [MO_UNALN >> MO_ASHIFT]    = "un+",
1964     [MO_ALIGN >> MO_ASHIFT]    = "",
1965 #else
1966     [MO_UNALN >> MO_ASHIFT]    = "",
1967     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1968 #endif
1969     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1970     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1971     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1972     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1973     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1974     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1975 };
1976 
1977 static inline bool tcg_regset_single(TCGRegSet d)
1978 {
1979     return (d & (d - 1)) == 0;
1980 }
1981 
1982 static inline TCGReg tcg_regset_first(TCGRegSet d)
1983 {
1984     if (TCG_TARGET_NB_REGS <= 32) {
1985         return ctz32(d);
1986     } else {
1987         return ctz64(d);
1988     }
1989 }
1990 
1991 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1992 {
1993     char buf[128];
1994     TCGOp *op;
1995 
1996     QTAILQ_FOREACH(op, &s->ops, link) {
1997         int i, k, nb_oargs, nb_iargs, nb_cargs;
1998         const TCGOpDef *def;
1999         TCGOpcode c;
2000         int col = 0;
2001 
2002         c = op->opc;
2003         def = &tcg_op_defs[c];
2004 
2005         if (c == INDEX_op_insn_start) {
2006             nb_oargs = 0;
2007             col += qemu_log("\n ----");
2008 
2009             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2010                 target_ulong a;
2011 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2012                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2013 #else
2014                 a = op->args[i];
2015 #endif
2016                 col += qemu_log(" " TARGET_FMT_lx, a);
2017             }
2018         } else if (c == INDEX_op_call) {
2019             /* variable number of arguments */
2020             nb_oargs = TCGOP_CALLO(op);
2021             nb_iargs = TCGOP_CALLI(op);
2022             nb_cargs = def->nb_cargs;
2023 
2024             /* function name, flags, out args */
2025             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2026                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2027                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2028             for (i = 0; i < nb_oargs; i++) {
2029                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2030                                                        op->args[i]));
2031             }
2032             for (i = 0; i < nb_iargs; i++) {
2033                 TCGArg arg = op->args[nb_oargs + i];
2034                 const char *t = "<dummy>";
2035                 if (arg != TCG_CALL_DUMMY_ARG) {
2036                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2037                 }
2038                 col += qemu_log(",%s", t);
2039             }
2040         } else {
2041             col += qemu_log(" %s ", def->name);
2042 
2043             nb_oargs = def->nb_oargs;
2044             nb_iargs = def->nb_iargs;
2045             nb_cargs = def->nb_cargs;
2046 
2047             if (def->flags & TCG_OPF_VECTOR) {
2048                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2049                                 8 << TCGOP_VECE(op));
2050             }
2051 
2052             k = 0;
2053             for (i = 0; i < nb_oargs; i++) {
2054                 if (k != 0) {
2055                     col += qemu_log(",");
2056                 }
2057                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2058                                                       op->args[k++]));
2059             }
2060             for (i = 0; i < nb_iargs; i++) {
2061                 if (k != 0) {
2062                     col += qemu_log(",");
2063                 }
2064                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2065                                                       op->args[k++]));
2066             }
2067             switch (c) {
2068             case INDEX_op_brcond_i32:
2069             case INDEX_op_setcond_i32:
2070             case INDEX_op_movcond_i32:
2071             case INDEX_op_brcond2_i32:
2072             case INDEX_op_setcond2_i32:
2073             case INDEX_op_brcond_i64:
2074             case INDEX_op_setcond_i64:
2075             case INDEX_op_movcond_i64:
2076             case INDEX_op_cmp_vec:
2077             case INDEX_op_cmpsel_vec:
2078                 if (op->args[k] < ARRAY_SIZE(cond_name)
2079                     && cond_name[op->args[k]]) {
2080                     col += qemu_log(",%s", cond_name[op->args[k++]]);
2081                 } else {
2082                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2083                 }
2084                 i = 1;
2085                 break;
2086             case INDEX_op_qemu_ld_i32:
2087             case INDEX_op_qemu_st_i32:
2088             case INDEX_op_qemu_ld_i64:
2089             case INDEX_op_qemu_st_i64:
2090                 {
2091                     TCGMemOpIdx oi = op->args[k++];
2092                     MemOp op = get_memop(oi);
2093                     unsigned ix = get_mmuidx(oi);
2094 
2095                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2096                         col += qemu_log(",$0x%x,%u", op, ix);
2097                     } else {
2098                         const char *s_al, *s_op;
2099                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2100                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2101                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2102                     }
2103                     i = 1;
2104                 }
2105                 break;
2106             default:
2107                 i = 0;
2108                 break;
2109             }
2110             switch (c) {
2111             case INDEX_op_set_label:
2112             case INDEX_op_br:
2113             case INDEX_op_brcond_i32:
2114             case INDEX_op_brcond_i64:
2115             case INDEX_op_brcond2_i32:
2116                 col += qemu_log("%s$L%d", k ? "," : "",
2117                                 arg_label(op->args[k])->id);
2118                 i++, k++;
2119                 break;
2120             default:
2121                 break;
2122             }
2123             for (; i < nb_cargs; i++, k++) {
2124                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2125             }
2126         }
2127 
2128         if (have_prefs || op->life) {
2129 
2130             QemuLogFile *logfile;
2131 
2132             rcu_read_lock();
2133             logfile = qatomic_rcu_read(&qemu_logfile);
2134             if (logfile) {
2135                 for (; col < 40; ++col) {
2136                     putc(' ', logfile->fd);
2137                 }
2138             }
2139             rcu_read_unlock();
2140         }
2141 
2142         if (op->life) {
2143             unsigned life = op->life;
2144 
2145             if (life & (SYNC_ARG * 3)) {
2146                 qemu_log("  sync:");
2147                 for (i = 0; i < 2; ++i) {
2148                     if (life & (SYNC_ARG << i)) {
2149                         qemu_log(" %d", i);
2150                     }
2151                 }
2152             }
2153             life /= DEAD_ARG;
2154             if (life) {
2155                 qemu_log("  dead:");
2156                 for (i = 0; life; ++i, life >>= 1) {
2157                     if (life & 1) {
2158                         qemu_log(" %d", i);
2159                     }
2160                 }
2161             }
2162         }
2163 
2164         if (have_prefs) {
2165             for (i = 0; i < nb_oargs; ++i) {
2166                 TCGRegSet set = op->output_pref[i];
2167 
2168                 if (i == 0) {
2169                     qemu_log("  pref=");
2170                 } else {
2171                     qemu_log(",");
2172                 }
2173                 if (set == 0) {
2174                     qemu_log("none");
2175                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2176                     qemu_log("all");
2177 #ifdef CONFIG_DEBUG_TCG
2178                 } else if (tcg_regset_single(set)) {
2179                     TCGReg reg = tcg_regset_first(set);
2180                     qemu_log("%s", tcg_target_reg_names[reg]);
2181 #endif
2182                 } else if (TCG_TARGET_NB_REGS <= 32) {
2183                     qemu_log("%#x", (uint32_t)set);
2184                 } else {
2185                     qemu_log("%#" PRIx64, (uint64_t)set);
2186                 }
2187             }
2188         }
2189 
2190         qemu_log("\n");
2191     }
2192 }
2193 
2194 /* we give more priority to constraints with less registers */
2195 static int get_constraint_priority(const TCGOpDef *def, int k)
2196 {
2197     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2198     int n;
2199 
2200     if (arg_ct->oalias) {
2201         /* an alias is equivalent to a single register */
2202         n = 1;
2203     } else {
2204         n = ctpop64(arg_ct->regs);
2205     }
2206     return TCG_TARGET_NB_REGS - n + 1;
2207 }
2208 
2209 /* sort from highest priority to lowest */
2210 static void sort_constraints(TCGOpDef *def, int start, int n)
2211 {
2212     int i, j;
2213     TCGArgConstraint *a = def->args_ct;
2214 
2215     for (i = 0; i < n; i++) {
2216         a[start + i].sort_index = start + i;
2217     }
2218     if (n <= 1) {
2219         return;
2220     }
2221     for (i = 0; i < n - 1; i++) {
2222         for (j = i + 1; j < n; j++) {
2223             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2224             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2225             if (p1 < p2) {
2226                 int tmp = a[start + i].sort_index;
2227                 a[start + i].sort_index = a[start + j].sort_index;
2228                 a[start + j].sort_index = tmp;
2229             }
2230         }
2231     }
2232 }
2233 
2234 static void process_op_defs(TCGContext *s)
2235 {
2236     TCGOpcode op;
2237 
2238     for (op = 0; op < NB_OPS; op++) {
2239         TCGOpDef *def = &tcg_op_defs[op];
2240         const TCGTargetOpDef *tdefs;
2241         TCGType type;
2242         int i, nb_args;
2243 
2244         if (def->flags & TCG_OPF_NOT_PRESENT) {
2245             continue;
2246         }
2247 
2248         nb_args = def->nb_iargs + def->nb_oargs;
2249         if (nb_args == 0) {
2250             continue;
2251         }
2252 
2253         tdefs = tcg_target_op_def(op);
2254         /* Missing TCGTargetOpDef entry. */
2255         tcg_debug_assert(tdefs != NULL);
2256 
2257         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2258         for (i = 0; i < nb_args; i++) {
2259             const char *ct_str = tdefs->args_ct_str[i];
2260             /* Incomplete TCGTargetOpDef entry. */
2261             tcg_debug_assert(ct_str != NULL);
2262 
2263             while (*ct_str != '\0') {
2264                 switch(*ct_str) {
2265                 case '0' ... '9':
2266                     {
2267                         int oarg = *ct_str - '0';
2268                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2269                         tcg_debug_assert(oarg < def->nb_oargs);
2270                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
2271                         def->args_ct[i] = def->args_ct[oarg];
2272                         /* The output sets oalias.  */
2273                         def->args_ct[oarg].oalias = true;
2274                         def->args_ct[oarg].alias_index = i;
2275                         /* The input sets ialias. */
2276                         def->args_ct[i].ialias = true;
2277                         def->args_ct[i].alias_index = oarg;
2278                     }
2279                     ct_str++;
2280                     break;
2281                 case '&':
2282                     def->args_ct[i].newreg = true;
2283                     ct_str++;
2284                     break;
2285                 case 'i':
2286                     def->args_ct[i].ct |= TCG_CT_CONST;
2287                     ct_str++;
2288                     break;
2289                 default:
2290                     ct_str = target_parse_constraint(&def->args_ct[i],
2291                                                      ct_str, type);
2292                     /* Typo in TCGTargetOpDef constraint. */
2293                     tcg_debug_assert(ct_str != NULL);
2294                 }
2295             }
2296         }
2297 
2298         /* TCGTargetOpDef entry with too much information? */
2299         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2300 
2301         /* sort the constraints (XXX: this is just an heuristic) */
2302         sort_constraints(def, 0, def->nb_oargs);
2303         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2304     }
2305 }
2306 
2307 void tcg_op_remove(TCGContext *s, TCGOp *op)
2308 {
2309     TCGLabel *label;
2310 
2311     switch (op->opc) {
2312     case INDEX_op_br:
2313         label = arg_label(op->args[0]);
2314         label->refs--;
2315         break;
2316     case INDEX_op_brcond_i32:
2317     case INDEX_op_brcond_i64:
2318         label = arg_label(op->args[3]);
2319         label->refs--;
2320         break;
2321     case INDEX_op_brcond2_i32:
2322         label = arg_label(op->args[5]);
2323         label->refs--;
2324         break;
2325     default:
2326         break;
2327     }
2328 
2329     QTAILQ_REMOVE(&s->ops, op, link);
2330     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2331     s->nb_ops--;
2332 
2333 #ifdef CONFIG_PROFILER
2334     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2335 #endif
2336 }
2337 
2338 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2339 {
2340     TCGContext *s = tcg_ctx;
2341     TCGOp *op;
2342 
2343     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2344         op = tcg_malloc(sizeof(TCGOp));
2345     } else {
2346         op = QTAILQ_FIRST(&s->free_ops);
2347         QTAILQ_REMOVE(&s->free_ops, op, link);
2348     }
2349     memset(op, 0, offsetof(TCGOp, link));
2350     op->opc = opc;
2351     s->nb_ops++;
2352 
2353     return op;
2354 }
2355 
2356 TCGOp *tcg_emit_op(TCGOpcode opc)
2357 {
2358     TCGOp *op = tcg_op_alloc(opc);
2359     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2360     return op;
2361 }
2362 
2363 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2364 {
2365     TCGOp *new_op = tcg_op_alloc(opc);
2366     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2367     return new_op;
2368 }
2369 
2370 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2371 {
2372     TCGOp *new_op = tcg_op_alloc(opc);
2373     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2374     return new_op;
2375 }
2376 
2377 /* Reachable analysis : remove unreachable code.  */
2378 static void reachable_code_pass(TCGContext *s)
2379 {
2380     TCGOp *op, *op_next;
2381     bool dead = false;
2382 
2383     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2384         bool remove = dead;
2385         TCGLabel *label;
2386         int call_flags;
2387 
2388         switch (op->opc) {
2389         case INDEX_op_set_label:
2390             label = arg_label(op->args[0]);
2391             if (label->refs == 0) {
2392                 /*
2393                  * While there is an occasional backward branch, virtually
2394                  * all branches generated by the translators are forward.
2395                  * Which means that generally we will have already removed
2396                  * all references to the label that will be, and there is
2397                  * little to be gained by iterating.
2398                  */
2399                 remove = true;
2400             } else {
2401                 /* Once we see a label, insns become live again.  */
2402                 dead = false;
2403                 remove = false;
2404 
2405                 /*
2406                  * Optimization can fold conditional branches to unconditional.
2407                  * If we find a label with one reference which is preceded by
2408                  * an unconditional branch to it, remove both.  This needed to
2409                  * wait until the dead code in between them was removed.
2410                  */
2411                 if (label->refs == 1) {
2412                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2413                     if (op_prev->opc == INDEX_op_br &&
2414                         label == arg_label(op_prev->args[0])) {
2415                         tcg_op_remove(s, op_prev);
2416                         remove = true;
2417                     }
2418                 }
2419             }
2420             break;
2421 
2422         case INDEX_op_br:
2423         case INDEX_op_exit_tb:
2424         case INDEX_op_goto_ptr:
2425             /* Unconditional branches; everything following is dead.  */
2426             dead = true;
2427             break;
2428 
2429         case INDEX_op_call:
2430             /* Notice noreturn helper calls, raising exceptions.  */
2431             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2432             if (call_flags & TCG_CALL_NO_RETURN) {
2433                 dead = true;
2434             }
2435             break;
2436 
2437         case INDEX_op_insn_start:
2438             /* Never remove -- we need to keep these for unwind.  */
2439             remove = false;
2440             break;
2441 
2442         default:
2443             break;
2444         }
2445 
2446         if (remove) {
2447             tcg_op_remove(s, op);
2448         }
2449     }
2450 }
2451 
2452 #define TS_DEAD  1
2453 #define TS_MEM   2
2454 
2455 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2456 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2457 
2458 /* For liveness_pass_1, the register preferences for a given temp.  */
2459 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2460 {
2461     return ts->state_ptr;
2462 }
2463 
2464 /* For liveness_pass_1, reset the preferences for a given temp to the
2465  * maximal regset for its type.
2466  */
2467 static inline void la_reset_pref(TCGTemp *ts)
2468 {
2469     *la_temp_pref(ts)
2470         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2471 }
2472 
2473 /* liveness analysis: end of function: all temps are dead, and globals
2474    should be in memory. */
2475 static void la_func_end(TCGContext *s, int ng, int nt)
2476 {
2477     int i;
2478 
2479     for (i = 0; i < ng; ++i) {
2480         s->temps[i].state = TS_DEAD | TS_MEM;
2481         la_reset_pref(&s->temps[i]);
2482     }
2483     for (i = ng; i < nt; ++i) {
2484         s->temps[i].state = TS_DEAD;
2485         la_reset_pref(&s->temps[i]);
2486     }
2487 }
2488 
2489 /* liveness analysis: end of basic block: all temps are dead, globals
2490    and local temps should be in memory. */
2491 static void la_bb_end(TCGContext *s, int ng, int nt)
2492 {
2493     int i;
2494 
2495     for (i = 0; i < ng; ++i) {
2496         s->temps[i].state = TS_DEAD | TS_MEM;
2497         la_reset_pref(&s->temps[i]);
2498     }
2499     for (i = ng; i < nt; ++i) {
2500         s->temps[i].state = (s->temps[i].temp_local
2501                              ? TS_DEAD | TS_MEM
2502                              : TS_DEAD);
2503         la_reset_pref(&s->temps[i]);
2504     }
2505 }
2506 
2507 /* liveness analysis: sync globals back to memory.  */
2508 static void la_global_sync(TCGContext *s, int ng)
2509 {
2510     int i;
2511 
2512     for (i = 0; i < ng; ++i) {
2513         int state = s->temps[i].state;
2514         s->temps[i].state = state | TS_MEM;
2515         if (state == TS_DEAD) {
2516             /* If the global was previously dead, reset prefs.  */
2517             la_reset_pref(&s->temps[i]);
2518         }
2519     }
2520 }
2521 
2522 /*
2523  * liveness analysis: conditional branch: all temps are dead,
2524  * globals and local temps should be synced.
2525  */
2526 static void la_bb_sync(TCGContext *s, int ng, int nt)
2527 {
2528     la_global_sync(s, ng);
2529 
2530     for (int i = ng; i < nt; ++i) {
2531         if (s->temps[i].temp_local) {
2532             int state = s->temps[i].state;
2533             s->temps[i].state = state | TS_MEM;
2534             if (state != TS_DEAD) {
2535                 continue;
2536             }
2537         } else {
2538             s->temps[i].state = TS_DEAD;
2539         }
2540         la_reset_pref(&s->temps[i]);
2541     }
2542 }
2543 
2544 /* liveness analysis: sync globals back to memory and kill.  */
2545 static void la_global_kill(TCGContext *s, int ng)
2546 {
2547     int i;
2548 
2549     for (i = 0; i < ng; i++) {
2550         s->temps[i].state = TS_DEAD | TS_MEM;
2551         la_reset_pref(&s->temps[i]);
2552     }
2553 }
2554 
2555 /* liveness analysis: note live globals crossing calls.  */
2556 static void la_cross_call(TCGContext *s, int nt)
2557 {
2558     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2559     int i;
2560 
2561     for (i = 0; i < nt; i++) {
2562         TCGTemp *ts = &s->temps[i];
2563         if (!(ts->state & TS_DEAD)) {
2564             TCGRegSet *pset = la_temp_pref(ts);
2565             TCGRegSet set = *pset;
2566 
2567             set &= mask;
2568             /* If the combination is not possible, restart.  */
2569             if (set == 0) {
2570                 set = tcg_target_available_regs[ts->type] & mask;
2571             }
2572             *pset = set;
2573         }
2574     }
2575 }
2576 
2577 /* Liveness analysis : update the opc_arg_life array to tell if a
2578    given input arguments is dead. Instructions updating dead
2579    temporaries are removed. */
2580 static void liveness_pass_1(TCGContext *s)
2581 {
2582     int nb_globals = s->nb_globals;
2583     int nb_temps = s->nb_temps;
2584     TCGOp *op, *op_prev;
2585     TCGRegSet *prefs;
2586     int i;
2587 
2588     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2589     for (i = 0; i < nb_temps; ++i) {
2590         s->temps[i].state_ptr = prefs + i;
2591     }
2592 
2593     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2594     la_func_end(s, nb_globals, nb_temps);
2595 
2596     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2597         int nb_iargs, nb_oargs;
2598         TCGOpcode opc_new, opc_new2;
2599         bool have_opc_new2;
2600         TCGLifeData arg_life = 0;
2601         TCGTemp *ts;
2602         TCGOpcode opc = op->opc;
2603         const TCGOpDef *def = &tcg_op_defs[opc];
2604 
2605         switch (opc) {
2606         case INDEX_op_call:
2607             {
2608                 int call_flags;
2609                 int nb_call_regs;
2610 
2611                 nb_oargs = TCGOP_CALLO(op);
2612                 nb_iargs = TCGOP_CALLI(op);
2613                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2614 
2615                 /* pure functions can be removed if their result is unused */
2616                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2617                     for (i = 0; i < nb_oargs; i++) {
2618                         ts = arg_temp(op->args[i]);
2619                         if (ts->state != TS_DEAD) {
2620                             goto do_not_remove_call;
2621                         }
2622                     }
2623                     goto do_remove;
2624                 }
2625             do_not_remove_call:
2626 
2627                 /* Output args are dead.  */
2628                 for (i = 0; i < nb_oargs; i++) {
2629                     ts = arg_temp(op->args[i]);
2630                     if (ts->state & TS_DEAD) {
2631                         arg_life |= DEAD_ARG << i;
2632                     }
2633                     if (ts->state & TS_MEM) {
2634                         arg_life |= SYNC_ARG << i;
2635                     }
2636                     ts->state = TS_DEAD;
2637                     la_reset_pref(ts);
2638 
2639                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2640                     op->output_pref[i] = 0;
2641                 }
2642 
2643                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2644                                     TCG_CALL_NO_READ_GLOBALS))) {
2645                     la_global_kill(s, nb_globals);
2646                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2647                     la_global_sync(s, nb_globals);
2648                 }
2649 
2650                 /* Record arguments that die in this helper.  */
2651                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2652                     ts = arg_temp(op->args[i]);
2653                     if (ts && ts->state & TS_DEAD) {
2654                         arg_life |= DEAD_ARG << i;
2655                     }
2656                 }
2657 
2658                 /* For all live registers, remove call-clobbered prefs.  */
2659                 la_cross_call(s, nb_temps);
2660 
2661                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2662 
2663                 /* Input arguments are live for preceding opcodes.  */
2664                 for (i = 0; i < nb_iargs; i++) {
2665                     ts = arg_temp(op->args[i + nb_oargs]);
2666                     if (ts && ts->state & TS_DEAD) {
2667                         /* For those arguments that die, and will be allocated
2668                          * in registers, clear the register set for that arg,
2669                          * to be filled in below.  For args that will be on
2670                          * the stack, reset to any available reg.
2671                          */
2672                         *la_temp_pref(ts)
2673                             = (i < nb_call_regs ? 0 :
2674                                tcg_target_available_regs[ts->type]);
2675                         ts->state &= ~TS_DEAD;
2676                     }
2677                 }
2678 
2679                 /* For each input argument, add its input register to prefs.
2680                    If a temp is used once, this produces a single set bit.  */
2681                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2682                     ts = arg_temp(op->args[i + nb_oargs]);
2683                     if (ts) {
2684                         tcg_regset_set_reg(*la_temp_pref(ts),
2685                                            tcg_target_call_iarg_regs[i]);
2686                     }
2687                 }
2688             }
2689             break;
2690         case INDEX_op_insn_start:
2691             break;
2692         case INDEX_op_discard:
2693             /* mark the temporary as dead */
2694             ts = arg_temp(op->args[0]);
2695             ts->state = TS_DEAD;
2696             la_reset_pref(ts);
2697             break;
2698 
2699         case INDEX_op_add2_i32:
2700             opc_new = INDEX_op_add_i32;
2701             goto do_addsub2;
2702         case INDEX_op_sub2_i32:
2703             opc_new = INDEX_op_sub_i32;
2704             goto do_addsub2;
2705         case INDEX_op_add2_i64:
2706             opc_new = INDEX_op_add_i64;
2707             goto do_addsub2;
2708         case INDEX_op_sub2_i64:
2709             opc_new = INDEX_op_sub_i64;
2710         do_addsub2:
2711             nb_iargs = 4;
2712             nb_oargs = 2;
2713             /* Test if the high part of the operation is dead, but not
2714                the low part.  The result can be optimized to a simple
2715                add or sub.  This happens often for x86_64 guest when the
2716                cpu mode is set to 32 bit.  */
2717             if (arg_temp(op->args[1])->state == TS_DEAD) {
2718                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2719                     goto do_remove;
2720                 }
2721                 /* Replace the opcode and adjust the args in place,
2722                    leaving 3 unused args at the end.  */
2723                 op->opc = opc = opc_new;
2724                 op->args[1] = op->args[2];
2725                 op->args[2] = op->args[4];
2726                 /* Fall through and mark the single-word operation live.  */
2727                 nb_iargs = 2;
2728                 nb_oargs = 1;
2729             }
2730             goto do_not_remove;
2731 
2732         case INDEX_op_mulu2_i32:
2733             opc_new = INDEX_op_mul_i32;
2734             opc_new2 = INDEX_op_muluh_i32;
2735             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2736             goto do_mul2;
2737         case INDEX_op_muls2_i32:
2738             opc_new = INDEX_op_mul_i32;
2739             opc_new2 = INDEX_op_mulsh_i32;
2740             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2741             goto do_mul2;
2742         case INDEX_op_mulu2_i64:
2743             opc_new = INDEX_op_mul_i64;
2744             opc_new2 = INDEX_op_muluh_i64;
2745             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2746             goto do_mul2;
2747         case INDEX_op_muls2_i64:
2748             opc_new = INDEX_op_mul_i64;
2749             opc_new2 = INDEX_op_mulsh_i64;
2750             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2751             goto do_mul2;
2752         do_mul2:
2753             nb_iargs = 2;
2754             nb_oargs = 2;
2755             if (arg_temp(op->args[1])->state == TS_DEAD) {
2756                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2757                     /* Both parts of the operation are dead.  */
2758                     goto do_remove;
2759                 }
2760                 /* The high part of the operation is dead; generate the low. */
2761                 op->opc = opc = opc_new;
2762                 op->args[1] = op->args[2];
2763                 op->args[2] = op->args[3];
2764             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2765                 /* The low part of the operation is dead; generate the high. */
2766                 op->opc = opc = opc_new2;
2767                 op->args[0] = op->args[1];
2768                 op->args[1] = op->args[2];
2769                 op->args[2] = op->args[3];
2770             } else {
2771                 goto do_not_remove;
2772             }
2773             /* Mark the single-word operation live.  */
2774             nb_oargs = 1;
2775             goto do_not_remove;
2776 
2777         default:
2778             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2779             nb_iargs = def->nb_iargs;
2780             nb_oargs = def->nb_oargs;
2781 
2782             /* Test if the operation can be removed because all
2783                its outputs are dead. We assume that nb_oargs == 0
2784                implies side effects */
2785             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2786                 for (i = 0; i < nb_oargs; i++) {
2787                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2788                         goto do_not_remove;
2789                     }
2790                 }
2791                 goto do_remove;
2792             }
2793             goto do_not_remove;
2794 
2795         do_remove:
2796             tcg_op_remove(s, op);
2797             break;
2798 
2799         do_not_remove:
2800             for (i = 0; i < nb_oargs; i++) {
2801                 ts = arg_temp(op->args[i]);
2802 
2803                 /* Remember the preference of the uses that followed.  */
2804                 op->output_pref[i] = *la_temp_pref(ts);
2805 
2806                 /* Output args are dead.  */
2807                 if (ts->state & TS_DEAD) {
2808                     arg_life |= DEAD_ARG << i;
2809                 }
2810                 if (ts->state & TS_MEM) {
2811                     arg_life |= SYNC_ARG << i;
2812                 }
2813                 ts->state = TS_DEAD;
2814                 la_reset_pref(ts);
2815             }
2816 
2817             /* If end of basic block, update.  */
2818             if (def->flags & TCG_OPF_BB_EXIT) {
2819                 la_func_end(s, nb_globals, nb_temps);
2820             } else if (def->flags & TCG_OPF_COND_BRANCH) {
2821                 la_bb_sync(s, nb_globals, nb_temps);
2822             } else if (def->flags & TCG_OPF_BB_END) {
2823                 la_bb_end(s, nb_globals, nb_temps);
2824             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2825                 la_global_sync(s, nb_globals);
2826                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2827                     la_cross_call(s, nb_temps);
2828                 }
2829             }
2830 
2831             /* Record arguments that die in this opcode.  */
2832             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2833                 ts = arg_temp(op->args[i]);
2834                 if (ts->state & TS_DEAD) {
2835                     arg_life |= DEAD_ARG << i;
2836                 }
2837             }
2838 
2839             /* Input arguments are live for preceding opcodes.  */
2840             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2841                 ts = arg_temp(op->args[i]);
2842                 if (ts->state & TS_DEAD) {
2843                     /* For operands that were dead, initially allow
2844                        all regs for the type.  */
2845                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2846                     ts->state &= ~TS_DEAD;
2847                 }
2848             }
2849 
2850             /* Incorporate constraints for this operand.  */
2851             switch (opc) {
2852             case INDEX_op_mov_i32:
2853             case INDEX_op_mov_i64:
2854                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2855                    have proper constraints.  That said, special case
2856                    moves to propagate preferences backward.  */
2857                 if (IS_DEAD_ARG(1)) {
2858                     *la_temp_pref(arg_temp(op->args[0]))
2859                         = *la_temp_pref(arg_temp(op->args[1]));
2860                 }
2861                 break;
2862 
2863             default:
2864                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2865                     const TCGArgConstraint *ct = &def->args_ct[i];
2866                     TCGRegSet set, *pset;
2867 
2868                     ts = arg_temp(op->args[i]);
2869                     pset = la_temp_pref(ts);
2870                     set = *pset;
2871 
2872                     set &= ct->regs;
2873                     if (ct->ialias) {
2874                         set &= op->output_pref[ct->alias_index];
2875                     }
2876                     /* If the combination is not possible, restart.  */
2877                     if (set == 0) {
2878                         set = ct->regs;
2879                     }
2880                     *pset = set;
2881                 }
2882                 break;
2883             }
2884             break;
2885         }
2886         op->life = arg_life;
2887     }
2888 }
2889 
2890 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2891 static bool liveness_pass_2(TCGContext *s)
2892 {
2893     int nb_globals = s->nb_globals;
2894     int nb_temps, i;
2895     bool changes = false;
2896     TCGOp *op, *op_next;
2897 
2898     /* Create a temporary for each indirect global.  */
2899     for (i = 0; i < nb_globals; ++i) {
2900         TCGTemp *its = &s->temps[i];
2901         if (its->indirect_reg) {
2902             TCGTemp *dts = tcg_temp_alloc(s);
2903             dts->type = its->type;
2904             dts->base_type = its->base_type;
2905             its->state_ptr = dts;
2906         } else {
2907             its->state_ptr = NULL;
2908         }
2909         /* All globals begin dead.  */
2910         its->state = TS_DEAD;
2911     }
2912     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2913         TCGTemp *its = &s->temps[i];
2914         its->state_ptr = NULL;
2915         its->state = TS_DEAD;
2916     }
2917 
2918     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2919         TCGOpcode opc = op->opc;
2920         const TCGOpDef *def = &tcg_op_defs[opc];
2921         TCGLifeData arg_life = op->life;
2922         int nb_iargs, nb_oargs, call_flags;
2923         TCGTemp *arg_ts, *dir_ts;
2924 
2925         if (opc == INDEX_op_call) {
2926             nb_oargs = TCGOP_CALLO(op);
2927             nb_iargs = TCGOP_CALLI(op);
2928             call_flags = op->args[nb_oargs + nb_iargs + 1];
2929         } else {
2930             nb_iargs = def->nb_iargs;
2931             nb_oargs = def->nb_oargs;
2932 
2933             /* Set flags similar to how calls require.  */
2934             if (def->flags & TCG_OPF_COND_BRANCH) {
2935                 /* Like reading globals: sync_globals */
2936                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2937             } else if (def->flags & TCG_OPF_BB_END) {
2938                 /* Like writing globals: save_globals */
2939                 call_flags = 0;
2940             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2941                 /* Like reading globals: sync_globals */
2942                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2943             } else {
2944                 /* No effect on globals.  */
2945                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2946                               TCG_CALL_NO_WRITE_GLOBALS);
2947             }
2948         }
2949 
2950         /* Make sure that input arguments are available.  */
2951         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2952             arg_ts = arg_temp(op->args[i]);
2953             if (arg_ts) {
2954                 dir_ts = arg_ts->state_ptr;
2955                 if (dir_ts && arg_ts->state == TS_DEAD) {
2956                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2957                                       ? INDEX_op_ld_i32
2958                                       : INDEX_op_ld_i64);
2959                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2960 
2961                     lop->args[0] = temp_arg(dir_ts);
2962                     lop->args[1] = temp_arg(arg_ts->mem_base);
2963                     lop->args[2] = arg_ts->mem_offset;
2964 
2965                     /* Loaded, but synced with memory.  */
2966                     arg_ts->state = TS_MEM;
2967                 }
2968             }
2969         }
2970 
2971         /* Perform input replacement, and mark inputs that became dead.
2972            No action is required except keeping temp_state up to date
2973            so that we reload when needed.  */
2974         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2975             arg_ts = arg_temp(op->args[i]);
2976             if (arg_ts) {
2977                 dir_ts = arg_ts->state_ptr;
2978                 if (dir_ts) {
2979                     op->args[i] = temp_arg(dir_ts);
2980                     changes = true;
2981                     if (IS_DEAD_ARG(i)) {
2982                         arg_ts->state = TS_DEAD;
2983                     }
2984                 }
2985             }
2986         }
2987 
2988         /* Liveness analysis should ensure that the following are
2989            all correct, for call sites and basic block end points.  */
2990         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2991             /* Nothing to do */
2992         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2993             for (i = 0; i < nb_globals; ++i) {
2994                 /* Liveness should see that globals are synced back,
2995                    that is, either TS_DEAD or TS_MEM.  */
2996                 arg_ts = &s->temps[i];
2997                 tcg_debug_assert(arg_ts->state_ptr == 0
2998                                  || arg_ts->state != 0);
2999             }
3000         } else {
3001             for (i = 0; i < nb_globals; ++i) {
3002                 /* Liveness should see that globals are saved back,
3003                    that is, TS_DEAD, waiting to be reloaded.  */
3004                 arg_ts = &s->temps[i];
3005                 tcg_debug_assert(arg_ts->state_ptr == 0
3006                                  || arg_ts->state == TS_DEAD);
3007             }
3008         }
3009 
3010         /* Outputs become available.  */
3011         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3012             arg_ts = arg_temp(op->args[0]);
3013             dir_ts = arg_ts->state_ptr;
3014             if (dir_ts) {
3015                 op->args[0] = temp_arg(dir_ts);
3016                 changes = true;
3017 
3018                 /* The output is now live and modified.  */
3019                 arg_ts->state = 0;
3020 
3021                 if (NEED_SYNC_ARG(0)) {
3022                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3023                                       ? INDEX_op_st_i32
3024                                       : INDEX_op_st_i64);
3025                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3026                     TCGTemp *out_ts = dir_ts;
3027 
3028                     if (IS_DEAD_ARG(0)) {
3029                         out_ts = arg_temp(op->args[1]);
3030                         arg_ts->state = TS_DEAD;
3031                         tcg_op_remove(s, op);
3032                     } else {
3033                         arg_ts->state = TS_MEM;
3034                     }
3035 
3036                     sop->args[0] = temp_arg(out_ts);
3037                     sop->args[1] = temp_arg(arg_ts->mem_base);
3038                     sop->args[2] = arg_ts->mem_offset;
3039                 } else {
3040                     tcg_debug_assert(!IS_DEAD_ARG(0));
3041                 }
3042             }
3043         } else {
3044             for (i = 0; i < nb_oargs; i++) {
3045                 arg_ts = arg_temp(op->args[i]);
3046                 dir_ts = arg_ts->state_ptr;
3047                 if (!dir_ts) {
3048                     continue;
3049                 }
3050                 op->args[i] = temp_arg(dir_ts);
3051                 changes = true;
3052 
3053                 /* The output is now live and modified.  */
3054                 arg_ts->state = 0;
3055 
3056                 /* Sync outputs upon their last write.  */
3057                 if (NEED_SYNC_ARG(i)) {
3058                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3059                                       ? INDEX_op_st_i32
3060                                       : INDEX_op_st_i64);
3061                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3062 
3063                     sop->args[0] = temp_arg(dir_ts);
3064                     sop->args[1] = temp_arg(arg_ts->mem_base);
3065                     sop->args[2] = arg_ts->mem_offset;
3066 
3067                     arg_ts->state = TS_MEM;
3068                 }
3069                 /* Drop outputs that are dead.  */
3070                 if (IS_DEAD_ARG(i)) {
3071                     arg_ts->state = TS_DEAD;
3072                 }
3073             }
3074         }
3075     }
3076 
3077     return changes;
3078 }
3079 
3080 #ifdef CONFIG_DEBUG_TCG
3081 static void dump_regs(TCGContext *s)
3082 {
3083     TCGTemp *ts;
3084     int i;
3085     char buf[64];
3086 
3087     for(i = 0; i < s->nb_temps; i++) {
3088         ts = &s->temps[i];
3089         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3090         switch(ts->val_type) {
3091         case TEMP_VAL_REG:
3092             printf("%s", tcg_target_reg_names[ts->reg]);
3093             break;
3094         case TEMP_VAL_MEM:
3095             printf("%d(%s)", (int)ts->mem_offset,
3096                    tcg_target_reg_names[ts->mem_base->reg]);
3097             break;
3098         case TEMP_VAL_CONST:
3099             printf("$0x%" TCG_PRIlx, ts->val);
3100             break;
3101         case TEMP_VAL_DEAD:
3102             printf("D");
3103             break;
3104         default:
3105             printf("???");
3106             break;
3107         }
3108         printf("\n");
3109     }
3110 
3111     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3112         if (s->reg_to_temp[i] != NULL) {
3113             printf("%s: %s\n",
3114                    tcg_target_reg_names[i],
3115                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3116         }
3117     }
3118 }
3119 
3120 static void check_regs(TCGContext *s)
3121 {
3122     int reg;
3123     int k;
3124     TCGTemp *ts;
3125     char buf[64];
3126 
3127     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3128         ts = s->reg_to_temp[reg];
3129         if (ts != NULL) {
3130             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3131                 printf("Inconsistency for register %s:\n",
3132                        tcg_target_reg_names[reg]);
3133                 goto fail;
3134             }
3135         }
3136     }
3137     for (k = 0; k < s->nb_temps; k++) {
3138         ts = &s->temps[k];
3139         if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
3140             && s->reg_to_temp[ts->reg] != ts) {
3141             printf("Inconsistency for temp %s:\n",
3142                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3143         fail:
3144             printf("reg state:\n");
3145             dump_regs(s);
3146             tcg_abort();
3147         }
3148     }
3149 }
3150 #endif
3151 
3152 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3153 {
3154 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3155     /* Sparc64 stack is accessed with offset of 2047 */
3156     s->current_frame_offset = (s->current_frame_offset +
3157                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3158         ~(sizeof(tcg_target_long) - 1);
3159 #endif
3160     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3161         s->frame_end) {
3162         tcg_abort();
3163     }
3164     ts->mem_offset = s->current_frame_offset;
3165     ts->mem_base = s->frame_temp;
3166     ts->mem_allocated = 1;
3167     s->current_frame_offset += sizeof(tcg_target_long);
3168 }
3169 
3170 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3171 
3172 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3173    mark it free; otherwise mark it dead.  */
3174 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3175 {
3176     if (ts->fixed_reg) {
3177         return;
3178     }
3179     if (ts->val_type == TEMP_VAL_REG) {
3180         s->reg_to_temp[ts->reg] = NULL;
3181     }
3182     ts->val_type = (free_or_dead < 0
3183                     || ts->temp_local
3184                     || ts->temp_global
3185                     ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3186 }
3187 
3188 /* Mark a temporary as dead.  */
3189 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3190 {
3191     temp_free_or_dead(s, ts, 1);
3192 }
3193 
3194 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3195    registers needs to be allocated to store a constant.  If 'free_or_dead'
3196    is non-zero, subsequently release the temporary; if it is positive, the
3197    temp is dead; if it is negative, the temp is free.  */
3198 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3199                       TCGRegSet preferred_regs, int free_or_dead)
3200 {
3201     if (ts->fixed_reg) {
3202         return;
3203     }
3204     if (!ts->mem_coherent) {
3205         if (!ts->mem_allocated) {
3206             temp_allocate_frame(s, ts);
3207         }
3208         switch (ts->val_type) {
3209         case TEMP_VAL_CONST:
3210             /* If we're going to free the temp immediately, then we won't
3211                require it later in a register, so attempt to store the
3212                constant to memory directly.  */
3213             if (free_or_dead
3214                 && tcg_out_sti(s, ts->type, ts->val,
3215                                ts->mem_base->reg, ts->mem_offset)) {
3216                 break;
3217             }
3218             temp_load(s, ts, tcg_target_available_regs[ts->type],
3219                       allocated_regs, preferred_regs);
3220             /* fallthrough */
3221 
3222         case TEMP_VAL_REG:
3223             tcg_out_st(s, ts->type, ts->reg,
3224                        ts->mem_base->reg, ts->mem_offset);
3225             break;
3226 
3227         case TEMP_VAL_MEM:
3228             break;
3229 
3230         case TEMP_VAL_DEAD:
3231         default:
3232             tcg_abort();
3233         }
3234         ts->mem_coherent = 1;
3235     }
3236     if (free_or_dead) {
3237         temp_free_or_dead(s, ts, free_or_dead);
3238     }
3239 }
3240 
3241 /* free register 'reg' by spilling the corresponding temporary if necessary */
3242 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3243 {
3244     TCGTemp *ts = s->reg_to_temp[reg];
3245     if (ts != NULL) {
3246         temp_sync(s, ts, allocated_regs, 0, -1);
3247     }
3248 }
3249 
3250 /**
3251  * tcg_reg_alloc:
3252  * @required_regs: Set of registers in which we must allocate.
3253  * @allocated_regs: Set of registers which must be avoided.
3254  * @preferred_regs: Set of registers we should prefer.
3255  * @rev: True if we search the registers in "indirect" order.
3256  *
3257  * The allocated register must be in @required_regs & ~@allocated_regs,
3258  * but if we can put it in @preferred_regs we may save a move later.
3259  */
3260 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3261                             TCGRegSet allocated_regs,
3262                             TCGRegSet preferred_regs, bool rev)
3263 {
3264     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3265     TCGRegSet reg_ct[2];
3266     const int *order;
3267 
3268     reg_ct[1] = required_regs & ~allocated_regs;
3269     tcg_debug_assert(reg_ct[1] != 0);
3270     reg_ct[0] = reg_ct[1] & preferred_regs;
3271 
3272     /* Skip the preferred_regs option if it cannot be satisfied,
3273        or if the preference made no difference.  */
3274     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3275 
3276     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3277 
3278     /* Try free registers, preferences first.  */
3279     for (j = f; j < 2; j++) {
3280         TCGRegSet set = reg_ct[j];
3281 
3282         if (tcg_regset_single(set)) {
3283             /* One register in the set.  */
3284             TCGReg reg = tcg_regset_first(set);
3285             if (s->reg_to_temp[reg] == NULL) {
3286                 return reg;
3287             }
3288         } else {
3289             for (i = 0; i < n; i++) {
3290                 TCGReg reg = order[i];
3291                 if (s->reg_to_temp[reg] == NULL &&
3292                     tcg_regset_test_reg(set, reg)) {
3293                     return reg;
3294                 }
3295             }
3296         }
3297     }
3298 
3299     /* We must spill something.  */
3300     for (j = f; j < 2; j++) {
3301         TCGRegSet set = reg_ct[j];
3302 
3303         if (tcg_regset_single(set)) {
3304             /* One register in the set.  */
3305             TCGReg reg = tcg_regset_first(set);
3306             tcg_reg_free(s, reg, allocated_regs);
3307             return reg;
3308         } else {
3309             for (i = 0; i < n; i++) {
3310                 TCGReg reg = order[i];
3311                 if (tcg_regset_test_reg(set, reg)) {
3312                     tcg_reg_free(s, reg, allocated_regs);
3313                     return reg;
3314                 }
3315             }
3316         }
3317     }
3318 
3319     tcg_abort();
3320 }
3321 
3322 /* Make sure the temporary is in a register.  If needed, allocate the register
3323    from DESIRED while avoiding ALLOCATED.  */
3324 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3325                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3326 {
3327     TCGReg reg;
3328 
3329     switch (ts->val_type) {
3330     case TEMP_VAL_REG:
3331         return;
3332     case TEMP_VAL_CONST:
3333         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3334                             preferred_regs, ts->indirect_base);
3335         tcg_out_movi(s, ts->type, reg, ts->val);
3336         ts->mem_coherent = 0;
3337         break;
3338     case TEMP_VAL_MEM:
3339         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3340                             preferred_regs, ts->indirect_base);
3341         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3342         ts->mem_coherent = 1;
3343         break;
3344     case TEMP_VAL_DEAD:
3345     default:
3346         tcg_abort();
3347     }
3348     ts->reg = reg;
3349     ts->val_type = TEMP_VAL_REG;
3350     s->reg_to_temp[reg] = ts;
3351 }
3352 
3353 /* Save a temporary to memory. 'allocated_regs' is used in case a
3354    temporary registers needs to be allocated to store a constant.  */
3355 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3356 {
3357     /* The liveness analysis already ensures that globals are back
3358        in memory. Keep an tcg_debug_assert for safety. */
3359     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3360 }
3361 
3362 /* save globals to their canonical location and assume they can be
3363    modified be the following code. 'allocated_regs' is used in case a
3364    temporary registers needs to be allocated to store a constant. */
3365 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3366 {
3367     int i, n;
3368 
3369     for (i = 0, n = s->nb_globals; i < n; i++) {
3370         temp_save(s, &s->temps[i], allocated_regs);
3371     }
3372 }
3373 
3374 /* sync globals to their canonical location and assume they can be
3375    read by the following code. 'allocated_regs' is used in case a
3376    temporary registers needs to be allocated to store a constant. */
3377 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3378 {
3379     int i, n;
3380 
3381     for (i = 0, n = s->nb_globals; i < n; i++) {
3382         TCGTemp *ts = &s->temps[i];
3383         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3384                          || ts->fixed_reg
3385                          || ts->mem_coherent);
3386     }
3387 }
3388 
3389 /* at the end of a basic block, we assume all temporaries are dead and
3390    all globals are stored at their canonical location. */
3391 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3392 {
3393     int i;
3394 
3395     for (i = s->nb_globals; i < s->nb_temps; i++) {
3396         TCGTemp *ts = &s->temps[i];
3397         if (ts->temp_local) {
3398             temp_save(s, ts, allocated_regs);
3399         } else {
3400             /* The liveness analysis already ensures that temps are dead.
3401                Keep an tcg_debug_assert for safety. */
3402             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3403         }
3404     }
3405 
3406     save_globals(s, allocated_regs);
3407 }
3408 
3409 /*
3410  * At a conditional branch, we assume all temporaries are dead and
3411  * all globals and local temps are synced to their location.
3412  */
3413 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3414 {
3415     sync_globals(s, allocated_regs);
3416 
3417     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3418         TCGTemp *ts = &s->temps[i];
3419         /*
3420          * The liveness analysis already ensures that temps are dead.
3421          * Keep tcg_debug_asserts for safety.
3422          */
3423         if (ts->temp_local) {
3424             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3425         } else {
3426             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3427         }
3428     }
3429 }
3430 
3431 /*
3432  * Specialized code generation for INDEX_op_movi_*.
3433  */
3434 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3435                                   tcg_target_ulong val, TCGLifeData arg_life,
3436                                   TCGRegSet preferred_regs)
3437 {
3438     /* ENV should not be modified.  */
3439     tcg_debug_assert(!ots->fixed_reg);
3440 
3441     /* The movi is not explicitly generated here.  */
3442     if (ots->val_type == TEMP_VAL_REG) {
3443         s->reg_to_temp[ots->reg] = NULL;
3444     }
3445     ots->val_type = TEMP_VAL_CONST;
3446     ots->val = val;
3447     ots->mem_coherent = 0;
3448     if (NEED_SYNC_ARG(0)) {
3449         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3450     } else if (IS_DEAD_ARG(0)) {
3451         temp_dead(s, ots);
3452     }
3453 }
3454 
3455 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3456 {
3457     TCGTemp *ots = arg_temp(op->args[0]);
3458     tcg_target_ulong val = op->args[1];
3459 
3460     tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3461 }
3462 
3463 /*
3464  * Specialized code generation for INDEX_op_mov_*.
3465  */
3466 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3467 {
3468     const TCGLifeData arg_life = op->life;
3469     TCGRegSet allocated_regs, preferred_regs;
3470     TCGTemp *ts, *ots;
3471     TCGType otype, itype;
3472 
3473     allocated_regs = s->reserved_regs;
3474     preferred_regs = op->output_pref[0];
3475     ots = arg_temp(op->args[0]);
3476     ts = arg_temp(op->args[1]);
3477 
3478     /* ENV should not be modified.  */
3479     tcg_debug_assert(!ots->fixed_reg);
3480 
3481     /* Note that otype != itype for no-op truncation.  */
3482     otype = ots->type;
3483     itype = ts->type;
3484 
3485     if (ts->val_type == TEMP_VAL_CONST) {
3486         /* propagate constant or generate sti */
3487         tcg_target_ulong val = ts->val;
3488         if (IS_DEAD_ARG(1)) {
3489             temp_dead(s, ts);
3490         }
3491         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3492         return;
3493     }
3494 
3495     /* If the source value is in memory we're going to be forced
3496        to have it in a register in order to perform the copy.  Copy
3497        the SOURCE value into its own register first, that way we
3498        don't have to reload SOURCE the next time it is used. */
3499     if (ts->val_type == TEMP_VAL_MEM) {
3500         temp_load(s, ts, tcg_target_available_regs[itype],
3501                   allocated_regs, preferred_regs);
3502     }
3503 
3504     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3505     if (IS_DEAD_ARG(0)) {
3506         /* mov to a non-saved dead register makes no sense (even with
3507            liveness analysis disabled). */
3508         tcg_debug_assert(NEED_SYNC_ARG(0));
3509         if (!ots->mem_allocated) {
3510             temp_allocate_frame(s, ots);
3511         }
3512         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3513         if (IS_DEAD_ARG(1)) {
3514             temp_dead(s, ts);
3515         }
3516         temp_dead(s, ots);
3517     } else {
3518         if (IS_DEAD_ARG(1) && !ts->fixed_reg) {
3519             /* the mov can be suppressed */
3520             if (ots->val_type == TEMP_VAL_REG) {
3521                 s->reg_to_temp[ots->reg] = NULL;
3522             }
3523             ots->reg = ts->reg;
3524             temp_dead(s, ts);
3525         } else {
3526             if (ots->val_type != TEMP_VAL_REG) {
3527                 /* When allocating a new register, make sure to not spill the
3528                    input one. */
3529                 tcg_regset_set_reg(allocated_regs, ts->reg);
3530                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3531                                          allocated_regs, preferred_regs,
3532                                          ots->indirect_base);
3533             }
3534             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3535                 /*
3536                  * Cross register class move not supported.
3537                  * Store the source register into the destination slot
3538                  * and leave the destination temp as TEMP_VAL_MEM.
3539                  */
3540                 assert(!ots->fixed_reg);
3541                 if (!ts->mem_allocated) {
3542                     temp_allocate_frame(s, ots);
3543                 }
3544                 tcg_out_st(s, ts->type, ts->reg,
3545                            ots->mem_base->reg, ots->mem_offset);
3546                 ots->mem_coherent = 1;
3547                 temp_free_or_dead(s, ots, -1);
3548                 return;
3549             }
3550         }
3551         ots->val_type = TEMP_VAL_REG;
3552         ots->mem_coherent = 0;
3553         s->reg_to_temp[ots->reg] = ots;
3554         if (NEED_SYNC_ARG(0)) {
3555             temp_sync(s, ots, allocated_regs, 0, 0);
3556         }
3557     }
3558 }
3559 
3560 /*
3561  * Specialized code generation for INDEX_op_dup_vec.
3562  */
3563 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3564 {
3565     const TCGLifeData arg_life = op->life;
3566     TCGRegSet dup_out_regs, dup_in_regs;
3567     TCGTemp *its, *ots;
3568     TCGType itype, vtype;
3569     intptr_t endian_fixup;
3570     unsigned vece;
3571     bool ok;
3572 
3573     ots = arg_temp(op->args[0]);
3574     its = arg_temp(op->args[1]);
3575 
3576     /* ENV should not be modified.  */
3577     tcg_debug_assert(!ots->fixed_reg);
3578 
3579     itype = its->type;
3580     vece = TCGOP_VECE(op);
3581     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3582 
3583     if (its->val_type == TEMP_VAL_CONST) {
3584         /* Propagate constant via movi -> dupi.  */
3585         tcg_target_ulong val = its->val;
3586         if (IS_DEAD_ARG(1)) {
3587             temp_dead(s, its);
3588         }
3589         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3590         return;
3591     }
3592 
3593     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3594     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3595 
3596     /* Allocate the output register now.  */
3597     if (ots->val_type != TEMP_VAL_REG) {
3598         TCGRegSet allocated_regs = s->reserved_regs;
3599 
3600         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3601             /* Make sure to not spill the input register. */
3602             tcg_regset_set_reg(allocated_regs, its->reg);
3603         }
3604         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3605                                  op->output_pref[0], ots->indirect_base);
3606         ots->val_type = TEMP_VAL_REG;
3607         ots->mem_coherent = 0;
3608         s->reg_to_temp[ots->reg] = ots;
3609     }
3610 
3611     switch (its->val_type) {
3612     case TEMP_VAL_REG:
3613         /*
3614          * The dup constriaints must be broad, covering all possible VECE.
3615          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3616          * to fail, indicating that extra moves are required for that case.
3617          */
3618         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3619             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3620                 goto done;
3621             }
3622             /* Try again from memory or a vector input register.  */
3623         }
3624         if (!its->mem_coherent) {
3625             /*
3626              * The input register is not synced, and so an extra store
3627              * would be required to use memory.  Attempt an integer-vector
3628              * register move first.  We do not have a TCGRegSet for this.
3629              */
3630             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3631                 break;
3632             }
3633             /* Sync the temp back to its slot and load from there.  */
3634             temp_sync(s, its, s->reserved_regs, 0, 0);
3635         }
3636         /* fall through */
3637 
3638     case TEMP_VAL_MEM:
3639 #ifdef HOST_WORDS_BIGENDIAN
3640         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3641         endian_fixup -= 1 << vece;
3642 #else
3643         endian_fixup = 0;
3644 #endif
3645         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3646                              its->mem_offset + endian_fixup)) {
3647             goto done;
3648         }
3649         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3650         break;
3651 
3652     default:
3653         g_assert_not_reached();
3654     }
3655 
3656     /* We now have a vector input register, so dup must succeed. */
3657     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3658     tcg_debug_assert(ok);
3659 
3660  done:
3661     if (IS_DEAD_ARG(1)) {
3662         temp_dead(s, its);
3663     }
3664     if (NEED_SYNC_ARG(0)) {
3665         temp_sync(s, ots, s->reserved_regs, 0, 0);
3666     }
3667     if (IS_DEAD_ARG(0)) {
3668         temp_dead(s, ots);
3669     }
3670 }
3671 
3672 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3673 {
3674     const TCGLifeData arg_life = op->life;
3675     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3676     TCGRegSet i_allocated_regs;
3677     TCGRegSet o_allocated_regs;
3678     int i, k, nb_iargs, nb_oargs;
3679     TCGReg reg;
3680     TCGArg arg;
3681     const TCGArgConstraint *arg_ct;
3682     TCGTemp *ts;
3683     TCGArg new_args[TCG_MAX_OP_ARGS];
3684     int const_args[TCG_MAX_OP_ARGS];
3685 
3686     nb_oargs = def->nb_oargs;
3687     nb_iargs = def->nb_iargs;
3688 
3689     /* copy constants */
3690     memcpy(new_args + nb_oargs + nb_iargs,
3691            op->args + nb_oargs + nb_iargs,
3692            sizeof(TCGArg) * def->nb_cargs);
3693 
3694     i_allocated_regs = s->reserved_regs;
3695     o_allocated_regs = s->reserved_regs;
3696 
3697     /* satisfy input constraints */
3698     for (k = 0; k < nb_iargs; k++) {
3699         TCGRegSet i_preferred_regs, o_preferred_regs;
3700 
3701         i = def->args_ct[nb_oargs + k].sort_index;
3702         arg = op->args[i];
3703         arg_ct = &def->args_ct[i];
3704         ts = arg_temp(arg);
3705 
3706         if (ts->val_type == TEMP_VAL_CONST
3707             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3708             /* constant is OK for instruction */
3709             const_args[i] = 1;
3710             new_args[i] = ts->val;
3711             continue;
3712         }
3713 
3714         i_preferred_regs = o_preferred_regs = 0;
3715         if (arg_ct->ialias) {
3716             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3717             if (ts->fixed_reg) {
3718                 /* if fixed register, we must allocate a new register
3719                    if the alias is not the same register */
3720                 if (arg != op->args[arg_ct->alias_index]) {
3721                     goto allocate_in_reg;
3722                 }
3723             } else {
3724                 /* if the input is aliased to an output and if it is
3725                    not dead after the instruction, we must allocate
3726                    a new register and move it */
3727                 if (!IS_DEAD_ARG(i)) {
3728                     goto allocate_in_reg;
3729                 }
3730 
3731                 /* check if the current register has already been allocated
3732                    for another input aliased to an output */
3733                 if (ts->val_type == TEMP_VAL_REG) {
3734                     int k2, i2;
3735                     reg = ts->reg;
3736                     for (k2 = 0 ; k2 < k ; k2++) {
3737                         i2 = def->args_ct[nb_oargs + k2].sort_index;
3738                         if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3739                             goto allocate_in_reg;
3740                         }
3741                     }
3742                 }
3743                 i_preferred_regs = o_preferred_regs;
3744             }
3745         }
3746 
3747         temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3748         reg = ts->reg;
3749 
3750         if (tcg_regset_test_reg(arg_ct->regs, reg)) {
3751             /* nothing to do : the constraint is satisfied */
3752         } else {
3753         allocate_in_reg:
3754             /* allocate a new register matching the constraint
3755                and move the temporary register into it */
3756             temp_load(s, ts, tcg_target_available_regs[ts->type],
3757                       i_allocated_regs, 0);
3758             reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3759                                 o_preferred_regs, ts->indirect_base);
3760             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3761                 /*
3762                  * Cross register class move not supported.  Sync the
3763                  * temp back to its slot and load from there.
3764                  */
3765                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3766                 tcg_out_ld(s, ts->type, reg,
3767                            ts->mem_base->reg, ts->mem_offset);
3768             }
3769         }
3770         new_args[i] = reg;
3771         const_args[i] = 0;
3772         tcg_regset_set_reg(i_allocated_regs, reg);
3773     }
3774 
3775     /* mark dead temporaries and free the associated registers */
3776     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3777         if (IS_DEAD_ARG(i)) {
3778             temp_dead(s, arg_temp(op->args[i]));
3779         }
3780     }
3781 
3782     if (def->flags & TCG_OPF_COND_BRANCH) {
3783         tcg_reg_alloc_cbranch(s, i_allocated_regs);
3784     } else if (def->flags & TCG_OPF_BB_END) {
3785         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3786     } else {
3787         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3788             /* XXX: permit generic clobber register list ? */
3789             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3790                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3791                     tcg_reg_free(s, i, i_allocated_regs);
3792                 }
3793             }
3794         }
3795         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3796             /* sync globals if the op has side effects and might trigger
3797                an exception. */
3798             sync_globals(s, i_allocated_regs);
3799         }
3800 
3801         /* satisfy the output constraints */
3802         for(k = 0; k < nb_oargs; k++) {
3803             i = def->args_ct[k].sort_index;
3804             arg = op->args[i];
3805             arg_ct = &def->args_ct[i];
3806             ts = arg_temp(arg);
3807 
3808             /* ENV should not be modified.  */
3809             tcg_debug_assert(!ts->fixed_reg);
3810 
3811             if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3812                 reg = new_args[arg_ct->alias_index];
3813             } else if (arg_ct->newreg) {
3814                 reg = tcg_reg_alloc(s, arg_ct->regs,
3815                                     i_allocated_regs | o_allocated_regs,
3816                                     op->output_pref[k], ts->indirect_base);
3817             } else {
3818                 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3819                                     op->output_pref[k], ts->indirect_base);
3820             }
3821             tcg_regset_set_reg(o_allocated_regs, reg);
3822             if (ts->val_type == TEMP_VAL_REG) {
3823                 s->reg_to_temp[ts->reg] = NULL;
3824             }
3825             ts->val_type = TEMP_VAL_REG;
3826             ts->reg = reg;
3827             /*
3828              * Temp value is modified, so the value kept in memory is
3829              * potentially not the same.
3830              */
3831             ts->mem_coherent = 0;
3832             s->reg_to_temp[reg] = ts;
3833             new_args[i] = reg;
3834         }
3835     }
3836 
3837     /* emit instruction */
3838     if (def->flags & TCG_OPF_VECTOR) {
3839         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3840                        new_args, const_args);
3841     } else {
3842         tcg_out_op(s, op->opc, new_args, const_args);
3843     }
3844 
3845     /* move the outputs in the correct register if needed */
3846     for(i = 0; i < nb_oargs; i++) {
3847         ts = arg_temp(op->args[i]);
3848 
3849         /* ENV should not be modified.  */
3850         tcg_debug_assert(!ts->fixed_reg);
3851 
3852         if (NEED_SYNC_ARG(i)) {
3853             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3854         } else if (IS_DEAD_ARG(i)) {
3855             temp_dead(s, ts);
3856         }
3857     }
3858 }
3859 
3860 #ifdef TCG_TARGET_STACK_GROWSUP
3861 #define STACK_DIR(x) (-(x))
3862 #else
3863 #define STACK_DIR(x) (x)
3864 #endif
3865 
3866 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3867 {
3868     const int nb_oargs = TCGOP_CALLO(op);
3869     const int nb_iargs = TCGOP_CALLI(op);
3870     const TCGLifeData arg_life = op->life;
3871     int flags, nb_regs, i;
3872     TCGReg reg;
3873     TCGArg arg;
3874     TCGTemp *ts;
3875     intptr_t stack_offset;
3876     size_t call_stack_size;
3877     tcg_insn_unit *func_addr;
3878     int allocate_args;
3879     TCGRegSet allocated_regs;
3880 
3881     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3882     flags = op->args[nb_oargs + nb_iargs + 1];
3883 
3884     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3885     if (nb_regs > nb_iargs) {
3886         nb_regs = nb_iargs;
3887     }
3888 
3889     /* assign stack slots first */
3890     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3891     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3892         ~(TCG_TARGET_STACK_ALIGN - 1);
3893     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3894     if (allocate_args) {
3895         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3896            preallocate call stack */
3897         tcg_abort();
3898     }
3899 
3900     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3901     for (i = nb_regs; i < nb_iargs; i++) {
3902         arg = op->args[nb_oargs + i];
3903 #ifdef TCG_TARGET_STACK_GROWSUP
3904         stack_offset -= sizeof(tcg_target_long);
3905 #endif
3906         if (arg != TCG_CALL_DUMMY_ARG) {
3907             ts = arg_temp(arg);
3908             temp_load(s, ts, tcg_target_available_regs[ts->type],
3909                       s->reserved_regs, 0);
3910             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3911         }
3912 #ifndef TCG_TARGET_STACK_GROWSUP
3913         stack_offset += sizeof(tcg_target_long);
3914 #endif
3915     }
3916 
3917     /* assign input registers */
3918     allocated_regs = s->reserved_regs;
3919     for (i = 0; i < nb_regs; i++) {
3920         arg = op->args[nb_oargs + i];
3921         if (arg != TCG_CALL_DUMMY_ARG) {
3922             ts = arg_temp(arg);
3923             reg = tcg_target_call_iarg_regs[i];
3924 
3925             if (ts->val_type == TEMP_VAL_REG) {
3926                 if (ts->reg != reg) {
3927                     tcg_reg_free(s, reg, allocated_regs);
3928                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3929                         /*
3930                          * Cross register class move not supported.  Sync the
3931                          * temp back to its slot and load from there.
3932                          */
3933                         temp_sync(s, ts, allocated_regs, 0, 0);
3934                         tcg_out_ld(s, ts->type, reg,
3935                                    ts->mem_base->reg, ts->mem_offset);
3936                     }
3937                 }
3938             } else {
3939                 TCGRegSet arg_set = 0;
3940 
3941                 tcg_reg_free(s, reg, allocated_regs);
3942                 tcg_regset_set_reg(arg_set, reg);
3943                 temp_load(s, ts, arg_set, allocated_regs, 0);
3944             }
3945 
3946             tcg_regset_set_reg(allocated_regs, reg);
3947         }
3948     }
3949 
3950     /* mark dead temporaries and free the associated registers */
3951     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3952         if (IS_DEAD_ARG(i)) {
3953             temp_dead(s, arg_temp(op->args[i]));
3954         }
3955     }
3956 
3957     /* clobber call registers */
3958     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3959         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3960             tcg_reg_free(s, i, allocated_regs);
3961         }
3962     }
3963 
3964     /* Save globals if they might be written by the helper, sync them if
3965        they might be read. */
3966     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3967         /* Nothing to do */
3968     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3969         sync_globals(s, allocated_regs);
3970     } else {
3971         save_globals(s, allocated_regs);
3972     }
3973 
3974     tcg_out_call(s, func_addr);
3975 
3976     /* assign output registers and emit moves if needed */
3977     for(i = 0; i < nb_oargs; i++) {
3978         arg = op->args[i];
3979         ts = arg_temp(arg);
3980 
3981         /* ENV should not be modified.  */
3982         tcg_debug_assert(!ts->fixed_reg);
3983 
3984         reg = tcg_target_call_oarg_regs[i];
3985         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3986         if (ts->val_type == TEMP_VAL_REG) {
3987             s->reg_to_temp[ts->reg] = NULL;
3988         }
3989         ts->val_type = TEMP_VAL_REG;
3990         ts->reg = reg;
3991         ts->mem_coherent = 0;
3992         s->reg_to_temp[reg] = ts;
3993         if (NEED_SYNC_ARG(i)) {
3994             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
3995         } else if (IS_DEAD_ARG(i)) {
3996             temp_dead(s, ts);
3997         }
3998     }
3999 }
4000 
4001 #ifdef CONFIG_PROFILER
4002 
4003 /* avoid copy/paste errors */
4004 #define PROF_ADD(to, from, field)                       \
4005     do {                                                \
4006         (to)->field += qatomic_read(&((from)->field));  \
4007     } while (0)
4008 
4009 #define PROF_MAX(to, from, field)                                       \
4010     do {                                                                \
4011         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4012         if (val__ > (to)->field) {                                      \
4013             (to)->field = val__;                                        \
4014         }                                                               \
4015     } while (0)
4016 
4017 /* Pass in a zero'ed @prof */
4018 static inline
4019 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4020 {
4021     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4022     unsigned int i;
4023 
4024     for (i = 0; i < n_ctxs; i++) {
4025         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4026         const TCGProfile *orig = &s->prof;
4027 
4028         if (counters) {
4029             PROF_ADD(prof, orig, cpu_exec_time);
4030             PROF_ADD(prof, orig, tb_count1);
4031             PROF_ADD(prof, orig, tb_count);
4032             PROF_ADD(prof, orig, op_count);
4033             PROF_MAX(prof, orig, op_count_max);
4034             PROF_ADD(prof, orig, temp_count);
4035             PROF_MAX(prof, orig, temp_count_max);
4036             PROF_ADD(prof, orig, del_op_count);
4037             PROF_ADD(prof, orig, code_in_len);
4038             PROF_ADD(prof, orig, code_out_len);
4039             PROF_ADD(prof, orig, search_out_len);
4040             PROF_ADD(prof, orig, interm_time);
4041             PROF_ADD(prof, orig, code_time);
4042             PROF_ADD(prof, orig, la_time);
4043             PROF_ADD(prof, orig, opt_time);
4044             PROF_ADD(prof, orig, restore_count);
4045             PROF_ADD(prof, orig, restore_time);
4046         }
4047         if (table) {
4048             int i;
4049 
4050             for (i = 0; i < NB_OPS; i++) {
4051                 PROF_ADD(prof, orig, table_op_count[i]);
4052             }
4053         }
4054     }
4055 }
4056 
4057 #undef PROF_ADD
4058 #undef PROF_MAX
4059 
4060 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4061 {
4062     tcg_profile_snapshot(prof, true, false);
4063 }
4064 
4065 static void tcg_profile_snapshot_table(TCGProfile *prof)
4066 {
4067     tcg_profile_snapshot(prof, false, true);
4068 }
4069 
4070 void tcg_dump_op_count(void)
4071 {
4072     TCGProfile prof = {};
4073     int i;
4074 
4075     tcg_profile_snapshot_table(&prof);
4076     for (i = 0; i < NB_OPS; i++) {
4077         qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4078                     prof.table_op_count[i]);
4079     }
4080 }
4081 
4082 int64_t tcg_cpu_exec_time(void)
4083 {
4084     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4085     unsigned int i;
4086     int64_t ret = 0;
4087 
4088     for (i = 0; i < n_ctxs; i++) {
4089         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4090         const TCGProfile *prof = &s->prof;
4091 
4092         ret += qatomic_read(&prof->cpu_exec_time);
4093     }
4094     return ret;
4095 }
4096 #else
4097 void tcg_dump_op_count(void)
4098 {
4099     qemu_printf("[TCG profiler not compiled]\n");
4100 }
4101 
4102 int64_t tcg_cpu_exec_time(void)
4103 {
4104     error_report("%s: TCG profiler not compiled", __func__);
4105     exit(EXIT_FAILURE);
4106 }
4107 #endif
4108 
4109 
4110 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4111 {
4112 #ifdef CONFIG_PROFILER
4113     TCGProfile *prof = &s->prof;
4114 #endif
4115     int i, num_insns;
4116     TCGOp *op;
4117 
4118 #ifdef CONFIG_PROFILER
4119     {
4120         int n = 0;
4121 
4122         QTAILQ_FOREACH(op, &s->ops, link) {
4123             n++;
4124         }
4125         qatomic_set(&prof->op_count, prof->op_count + n);
4126         if (n > prof->op_count_max) {
4127             qatomic_set(&prof->op_count_max, n);
4128         }
4129 
4130         n = s->nb_temps;
4131         qatomic_set(&prof->temp_count, prof->temp_count + n);
4132         if (n > prof->temp_count_max) {
4133             qatomic_set(&prof->temp_count_max, n);
4134         }
4135     }
4136 #endif
4137 
4138 #ifdef DEBUG_DISAS
4139     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4140                  && qemu_log_in_addr_range(tb->pc))) {
4141         FILE *logfile = qemu_log_lock();
4142         qemu_log("OP:\n");
4143         tcg_dump_ops(s, false);
4144         qemu_log("\n");
4145         qemu_log_unlock(logfile);
4146     }
4147 #endif
4148 
4149 #ifdef CONFIG_DEBUG_TCG
4150     /* Ensure all labels referenced have been emitted.  */
4151     {
4152         TCGLabel *l;
4153         bool error = false;
4154 
4155         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4156             if (unlikely(!l->present) && l->refs) {
4157                 qemu_log_mask(CPU_LOG_TB_OP,
4158                               "$L%d referenced but not present.\n", l->id);
4159                 error = true;
4160             }
4161         }
4162         assert(!error);
4163     }
4164 #endif
4165 
4166 #ifdef CONFIG_PROFILER
4167     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4168 #endif
4169 
4170 #ifdef USE_TCG_OPTIMIZATIONS
4171     tcg_optimize(s);
4172 #endif
4173 
4174 #ifdef CONFIG_PROFILER
4175     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4176     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4177 #endif
4178 
4179     reachable_code_pass(s);
4180     liveness_pass_1(s);
4181 
4182     if (s->nb_indirects > 0) {
4183 #ifdef DEBUG_DISAS
4184         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4185                      && qemu_log_in_addr_range(tb->pc))) {
4186             FILE *logfile = qemu_log_lock();
4187             qemu_log("OP before indirect lowering:\n");
4188             tcg_dump_ops(s, false);
4189             qemu_log("\n");
4190             qemu_log_unlock(logfile);
4191         }
4192 #endif
4193         /* Replace indirect temps with direct temps.  */
4194         if (liveness_pass_2(s)) {
4195             /* If changes were made, re-run liveness.  */
4196             liveness_pass_1(s);
4197         }
4198     }
4199 
4200 #ifdef CONFIG_PROFILER
4201     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4202 #endif
4203 
4204 #ifdef DEBUG_DISAS
4205     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4206                  && qemu_log_in_addr_range(tb->pc))) {
4207         FILE *logfile = qemu_log_lock();
4208         qemu_log("OP after optimization and liveness analysis:\n");
4209         tcg_dump_ops(s, true);
4210         qemu_log("\n");
4211         qemu_log_unlock(logfile);
4212     }
4213 #endif
4214 
4215     tcg_reg_alloc_start(s);
4216 
4217     s->code_buf = tb->tc.ptr;
4218     s->code_ptr = tb->tc.ptr;
4219 
4220 #ifdef TCG_TARGET_NEED_LDST_LABELS
4221     QSIMPLEQ_INIT(&s->ldst_labels);
4222 #endif
4223 #ifdef TCG_TARGET_NEED_POOL_LABELS
4224     s->pool_labels = NULL;
4225 #endif
4226 
4227     num_insns = -1;
4228     QTAILQ_FOREACH(op, &s->ops, link) {
4229         TCGOpcode opc = op->opc;
4230 
4231 #ifdef CONFIG_PROFILER
4232         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4233 #endif
4234 
4235         switch (opc) {
4236         case INDEX_op_mov_i32:
4237         case INDEX_op_mov_i64:
4238         case INDEX_op_mov_vec:
4239             tcg_reg_alloc_mov(s, op);
4240             break;
4241         case INDEX_op_movi_i32:
4242         case INDEX_op_movi_i64:
4243         case INDEX_op_dupi_vec:
4244             tcg_reg_alloc_movi(s, op);
4245             break;
4246         case INDEX_op_dup_vec:
4247             tcg_reg_alloc_dup(s, op);
4248             break;
4249         case INDEX_op_insn_start:
4250             if (num_insns >= 0) {
4251                 size_t off = tcg_current_code_size(s);
4252                 s->gen_insn_end_off[num_insns] = off;
4253                 /* Assert that we do not overflow our stored offset.  */
4254                 assert(s->gen_insn_end_off[num_insns] == off);
4255             }
4256             num_insns++;
4257             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4258                 target_ulong a;
4259 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4260                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4261 #else
4262                 a = op->args[i];
4263 #endif
4264                 s->gen_insn_data[num_insns][i] = a;
4265             }
4266             break;
4267         case INDEX_op_discard:
4268             temp_dead(s, arg_temp(op->args[0]));
4269             break;
4270         case INDEX_op_set_label:
4271             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4272             tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
4273             break;
4274         case INDEX_op_call:
4275             tcg_reg_alloc_call(s, op);
4276             break;
4277         default:
4278             /* Sanity check that we've not introduced any unhandled opcodes. */
4279             tcg_debug_assert(tcg_op_supported(opc));
4280             /* Note: in order to speed up the code, it would be much
4281                faster to have specialized register allocator functions for
4282                some common argument patterns */
4283             tcg_reg_alloc_op(s, op);
4284             break;
4285         }
4286 #ifdef CONFIG_DEBUG_TCG
4287         check_regs(s);
4288 #endif
4289         /* Test for (pending) buffer overflow.  The assumption is that any
4290            one operation beginning below the high water mark cannot overrun
4291            the buffer completely.  Thus we can test for overflow after
4292            generating code without having to check during generation.  */
4293         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4294             return -1;
4295         }
4296         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4297         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4298             return -2;
4299         }
4300     }
4301     tcg_debug_assert(num_insns >= 0);
4302     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4303 
4304     /* Generate TB finalization at the end of block */
4305 #ifdef TCG_TARGET_NEED_LDST_LABELS
4306     i = tcg_out_ldst_finalize(s);
4307     if (i < 0) {
4308         return i;
4309     }
4310 #endif
4311 #ifdef TCG_TARGET_NEED_POOL_LABELS
4312     i = tcg_out_pool_finalize(s);
4313     if (i < 0) {
4314         return i;
4315     }
4316 #endif
4317     if (!tcg_resolve_relocs(s)) {
4318         return -2;
4319     }
4320 
4321     /* flush instruction cache */
4322     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
4323 
4324     return tcg_current_code_size(s);
4325 }
4326 
4327 #ifdef CONFIG_PROFILER
4328 void tcg_dump_info(void)
4329 {
4330     TCGProfile prof = {};
4331     const TCGProfile *s;
4332     int64_t tb_count;
4333     int64_t tb_div_count;
4334     int64_t tot;
4335 
4336     tcg_profile_snapshot_counters(&prof);
4337     s = &prof;
4338     tb_count = s->tb_count;
4339     tb_div_count = tb_count ? tb_count : 1;
4340     tot = s->interm_time + s->code_time;
4341 
4342     qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4343                 tot, tot / 2.4e9);
4344     qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4345                 " %0.1f%%)\n",
4346                 tb_count, s->tb_count1 - tb_count,
4347                 (double)(s->tb_count1 - s->tb_count)
4348                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4349     qemu_printf("avg ops/TB          %0.1f max=%d\n",
4350                 (double)s->op_count / tb_div_count, s->op_count_max);
4351     qemu_printf("deleted ops/TB      %0.2f\n",
4352                 (double)s->del_op_count / tb_div_count);
4353     qemu_printf("avg temps/TB        %0.2f max=%d\n",
4354                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4355     qemu_printf("avg host code/TB    %0.1f\n",
4356                 (double)s->code_out_len / tb_div_count);
4357     qemu_printf("avg search data/TB  %0.1f\n",
4358                 (double)s->search_out_len / tb_div_count);
4359 
4360     qemu_printf("cycles/op           %0.1f\n",
4361                 s->op_count ? (double)tot / s->op_count : 0);
4362     qemu_printf("cycles/in byte      %0.1f\n",
4363                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4364     qemu_printf("cycles/out byte     %0.1f\n",
4365                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4366     qemu_printf("cycles/search byte     %0.1f\n",
4367                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4368     if (tot == 0) {
4369         tot = 1;
4370     }
4371     qemu_printf("  gen_interm time   %0.1f%%\n",
4372                 (double)s->interm_time / tot * 100.0);
4373     qemu_printf("  gen_code time     %0.1f%%\n",
4374                 (double)s->code_time / tot * 100.0);
4375     qemu_printf("optim./code time    %0.1f%%\n",
4376                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4377                 * 100.0);
4378     qemu_printf("liveness/code time  %0.1f%%\n",
4379                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4380     qemu_printf("cpu_restore count   %" PRId64 "\n",
4381                 s->restore_count);
4382     qemu_printf("  avg cycles        %0.1f\n",
4383                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4384 }
4385 #else
4386 void tcg_dump_info(void)
4387 {
4388     qemu_printf("[TCG profiler not compiled]\n");
4389 }
4390 #endif
4391 
4392 #ifdef ELF_HOST_MACHINE
4393 /* In order to use this feature, the backend needs to do three things:
4394 
4395    (1) Define ELF_HOST_MACHINE to indicate both what value to
4396        put into the ELF image and to indicate support for the feature.
4397 
4398    (2) Define tcg_register_jit.  This should create a buffer containing
4399        the contents of a .debug_frame section that describes the post-
4400        prologue unwind info for the tcg machine.
4401 
4402    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4403 */
4404 
4405 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4406 typedef enum {
4407     JIT_NOACTION = 0,
4408     JIT_REGISTER_FN,
4409     JIT_UNREGISTER_FN
4410 } jit_actions_t;
4411 
4412 struct jit_code_entry {
4413     struct jit_code_entry *next_entry;
4414     struct jit_code_entry *prev_entry;
4415     const void *symfile_addr;
4416     uint64_t symfile_size;
4417 };
4418 
4419 struct jit_descriptor {
4420     uint32_t version;
4421     uint32_t action_flag;
4422     struct jit_code_entry *relevant_entry;
4423     struct jit_code_entry *first_entry;
4424 };
4425 
4426 void __jit_debug_register_code(void) __attribute__((noinline));
4427 void __jit_debug_register_code(void)
4428 {
4429     asm("");
4430 }
4431 
4432 /* Must statically initialize the version, because GDB may check
4433    the version before we can set it.  */
4434 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4435 
4436 /* End GDB interface.  */
4437 
4438 static int find_string(const char *strtab, const char *str)
4439 {
4440     const char *p = strtab + 1;
4441 
4442     while (1) {
4443         if (strcmp(p, str) == 0) {
4444             return p - strtab;
4445         }
4446         p += strlen(p) + 1;
4447     }
4448 }
4449 
4450 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
4451                                  const void *debug_frame,
4452                                  size_t debug_frame_size)
4453 {
4454     struct __attribute__((packed)) DebugInfo {
4455         uint32_t  len;
4456         uint16_t  version;
4457         uint32_t  abbrev;
4458         uint8_t   ptr_size;
4459         uint8_t   cu_die;
4460         uint16_t  cu_lang;
4461         uintptr_t cu_low_pc;
4462         uintptr_t cu_high_pc;
4463         uint8_t   fn_die;
4464         char      fn_name[16];
4465         uintptr_t fn_low_pc;
4466         uintptr_t fn_high_pc;
4467         uint8_t   cu_eoc;
4468     };
4469 
4470     struct ElfImage {
4471         ElfW(Ehdr) ehdr;
4472         ElfW(Phdr) phdr;
4473         ElfW(Shdr) shdr[7];
4474         ElfW(Sym)  sym[2];
4475         struct DebugInfo di;
4476         uint8_t    da[24];
4477         char       str[80];
4478     };
4479 
4480     struct ElfImage *img;
4481 
4482     static const struct ElfImage img_template = {
4483         .ehdr = {
4484             .e_ident[EI_MAG0] = ELFMAG0,
4485             .e_ident[EI_MAG1] = ELFMAG1,
4486             .e_ident[EI_MAG2] = ELFMAG2,
4487             .e_ident[EI_MAG3] = ELFMAG3,
4488             .e_ident[EI_CLASS] = ELF_CLASS,
4489             .e_ident[EI_DATA] = ELF_DATA,
4490             .e_ident[EI_VERSION] = EV_CURRENT,
4491             .e_type = ET_EXEC,
4492             .e_machine = ELF_HOST_MACHINE,
4493             .e_version = EV_CURRENT,
4494             .e_phoff = offsetof(struct ElfImage, phdr),
4495             .e_shoff = offsetof(struct ElfImage, shdr),
4496             .e_ehsize = sizeof(ElfW(Shdr)),
4497             .e_phentsize = sizeof(ElfW(Phdr)),
4498             .e_phnum = 1,
4499             .e_shentsize = sizeof(ElfW(Shdr)),
4500             .e_shnum = ARRAY_SIZE(img->shdr),
4501             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4502 #ifdef ELF_HOST_FLAGS
4503             .e_flags = ELF_HOST_FLAGS,
4504 #endif
4505 #ifdef ELF_OSABI
4506             .e_ident[EI_OSABI] = ELF_OSABI,
4507 #endif
4508         },
4509         .phdr = {
4510             .p_type = PT_LOAD,
4511             .p_flags = PF_X,
4512         },
4513         .shdr = {
4514             [0] = { .sh_type = SHT_NULL },
4515             /* Trick: The contents of code_gen_buffer are not present in
4516                this fake ELF file; that got allocated elsewhere.  Therefore
4517                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4518                will not look for contents.  We can record any address.  */
4519             [1] = { /* .text */
4520                 .sh_type = SHT_NOBITS,
4521                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4522             },
4523             [2] = { /* .debug_info */
4524                 .sh_type = SHT_PROGBITS,
4525                 .sh_offset = offsetof(struct ElfImage, di),
4526                 .sh_size = sizeof(struct DebugInfo),
4527             },
4528             [3] = { /* .debug_abbrev */
4529                 .sh_type = SHT_PROGBITS,
4530                 .sh_offset = offsetof(struct ElfImage, da),
4531                 .sh_size = sizeof(img->da),
4532             },
4533             [4] = { /* .debug_frame */
4534                 .sh_type = SHT_PROGBITS,
4535                 .sh_offset = sizeof(struct ElfImage),
4536             },
4537             [5] = { /* .symtab */
4538                 .sh_type = SHT_SYMTAB,
4539                 .sh_offset = offsetof(struct ElfImage, sym),
4540                 .sh_size = sizeof(img->sym),
4541                 .sh_info = 1,
4542                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4543                 .sh_entsize = sizeof(ElfW(Sym)),
4544             },
4545             [6] = { /* .strtab */
4546                 .sh_type = SHT_STRTAB,
4547                 .sh_offset = offsetof(struct ElfImage, str),
4548                 .sh_size = sizeof(img->str),
4549             }
4550         },
4551         .sym = {
4552             [1] = { /* code_gen_buffer */
4553                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4554                 .st_shndx = 1,
4555             }
4556         },
4557         .di = {
4558             .len = sizeof(struct DebugInfo) - 4,
4559             .version = 2,
4560             .ptr_size = sizeof(void *),
4561             .cu_die = 1,
4562             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4563             .fn_die = 2,
4564             .fn_name = "code_gen_buffer"
4565         },
4566         .da = {
4567             1,          /* abbrev number (the cu) */
4568             0x11, 1,    /* DW_TAG_compile_unit, has children */
4569             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4570             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4571             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4572             0, 0,       /* end of abbrev */
4573             2,          /* abbrev number (the fn) */
4574             0x2e, 0,    /* DW_TAG_subprogram, no children */
4575             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4576             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4577             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4578             0, 0,       /* end of abbrev */
4579             0           /* no more abbrev */
4580         },
4581         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4582                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4583     };
4584 
4585     /* We only need a single jit entry; statically allocate it.  */
4586     static struct jit_code_entry one_entry;
4587 
4588     uintptr_t buf = (uintptr_t)buf_ptr;
4589     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4590     DebugFrameHeader *dfh;
4591 
4592     img = g_malloc(img_size);
4593     *img = img_template;
4594 
4595     img->phdr.p_vaddr = buf;
4596     img->phdr.p_paddr = buf;
4597     img->phdr.p_memsz = buf_size;
4598 
4599     img->shdr[1].sh_name = find_string(img->str, ".text");
4600     img->shdr[1].sh_addr = buf;
4601     img->shdr[1].sh_size = buf_size;
4602 
4603     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4604     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4605 
4606     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4607     img->shdr[4].sh_size = debug_frame_size;
4608 
4609     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4610     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4611 
4612     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4613     img->sym[1].st_value = buf;
4614     img->sym[1].st_size = buf_size;
4615 
4616     img->di.cu_low_pc = buf;
4617     img->di.cu_high_pc = buf + buf_size;
4618     img->di.fn_low_pc = buf;
4619     img->di.fn_high_pc = buf + buf_size;
4620 
4621     dfh = (DebugFrameHeader *)(img + 1);
4622     memcpy(dfh, debug_frame, debug_frame_size);
4623     dfh->fde.func_start = buf;
4624     dfh->fde.func_len = buf_size;
4625 
4626 #ifdef DEBUG_JIT
4627     /* Enable this block to be able to debug the ELF image file creation.
4628        One can use readelf, objdump, or other inspection utilities.  */
4629     {
4630         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4631         if (f) {
4632             if (fwrite(img, img_size, 1, f) != img_size) {
4633                 /* Avoid stupid unused return value warning for fwrite.  */
4634             }
4635             fclose(f);
4636         }
4637     }
4638 #endif
4639 
4640     one_entry.symfile_addr = img;
4641     one_entry.symfile_size = img_size;
4642 
4643     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4644     __jit_debug_descriptor.relevant_entry = &one_entry;
4645     __jit_debug_descriptor.first_entry = &one_entry;
4646     __jit_debug_register_code();
4647 }
4648 #else
4649 /* No support for the feature.  Provide the entry point expected by exec.c,
4650    and implement the internal function we declared earlier.  */
4651 
4652 static void tcg_register_jit_int(void *buf, size_t size,
4653                                  const void *debug_frame,
4654                                  size_t debug_frame_size)
4655 {
4656 }
4657 
4658 void tcg_register_jit(void *buf, size_t buf_size)
4659 {
4660 }
4661 #endif /* ELF_HOST_MACHINE */
4662 
4663 #if !TCG_TARGET_MAYBE_vec
4664 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4665 {
4666     g_assert_not_reached();
4667 }
4668 #endif
4669