xref: /openbmc/qemu/tcg/tcg.c (revision 4c4465ff)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41    CPU definitions. Currently they are used for qemu_ld/st
42    instructions */
43 #define NO_CPU_IO_DEFS
44 #include "cpu.h"
45 
46 #include "exec/exec-all.h"
47 
48 #if !defined(CONFIG_USER_ONLY)
49 #include "hw/boards.h"
50 #endif
51 
52 #include "tcg/tcg-op.h"
53 
54 #if UINTPTR_MAX == UINT32_MAX
55 # define ELF_CLASS  ELFCLASS32
56 #else
57 # define ELF_CLASS  ELFCLASS64
58 #endif
59 #ifdef HOST_WORDS_BIGENDIAN
60 # define ELF_DATA   ELFDATA2MSB
61 #else
62 # define ELF_DATA   ELFDATA2LSB
63 #endif
64 
65 #include "elf.h"
66 #include "exec/log.h"
67 #include "sysemu/sysemu.h"
68 
69 /* Forward declarations for functions declared in tcg-target.c.inc and
70    used here. */
71 static void tcg_target_init(TCGContext *s);
72 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
73 static void tcg_target_qemu_prologue(TCGContext *s);
74 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
75                         intptr_t value, intptr_t addend);
76 
77 /* The CIE and FDE header definitions will be common to all hosts.  */
78 typedef struct {
79     uint32_t len __attribute__((aligned((sizeof(void *)))));
80     uint32_t id;
81     uint8_t version;
82     char augmentation[1];
83     uint8_t code_align;
84     uint8_t data_align;
85     uint8_t return_column;
86 } DebugFrameCIE;
87 
88 typedef struct QEMU_PACKED {
89     uint32_t len __attribute__((aligned((sizeof(void *)))));
90     uint32_t cie_offset;
91     uintptr_t func_start;
92     uintptr_t func_len;
93 } DebugFrameFDEHeader;
94 
95 typedef struct QEMU_PACKED {
96     DebugFrameCIE cie;
97     DebugFrameFDEHeader fde;
98 } DebugFrameHeader;
99 
100 static void tcg_register_jit_int(const void *buf, size_t size,
101                                  const void *debug_frame,
102                                  size_t debug_frame_size)
103     __attribute__((unused));
104 
105 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
106 static const char *target_parse_constraint(TCGArgConstraint *ct,
107                                            const char *ct_str, TCGType type);
108 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
109                        intptr_t arg2);
110 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
111 static void tcg_out_movi(TCGContext *s, TCGType type,
112                          TCGReg ret, tcg_target_long arg);
113 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
114                        const int *const_args);
115 #if TCG_TARGET_MAYBE_vec
116 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
117                             TCGReg dst, TCGReg src);
118 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
119                              TCGReg dst, TCGReg base, intptr_t offset);
120 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
121                              TCGReg dst, tcg_target_long arg);
122 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
123                            unsigned vece, const TCGArg *args,
124                            const int *const_args);
125 #else
126 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
127                                    TCGReg dst, TCGReg src)
128 {
129     g_assert_not_reached();
130 }
131 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
132                                     TCGReg dst, TCGReg base, intptr_t offset)
133 {
134     g_assert_not_reached();
135 }
136 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
137                                     TCGReg dst, tcg_target_long arg)
138 {
139     g_assert_not_reached();
140 }
141 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
142                                   unsigned vece, const TCGArg *args,
143                                   const int *const_args)
144 {
145     g_assert_not_reached();
146 }
147 #endif
148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
149                        intptr_t arg2);
150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
151                         TCGReg base, intptr_t ofs);
152 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
153 static int tcg_target_const_match(tcg_target_long val, TCGType type,
154                                   const TCGArgConstraint *arg_ct);
155 #ifdef TCG_TARGET_NEED_LDST_LABELS
156 static int tcg_out_ldst_finalize(TCGContext *s);
157 #endif
158 
159 #define TCG_HIGHWATER 1024
160 
161 static TCGContext **tcg_ctxs;
162 static unsigned int n_tcg_ctxs;
163 TCGv_env cpu_env = 0;
164 const void *tcg_code_gen_epilogue;
165 uintptr_t tcg_splitwx_diff;
166 
167 #ifndef CONFIG_TCG_INTERPRETER
168 tcg_prologue_fn *tcg_qemu_tb_exec;
169 #endif
170 
171 struct tcg_region_tree {
172     QemuMutex lock;
173     GTree *tree;
174     /* padding to avoid false sharing is computed at run-time */
175 };
176 
177 /*
178  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
179  * dynamically allocate from as demand dictates. Given appropriate region
180  * sizing, this minimizes flushes even when some TCG threads generate a lot
181  * more code than others.
182  */
183 struct tcg_region_state {
184     QemuMutex lock;
185 
186     /* fields set at init time */
187     void *start;
188     void *start_aligned;
189     void *end;
190     size_t n;
191     size_t size; /* size of one region */
192     size_t stride; /* .size + guard size */
193 
194     /* fields protected by the lock */
195     size_t current; /* current region index */
196     size_t agg_size_full; /* aggregate size of full regions */
197 };
198 
199 static struct tcg_region_state region;
200 /*
201  * This is an array of struct tcg_region_tree's, with padding.
202  * We use void * to simplify the computation of region_trees[i]; each
203  * struct is found every tree_size bytes.
204  */
205 static void *region_trees;
206 static size_t tree_size;
207 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
208 static TCGRegSet tcg_target_call_clobber_regs;
209 
210 #if TCG_TARGET_INSN_UNIT_SIZE == 1
211 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
212 {
213     *s->code_ptr++ = v;
214 }
215 
216 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
217                                                       uint8_t v)
218 {
219     *p = v;
220 }
221 #endif
222 
223 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
224 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
225 {
226     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
227         *s->code_ptr++ = v;
228     } else {
229         tcg_insn_unit *p = s->code_ptr;
230         memcpy(p, &v, sizeof(v));
231         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
232     }
233 }
234 
235 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
236                                                        uint16_t v)
237 {
238     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
239         *p = v;
240     } else {
241         memcpy(p, &v, sizeof(v));
242     }
243 }
244 #endif
245 
246 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
247 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
248 {
249     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
250         *s->code_ptr++ = v;
251     } else {
252         tcg_insn_unit *p = s->code_ptr;
253         memcpy(p, &v, sizeof(v));
254         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
255     }
256 }
257 
258 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
259                                                        uint32_t v)
260 {
261     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
262         *p = v;
263     } else {
264         memcpy(p, &v, sizeof(v));
265     }
266 }
267 #endif
268 
269 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
270 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
271 {
272     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
273         *s->code_ptr++ = v;
274     } else {
275         tcg_insn_unit *p = s->code_ptr;
276         memcpy(p, &v, sizeof(v));
277         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
278     }
279 }
280 
281 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
282                                                        uint64_t v)
283 {
284     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
285         *p = v;
286     } else {
287         memcpy(p, &v, sizeof(v));
288     }
289 }
290 #endif
291 
292 /* label relocation processing */
293 
294 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
295                           TCGLabel *l, intptr_t addend)
296 {
297     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
298 
299     r->type = type;
300     r->ptr = code_ptr;
301     r->addend = addend;
302     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
303 }
304 
305 static void tcg_out_label(TCGContext *s, TCGLabel *l)
306 {
307     tcg_debug_assert(!l->has_value);
308     l->has_value = 1;
309     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
310 }
311 
312 TCGLabel *gen_new_label(void)
313 {
314     TCGContext *s = tcg_ctx;
315     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
316 
317     memset(l, 0, sizeof(TCGLabel));
318     l->id = s->nb_labels++;
319     QSIMPLEQ_INIT(&l->relocs);
320 
321     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
322 
323     return l;
324 }
325 
326 static bool tcg_resolve_relocs(TCGContext *s)
327 {
328     TCGLabel *l;
329 
330     QSIMPLEQ_FOREACH(l, &s->labels, next) {
331         TCGRelocation *r;
332         uintptr_t value = l->u.value;
333 
334         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
335             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
336                 return false;
337             }
338         }
339     }
340     return true;
341 }
342 
343 static void set_jmp_reset_offset(TCGContext *s, int which)
344 {
345     /*
346      * We will check for overflow at the end of the opcode loop in
347      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
348      */
349     s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
350 }
351 
352 #include "tcg-target.c.inc"
353 
354 /* compare a pointer @ptr and a tb_tc @s */
355 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
356 {
357     if (ptr >= s->ptr + s->size) {
358         return 1;
359     } else if (ptr < s->ptr) {
360         return -1;
361     }
362     return 0;
363 }
364 
365 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
366 {
367     const struct tb_tc *a = ap;
368     const struct tb_tc *b = bp;
369 
370     /*
371      * When both sizes are set, we know this isn't a lookup.
372      * This is the most likely case: every TB must be inserted; lookups
373      * are a lot less frequent.
374      */
375     if (likely(a->size && b->size)) {
376         if (a->ptr > b->ptr) {
377             return 1;
378         } else if (a->ptr < b->ptr) {
379             return -1;
380         }
381         /* a->ptr == b->ptr should happen only on deletions */
382         g_assert(a->size == b->size);
383         return 0;
384     }
385     /*
386      * All lookups have either .size field set to 0.
387      * From the glib sources we see that @ap is always the lookup key. However
388      * the docs provide no guarantee, so we just mark this case as likely.
389      */
390     if (likely(a->size == 0)) {
391         return ptr_cmp_tb_tc(a->ptr, b);
392     }
393     return ptr_cmp_tb_tc(b->ptr, a);
394 }
395 
396 static void tcg_region_trees_init(void)
397 {
398     size_t i;
399 
400     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
401     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
402     for (i = 0; i < region.n; i++) {
403         struct tcg_region_tree *rt = region_trees + i * tree_size;
404 
405         qemu_mutex_init(&rt->lock);
406         rt->tree = g_tree_new(tb_tc_cmp);
407     }
408 }
409 
410 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *cp)
411 {
412     void *p = tcg_splitwx_to_rw(cp);
413     size_t region_idx;
414 
415     if (p < region.start_aligned) {
416         region_idx = 0;
417     } else {
418         ptrdiff_t offset = p - region.start_aligned;
419 
420         if (offset > region.stride * (region.n - 1)) {
421             region_idx = region.n - 1;
422         } else {
423             region_idx = offset / region.stride;
424         }
425     }
426     return region_trees + region_idx * tree_size;
427 }
428 
429 void tcg_tb_insert(TranslationBlock *tb)
430 {
431     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
432 
433     qemu_mutex_lock(&rt->lock);
434     g_tree_insert(rt->tree, &tb->tc, tb);
435     qemu_mutex_unlock(&rt->lock);
436 }
437 
438 void tcg_tb_remove(TranslationBlock *tb)
439 {
440     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
441 
442     qemu_mutex_lock(&rt->lock);
443     g_tree_remove(rt->tree, &tb->tc);
444     qemu_mutex_unlock(&rt->lock);
445 }
446 
447 /*
448  * Find the TB 'tb' such that
449  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
450  * Return NULL if not found.
451  */
452 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
453 {
454     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
455     TranslationBlock *tb;
456     struct tb_tc s = { .ptr = (void *)tc_ptr };
457 
458     qemu_mutex_lock(&rt->lock);
459     tb = g_tree_lookup(rt->tree, &s);
460     qemu_mutex_unlock(&rt->lock);
461     return tb;
462 }
463 
464 static void tcg_region_tree_lock_all(void)
465 {
466     size_t i;
467 
468     for (i = 0; i < region.n; i++) {
469         struct tcg_region_tree *rt = region_trees + i * tree_size;
470 
471         qemu_mutex_lock(&rt->lock);
472     }
473 }
474 
475 static void tcg_region_tree_unlock_all(void)
476 {
477     size_t i;
478 
479     for (i = 0; i < region.n; i++) {
480         struct tcg_region_tree *rt = region_trees + i * tree_size;
481 
482         qemu_mutex_unlock(&rt->lock);
483     }
484 }
485 
486 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
487 {
488     size_t i;
489 
490     tcg_region_tree_lock_all();
491     for (i = 0; i < region.n; i++) {
492         struct tcg_region_tree *rt = region_trees + i * tree_size;
493 
494         g_tree_foreach(rt->tree, func, user_data);
495     }
496     tcg_region_tree_unlock_all();
497 }
498 
499 size_t tcg_nb_tbs(void)
500 {
501     size_t nb_tbs = 0;
502     size_t i;
503 
504     tcg_region_tree_lock_all();
505     for (i = 0; i < region.n; i++) {
506         struct tcg_region_tree *rt = region_trees + i * tree_size;
507 
508         nb_tbs += g_tree_nnodes(rt->tree);
509     }
510     tcg_region_tree_unlock_all();
511     return nb_tbs;
512 }
513 
514 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
515 {
516     TranslationBlock *tb = v;
517 
518     tb_destroy(tb);
519     return FALSE;
520 }
521 
522 static void tcg_region_tree_reset_all(void)
523 {
524     size_t i;
525 
526     tcg_region_tree_lock_all();
527     for (i = 0; i < region.n; i++) {
528         struct tcg_region_tree *rt = region_trees + i * tree_size;
529 
530         g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
531         /* Increment the refcount first so that destroy acts as a reset */
532         g_tree_ref(rt->tree);
533         g_tree_destroy(rt->tree);
534     }
535     tcg_region_tree_unlock_all();
536 }
537 
538 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
539 {
540     void *start, *end;
541 
542     start = region.start_aligned + curr_region * region.stride;
543     end = start + region.size;
544 
545     if (curr_region == 0) {
546         start = region.start;
547     }
548     if (curr_region == region.n - 1) {
549         end = region.end;
550     }
551 
552     *pstart = start;
553     *pend = end;
554 }
555 
556 static void tcg_region_assign(TCGContext *s, size_t curr_region)
557 {
558     void *start, *end;
559 
560     tcg_region_bounds(curr_region, &start, &end);
561 
562     s->code_gen_buffer = start;
563     s->code_gen_ptr = start;
564     s->code_gen_buffer_size = end - start;
565     s->code_gen_highwater = end - TCG_HIGHWATER;
566 }
567 
568 static bool tcg_region_alloc__locked(TCGContext *s)
569 {
570     if (region.current == region.n) {
571         return true;
572     }
573     tcg_region_assign(s, region.current);
574     region.current++;
575     return false;
576 }
577 
578 /*
579  * Request a new region once the one in use has filled up.
580  * Returns true on error.
581  */
582 static bool tcg_region_alloc(TCGContext *s)
583 {
584     bool err;
585     /* read the region size now; alloc__locked will overwrite it on success */
586     size_t size_full = s->code_gen_buffer_size;
587 
588     qemu_mutex_lock(&region.lock);
589     err = tcg_region_alloc__locked(s);
590     if (!err) {
591         region.agg_size_full += size_full - TCG_HIGHWATER;
592     }
593     qemu_mutex_unlock(&region.lock);
594     return err;
595 }
596 
597 /*
598  * Perform a context's first region allocation.
599  * This function does _not_ increment region.agg_size_full.
600  */
601 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
602 {
603     return tcg_region_alloc__locked(s);
604 }
605 
606 /* Call from a safe-work context */
607 void tcg_region_reset_all(void)
608 {
609     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
610     unsigned int i;
611 
612     qemu_mutex_lock(&region.lock);
613     region.current = 0;
614     region.agg_size_full = 0;
615 
616     for (i = 0; i < n_ctxs; i++) {
617         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
618         bool err = tcg_region_initial_alloc__locked(s);
619 
620         g_assert(!err);
621     }
622     qemu_mutex_unlock(&region.lock);
623 
624     tcg_region_tree_reset_all();
625 }
626 
627 #ifdef CONFIG_USER_ONLY
628 static size_t tcg_n_regions(void)
629 {
630     return 1;
631 }
632 #else
633 /*
634  * It is likely that some vCPUs will translate more code than others, so we
635  * first try to set more regions than max_cpus, with those regions being of
636  * reasonable size. If that's not possible we make do by evenly dividing
637  * the code_gen_buffer among the vCPUs.
638  */
639 static size_t tcg_n_regions(void)
640 {
641     size_t i;
642 
643     /* Use a single region if all we have is one vCPU thread */
644 #if !defined(CONFIG_USER_ONLY)
645     MachineState *ms = MACHINE(qdev_get_machine());
646     unsigned int max_cpus = ms->smp.max_cpus;
647 #endif
648     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
649         return 1;
650     }
651 
652     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
653     for (i = 8; i > 0; i--) {
654         size_t regions_per_thread = i;
655         size_t region_size;
656 
657         region_size = tcg_init_ctx.code_gen_buffer_size;
658         region_size /= max_cpus * regions_per_thread;
659 
660         if (region_size >= 2 * 1024u * 1024) {
661             return max_cpus * regions_per_thread;
662         }
663     }
664     /* If we can't, then just allocate one region per vCPU thread */
665     return max_cpus;
666 }
667 #endif
668 
669 /*
670  * Initializes region partitioning.
671  *
672  * Called at init time from the parent thread (i.e. the one calling
673  * tcg_context_init), after the target's TCG globals have been set.
674  *
675  * Region partitioning works by splitting code_gen_buffer into separate regions,
676  * and then assigning regions to TCG threads so that the threads can translate
677  * code in parallel without synchronization.
678  *
679  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
680  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
681  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
682  * must have been parsed before calling this function, since it calls
683  * qemu_tcg_mttcg_enabled().
684  *
685  * In user-mode we use a single region.  Having multiple regions in user-mode
686  * is not supported, because the number of vCPU threads (recall that each thread
687  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
688  * OS, and usually this number is huge (tens of thousands is not uncommon).
689  * Thus, given this large bound on the number of vCPU threads and the fact
690  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
691  * that the availability of at least one region per vCPU thread.
692  *
693  * However, this user-mode limitation is unlikely to be a significant problem
694  * in practice. Multi-threaded guests share most if not all of their translated
695  * code, which makes parallel code generation less appealing than in softmmu.
696  */
697 void tcg_region_init(void)
698 {
699     void *buf = tcg_init_ctx.code_gen_buffer;
700     void *aligned;
701     size_t size = tcg_init_ctx.code_gen_buffer_size;
702     size_t page_size = qemu_real_host_page_size;
703     size_t region_size;
704     size_t n_regions;
705     size_t i;
706     uintptr_t splitwx_diff;
707 
708     n_regions = tcg_n_regions();
709 
710     /* The first region will be 'aligned - buf' bytes larger than the others */
711     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
712     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
713     /*
714      * Make region_size a multiple of page_size, using aligned as the start.
715      * As a result of this we might end up with a few extra pages at the end of
716      * the buffer; we will assign those to the last region.
717      */
718     region_size = (size - (aligned - buf)) / n_regions;
719     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
720 
721     /* A region must have at least 2 pages; one code, one guard */
722     g_assert(region_size >= 2 * page_size);
723 
724     /* init the region struct */
725     qemu_mutex_init(&region.lock);
726     region.n = n_regions;
727     region.size = region_size - page_size;
728     region.stride = region_size;
729     region.start = buf;
730     region.start_aligned = aligned;
731     /* page-align the end, since its last page will be a guard page */
732     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
733     /* account for that last guard page */
734     region.end -= page_size;
735 
736     /* set guard pages */
737     splitwx_diff = tcg_splitwx_diff;
738     for (i = 0; i < region.n; i++) {
739         void *start, *end;
740         int rc;
741 
742         tcg_region_bounds(i, &start, &end);
743         rc = qemu_mprotect_none(end, page_size);
744         g_assert(!rc);
745         if (splitwx_diff) {
746             rc = qemu_mprotect_none(end + splitwx_diff, page_size);
747             g_assert(!rc);
748         }
749     }
750 
751     tcg_region_trees_init();
752 
753     /* In user-mode we support only one ctx, so do the initial allocation now */
754 #ifdef CONFIG_USER_ONLY
755     {
756         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
757 
758         g_assert(!err);
759     }
760 #endif
761 }
762 
763 #ifdef CONFIG_DEBUG_TCG
764 const void *tcg_splitwx_to_rx(void *rw)
765 {
766     /* Pass NULL pointers unchanged. */
767     if (rw) {
768         g_assert(in_code_gen_buffer(rw));
769         rw += tcg_splitwx_diff;
770     }
771     return rw;
772 }
773 
774 void *tcg_splitwx_to_rw(const void *rx)
775 {
776     /* Pass NULL pointers unchanged. */
777     if (rx) {
778         rx -= tcg_splitwx_diff;
779         /* Assert that we end with a pointer in the rw region. */
780         g_assert(in_code_gen_buffer(rx));
781     }
782     return (void *)rx;
783 }
784 #endif /* CONFIG_DEBUG_TCG */
785 
786 static void alloc_tcg_plugin_context(TCGContext *s)
787 {
788 #ifdef CONFIG_PLUGIN
789     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
790     s->plugin_tb->insns =
791         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
792 #endif
793 }
794 
795 /*
796  * All TCG threads except the parent (i.e. the one that called tcg_context_init
797  * and registered the target's TCG globals) must register with this function
798  * before initiating translation.
799  *
800  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
801  * of tcg_region_init() for the reasoning behind this.
802  *
803  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
804  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
805  * is not used anymore for translation once this function is called.
806  *
807  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
808  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
809  */
810 #ifdef CONFIG_USER_ONLY
811 void tcg_register_thread(void)
812 {
813     tcg_ctx = &tcg_init_ctx;
814 }
815 #else
816 void tcg_register_thread(void)
817 {
818     MachineState *ms = MACHINE(qdev_get_machine());
819     TCGContext *s = g_malloc(sizeof(*s));
820     unsigned int i, n;
821     bool err;
822 
823     *s = tcg_init_ctx;
824 
825     /* Relink mem_base.  */
826     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
827         if (tcg_init_ctx.temps[i].mem_base) {
828             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
829             tcg_debug_assert(b >= 0 && b < n);
830             s->temps[i].mem_base = &s->temps[b];
831         }
832     }
833 
834     /* Claim an entry in tcg_ctxs */
835     n = qatomic_fetch_inc(&n_tcg_ctxs);
836     g_assert(n < ms->smp.max_cpus);
837     qatomic_set(&tcg_ctxs[n], s);
838 
839     if (n > 0) {
840         alloc_tcg_plugin_context(s);
841     }
842 
843     tcg_ctx = s;
844     qemu_mutex_lock(&region.lock);
845     err = tcg_region_initial_alloc__locked(tcg_ctx);
846     g_assert(!err);
847     qemu_mutex_unlock(&region.lock);
848 }
849 #endif /* !CONFIG_USER_ONLY */
850 
851 /*
852  * Returns the size (in bytes) of all translated code (i.e. from all regions)
853  * currently in the cache.
854  * See also: tcg_code_capacity()
855  * Do not confuse with tcg_current_code_size(); that one applies to a single
856  * TCG context.
857  */
858 size_t tcg_code_size(void)
859 {
860     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
861     unsigned int i;
862     size_t total;
863 
864     qemu_mutex_lock(&region.lock);
865     total = region.agg_size_full;
866     for (i = 0; i < n_ctxs; i++) {
867         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
868         size_t size;
869 
870         size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
871         g_assert(size <= s->code_gen_buffer_size);
872         total += size;
873     }
874     qemu_mutex_unlock(&region.lock);
875     return total;
876 }
877 
878 /*
879  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
880  * regions.
881  * See also: tcg_code_size()
882  */
883 size_t tcg_code_capacity(void)
884 {
885     size_t guard_size, capacity;
886 
887     /* no need for synchronization; these variables are set at init time */
888     guard_size = region.stride - region.size;
889     capacity = region.end + guard_size - region.start;
890     capacity -= region.n * (guard_size + TCG_HIGHWATER);
891     return capacity;
892 }
893 
894 size_t tcg_tb_phys_invalidate_count(void)
895 {
896     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
897     unsigned int i;
898     size_t total = 0;
899 
900     for (i = 0; i < n_ctxs; i++) {
901         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
902 
903         total += qatomic_read(&s->tb_phys_invalidate_count);
904     }
905     return total;
906 }
907 
908 /* pool based memory allocation */
909 void *tcg_malloc_internal(TCGContext *s, int size)
910 {
911     TCGPool *p;
912     int pool_size;
913 
914     if (size > TCG_POOL_CHUNK_SIZE) {
915         /* big malloc: insert a new pool (XXX: could optimize) */
916         p = g_malloc(sizeof(TCGPool) + size);
917         p->size = size;
918         p->next = s->pool_first_large;
919         s->pool_first_large = p;
920         return p->data;
921     } else {
922         p = s->pool_current;
923         if (!p) {
924             p = s->pool_first;
925             if (!p)
926                 goto new_pool;
927         } else {
928             if (!p->next) {
929             new_pool:
930                 pool_size = TCG_POOL_CHUNK_SIZE;
931                 p = g_malloc(sizeof(TCGPool) + pool_size);
932                 p->size = pool_size;
933                 p->next = NULL;
934                 if (s->pool_current)
935                     s->pool_current->next = p;
936                 else
937                     s->pool_first = p;
938             } else {
939                 p = p->next;
940             }
941         }
942     }
943     s->pool_current = p;
944     s->pool_cur = p->data + size;
945     s->pool_end = p->data + p->size;
946     return p->data;
947 }
948 
949 void tcg_pool_reset(TCGContext *s)
950 {
951     TCGPool *p, *t;
952     for (p = s->pool_first_large; p; p = t) {
953         t = p->next;
954         g_free(p);
955     }
956     s->pool_first_large = NULL;
957     s->pool_cur = s->pool_end = NULL;
958     s->pool_current = NULL;
959 }
960 
961 typedef struct TCGHelperInfo {
962     void *func;
963     const char *name;
964     unsigned flags;
965     unsigned sizemask;
966 } TCGHelperInfo;
967 
968 #include "exec/helper-proto.h"
969 
970 static const TCGHelperInfo all_helpers[] = {
971 #include "exec/helper-tcg.h"
972 };
973 static GHashTable *helper_table;
974 
975 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
976 static void process_op_defs(TCGContext *s);
977 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
978                                             TCGReg reg, const char *name);
979 
980 void tcg_context_init(TCGContext *s)
981 {
982     int op, total_args, n, i;
983     TCGOpDef *def;
984     TCGArgConstraint *args_ct;
985     TCGTemp *ts;
986 
987     memset(s, 0, sizeof(*s));
988     s->nb_globals = 0;
989 
990     /* Count total number of arguments and allocate the corresponding
991        space */
992     total_args = 0;
993     for(op = 0; op < NB_OPS; op++) {
994         def = &tcg_op_defs[op];
995         n = def->nb_iargs + def->nb_oargs;
996         total_args += n;
997     }
998 
999     args_ct = g_new0(TCGArgConstraint, total_args);
1000 
1001     for(op = 0; op < NB_OPS; op++) {
1002         def = &tcg_op_defs[op];
1003         def->args_ct = args_ct;
1004         n = def->nb_iargs + def->nb_oargs;
1005         args_ct += n;
1006     }
1007 
1008     /* Register helpers.  */
1009     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
1010     helper_table = g_hash_table_new(NULL, NULL);
1011 
1012     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1013         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1014                             (gpointer)&all_helpers[i]);
1015     }
1016 
1017     tcg_target_init(s);
1018     process_op_defs(s);
1019 
1020     /* Reverse the order of the saved registers, assuming they're all at
1021        the start of tcg_target_reg_alloc_order.  */
1022     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1023         int r = tcg_target_reg_alloc_order[n];
1024         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1025             break;
1026         }
1027     }
1028     for (i = 0; i < n; ++i) {
1029         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1030     }
1031     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1032         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1033     }
1034 
1035     alloc_tcg_plugin_context(s);
1036 
1037     tcg_ctx = s;
1038     /*
1039      * In user-mode we simply share the init context among threads, since we
1040      * use a single region. See the documentation tcg_region_init() for the
1041      * reasoning behind this.
1042      * In softmmu we will have at most max_cpus TCG threads.
1043      */
1044 #ifdef CONFIG_USER_ONLY
1045     tcg_ctxs = &tcg_ctx;
1046     n_tcg_ctxs = 1;
1047 #else
1048     MachineState *ms = MACHINE(qdev_get_machine());
1049     unsigned int max_cpus = ms->smp.max_cpus;
1050     tcg_ctxs = g_new(TCGContext *, max_cpus);
1051 #endif
1052 
1053     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1054     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1055     cpu_env = temp_tcgv_ptr(ts);
1056 }
1057 
1058 /*
1059  * Allocate TBs right before their corresponding translated code, making
1060  * sure that TBs and code are on different cache lines.
1061  */
1062 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1063 {
1064     uintptr_t align = qemu_icache_linesize;
1065     TranslationBlock *tb;
1066     void *next;
1067 
1068  retry:
1069     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1070     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1071 
1072     if (unlikely(next > s->code_gen_highwater)) {
1073         if (tcg_region_alloc(s)) {
1074             return NULL;
1075         }
1076         goto retry;
1077     }
1078     qatomic_set(&s->code_gen_ptr, next);
1079     s->data_gen_ptr = NULL;
1080     return tb;
1081 }
1082 
1083 void tcg_prologue_init(TCGContext *s)
1084 {
1085     size_t prologue_size, total_size;
1086     void *buf0, *buf1;
1087 
1088     /* Put the prologue at the beginning of code_gen_buffer.  */
1089     buf0 = s->code_gen_buffer;
1090     total_size = s->code_gen_buffer_size;
1091     s->code_ptr = buf0;
1092     s->code_buf = buf0;
1093     s->data_gen_ptr = NULL;
1094 
1095     /*
1096      * The region trees are not yet configured, but tcg_splitwx_to_rx
1097      * needs the bounds for an assert.
1098      */
1099     region.start = buf0;
1100     region.end = buf0 + total_size;
1101 
1102 #ifndef CONFIG_TCG_INTERPRETER
1103     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0);
1104 #endif
1105 
1106     /* Compute a high-water mark, at which we voluntarily flush the buffer
1107        and start over.  The size here is arbitrary, significantly larger
1108        than we expect the code generation for any one opcode to require.  */
1109     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1110 
1111 #ifdef TCG_TARGET_NEED_POOL_LABELS
1112     s->pool_labels = NULL;
1113 #endif
1114 
1115     /* Generate the prologue.  */
1116     tcg_target_qemu_prologue(s);
1117 
1118 #ifdef TCG_TARGET_NEED_POOL_LABELS
1119     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1120     {
1121         int result = tcg_out_pool_finalize(s);
1122         tcg_debug_assert(result == 0);
1123     }
1124 #endif
1125 
1126     buf1 = s->code_ptr;
1127 #ifndef CONFIG_TCG_INTERPRETER
1128     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0,
1129                         tcg_ptr_byte_diff(buf1, buf0));
1130 #endif
1131 
1132     /* Deduct the prologue from the buffer.  */
1133     prologue_size = tcg_current_code_size(s);
1134     s->code_gen_ptr = buf1;
1135     s->code_gen_buffer = buf1;
1136     s->code_buf = buf1;
1137     total_size -= prologue_size;
1138     s->code_gen_buffer_size = total_size;
1139 
1140     tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size);
1141 
1142 #ifdef DEBUG_DISAS
1143     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1144         FILE *logfile = qemu_log_lock();
1145         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1146         if (s->data_gen_ptr) {
1147             size_t code_size = s->data_gen_ptr - buf0;
1148             size_t data_size = prologue_size - code_size;
1149             size_t i;
1150 
1151             log_disas(buf0, code_size);
1152 
1153             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1154                 if (sizeof(tcg_target_ulong) == 8) {
1155                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1156                              (uintptr_t)s->data_gen_ptr + i,
1157                              *(uint64_t *)(s->data_gen_ptr + i));
1158                 } else {
1159                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1160                              (uintptr_t)s->data_gen_ptr + i,
1161                              *(uint32_t *)(s->data_gen_ptr + i));
1162                 }
1163             }
1164         } else {
1165             log_disas(buf0, prologue_size);
1166         }
1167         qemu_log("\n");
1168         qemu_log_flush();
1169         qemu_log_unlock(logfile);
1170     }
1171 #endif
1172 
1173     /* Assert that goto_ptr is implemented completely.  */
1174     if (TCG_TARGET_HAS_goto_ptr) {
1175         tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1176     }
1177 }
1178 
1179 void tcg_func_start(TCGContext *s)
1180 {
1181     tcg_pool_reset(s);
1182     s->nb_temps = s->nb_globals;
1183 
1184     /* No temps have been previously allocated for size or locality.  */
1185     memset(s->free_temps, 0, sizeof(s->free_temps));
1186 
1187     s->nb_ops = 0;
1188     s->nb_labels = 0;
1189     s->current_frame_offset = s->frame_start;
1190 
1191 #ifdef CONFIG_DEBUG_TCG
1192     s->goto_tb_issue_mask = 0;
1193 #endif
1194 
1195     QTAILQ_INIT(&s->ops);
1196     QTAILQ_INIT(&s->free_ops);
1197     QSIMPLEQ_INIT(&s->labels);
1198 }
1199 
1200 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1201 {
1202     int n = s->nb_temps++;
1203     tcg_debug_assert(n < TCG_MAX_TEMPS);
1204     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1205 }
1206 
1207 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1208 {
1209     TCGTemp *ts;
1210 
1211     tcg_debug_assert(s->nb_globals == s->nb_temps);
1212     s->nb_globals++;
1213     ts = tcg_temp_alloc(s);
1214     ts->temp_global = 1;
1215 
1216     return ts;
1217 }
1218 
1219 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1220                                             TCGReg reg, const char *name)
1221 {
1222     TCGTemp *ts;
1223 
1224     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1225         tcg_abort();
1226     }
1227 
1228     ts = tcg_global_alloc(s);
1229     ts->base_type = type;
1230     ts->type = type;
1231     ts->fixed_reg = 1;
1232     ts->reg = reg;
1233     ts->name = name;
1234     tcg_regset_set_reg(s->reserved_regs, reg);
1235 
1236     return ts;
1237 }
1238 
1239 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1240 {
1241     s->frame_start = start;
1242     s->frame_end = start + size;
1243     s->frame_temp
1244         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1245 }
1246 
1247 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1248                                      intptr_t offset, const char *name)
1249 {
1250     TCGContext *s = tcg_ctx;
1251     TCGTemp *base_ts = tcgv_ptr_temp(base);
1252     TCGTemp *ts = tcg_global_alloc(s);
1253     int indirect_reg = 0, bigendian = 0;
1254 #ifdef HOST_WORDS_BIGENDIAN
1255     bigendian = 1;
1256 #endif
1257 
1258     if (!base_ts->fixed_reg) {
1259         /* We do not support double-indirect registers.  */
1260         tcg_debug_assert(!base_ts->indirect_reg);
1261         base_ts->indirect_base = 1;
1262         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1263                             ? 2 : 1);
1264         indirect_reg = 1;
1265     }
1266 
1267     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1268         TCGTemp *ts2 = tcg_global_alloc(s);
1269         char buf[64];
1270 
1271         ts->base_type = TCG_TYPE_I64;
1272         ts->type = TCG_TYPE_I32;
1273         ts->indirect_reg = indirect_reg;
1274         ts->mem_allocated = 1;
1275         ts->mem_base = base_ts;
1276         ts->mem_offset = offset + bigendian * 4;
1277         pstrcpy(buf, sizeof(buf), name);
1278         pstrcat(buf, sizeof(buf), "_0");
1279         ts->name = strdup(buf);
1280 
1281         tcg_debug_assert(ts2 == ts + 1);
1282         ts2->base_type = TCG_TYPE_I64;
1283         ts2->type = TCG_TYPE_I32;
1284         ts2->indirect_reg = indirect_reg;
1285         ts2->mem_allocated = 1;
1286         ts2->mem_base = base_ts;
1287         ts2->mem_offset = offset + (1 - bigendian) * 4;
1288         pstrcpy(buf, sizeof(buf), name);
1289         pstrcat(buf, sizeof(buf), "_1");
1290         ts2->name = strdup(buf);
1291     } else {
1292         ts->base_type = type;
1293         ts->type = type;
1294         ts->indirect_reg = indirect_reg;
1295         ts->mem_allocated = 1;
1296         ts->mem_base = base_ts;
1297         ts->mem_offset = offset;
1298         ts->name = name;
1299     }
1300     return ts;
1301 }
1302 
1303 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1304 {
1305     TCGContext *s = tcg_ctx;
1306     TCGTemp *ts;
1307     int idx, k;
1308 
1309     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1310     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1311     if (idx < TCG_MAX_TEMPS) {
1312         /* There is already an available temp with the right type.  */
1313         clear_bit(idx, s->free_temps[k].l);
1314 
1315         ts = &s->temps[idx];
1316         ts->temp_allocated = 1;
1317         tcg_debug_assert(ts->base_type == type);
1318         tcg_debug_assert(ts->temp_local == temp_local);
1319     } else {
1320         ts = tcg_temp_alloc(s);
1321         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1322             TCGTemp *ts2 = tcg_temp_alloc(s);
1323 
1324             ts->base_type = type;
1325             ts->type = TCG_TYPE_I32;
1326             ts->temp_allocated = 1;
1327             ts->temp_local = temp_local;
1328 
1329             tcg_debug_assert(ts2 == ts + 1);
1330             ts2->base_type = TCG_TYPE_I64;
1331             ts2->type = TCG_TYPE_I32;
1332             ts2->temp_allocated = 1;
1333             ts2->temp_local = temp_local;
1334         } else {
1335             ts->base_type = type;
1336             ts->type = type;
1337             ts->temp_allocated = 1;
1338             ts->temp_local = temp_local;
1339         }
1340     }
1341 
1342 #if defined(CONFIG_DEBUG_TCG)
1343     s->temps_in_use++;
1344 #endif
1345     return ts;
1346 }
1347 
1348 TCGv_vec tcg_temp_new_vec(TCGType type)
1349 {
1350     TCGTemp *t;
1351 
1352 #ifdef CONFIG_DEBUG_TCG
1353     switch (type) {
1354     case TCG_TYPE_V64:
1355         assert(TCG_TARGET_HAS_v64);
1356         break;
1357     case TCG_TYPE_V128:
1358         assert(TCG_TARGET_HAS_v128);
1359         break;
1360     case TCG_TYPE_V256:
1361         assert(TCG_TARGET_HAS_v256);
1362         break;
1363     default:
1364         g_assert_not_reached();
1365     }
1366 #endif
1367 
1368     t = tcg_temp_new_internal(type, 0);
1369     return temp_tcgv_vec(t);
1370 }
1371 
1372 /* Create a new temp of the same type as an existing temp.  */
1373 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1374 {
1375     TCGTemp *t = tcgv_vec_temp(match);
1376 
1377     tcg_debug_assert(t->temp_allocated != 0);
1378 
1379     t = tcg_temp_new_internal(t->base_type, 0);
1380     return temp_tcgv_vec(t);
1381 }
1382 
1383 void tcg_temp_free_internal(TCGTemp *ts)
1384 {
1385     TCGContext *s = tcg_ctx;
1386     int k, idx;
1387 
1388 #if defined(CONFIG_DEBUG_TCG)
1389     s->temps_in_use--;
1390     if (s->temps_in_use < 0) {
1391         fprintf(stderr, "More temporaries freed than allocated!\n");
1392     }
1393 #endif
1394 
1395     tcg_debug_assert(ts->temp_global == 0);
1396     tcg_debug_assert(ts->temp_allocated != 0);
1397     ts->temp_allocated = 0;
1398 
1399     idx = temp_idx(ts);
1400     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1401     set_bit(idx, s->free_temps[k].l);
1402 }
1403 
1404 TCGv_i32 tcg_const_i32(int32_t val)
1405 {
1406     TCGv_i32 t0;
1407     t0 = tcg_temp_new_i32();
1408     tcg_gen_movi_i32(t0, val);
1409     return t0;
1410 }
1411 
1412 TCGv_i64 tcg_const_i64(int64_t val)
1413 {
1414     TCGv_i64 t0;
1415     t0 = tcg_temp_new_i64();
1416     tcg_gen_movi_i64(t0, val);
1417     return t0;
1418 }
1419 
1420 TCGv_i32 tcg_const_local_i32(int32_t val)
1421 {
1422     TCGv_i32 t0;
1423     t0 = tcg_temp_local_new_i32();
1424     tcg_gen_movi_i32(t0, val);
1425     return t0;
1426 }
1427 
1428 TCGv_i64 tcg_const_local_i64(int64_t val)
1429 {
1430     TCGv_i64 t0;
1431     t0 = tcg_temp_local_new_i64();
1432     tcg_gen_movi_i64(t0, val);
1433     return t0;
1434 }
1435 
1436 #if defined(CONFIG_DEBUG_TCG)
1437 void tcg_clear_temp_count(void)
1438 {
1439     TCGContext *s = tcg_ctx;
1440     s->temps_in_use = 0;
1441 }
1442 
1443 int tcg_check_temp_count(void)
1444 {
1445     TCGContext *s = tcg_ctx;
1446     if (s->temps_in_use) {
1447         /* Clear the count so that we don't give another
1448          * warning immediately next time around.
1449          */
1450         s->temps_in_use = 0;
1451         return 1;
1452     }
1453     return 0;
1454 }
1455 #endif
1456 
1457 /* Return true if OP may appear in the opcode stream.
1458    Test the runtime variable that controls each opcode.  */
1459 bool tcg_op_supported(TCGOpcode op)
1460 {
1461     const bool have_vec
1462         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1463 
1464     switch (op) {
1465     case INDEX_op_discard:
1466     case INDEX_op_set_label:
1467     case INDEX_op_call:
1468     case INDEX_op_br:
1469     case INDEX_op_mb:
1470     case INDEX_op_insn_start:
1471     case INDEX_op_exit_tb:
1472     case INDEX_op_goto_tb:
1473     case INDEX_op_qemu_ld_i32:
1474     case INDEX_op_qemu_st_i32:
1475     case INDEX_op_qemu_ld_i64:
1476     case INDEX_op_qemu_st_i64:
1477         return true;
1478 
1479     case INDEX_op_qemu_st8_i32:
1480         return TCG_TARGET_HAS_qemu_st8_i32;
1481 
1482     case INDEX_op_goto_ptr:
1483         return TCG_TARGET_HAS_goto_ptr;
1484 
1485     case INDEX_op_mov_i32:
1486     case INDEX_op_movi_i32:
1487     case INDEX_op_setcond_i32:
1488     case INDEX_op_brcond_i32:
1489     case INDEX_op_ld8u_i32:
1490     case INDEX_op_ld8s_i32:
1491     case INDEX_op_ld16u_i32:
1492     case INDEX_op_ld16s_i32:
1493     case INDEX_op_ld_i32:
1494     case INDEX_op_st8_i32:
1495     case INDEX_op_st16_i32:
1496     case INDEX_op_st_i32:
1497     case INDEX_op_add_i32:
1498     case INDEX_op_sub_i32:
1499     case INDEX_op_mul_i32:
1500     case INDEX_op_and_i32:
1501     case INDEX_op_or_i32:
1502     case INDEX_op_xor_i32:
1503     case INDEX_op_shl_i32:
1504     case INDEX_op_shr_i32:
1505     case INDEX_op_sar_i32:
1506         return true;
1507 
1508     case INDEX_op_movcond_i32:
1509         return TCG_TARGET_HAS_movcond_i32;
1510     case INDEX_op_div_i32:
1511     case INDEX_op_divu_i32:
1512         return TCG_TARGET_HAS_div_i32;
1513     case INDEX_op_rem_i32:
1514     case INDEX_op_remu_i32:
1515         return TCG_TARGET_HAS_rem_i32;
1516     case INDEX_op_div2_i32:
1517     case INDEX_op_divu2_i32:
1518         return TCG_TARGET_HAS_div2_i32;
1519     case INDEX_op_rotl_i32:
1520     case INDEX_op_rotr_i32:
1521         return TCG_TARGET_HAS_rot_i32;
1522     case INDEX_op_deposit_i32:
1523         return TCG_TARGET_HAS_deposit_i32;
1524     case INDEX_op_extract_i32:
1525         return TCG_TARGET_HAS_extract_i32;
1526     case INDEX_op_sextract_i32:
1527         return TCG_TARGET_HAS_sextract_i32;
1528     case INDEX_op_extract2_i32:
1529         return TCG_TARGET_HAS_extract2_i32;
1530     case INDEX_op_add2_i32:
1531         return TCG_TARGET_HAS_add2_i32;
1532     case INDEX_op_sub2_i32:
1533         return TCG_TARGET_HAS_sub2_i32;
1534     case INDEX_op_mulu2_i32:
1535         return TCG_TARGET_HAS_mulu2_i32;
1536     case INDEX_op_muls2_i32:
1537         return TCG_TARGET_HAS_muls2_i32;
1538     case INDEX_op_muluh_i32:
1539         return TCG_TARGET_HAS_muluh_i32;
1540     case INDEX_op_mulsh_i32:
1541         return TCG_TARGET_HAS_mulsh_i32;
1542     case INDEX_op_ext8s_i32:
1543         return TCG_TARGET_HAS_ext8s_i32;
1544     case INDEX_op_ext16s_i32:
1545         return TCG_TARGET_HAS_ext16s_i32;
1546     case INDEX_op_ext8u_i32:
1547         return TCG_TARGET_HAS_ext8u_i32;
1548     case INDEX_op_ext16u_i32:
1549         return TCG_TARGET_HAS_ext16u_i32;
1550     case INDEX_op_bswap16_i32:
1551         return TCG_TARGET_HAS_bswap16_i32;
1552     case INDEX_op_bswap32_i32:
1553         return TCG_TARGET_HAS_bswap32_i32;
1554     case INDEX_op_not_i32:
1555         return TCG_TARGET_HAS_not_i32;
1556     case INDEX_op_neg_i32:
1557         return TCG_TARGET_HAS_neg_i32;
1558     case INDEX_op_andc_i32:
1559         return TCG_TARGET_HAS_andc_i32;
1560     case INDEX_op_orc_i32:
1561         return TCG_TARGET_HAS_orc_i32;
1562     case INDEX_op_eqv_i32:
1563         return TCG_TARGET_HAS_eqv_i32;
1564     case INDEX_op_nand_i32:
1565         return TCG_TARGET_HAS_nand_i32;
1566     case INDEX_op_nor_i32:
1567         return TCG_TARGET_HAS_nor_i32;
1568     case INDEX_op_clz_i32:
1569         return TCG_TARGET_HAS_clz_i32;
1570     case INDEX_op_ctz_i32:
1571         return TCG_TARGET_HAS_ctz_i32;
1572     case INDEX_op_ctpop_i32:
1573         return TCG_TARGET_HAS_ctpop_i32;
1574 
1575     case INDEX_op_brcond2_i32:
1576     case INDEX_op_setcond2_i32:
1577         return TCG_TARGET_REG_BITS == 32;
1578 
1579     case INDEX_op_mov_i64:
1580     case INDEX_op_movi_i64:
1581     case INDEX_op_setcond_i64:
1582     case INDEX_op_brcond_i64:
1583     case INDEX_op_ld8u_i64:
1584     case INDEX_op_ld8s_i64:
1585     case INDEX_op_ld16u_i64:
1586     case INDEX_op_ld16s_i64:
1587     case INDEX_op_ld32u_i64:
1588     case INDEX_op_ld32s_i64:
1589     case INDEX_op_ld_i64:
1590     case INDEX_op_st8_i64:
1591     case INDEX_op_st16_i64:
1592     case INDEX_op_st32_i64:
1593     case INDEX_op_st_i64:
1594     case INDEX_op_add_i64:
1595     case INDEX_op_sub_i64:
1596     case INDEX_op_mul_i64:
1597     case INDEX_op_and_i64:
1598     case INDEX_op_or_i64:
1599     case INDEX_op_xor_i64:
1600     case INDEX_op_shl_i64:
1601     case INDEX_op_shr_i64:
1602     case INDEX_op_sar_i64:
1603     case INDEX_op_ext_i32_i64:
1604     case INDEX_op_extu_i32_i64:
1605         return TCG_TARGET_REG_BITS == 64;
1606 
1607     case INDEX_op_movcond_i64:
1608         return TCG_TARGET_HAS_movcond_i64;
1609     case INDEX_op_div_i64:
1610     case INDEX_op_divu_i64:
1611         return TCG_TARGET_HAS_div_i64;
1612     case INDEX_op_rem_i64:
1613     case INDEX_op_remu_i64:
1614         return TCG_TARGET_HAS_rem_i64;
1615     case INDEX_op_div2_i64:
1616     case INDEX_op_divu2_i64:
1617         return TCG_TARGET_HAS_div2_i64;
1618     case INDEX_op_rotl_i64:
1619     case INDEX_op_rotr_i64:
1620         return TCG_TARGET_HAS_rot_i64;
1621     case INDEX_op_deposit_i64:
1622         return TCG_TARGET_HAS_deposit_i64;
1623     case INDEX_op_extract_i64:
1624         return TCG_TARGET_HAS_extract_i64;
1625     case INDEX_op_sextract_i64:
1626         return TCG_TARGET_HAS_sextract_i64;
1627     case INDEX_op_extract2_i64:
1628         return TCG_TARGET_HAS_extract2_i64;
1629     case INDEX_op_extrl_i64_i32:
1630         return TCG_TARGET_HAS_extrl_i64_i32;
1631     case INDEX_op_extrh_i64_i32:
1632         return TCG_TARGET_HAS_extrh_i64_i32;
1633     case INDEX_op_ext8s_i64:
1634         return TCG_TARGET_HAS_ext8s_i64;
1635     case INDEX_op_ext16s_i64:
1636         return TCG_TARGET_HAS_ext16s_i64;
1637     case INDEX_op_ext32s_i64:
1638         return TCG_TARGET_HAS_ext32s_i64;
1639     case INDEX_op_ext8u_i64:
1640         return TCG_TARGET_HAS_ext8u_i64;
1641     case INDEX_op_ext16u_i64:
1642         return TCG_TARGET_HAS_ext16u_i64;
1643     case INDEX_op_ext32u_i64:
1644         return TCG_TARGET_HAS_ext32u_i64;
1645     case INDEX_op_bswap16_i64:
1646         return TCG_TARGET_HAS_bswap16_i64;
1647     case INDEX_op_bswap32_i64:
1648         return TCG_TARGET_HAS_bswap32_i64;
1649     case INDEX_op_bswap64_i64:
1650         return TCG_TARGET_HAS_bswap64_i64;
1651     case INDEX_op_not_i64:
1652         return TCG_TARGET_HAS_not_i64;
1653     case INDEX_op_neg_i64:
1654         return TCG_TARGET_HAS_neg_i64;
1655     case INDEX_op_andc_i64:
1656         return TCG_TARGET_HAS_andc_i64;
1657     case INDEX_op_orc_i64:
1658         return TCG_TARGET_HAS_orc_i64;
1659     case INDEX_op_eqv_i64:
1660         return TCG_TARGET_HAS_eqv_i64;
1661     case INDEX_op_nand_i64:
1662         return TCG_TARGET_HAS_nand_i64;
1663     case INDEX_op_nor_i64:
1664         return TCG_TARGET_HAS_nor_i64;
1665     case INDEX_op_clz_i64:
1666         return TCG_TARGET_HAS_clz_i64;
1667     case INDEX_op_ctz_i64:
1668         return TCG_TARGET_HAS_ctz_i64;
1669     case INDEX_op_ctpop_i64:
1670         return TCG_TARGET_HAS_ctpop_i64;
1671     case INDEX_op_add2_i64:
1672         return TCG_TARGET_HAS_add2_i64;
1673     case INDEX_op_sub2_i64:
1674         return TCG_TARGET_HAS_sub2_i64;
1675     case INDEX_op_mulu2_i64:
1676         return TCG_TARGET_HAS_mulu2_i64;
1677     case INDEX_op_muls2_i64:
1678         return TCG_TARGET_HAS_muls2_i64;
1679     case INDEX_op_muluh_i64:
1680         return TCG_TARGET_HAS_muluh_i64;
1681     case INDEX_op_mulsh_i64:
1682         return TCG_TARGET_HAS_mulsh_i64;
1683 
1684     case INDEX_op_mov_vec:
1685     case INDEX_op_dup_vec:
1686     case INDEX_op_dupi_vec:
1687     case INDEX_op_dupm_vec:
1688     case INDEX_op_ld_vec:
1689     case INDEX_op_st_vec:
1690     case INDEX_op_add_vec:
1691     case INDEX_op_sub_vec:
1692     case INDEX_op_and_vec:
1693     case INDEX_op_or_vec:
1694     case INDEX_op_xor_vec:
1695     case INDEX_op_cmp_vec:
1696         return have_vec;
1697     case INDEX_op_dup2_vec:
1698         return have_vec && TCG_TARGET_REG_BITS == 32;
1699     case INDEX_op_not_vec:
1700         return have_vec && TCG_TARGET_HAS_not_vec;
1701     case INDEX_op_neg_vec:
1702         return have_vec && TCG_TARGET_HAS_neg_vec;
1703     case INDEX_op_abs_vec:
1704         return have_vec && TCG_TARGET_HAS_abs_vec;
1705     case INDEX_op_andc_vec:
1706         return have_vec && TCG_TARGET_HAS_andc_vec;
1707     case INDEX_op_orc_vec:
1708         return have_vec && TCG_TARGET_HAS_orc_vec;
1709     case INDEX_op_mul_vec:
1710         return have_vec && TCG_TARGET_HAS_mul_vec;
1711     case INDEX_op_shli_vec:
1712     case INDEX_op_shri_vec:
1713     case INDEX_op_sari_vec:
1714         return have_vec && TCG_TARGET_HAS_shi_vec;
1715     case INDEX_op_shls_vec:
1716     case INDEX_op_shrs_vec:
1717     case INDEX_op_sars_vec:
1718         return have_vec && TCG_TARGET_HAS_shs_vec;
1719     case INDEX_op_shlv_vec:
1720     case INDEX_op_shrv_vec:
1721     case INDEX_op_sarv_vec:
1722         return have_vec && TCG_TARGET_HAS_shv_vec;
1723     case INDEX_op_rotli_vec:
1724         return have_vec && TCG_TARGET_HAS_roti_vec;
1725     case INDEX_op_rotls_vec:
1726         return have_vec && TCG_TARGET_HAS_rots_vec;
1727     case INDEX_op_rotlv_vec:
1728     case INDEX_op_rotrv_vec:
1729         return have_vec && TCG_TARGET_HAS_rotv_vec;
1730     case INDEX_op_ssadd_vec:
1731     case INDEX_op_usadd_vec:
1732     case INDEX_op_sssub_vec:
1733     case INDEX_op_ussub_vec:
1734         return have_vec && TCG_TARGET_HAS_sat_vec;
1735     case INDEX_op_smin_vec:
1736     case INDEX_op_umin_vec:
1737     case INDEX_op_smax_vec:
1738     case INDEX_op_umax_vec:
1739         return have_vec && TCG_TARGET_HAS_minmax_vec;
1740     case INDEX_op_bitsel_vec:
1741         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1742     case INDEX_op_cmpsel_vec:
1743         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1744 
1745     default:
1746         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1747         return true;
1748     }
1749 }
1750 
1751 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1752    and endian swap. Maybe it would be better to do the alignment
1753    and endian swap in tcg_reg_alloc_call(). */
1754 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1755 {
1756     int i, real_args, nb_rets, pi;
1757     unsigned sizemask, flags;
1758     TCGHelperInfo *info;
1759     TCGOp *op;
1760 
1761     info = g_hash_table_lookup(helper_table, (gpointer)func);
1762     flags = info->flags;
1763     sizemask = info->sizemask;
1764 
1765 #ifdef CONFIG_PLUGIN
1766     /* detect non-plugin helpers */
1767     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1768         tcg_ctx->plugin_insn->calls_helpers = true;
1769     }
1770 #endif
1771 
1772 #if defined(__sparc__) && !defined(__arch64__) \
1773     && !defined(CONFIG_TCG_INTERPRETER)
1774     /* We have 64-bit values in one register, but need to pass as two
1775        separate parameters.  Split them.  */
1776     int orig_sizemask = sizemask;
1777     int orig_nargs = nargs;
1778     TCGv_i64 retl, reth;
1779     TCGTemp *split_args[MAX_OPC_PARAM];
1780 
1781     retl = NULL;
1782     reth = NULL;
1783     if (sizemask != 0) {
1784         for (i = real_args = 0; i < nargs; ++i) {
1785             int is_64bit = sizemask & (1 << (i+1)*2);
1786             if (is_64bit) {
1787                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1788                 TCGv_i32 h = tcg_temp_new_i32();
1789                 TCGv_i32 l = tcg_temp_new_i32();
1790                 tcg_gen_extr_i64_i32(l, h, orig);
1791                 split_args[real_args++] = tcgv_i32_temp(h);
1792                 split_args[real_args++] = tcgv_i32_temp(l);
1793             } else {
1794                 split_args[real_args++] = args[i];
1795             }
1796         }
1797         nargs = real_args;
1798         args = split_args;
1799         sizemask = 0;
1800     }
1801 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1802     for (i = 0; i < nargs; ++i) {
1803         int is_64bit = sizemask & (1 << (i+1)*2);
1804         int is_signed = sizemask & (2 << (i+1)*2);
1805         if (!is_64bit) {
1806             TCGv_i64 temp = tcg_temp_new_i64();
1807             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1808             if (is_signed) {
1809                 tcg_gen_ext32s_i64(temp, orig);
1810             } else {
1811                 tcg_gen_ext32u_i64(temp, orig);
1812             }
1813             args[i] = tcgv_i64_temp(temp);
1814         }
1815     }
1816 #endif /* TCG_TARGET_EXTEND_ARGS */
1817 
1818     op = tcg_emit_op(INDEX_op_call);
1819 
1820     pi = 0;
1821     if (ret != NULL) {
1822 #if defined(__sparc__) && !defined(__arch64__) \
1823     && !defined(CONFIG_TCG_INTERPRETER)
1824         if (orig_sizemask & 1) {
1825             /* The 32-bit ABI is going to return the 64-bit value in
1826                the %o0/%o1 register pair.  Prepare for this by using
1827                two return temporaries, and reassemble below.  */
1828             retl = tcg_temp_new_i64();
1829             reth = tcg_temp_new_i64();
1830             op->args[pi++] = tcgv_i64_arg(reth);
1831             op->args[pi++] = tcgv_i64_arg(retl);
1832             nb_rets = 2;
1833         } else {
1834             op->args[pi++] = temp_arg(ret);
1835             nb_rets = 1;
1836         }
1837 #else
1838         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1839 #ifdef HOST_WORDS_BIGENDIAN
1840             op->args[pi++] = temp_arg(ret + 1);
1841             op->args[pi++] = temp_arg(ret);
1842 #else
1843             op->args[pi++] = temp_arg(ret);
1844             op->args[pi++] = temp_arg(ret + 1);
1845 #endif
1846             nb_rets = 2;
1847         } else {
1848             op->args[pi++] = temp_arg(ret);
1849             nb_rets = 1;
1850         }
1851 #endif
1852     } else {
1853         nb_rets = 0;
1854     }
1855     TCGOP_CALLO(op) = nb_rets;
1856 
1857     real_args = 0;
1858     for (i = 0; i < nargs; i++) {
1859         int is_64bit = sizemask & (1 << (i+1)*2);
1860         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1861 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1862             /* some targets want aligned 64 bit args */
1863             if (real_args & 1) {
1864                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1865                 real_args++;
1866             }
1867 #endif
1868            /* If stack grows up, then we will be placing successive
1869               arguments at lower addresses, which means we need to
1870               reverse the order compared to how we would normally
1871               treat either big or little-endian.  For those arguments
1872               that will wind up in registers, this still works for
1873               HPPA (the only current STACK_GROWSUP target) since the
1874               argument registers are *also* allocated in decreasing
1875               order.  If another such target is added, this logic may
1876               have to get more complicated to differentiate between
1877               stack arguments and register arguments.  */
1878 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1879             op->args[pi++] = temp_arg(args[i] + 1);
1880             op->args[pi++] = temp_arg(args[i]);
1881 #else
1882             op->args[pi++] = temp_arg(args[i]);
1883             op->args[pi++] = temp_arg(args[i] + 1);
1884 #endif
1885             real_args += 2;
1886             continue;
1887         }
1888 
1889         op->args[pi++] = temp_arg(args[i]);
1890         real_args++;
1891     }
1892     op->args[pi++] = (uintptr_t)func;
1893     op->args[pi++] = flags;
1894     TCGOP_CALLI(op) = real_args;
1895 
1896     /* Make sure the fields didn't overflow.  */
1897     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1898     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1899 
1900 #if defined(__sparc__) && !defined(__arch64__) \
1901     && !defined(CONFIG_TCG_INTERPRETER)
1902     /* Free all of the parts we allocated above.  */
1903     for (i = real_args = 0; i < orig_nargs; ++i) {
1904         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1905         if (is_64bit) {
1906             tcg_temp_free_internal(args[real_args++]);
1907             tcg_temp_free_internal(args[real_args++]);
1908         } else {
1909             real_args++;
1910         }
1911     }
1912     if (orig_sizemask & 1) {
1913         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1914            Note that describing these as TCGv_i64 eliminates an unnecessary
1915            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1916         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1917         tcg_temp_free_i64(retl);
1918         tcg_temp_free_i64(reth);
1919     }
1920 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1921     for (i = 0; i < nargs; ++i) {
1922         int is_64bit = sizemask & (1 << (i+1)*2);
1923         if (!is_64bit) {
1924             tcg_temp_free_internal(args[i]);
1925         }
1926     }
1927 #endif /* TCG_TARGET_EXTEND_ARGS */
1928 }
1929 
1930 static void tcg_reg_alloc_start(TCGContext *s)
1931 {
1932     int i, n;
1933     TCGTemp *ts;
1934 
1935     for (i = 0, n = s->nb_globals; i < n; i++) {
1936         ts = &s->temps[i];
1937         ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1938     }
1939     for (n = s->nb_temps; i < n; i++) {
1940         ts = &s->temps[i];
1941         ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1942         ts->mem_allocated = 0;
1943         ts->fixed_reg = 0;
1944     }
1945 
1946     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1947 }
1948 
1949 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1950                                  TCGTemp *ts)
1951 {
1952     int idx = temp_idx(ts);
1953 
1954     if (ts->temp_global) {
1955         pstrcpy(buf, buf_size, ts->name);
1956     } else if (ts->temp_local) {
1957         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1958     } else {
1959         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1960     }
1961     return buf;
1962 }
1963 
1964 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1965                              int buf_size, TCGArg arg)
1966 {
1967     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1968 }
1969 
1970 /* Find helper name.  */
1971 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1972 {
1973     const char *ret = NULL;
1974     if (helper_table) {
1975         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1976         if (info) {
1977             ret = info->name;
1978         }
1979     }
1980     return ret;
1981 }
1982 
1983 static const char * const cond_name[] =
1984 {
1985     [TCG_COND_NEVER] = "never",
1986     [TCG_COND_ALWAYS] = "always",
1987     [TCG_COND_EQ] = "eq",
1988     [TCG_COND_NE] = "ne",
1989     [TCG_COND_LT] = "lt",
1990     [TCG_COND_GE] = "ge",
1991     [TCG_COND_LE] = "le",
1992     [TCG_COND_GT] = "gt",
1993     [TCG_COND_LTU] = "ltu",
1994     [TCG_COND_GEU] = "geu",
1995     [TCG_COND_LEU] = "leu",
1996     [TCG_COND_GTU] = "gtu"
1997 };
1998 
1999 static const char * const ldst_name[] =
2000 {
2001     [MO_UB]   = "ub",
2002     [MO_SB]   = "sb",
2003     [MO_LEUW] = "leuw",
2004     [MO_LESW] = "lesw",
2005     [MO_LEUL] = "leul",
2006     [MO_LESL] = "lesl",
2007     [MO_LEQ]  = "leq",
2008     [MO_BEUW] = "beuw",
2009     [MO_BESW] = "besw",
2010     [MO_BEUL] = "beul",
2011     [MO_BESL] = "besl",
2012     [MO_BEQ]  = "beq",
2013 };
2014 
2015 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2016 #ifdef TARGET_ALIGNED_ONLY
2017     [MO_UNALN >> MO_ASHIFT]    = "un+",
2018     [MO_ALIGN >> MO_ASHIFT]    = "",
2019 #else
2020     [MO_UNALN >> MO_ASHIFT]    = "",
2021     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2022 #endif
2023     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2024     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2025     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2026     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2027     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2028     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2029 };
2030 
2031 static inline bool tcg_regset_single(TCGRegSet d)
2032 {
2033     return (d & (d - 1)) == 0;
2034 }
2035 
2036 static inline TCGReg tcg_regset_first(TCGRegSet d)
2037 {
2038     if (TCG_TARGET_NB_REGS <= 32) {
2039         return ctz32(d);
2040     } else {
2041         return ctz64(d);
2042     }
2043 }
2044 
2045 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
2046 {
2047     char buf[128];
2048     TCGOp *op;
2049 
2050     QTAILQ_FOREACH(op, &s->ops, link) {
2051         int i, k, nb_oargs, nb_iargs, nb_cargs;
2052         const TCGOpDef *def;
2053         TCGOpcode c;
2054         int col = 0;
2055 
2056         c = op->opc;
2057         def = &tcg_op_defs[c];
2058 
2059         if (c == INDEX_op_insn_start) {
2060             nb_oargs = 0;
2061             col += qemu_log("\n ----");
2062 
2063             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2064                 target_ulong a;
2065 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2066                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2067 #else
2068                 a = op->args[i];
2069 #endif
2070                 col += qemu_log(" " TARGET_FMT_lx, a);
2071             }
2072         } else if (c == INDEX_op_call) {
2073             /* variable number of arguments */
2074             nb_oargs = TCGOP_CALLO(op);
2075             nb_iargs = TCGOP_CALLI(op);
2076             nb_cargs = def->nb_cargs;
2077 
2078             /* function name, flags, out args */
2079             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2080                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2081                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2082             for (i = 0; i < nb_oargs; i++) {
2083                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2084                                                        op->args[i]));
2085             }
2086             for (i = 0; i < nb_iargs; i++) {
2087                 TCGArg arg = op->args[nb_oargs + i];
2088                 const char *t = "<dummy>";
2089                 if (arg != TCG_CALL_DUMMY_ARG) {
2090                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2091                 }
2092                 col += qemu_log(",%s", t);
2093             }
2094         } else {
2095             col += qemu_log(" %s ", def->name);
2096 
2097             nb_oargs = def->nb_oargs;
2098             nb_iargs = def->nb_iargs;
2099             nb_cargs = def->nb_cargs;
2100 
2101             if (def->flags & TCG_OPF_VECTOR) {
2102                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2103                                 8 << TCGOP_VECE(op));
2104             }
2105 
2106             k = 0;
2107             for (i = 0; i < nb_oargs; i++) {
2108                 if (k != 0) {
2109                     col += qemu_log(",");
2110                 }
2111                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2112                                                       op->args[k++]));
2113             }
2114             for (i = 0; i < nb_iargs; i++) {
2115                 if (k != 0) {
2116                     col += qemu_log(",");
2117                 }
2118                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2119                                                       op->args[k++]));
2120             }
2121             switch (c) {
2122             case INDEX_op_brcond_i32:
2123             case INDEX_op_setcond_i32:
2124             case INDEX_op_movcond_i32:
2125             case INDEX_op_brcond2_i32:
2126             case INDEX_op_setcond2_i32:
2127             case INDEX_op_brcond_i64:
2128             case INDEX_op_setcond_i64:
2129             case INDEX_op_movcond_i64:
2130             case INDEX_op_cmp_vec:
2131             case INDEX_op_cmpsel_vec:
2132                 if (op->args[k] < ARRAY_SIZE(cond_name)
2133                     && cond_name[op->args[k]]) {
2134                     col += qemu_log(",%s", cond_name[op->args[k++]]);
2135                 } else {
2136                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2137                 }
2138                 i = 1;
2139                 break;
2140             case INDEX_op_qemu_ld_i32:
2141             case INDEX_op_qemu_st_i32:
2142             case INDEX_op_qemu_st8_i32:
2143             case INDEX_op_qemu_ld_i64:
2144             case INDEX_op_qemu_st_i64:
2145                 {
2146                     TCGMemOpIdx oi = op->args[k++];
2147                     MemOp op = get_memop(oi);
2148                     unsigned ix = get_mmuidx(oi);
2149 
2150                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2151                         col += qemu_log(",$0x%x,%u", op, ix);
2152                     } else {
2153                         const char *s_al, *s_op;
2154                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2155                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2156                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2157                     }
2158                     i = 1;
2159                 }
2160                 break;
2161             default:
2162                 i = 0;
2163                 break;
2164             }
2165             switch (c) {
2166             case INDEX_op_set_label:
2167             case INDEX_op_br:
2168             case INDEX_op_brcond_i32:
2169             case INDEX_op_brcond_i64:
2170             case INDEX_op_brcond2_i32:
2171                 col += qemu_log("%s$L%d", k ? "," : "",
2172                                 arg_label(op->args[k])->id);
2173                 i++, k++;
2174                 break;
2175             default:
2176                 break;
2177             }
2178             for (; i < nb_cargs; i++, k++) {
2179                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2180             }
2181         }
2182 
2183         if (have_prefs || op->life) {
2184 
2185             QemuLogFile *logfile;
2186 
2187             rcu_read_lock();
2188             logfile = qatomic_rcu_read(&qemu_logfile);
2189             if (logfile) {
2190                 for (; col < 40; ++col) {
2191                     putc(' ', logfile->fd);
2192                 }
2193             }
2194             rcu_read_unlock();
2195         }
2196 
2197         if (op->life) {
2198             unsigned life = op->life;
2199 
2200             if (life & (SYNC_ARG * 3)) {
2201                 qemu_log("  sync:");
2202                 for (i = 0; i < 2; ++i) {
2203                     if (life & (SYNC_ARG << i)) {
2204                         qemu_log(" %d", i);
2205                     }
2206                 }
2207             }
2208             life /= DEAD_ARG;
2209             if (life) {
2210                 qemu_log("  dead:");
2211                 for (i = 0; life; ++i, life >>= 1) {
2212                     if (life & 1) {
2213                         qemu_log(" %d", i);
2214                     }
2215                 }
2216             }
2217         }
2218 
2219         if (have_prefs) {
2220             for (i = 0; i < nb_oargs; ++i) {
2221                 TCGRegSet set = op->output_pref[i];
2222 
2223                 if (i == 0) {
2224                     qemu_log("  pref=");
2225                 } else {
2226                     qemu_log(",");
2227                 }
2228                 if (set == 0) {
2229                     qemu_log("none");
2230                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2231                     qemu_log("all");
2232 #ifdef CONFIG_DEBUG_TCG
2233                 } else if (tcg_regset_single(set)) {
2234                     TCGReg reg = tcg_regset_first(set);
2235                     qemu_log("%s", tcg_target_reg_names[reg]);
2236 #endif
2237                 } else if (TCG_TARGET_NB_REGS <= 32) {
2238                     qemu_log("%#x", (uint32_t)set);
2239                 } else {
2240                     qemu_log("%#" PRIx64, (uint64_t)set);
2241                 }
2242             }
2243         }
2244 
2245         qemu_log("\n");
2246     }
2247 }
2248 
2249 /* we give more priority to constraints with less registers */
2250 static int get_constraint_priority(const TCGOpDef *def, int k)
2251 {
2252     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2253     int n;
2254 
2255     if (arg_ct->oalias) {
2256         /* an alias is equivalent to a single register */
2257         n = 1;
2258     } else {
2259         n = ctpop64(arg_ct->regs);
2260     }
2261     return TCG_TARGET_NB_REGS - n + 1;
2262 }
2263 
2264 /* sort from highest priority to lowest */
2265 static void sort_constraints(TCGOpDef *def, int start, int n)
2266 {
2267     int i, j;
2268     TCGArgConstraint *a = def->args_ct;
2269 
2270     for (i = 0; i < n; i++) {
2271         a[start + i].sort_index = start + i;
2272     }
2273     if (n <= 1) {
2274         return;
2275     }
2276     for (i = 0; i < n - 1; i++) {
2277         for (j = i + 1; j < n; j++) {
2278             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2279             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2280             if (p1 < p2) {
2281                 int tmp = a[start + i].sort_index;
2282                 a[start + i].sort_index = a[start + j].sort_index;
2283                 a[start + j].sort_index = tmp;
2284             }
2285         }
2286     }
2287 }
2288 
2289 static void process_op_defs(TCGContext *s)
2290 {
2291     TCGOpcode op;
2292 
2293     for (op = 0; op < NB_OPS; op++) {
2294         TCGOpDef *def = &tcg_op_defs[op];
2295         const TCGTargetOpDef *tdefs;
2296         TCGType type;
2297         int i, nb_args;
2298 
2299         if (def->flags & TCG_OPF_NOT_PRESENT) {
2300             continue;
2301         }
2302 
2303         nb_args = def->nb_iargs + def->nb_oargs;
2304         if (nb_args == 0) {
2305             continue;
2306         }
2307 
2308         tdefs = tcg_target_op_def(op);
2309         /* Missing TCGTargetOpDef entry. */
2310         tcg_debug_assert(tdefs != NULL);
2311 
2312         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2313         for (i = 0; i < nb_args; i++) {
2314             const char *ct_str = tdefs->args_ct_str[i];
2315             /* Incomplete TCGTargetOpDef entry. */
2316             tcg_debug_assert(ct_str != NULL);
2317 
2318             while (*ct_str != '\0') {
2319                 switch(*ct_str) {
2320                 case '0' ... '9':
2321                     {
2322                         int oarg = *ct_str - '0';
2323                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2324                         tcg_debug_assert(oarg < def->nb_oargs);
2325                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
2326                         def->args_ct[i] = def->args_ct[oarg];
2327                         /* The output sets oalias.  */
2328                         def->args_ct[oarg].oalias = true;
2329                         def->args_ct[oarg].alias_index = i;
2330                         /* The input sets ialias. */
2331                         def->args_ct[i].ialias = true;
2332                         def->args_ct[i].alias_index = oarg;
2333                     }
2334                     ct_str++;
2335                     break;
2336                 case '&':
2337                     def->args_ct[i].newreg = true;
2338                     ct_str++;
2339                     break;
2340                 case 'i':
2341                     def->args_ct[i].ct |= TCG_CT_CONST;
2342                     ct_str++;
2343                     break;
2344                 default:
2345                     ct_str = target_parse_constraint(&def->args_ct[i],
2346                                                      ct_str, type);
2347                     /* Typo in TCGTargetOpDef constraint. */
2348                     tcg_debug_assert(ct_str != NULL);
2349                 }
2350             }
2351         }
2352 
2353         /* TCGTargetOpDef entry with too much information? */
2354         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2355 
2356         /* sort the constraints (XXX: this is just an heuristic) */
2357         sort_constraints(def, 0, def->nb_oargs);
2358         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2359     }
2360 }
2361 
2362 void tcg_op_remove(TCGContext *s, TCGOp *op)
2363 {
2364     TCGLabel *label;
2365 
2366     switch (op->opc) {
2367     case INDEX_op_br:
2368         label = arg_label(op->args[0]);
2369         label->refs--;
2370         break;
2371     case INDEX_op_brcond_i32:
2372     case INDEX_op_brcond_i64:
2373         label = arg_label(op->args[3]);
2374         label->refs--;
2375         break;
2376     case INDEX_op_brcond2_i32:
2377         label = arg_label(op->args[5]);
2378         label->refs--;
2379         break;
2380     default:
2381         break;
2382     }
2383 
2384     QTAILQ_REMOVE(&s->ops, op, link);
2385     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2386     s->nb_ops--;
2387 
2388 #ifdef CONFIG_PROFILER
2389     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2390 #endif
2391 }
2392 
2393 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2394 {
2395     TCGContext *s = tcg_ctx;
2396     TCGOp *op;
2397 
2398     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2399         op = tcg_malloc(sizeof(TCGOp));
2400     } else {
2401         op = QTAILQ_FIRST(&s->free_ops);
2402         QTAILQ_REMOVE(&s->free_ops, op, link);
2403     }
2404     memset(op, 0, offsetof(TCGOp, link));
2405     op->opc = opc;
2406     s->nb_ops++;
2407 
2408     return op;
2409 }
2410 
2411 TCGOp *tcg_emit_op(TCGOpcode opc)
2412 {
2413     TCGOp *op = tcg_op_alloc(opc);
2414     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2415     return op;
2416 }
2417 
2418 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2419 {
2420     TCGOp *new_op = tcg_op_alloc(opc);
2421     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2422     return new_op;
2423 }
2424 
2425 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2426 {
2427     TCGOp *new_op = tcg_op_alloc(opc);
2428     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2429     return new_op;
2430 }
2431 
2432 /* Reachable analysis : remove unreachable code.  */
2433 static void reachable_code_pass(TCGContext *s)
2434 {
2435     TCGOp *op, *op_next;
2436     bool dead = false;
2437 
2438     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2439         bool remove = dead;
2440         TCGLabel *label;
2441         int call_flags;
2442 
2443         switch (op->opc) {
2444         case INDEX_op_set_label:
2445             label = arg_label(op->args[0]);
2446             if (label->refs == 0) {
2447                 /*
2448                  * While there is an occasional backward branch, virtually
2449                  * all branches generated by the translators are forward.
2450                  * Which means that generally we will have already removed
2451                  * all references to the label that will be, and there is
2452                  * little to be gained by iterating.
2453                  */
2454                 remove = true;
2455             } else {
2456                 /* Once we see a label, insns become live again.  */
2457                 dead = false;
2458                 remove = false;
2459 
2460                 /*
2461                  * Optimization can fold conditional branches to unconditional.
2462                  * If we find a label with one reference which is preceded by
2463                  * an unconditional branch to it, remove both.  This needed to
2464                  * wait until the dead code in between them was removed.
2465                  */
2466                 if (label->refs == 1) {
2467                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2468                     if (op_prev->opc == INDEX_op_br &&
2469                         label == arg_label(op_prev->args[0])) {
2470                         tcg_op_remove(s, op_prev);
2471                         remove = true;
2472                     }
2473                 }
2474             }
2475             break;
2476 
2477         case INDEX_op_br:
2478         case INDEX_op_exit_tb:
2479         case INDEX_op_goto_ptr:
2480             /* Unconditional branches; everything following is dead.  */
2481             dead = true;
2482             break;
2483 
2484         case INDEX_op_call:
2485             /* Notice noreturn helper calls, raising exceptions.  */
2486             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2487             if (call_flags & TCG_CALL_NO_RETURN) {
2488                 dead = true;
2489             }
2490             break;
2491 
2492         case INDEX_op_insn_start:
2493             /* Never remove -- we need to keep these for unwind.  */
2494             remove = false;
2495             break;
2496 
2497         default:
2498             break;
2499         }
2500 
2501         if (remove) {
2502             tcg_op_remove(s, op);
2503         }
2504     }
2505 }
2506 
2507 #define TS_DEAD  1
2508 #define TS_MEM   2
2509 
2510 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2511 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2512 
2513 /* For liveness_pass_1, the register preferences for a given temp.  */
2514 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2515 {
2516     return ts->state_ptr;
2517 }
2518 
2519 /* For liveness_pass_1, reset the preferences for a given temp to the
2520  * maximal regset for its type.
2521  */
2522 static inline void la_reset_pref(TCGTemp *ts)
2523 {
2524     *la_temp_pref(ts)
2525         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2526 }
2527 
2528 /* liveness analysis: end of function: all temps are dead, and globals
2529    should be in memory. */
2530 static void la_func_end(TCGContext *s, int ng, int nt)
2531 {
2532     int i;
2533 
2534     for (i = 0; i < ng; ++i) {
2535         s->temps[i].state = TS_DEAD | TS_MEM;
2536         la_reset_pref(&s->temps[i]);
2537     }
2538     for (i = ng; i < nt; ++i) {
2539         s->temps[i].state = TS_DEAD;
2540         la_reset_pref(&s->temps[i]);
2541     }
2542 }
2543 
2544 /* liveness analysis: end of basic block: all temps are dead, globals
2545    and local temps should be in memory. */
2546 static void la_bb_end(TCGContext *s, int ng, int nt)
2547 {
2548     int i;
2549 
2550     for (i = 0; i < ng; ++i) {
2551         s->temps[i].state = TS_DEAD | TS_MEM;
2552         la_reset_pref(&s->temps[i]);
2553     }
2554     for (i = ng; i < nt; ++i) {
2555         s->temps[i].state = (s->temps[i].temp_local
2556                              ? TS_DEAD | TS_MEM
2557                              : TS_DEAD);
2558         la_reset_pref(&s->temps[i]);
2559     }
2560 }
2561 
2562 /* liveness analysis: sync globals back to memory.  */
2563 static void la_global_sync(TCGContext *s, int ng)
2564 {
2565     int i;
2566 
2567     for (i = 0; i < ng; ++i) {
2568         int state = s->temps[i].state;
2569         s->temps[i].state = state | TS_MEM;
2570         if (state == TS_DEAD) {
2571             /* If the global was previously dead, reset prefs.  */
2572             la_reset_pref(&s->temps[i]);
2573         }
2574     }
2575 }
2576 
2577 /*
2578  * liveness analysis: conditional branch: all temps are dead,
2579  * globals and local temps should be synced.
2580  */
2581 static void la_bb_sync(TCGContext *s, int ng, int nt)
2582 {
2583     la_global_sync(s, ng);
2584 
2585     for (int i = ng; i < nt; ++i) {
2586         if (s->temps[i].temp_local) {
2587             int state = s->temps[i].state;
2588             s->temps[i].state = state | TS_MEM;
2589             if (state != TS_DEAD) {
2590                 continue;
2591             }
2592         } else {
2593             s->temps[i].state = TS_DEAD;
2594         }
2595         la_reset_pref(&s->temps[i]);
2596     }
2597 }
2598 
2599 /* liveness analysis: sync globals back to memory and kill.  */
2600 static void la_global_kill(TCGContext *s, int ng)
2601 {
2602     int i;
2603 
2604     for (i = 0; i < ng; i++) {
2605         s->temps[i].state = TS_DEAD | TS_MEM;
2606         la_reset_pref(&s->temps[i]);
2607     }
2608 }
2609 
2610 /* liveness analysis: note live globals crossing calls.  */
2611 static void la_cross_call(TCGContext *s, int nt)
2612 {
2613     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2614     int i;
2615 
2616     for (i = 0; i < nt; i++) {
2617         TCGTemp *ts = &s->temps[i];
2618         if (!(ts->state & TS_DEAD)) {
2619             TCGRegSet *pset = la_temp_pref(ts);
2620             TCGRegSet set = *pset;
2621 
2622             set &= mask;
2623             /* If the combination is not possible, restart.  */
2624             if (set == 0) {
2625                 set = tcg_target_available_regs[ts->type] & mask;
2626             }
2627             *pset = set;
2628         }
2629     }
2630 }
2631 
2632 /* Liveness analysis : update the opc_arg_life array to tell if a
2633    given input arguments is dead. Instructions updating dead
2634    temporaries are removed. */
2635 static void liveness_pass_1(TCGContext *s)
2636 {
2637     int nb_globals = s->nb_globals;
2638     int nb_temps = s->nb_temps;
2639     TCGOp *op, *op_prev;
2640     TCGRegSet *prefs;
2641     int i;
2642 
2643     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2644     for (i = 0; i < nb_temps; ++i) {
2645         s->temps[i].state_ptr = prefs + i;
2646     }
2647 
2648     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2649     la_func_end(s, nb_globals, nb_temps);
2650 
2651     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2652         int nb_iargs, nb_oargs;
2653         TCGOpcode opc_new, opc_new2;
2654         bool have_opc_new2;
2655         TCGLifeData arg_life = 0;
2656         TCGTemp *ts;
2657         TCGOpcode opc = op->opc;
2658         const TCGOpDef *def = &tcg_op_defs[opc];
2659 
2660         switch (opc) {
2661         case INDEX_op_call:
2662             {
2663                 int call_flags;
2664                 int nb_call_regs;
2665 
2666                 nb_oargs = TCGOP_CALLO(op);
2667                 nb_iargs = TCGOP_CALLI(op);
2668                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2669 
2670                 /* pure functions can be removed if their result is unused */
2671                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2672                     for (i = 0; i < nb_oargs; i++) {
2673                         ts = arg_temp(op->args[i]);
2674                         if (ts->state != TS_DEAD) {
2675                             goto do_not_remove_call;
2676                         }
2677                     }
2678                     goto do_remove;
2679                 }
2680             do_not_remove_call:
2681 
2682                 /* Output args are dead.  */
2683                 for (i = 0; i < nb_oargs; i++) {
2684                     ts = arg_temp(op->args[i]);
2685                     if (ts->state & TS_DEAD) {
2686                         arg_life |= DEAD_ARG << i;
2687                     }
2688                     if (ts->state & TS_MEM) {
2689                         arg_life |= SYNC_ARG << i;
2690                     }
2691                     ts->state = TS_DEAD;
2692                     la_reset_pref(ts);
2693 
2694                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2695                     op->output_pref[i] = 0;
2696                 }
2697 
2698                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2699                                     TCG_CALL_NO_READ_GLOBALS))) {
2700                     la_global_kill(s, nb_globals);
2701                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2702                     la_global_sync(s, nb_globals);
2703                 }
2704 
2705                 /* Record arguments that die in this helper.  */
2706                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2707                     ts = arg_temp(op->args[i]);
2708                     if (ts && ts->state & TS_DEAD) {
2709                         arg_life |= DEAD_ARG << i;
2710                     }
2711                 }
2712 
2713                 /* For all live registers, remove call-clobbered prefs.  */
2714                 la_cross_call(s, nb_temps);
2715 
2716                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2717 
2718                 /* Input arguments are live for preceding opcodes.  */
2719                 for (i = 0; i < nb_iargs; i++) {
2720                     ts = arg_temp(op->args[i + nb_oargs]);
2721                     if (ts && ts->state & TS_DEAD) {
2722                         /* For those arguments that die, and will be allocated
2723                          * in registers, clear the register set for that arg,
2724                          * to be filled in below.  For args that will be on
2725                          * the stack, reset to any available reg.
2726                          */
2727                         *la_temp_pref(ts)
2728                             = (i < nb_call_regs ? 0 :
2729                                tcg_target_available_regs[ts->type]);
2730                         ts->state &= ~TS_DEAD;
2731                     }
2732                 }
2733 
2734                 /* For each input argument, add its input register to prefs.
2735                    If a temp is used once, this produces a single set bit.  */
2736                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2737                     ts = arg_temp(op->args[i + nb_oargs]);
2738                     if (ts) {
2739                         tcg_regset_set_reg(*la_temp_pref(ts),
2740                                            tcg_target_call_iarg_regs[i]);
2741                     }
2742                 }
2743             }
2744             break;
2745         case INDEX_op_insn_start:
2746             break;
2747         case INDEX_op_discard:
2748             /* mark the temporary as dead */
2749             ts = arg_temp(op->args[0]);
2750             ts->state = TS_DEAD;
2751             la_reset_pref(ts);
2752             break;
2753 
2754         case INDEX_op_add2_i32:
2755             opc_new = INDEX_op_add_i32;
2756             goto do_addsub2;
2757         case INDEX_op_sub2_i32:
2758             opc_new = INDEX_op_sub_i32;
2759             goto do_addsub2;
2760         case INDEX_op_add2_i64:
2761             opc_new = INDEX_op_add_i64;
2762             goto do_addsub2;
2763         case INDEX_op_sub2_i64:
2764             opc_new = INDEX_op_sub_i64;
2765         do_addsub2:
2766             nb_iargs = 4;
2767             nb_oargs = 2;
2768             /* Test if the high part of the operation is dead, but not
2769                the low part.  The result can be optimized to a simple
2770                add or sub.  This happens often for x86_64 guest when the
2771                cpu mode is set to 32 bit.  */
2772             if (arg_temp(op->args[1])->state == TS_DEAD) {
2773                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2774                     goto do_remove;
2775                 }
2776                 /* Replace the opcode and adjust the args in place,
2777                    leaving 3 unused args at the end.  */
2778                 op->opc = opc = opc_new;
2779                 op->args[1] = op->args[2];
2780                 op->args[2] = op->args[4];
2781                 /* Fall through and mark the single-word operation live.  */
2782                 nb_iargs = 2;
2783                 nb_oargs = 1;
2784             }
2785             goto do_not_remove;
2786 
2787         case INDEX_op_mulu2_i32:
2788             opc_new = INDEX_op_mul_i32;
2789             opc_new2 = INDEX_op_muluh_i32;
2790             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2791             goto do_mul2;
2792         case INDEX_op_muls2_i32:
2793             opc_new = INDEX_op_mul_i32;
2794             opc_new2 = INDEX_op_mulsh_i32;
2795             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2796             goto do_mul2;
2797         case INDEX_op_mulu2_i64:
2798             opc_new = INDEX_op_mul_i64;
2799             opc_new2 = INDEX_op_muluh_i64;
2800             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2801             goto do_mul2;
2802         case INDEX_op_muls2_i64:
2803             opc_new = INDEX_op_mul_i64;
2804             opc_new2 = INDEX_op_mulsh_i64;
2805             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2806             goto do_mul2;
2807         do_mul2:
2808             nb_iargs = 2;
2809             nb_oargs = 2;
2810             if (arg_temp(op->args[1])->state == TS_DEAD) {
2811                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2812                     /* Both parts of the operation are dead.  */
2813                     goto do_remove;
2814                 }
2815                 /* The high part of the operation is dead; generate the low. */
2816                 op->opc = opc = opc_new;
2817                 op->args[1] = op->args[2];
2818                 op->args[2] = op->args[3];
2819             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2820                 /* The low part of the operation is dead; generate the high. */
2821                 op->opc = opc = opc_new2;
2822                 op->args[0] = op->args[1];
2823                 op->args[1] = op->args[2];
2824                 op->args[2] = op->args[3];
2825             } else {
2826                 goto do_not_remove;
2827             }
2828             /* Mark the single-word operation live.  */
2829             nb_oargs = 1;
2830             goto do_not_remove;
2831 
2832         default:
2833             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2834             nb_iargs = def->nb_iargs;
2835             nb_oargs = def->nb_oargs;
2836 
2837             /* Test if the operation can be removed because all
2838                its outputs are dead. We assume that nb_oargs == 0
2839                implies side effects */
2840             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2841                 for (i = 0; i < nb_oargs; i++) {
2842                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2843                         goto do_not_remove;
2844                     }
2845                 }
2846                 goto do_remove;
2847             }
2848             goto do_not_remove;
2849 
2850         do_remove:
2851             tcg_op_remove(s, op);
2852             break;
2853 
2854         do_not_remove:
2855             for (i = 0; i < nb_oargs; i++) {
2856                 ts = arg_temp(op->args[i]);
2857 
2858                 /* Remember the preference of the uses that followed.  */
2859                 op->output_pref[i] = *la_temp_pref(ts);
2860 
2861                 /* Output args are dead.  */
2862                 if (ts->state & TS_DEAD) {
2863                     arg_life |= DEAD_ARG << i;
2864                 }
2865                 if (ts->state & TS_MEM) {
2866                     arg_life |= SYNC_ARG << i;
2867                 }
2868                 ts->state = TS_DEAD;
2869                 la_reset_pref(ts);
2870             }
2871 
2872             /* If end of basic block, update.  */
2873             if (def->flags & TCG_OPF_BB_EXIT) {
2874                 la_func_end(s, nb_globals, nb_temps);
2875             } else if (def->flags & TCG_OPF_COND_BRANCH) {
2876                 la_bb_sync(s, nb_globals, nb_temps);
2877             } else if (def->flags & TCG_OPF_BB_END) {
2878                 la_bb_end(s, nb_globals, nb_temps);
2879             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2880                 la_global_sync(s, nb_globals);
2881                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2882                     la_cross_call(s, nb_temps);
2883                 }
2884             }
2885 
2886             /* Record arguments that die in this opcode.  */
2887             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2888                 ts = arg_temp(op->args[i]);
2889                 if (ts->state & TS_DEAD) {
2890                     arg_life |= DEAD_ARG << i;
2891                 }
2892             }
2893 
2894             /* Input arguments are live for preceding opcodes.  */
2895             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2896                 ts = arg_temp(op->args[i]);
2897                 if (ts->state & TS_DEAD) {
2898                     /* For operands that were dead, initially allow
2899                        all regs for the type.  */
2900                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2901                     ts->state &= ~TS_DEAD;
2902                 }
2903             }
2904 
2905             /* Incorporate constraints for this operand.  */
2906             switch (opc) {
2907             case INDEX_op_mov_i32:
2908             case INDEX_op_mov_i64:
2909                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2910                    have proper constraints.  That said, special case
2911                    moves to propagate preferences backward.  */
2912                 if (IS_DEAD_ARG(1)) {
2913                     *la_temp_pref(arg_temp(op->args[0]))
2914                         = *la_temp_pref(arg_temp(op->args[1]));
2915                 }
2916                 break;
2917 
2918             default:
2919                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2920                     const TCGArgConstraint *ct = &def->args_ct[i];
2921                     TCGRegSet set, *pset;
2922 
2923                     ts = arg_temp(op->args[i]);
2924                     pset = la_temp_pref(ts);
2925                     set = *pset;
2926 
2927                     set &= ct->regs;
2928                     if (ct->ialias) {
2929                         set &= op->output_pref[ct->alias_index];
2930                     }
2931                     /* If the combination is not possible, restart.  */
2932                     if (set == 0) {
2933                         set = ct->regs;
2934                     }
2935                     *pset = set;
2936                 }
2937                 break;
2938             }
2939             break;
2940         }
2941         op->life = arg_life;
2942     }
2943 }
2944 
2945 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2946 static bool liveness_pass_2(TCGContext *s)
2947 {
2948     int nb_globals = s->nb_globals;
2949     int nb_temps, i;
2950     bool changes = false;
2951     TCGOp *op, *op_next;
2952 
2953     /* Create a temporary for each indirect global.  */
2954     for (i = 0; i < nb_globals; ++i) {
2955         TCGTemp *its = &s->temps[i];
2956         if (its->indirect_reg) {
2957             TCGTemp *dts = tcg_temp_alloc(s);
2958             dts->type = its->type;
2959             dts->base_type = its->base_type;
2960             its->state_ptr = dts;
2961         } else {
2962             its->state_ptr = NULL;
2963         }
2964         /* All globals begin dead.  */
2965         its->state = TS_DEAD;
2966     }
2967     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2968         TCGTemp *its = &s->temps[i];
2969         its->state_ptr = NULL;
2970         its->state = TS_DEAD;
2971     }
2972 
2973     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2974         TCGOpcode opc = op->opc;
2975         const TCGOpDef *def = &tcg_op_defs[opc];
2976         TCGLifeData arg_life = op->life;
2977         int nb_iargs, nb_oargs, call_flags;
2978         TCGTemp *arg_ts, *dir_ts;
2979 
2980         if (opc == INDEX_op_call) {
2981             nb_oargs = TCGOP_CALLO(op);
2982             nb_iargs = TCGOP_CALLI(op);
2983             call_flags = op->args[nb_oargs + nb_iargs + 1];
2984         } else {
2985             nb_iargs = def->nb_iargs;
2986             nb_oargs = def->nb_oargs;
2987 
2988             /* Set flags similar to how calls require.  */
2989             if (def->flags & TCG_OPF_COND_BRANCH) {
2990                 /* Like reading globals: sync_globals */
2991                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2992             } else if (def->flags & TCG_OPF_BB_END) {
2993                 /* Like writing globals: save_globals */
2994                 call_flags = 0;
2995             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2996                 /* Like reading globals: sync_globals */
2997                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2998             } else {
2999                 /* No effect on globals.  */
3000                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3001                               TCG_CALL_NO_WRITE_GLOBALS);
3002             }
3003         }
3004 
3005         /* Make sure that input arguments are available.  */
3006         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3007             arg_ts = arg_temp(op->args[i]);
3008             if (arg_ts) {
3009                 dir_ts = arg_ts->state_ptr;
3010                 if (dir_ts && arg_ts->state == TS_DEAD) {
3011                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3012                                       ? INDEX_op_ld_i32
3013                                       : INDEX_op_ld_i64);
3014                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
3015 
3016                     lop->args[0] = temp_arg(dir_ts);
3017                     lop->args[1] = temp_arg(arg_ts->mem_base);
3018                     lop->args[2] = arg_ts->mem_offset;
3019 
3020                     /* Loaded, but synced with memory.  */
3021                     arg_ts->state = TS_MEM;
3022                 }
3023             }
3024         }
3025 
3026         /* Perform input replacement, and mark inputs that became dead.
3027            No action is required except keeping temp_state up to date
3028            so that we reload when needed.  */
3029         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3030             arg_ts = arg_temp(op->args[i]);
3031             if (arg_ts) {
3032                 dir_ts = arg_ts->state_ptr;
3033                 if (dir_ts) {
3034                     op->args[i] = temp_arg(dir_ts);
3035                     changes = true;
3036                     if (IS_DEAD_ARG(i)) {
3037                         arg_ts->state = TS_DEAD;
3038                     }
3039                 }
3040             }
3041         }
3042 
3043         /* Liveness analysis should ensure that the following are
3044            all correct, for call sites and basic block end points.  */
3045         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3046             /* Nothing to do */
3047         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3048             for (i = 0; i < nb_globals; ++i) {
3049                 /* Liveness should see that globals are synced back,
3050                    that is, either TS_DEAD or TS_MEM.  */
3051                 arg_ts = &s->temps[i];
3052                 tcg_debug_assert(arg_ts->state_ptr == 0
3053                                  || arg_ts->state != 0);
3054             }
3055         } else {
3056             for (i = 0; i < nb_globals; ++i) {
3057                 /* Liveness should see that globals are saved back,
3058                    that is, TS_DEAD, waiting to be reloaded.  */
3059                 arg_ts = &s->temps[i];
3060                 tcg_debug_assert(arg_ts->state_ptr == 0
3061                                  || arg_ts->state == TS_DEAD);
3062             }
3063         }
3064 
3065         /* Outputs become available.  */
3066         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3067             arg_ts = arg_temp(op->args[0]);
3068             dir_ts = arg_ts->state_ptr;
3069             if (dir_ts) {
3070                 op->args[0] = temp_arg(dir_ts);
3071                 changes = true;
3072 
3073                 /* The output is now live and modified.  */
3074                 arg_ts->state = 0;
3075 
3076                 if (NEED_SYNC_ARG(0)) {
3077                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3078                                       ? INDEX_op_st_i32
3079                                       : INDEX_op_st_i64);
3080                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3081                     TCGTemp *out_ts = dir_ts;
3082 
3083                     if (IS_DEAD_ARG(0)) {
3084                         out_ts = arg_temp(op->args[1]);
3085                         arg_ts->state = TS_DEAD;
3086                         tcg_op_remove(s, op);
3087                     } else {
3088                         arg_ts->state = TS_MEM;
3089                     }
3090 
3091                     sop->args[0] = temp_arg(out_ts);
3092                     sop->args[1] = temp_arg(arg_ts->mem_base);
3093                     sop->args[2] = arg_ts->mem_offset;
3094                 } else {
3095                     tcg_debug_assert(!IS_DEAD_ARG(0));
3096                 }
3097             }
3098         } else {
3099             for (i = 0; i < nb_oargs; i++) {
3100                 arg_ts = arg_temp(op->args[i]);
3101                 dir_ts = arg_ts->state_ptr;
3102                 if (!dir_ts) {
3103                     continue;
3104                 }
3105                 op->args[i] = temp_arg(dir_ts);
3106                 changes = true;
3107 
3108                 /* The output is now live and modified.  */
3109                 arg_ts->state = 0;
3110 
3111                 /* Sync outputs upon their last write.  */
3112                 if (NEED_SYNC_ARG(i)) {
3113                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3114                                       ? INDEX_op_st_i32
3115                                       : INDEX_op_st_i64);
3116                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3117 
3118                     sop->args[0] = temp_arg(dir_ts);
3119                     sop->args[1] = temp_arg(arg_ts->mem_base);
3120                     sop->args[2] = arg_ts->mem_offset;
3121 
3122                     arg_ts->state = TS_MEM;
3123                 }
3124                 /* Drop outputs that are dead.  */
3125                 if (IS_DEAD_ARG(i)) {
3126                     arg_ts->state = TS_DEAD;
3127                 }
3128             }
3129         }
3130     }
3131 
3132     return changes;
3133 }
3134 
3135 #ifdef CONFIG_DEBUG_TCG
3136 static void dump_regs(TCGContext *s)
3137 {
3138     TCGTemp *ts;
3139     int i;
3140     char buf[64];
3141 
3142     for(i = 0; i < s->nb_temps; i++) {
3143         ts = &s->temps[i];
3144         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3145         switch(ts->val_type) {
3146         case TEMP_VAL_REG:
3147             printf("%s", tcg_target_reg_names[ts->reg]);
3148             break;
3149         case TEMP_VAL_MEM:
3150             printf("%d(%s)", (int)ts->mem_offset,
3151                    tcg_target_reg_names[ts->mem_base->reg]);
3152             break;
3153         case TEMP_VAL_CONST:
3154             printf("$0x%" TCG_PRIlx, ts->val);
3155             break;
3156         case TEMP_VAL_DEAD:
3157             printf("D");
3158             break;
3159         default:
3160             printf("???");
3161             break;
3162         }
3163         printf("\n");
3164     }
3165 
3166     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3167         if (s->reg_to_temp[i] != NULL) {
3168             printf("%s: %s\n",
3169                    tcg_target_reg_names[i],
3170                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3171         }
3172     }
3173 }
3174 
3175 static void check_regs(TCGContext *s)
3176 {
3177     int reg;
3178     int k;
3179     TCGTemp *ts;
3180     char buf[64];
3181 
3182     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3183         ts = s->reg_to_temp[reg];
3184         if (ts != NULL) {
3185             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3186                 printf("Inconsistency for register %s:\n",
3187                        tcg_target_reg_names[reg]);
3188                 goto fail;
3189             }
3190         }
3191     }
3192     for (k = 0; k < s->nb_temps; k++) {
3193         ts = &s->temps[k];
3194         if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
3195             && s->reg_to_temp[ts->reg] != ts) {
3196             printf("Inconsistency for temp %s:\n",
3197                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3198         fail:
3199             printf("reg state:\n");
3200             dump_regs(s);
3201             tcg_abort();
3202         }
3203     }
3204 }
3205 #endif
3206 
3207 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3208 {
3209 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3210     /* Sparc64 stack is accessed with offset of 2047 */
3211     s->current_frame_offset = (s->current_frame_offset +
3212                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3213         ~(sizeof(tcg_target_long) - 1);
3214 #endif
3215     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3216         s->frame_end) {
3217         tcg_abort();
3218     }
3219     ts->mem_offset = s->current_frame_offset;
3220     ts->mem_base = s->frame_temp;
3221     ts->mem_allocated = 1;
3222     s->current_frame_offset += sizeof(tcg_target_long);
3223 }
3224 
3225 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3226 
3227 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3228    mark it free; otherwise mark it dead.  */
3229 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3230 {
3231     if (ts->fixed_reg) {
3232         return;
3233     }
3234     if (ts->val_type == TEMP_VAL_REG) {
3235         s->reg_to_temp[ts->reg] = NULL;
3236     }
3237     ts->val_type = (free_or_dead < 0
3238                     || ts->temp_local
3239                     || ts->temp_global
3240                     ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3241 }
3242 
3243 /* Mark a temporary as dead.  */
3244 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3245 {
3246     temp_free_or_dead(s, ts, 1);
3247 }
3248 
3249 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3250    registers needs to be allocated to store a constant.  If 'free_or_dead'
3251    is non-zero, subsequently release the temporary; if it is positive, the
3252    temp is dead; if it is negative, the temp is free.  */
3253 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3254                       TCGRegSet preferred_regs, int free_or_dead)
3255 {
3256     if (ts->fixed_reg) {
3257         return;
3258     }
3259     if (!ts->mem_coherent) {
3260         if (!ts->mem_allocated) {
3261             temp_allocate_frame(s, ts);
3262         }
3263         switch (ts->val_type) {
3264         case TEMP_VAL_CONST:
3265             /* If we're going to free the temp immediately, then we won't
3266                require it later in a register, so attempt to store the
3267                constant to memory directly.  */
3268             if (free_or_dead
3269                 && tcg_out_sti(s, ts->type, ts->val,
3270                                ts->mem_base->reg, ts->mem_offset)) {
3271                 break;
3272             }
3273             temp_load(s, ts, tcg_target_available_regs[ts->type],
3274                       allocated_regs, preferred_regs);
3275             /* fallthrough */
3276 
3277         case TEMP_VAL_REG:
3278             tcg_out_st(s, ts->type, ts->reg,
3279                        ts->mem_base->reg, ts->mem_offset);
3280             break;
3281 
3282         case TEMP_VAL_MEM:
3283             break;
3284 
3285         case TEMP_VAL_DEAD:
3286         default:
3287             tcg_abort();
3288         }
3289         ts->mem_coherent = 1;
3290     }
3291     if (free_or_dead) {
3292         temp_free_or_dead(s, ts, free_or_dead);
3293     }
3294 }
3295 
3296 /* free register 'reg' by spilling the corresponding temporary if necessary */
3297 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3298 {
3299     TCGTemp *ts = s->reg_to_temp[reg];
3300     if (ts != NULL) {
3301         temp_sync(s, ts, allocated_regs, 0, -1);
3302     }
3303 }
3304 
3305 /**
3306  * tcg_reg_alloc:
3307  * @required_regs: Set of registers in which we must allocate.
3308  * @allocated_regs: Set of registers which must be avoided.
3309  * @preferred_regs: Set of registers we should prefer.
3310  * @rev: True if we search the registers in "indirect" order.
3311  *
3312  * The allocated register must be in @required_regs & ~@allocated_regs,
3313  * but if we can put it in @preferred_regs we may save a move later.
3314  */
3315 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3316                             TCGRegSet allocated_regs,
3317                             TCGRegSet preferred_regs, bool rev)
3318 {
3319     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3320     TCGRegSet reg_ct[2];
3321     const int *order;
3322 
3323     reg_ct[1] = required_regs & ~allocated_regs;
3324     tcg_debug_assert(reg_ct[1] != 0);
3325     reg_ct[0] = reg_ct[1] & preferred_regs;
3326 
3327     /* Skip the preferred_regs option if it cannot be satisfied,
3328        or if the preference made no difference.  */
3329     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3330 
3331     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3332 
3333     /* Try free registers, preferences first.  */
3334     for (j = f; j < 2; j++) {
3335         TCGRegSet set = reg_ct[j];
3336 
3337         if (tcg_regset_single(set)) {
3338             /* One register in the set.  */
3339             TCGReg reg = tcg_regset_first(set);
3340             if (s->reg_to_temp[reg] == NULL) {
3341                 return reg;
3342             }
3343         } else {
3344             for (i = 0; i < n; i++) {
3345                 TCGReg reg = order[i];
3346                 if (s->reg_to_temp[reg] == NULL &&
3347                     tcg_regset_test_reg(set, reg)) {
3348                     return reg;
3349                 }
3350             }
3351         }
3352     }
3353 
3354     /* We must spill something.  */
3355     for (j = f; j < 2; j++) {
3356         TCGRegSet set = reg_ct[j];
3357 
3358         if (tcg_regset_single(set)) {
3359             /* One register in the set.  */
3360             TCGReg reg = tcg_regset_first(set);
3361             tcg_reg_free(s, reg, allocated_regs);
3362             return reg;
3363         } else {
3364             for (i = 0; i < n; i++) {
3365                 TCGReg reg = order[i];
3366                 if (tcg_regset_test_reg(set, reg)) {
3367                     tcg_reg_free(s, reg, allocated_regs);
3368                     return reg;
3369                 }
3370             }
3371         }
3372     }
3373 
3374     tcg_abort();
3375 }
3376 
3377 /* Make sure the temporary is in a register.  If needed, allocate the register
3378    from DESIRED while avoiding ALLOCATED.  */
3379 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3380                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3381 {
3382     TCGReg reg;
3383 
3384     switch (ts->val_type) {
3385     case TEMP_VAL_REG:
3386         return;
3387     case TEMP_VAL_CONST:
3388         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3389                             preferred_regs, ts->indirect_base);
3390         tcg_out_movi(s, ts->type, reg, ts->val);
3391         ts->mem_coherent = 0;
3392         break;
3393     case TEMP_VAL_MEM:
3394         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3395                             preferred_regs, ts->indirect_base);
3396         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3397         ts->mem_coherent = 1;
3398         break;
3399     case TEMP_VAL_DEAD:
3400     default:
3401         tcg_abort();
3402     }
3403     ts->reg = reg;
3404     ts->val_type = TEMP_VAL_REG;
3405     s->reg_to_temp[reg] = ts;
3406 }
3407 
3408 /* Save a temporary to memory. 'allocated_regs' is used in case a
3409    temporary registers needs to be allocated to store a constant.  */
3410 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3411 {
3412     /* The liveness analysis already ensures that globals are back
3413        in memory. Keep an tcg_debug_assert for safety. */
3414     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3415 }
3416 
3417 /* save globals to their canonical location and assume they can be
3418    modified be the following code. 'allocated_regs' is used in case a
3419    temporary registers needs to be allocated to store a constant. */
3420 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3421 {
3422     int i, n;
3423 
3424     for (i = 0, n = s->nb_globals; i < n; i++) {
3425         temp_save(s, &s->temps[i], allocated_regs);
3426     }
3427 }
3428 
3429 /* sync globals to their canonical location and assume they can be
3430    read by the following code. 'allocated_regs' is used in case a
3431    temporary registers needs to be allocated to store a constant. */
3432 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3433 {
3434     int i, n;
3435 
3436     for (i = 0, n = s->nb_globals; i < n; i++) {
3437         TCGTemp *ts = &s->temps[i];
3438         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3439                          || ts->fixed_reg
3440                          || ts->mem_coherent);
3441     }
3442 }
3443 
3444 /* at the end of a basic block, we assume all temporaries are dead and
3445    all globals are stored at their canonical location. */
3446 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3447 {
3448     int i;
3449 
3450     for (i = s->nb_globals; i < s->nb_temps; i++) {
3451         TCGTemp *ts = &s->temps[i];
3452         if (ts->temp_local) {
3453             temp_save(s, ts, allocated_regs);
3454         } else {
3455             /* The liveness analysis already ensures that temps are dead.
3456                Keep an tcg_debug_assert for safety. */
3457             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3458         }
3459     }
3460 
3461     save_globals(s, allocated_regs);
3462 }
3463 
3464 /*
3465  * At a conditional branch, we assume all temporaries are dead and
3466  * all globals and local temps are synced to their location.
3467  */
3468 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3469 {
3470     sync_globals(s, allocated_regs);
3471 
3472     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3473         TCGTemp *ts = &s->temps[i];
3474         /*
3475          * The liveness analysis already ensures that temps are dead.
3476          * Keep tcg_debug_asserts for safety.
3477          */
3478         if (ts->temp_local) {
3479             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3480         } else {
3481             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3482         }
3483     }
3484 }
3485 
3486 /*
3487  * Specialized code generation for INDEX_op_movi_*.
3488  */
3489 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3490                                   tcg_target_ulong val, TCGLifeData arg_life,
3491                                   TCGRegSet preferred_regs)
3492 {
3493     /* ENV should not be modified.  */
3494     tcg_debug_assert(!ots->fixed_reg);
3495 
3496     /* The movi is not explicitly generated here.  */
3497     if (ots->val_type == TEMP_VAL_REG) {
3498         s->reg_to_temp[ots->reg] = NULL;
3499     }
3500     ots->val_type = TEMP_VAL_CONST;
3501     ots->val = val;
3502     ots->mem_coherent = 0;
3503     if (NEED_SYNC_ARG(0)) {
3504         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3505     } else if (IS_DEAD_ARG(0)) {
3506         temp_dead(s, ots);
3507     }
3508 }
3509 
3510 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3511 {
3512     TCGTemp *ots = arg_temp(op->args[0]);
3513     tcg_target_ulong val = op->args[1];
3514 
3515     tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3516 }
3517 
3518 /*
3519  * Specialized code generation for INDEX_op_mov_*.
3520  */
3521 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3522 {
3523     const TCGLifeData arg_life = op->life;
3524     TCGRegSet allocated_regs, preferred_regs;
3525     TCGTemp *ts, *ots;
3526     TCGType otype, itype;
3527 
3528     allocated_regs = s->reserved_regs;
3529     preferred_regs = op->output_pref[0];
3530     ots = arg_temp(op->args[0]);
3531     ts = arg_temp(op->args[1]);
3532 
3533     /* ENV should not be modified.  */
3534     tcg_debug_assert(!ots->fixed_reg);
3535 
3536     /* Note that otype != itype for no-op truncation.  */
3537     otype = ots->type;
3538     itype = ts->type;
3539 
3540     if (ts->val_type == TEMP_VAL_CONST) {
3541         /* propagate constant or generate sti */
3542         tcg_target_ulong val = ts->val;
3543         if (IS_DEAD_ARG(1)) {
3544             temp_dead(s, ts);
3545         }
3546         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3547         return;
3548     }
3549 
3550     /* If the source value is in memory we're going to be forced
3551        to have it in a register in order to perform the copy.  Copy
3552        the SOURCE value into its own register first, that way we
3553        don't have to reload SOURCE the next time it is used. */
3554     if (ts->val_type == TEMP_VAL_MEM) {
3555         temp_load(s, ts, tcg_target_available_regs[itype],
3556                   allocated_regs, preferred_regs);
3557     }
3558 
3559     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3560     if (IS_DEAD_ARG(0)) {
3561         /* mov to a non-saved dead register makes no sense (even with
3562            liveness analysis disabled). */
3563         tcg_debug_assert(NEED_SYNC_ARG(0));
3564         if (!ots->mem_allocated) {
3565             temp_allocate_frame(s, ots);
3566         }
3567         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3568         if (IS_DEAD_ARG(1)) {
3569             temp_dead(s, ts);
3570         }
3571         temp_dead(s, ots);
3572     } else {
3573         if (IS_DEAD_ARG(1) && !ts->fixed_reg) {
3574             /* the mov can be suppressed */
3575             if (ots->val_type == TEMP_VAL_REG) {
3576                 s->reg_to_temp[ots->reg] = NULL;
3577             }
3578             ots->reg = ts->reg;
3579             temp_dead(s, ts);
3580         } else {
3581             if (ots->val_type != TEMP_VAL_REG) {
3582                 /* When allocating a new register, make sure to not spill the
3583                    input one. */
3584                 tcg_regset_set_reg(allocated_regs, ts->reg);
3585                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3586                                          allocated_regs, preferred_regs,
3587                                          ots->indirect_base);
3588             }
3589             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3590                 /*
3591                  * Cross register class move not supported.
3592                  * Store the source register into the destination slot
3593                  * and leave the destination temp as TEMP_VAL_MEM.
3594                  */
3595                 assert(!ots->fixed_reg);
3596                 if (!ts->mem_allocated) {
3597                     temp_allocate_frame(s, ots);
3598                 }
3599                 tcg_out_st(s, ts->type, ts->reg,
3600                            ots->mem_base->reg, ots->mem_offset);
3601                 ots->mem_coherent = 1;
3602                 temp_free_or_dead(s, ots, -1);
3603                 return;
3604             }
3605         }
3606         ots->val_type = TEMP_VAL_REG;
3607         ots->mem_coherent = 0;
3608         s->reg_to_temp[ots->reg] = ots;
3609         if (NEED_SYNC_ARG(0)) {
3610             temp_sync(s, ots, allocated_regs, 0, 0);
3611         }
3612     }
3613 }
3614 
3615 /*
3616  * Specialized code generation for INDEX_op_dup_vec.
3617  */
3618 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3619 {
3620     const TCGLifeData arg_life = op->life;
3621     TCGRegSet dup_out_regs, dup_in_regs;
3622     TCGTemp *its, *ots;
3623     TCGType itype, vtype;
3624     intptr_t endian_fixup;
3625     unsigned vece;
3626     bool ok;
3627 
3628     ots = arg_temp(op->args[0]);
3629     its = arg_temp(op->args[1]);
3630 
3631     /* ENV should not be modified.  */
3632     tcg_debug_assert(!ots->fixed_reg);
3633 
3634     itype = its->type;
3635     vece = TCGOP_VECE(op);
3636     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3637 
3638     if (its->val_type == TEMP_VAL_CONST) {
3639         /* Propagate constant via movi -> dupi.  */
3640         tcg_target_ulong val = its->val;
3641         if (IS_DEAD_ARG(1)) {
3642             temp_dead(s, its);
3643         }
3644         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3645         return;
3646     }
3647 
3648     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3649     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3650 
3651     /* Allocate the output register now.  */
3652     if (ots->val_type != TEMP_VAL_REG) {
3653         TCGRegSet allocated_regs = s->reserved_regs;
3654 
3655         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3656             /* Make sure to not spill the input register. */
3657             tcg_regset_set_reg(allocated_regs, its->reg);
3658         }
3659         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3660                                  op->output_pref[0], ots->indirect_base);
3661         ots->val_type = TEMP_VAL_REG;
3662         ots->mem_coherent = 0;
3663         s->reg_to_temp[ots->reg] = ots;
3664     }
3665 
3666     switch (its->val_type) {
3667     case TEMP_VAL_REG:
3668         /*
3669          * The dup constriaints must be broad, covering all possible VECE.
3670          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3671          * to fail, indicating that extra moves are required for that case.
3672          */
3673         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3674             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3675                 goto done;
3676             }
3677             /* Try again from memory or a vector input register.  */
3678         }
3679         if (!its->mem_coherent) {
3680             /*
3681              * The input register is not synced, and so an extra store
3682              * would be required to use memory.  Attempt an integer-vector
3683              * register move first.  We do not have a TCGRegSet for this.
3684              */
3685             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3686                 break;
3687             }
3688             /* Sync the temp back to its slot and load from there.  */
3689             temp_sync(s, its, s->reserved_regs, 0, 0);
3690         }
3691         /* fall through */
3692 
3693     case TEMP_VAL_MEM:
3694 #ifdef HOST_WORDS_BIGENDIAN
3695         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3696         endian_fixup -= 1 << vece;
3697 #else
3698         endian_fixup = 0;
3699 #endif
3700         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3701                              its->mem_offset + endian_fixup)) {
3702             goto done;
3703         }
3704         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3705         break;
3706 
3707     default:
3708         g_assert_not_reached();
3709     }
3710 
3711     /* We now have a vector input register, so dup must succeed. */
3712     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3713     tcg_debug_assert(ok);
3714 
3715  done:
3716     if (IS_DEAD_ARG(1)) {
3717         temp_dead(s, its);
3718     }
3719     if (NEED_SYNC_ARG(0)) {
3720         temp_sync(s, ots, s->reserved_regs, 0, 0);
3721     }
3722     if (IS_DEAD_ARG(0)) {
3723         temp_dead(s, ots);
3724     }
3725 }
3726 
3727 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3728 {
3729     const TCGLifeData arg_life = op->life;
3730     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3731     TCGRegSet i_allocated_regs;
3732     TCGRegSet o_allocated_regs;
3733     int i, k, nb_iargs, nb_oargs;
3734     TCGReg reg;
3735     TCGArg arg;
3736     const TCGArgConstraint *arg_ct;
3737     TCGTemp *ts;
3738     TCGArg new_args[TCG_MAX_OP_ARGS];
3739     int const_args[TCG_MAX_OP_ARGS];
3740 
3741     nb_oargs = def->nb_oargs;
3742     nb_iargs = def->nb_iargs;
3743 
3744     /* copy constants */
3745     memcpy(new_args + nb_oargs + nb_iargs,
3746            op->args + nb_oargs + nb_iargs,
3747            sizeof(TCGArg) * def->nb_cargs);
3748 
3749     i_allocated_regs = s->reserved_regs;
3750     o_allocated_regs = s->reserved_regs;
3751 
3752     /* satisfy input constraints */
3753     for (k = 0; k < nb_iargs; k++) {
3754         TCGRegSet i_preferred_regs, o_preferred_regs;
3755 
3756         i = def->args_ct[nb_oargs + k].sort_index;
3757         arg = op->args[i];
3758         arg_ct = &def->args_ct[i];
3759         ts = arg_temp(arg);
3760 
3761         if (ts->val_type == TEMP_VAL_CONST
3762             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3763             /* constant is OK for instruction */
3764             const_args[i] = 1;
3765             new_args[i] = ts->val;
3766             continue;
3767         }
3768 
3769         i_preferred_regs = o_preferred_regs = 0;
3770         if (arg_ct->ialias) {
3771             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3772             if (ts->fixed_reg) {
3773                 /* if fixed register, we must allocate a new register
3774                    if the alias is not the same register */
3775                 if (arg != op->args[arg_ct->alias_index]) {
3776                     goto allocate_in_reg;
3777                 }
3778             } else {
3779                 /* if the input is aliased to an output and if it is
3780                    not dead after the instruction, we must allocate
3781                    a new register and move it */
3782                 if (!IS_DEAD_ARG(i)) {
3783                     goto allocate_in_reg;
3784                 }
3785 
3786                 /* check if the current register has already been allocated
3787                    for another input aliased to an output */
3788                 if (ts->val_type == TEMP_VAL_REG) {
3789                     int k2, i2;
3790                     reg = ts->reg;
3791                     for (k2 = 0 ; k2 < k ; k2++) {
3792                         i2 = def->args_ct[nb_oargs + k2].sort_index;
3793                         if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3794                             goto allocate_in_reg;
3795                         }
3796                     }
3797                 }
3798                 i_preferred_regs = o_preferred_regs;
3799             }
3800         }
3801 
3802         temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3803         reg = ts->reg;
3804 
3805         if (tcg_regset_test_reg(arg_ct->regs, reg)) {
3806             /* nothing to do : the constraint is satisfied */
3807         } else {
3808         allocate_in_reg:
3809             /* allocate a new register matching the constraint
3810                and move the temporary register into it */
3811             temp_load(s, ts, tcg_target_available_regs[ts->type],
3812                       i_allocated_regs, 0);
3813             reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3814                                 o_preferred_regs, ts->indirect_base);
3815             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3816                 /*
3817                  * Cross register class move not supported.  Sync the
3818                  * temp back to its slot and load from there.
3819                  */
3820                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3821                 tcg_out_ld(s, ts->type, reg,
3822                            ts->mem_base->reg, ts->mem_offset);
3823             }
3824         }
3825         new_args[i] = reg;
3826         const_args[i] = 0;
3827         tcg_regset_set_reg(i_allocated_regs, reg);
3828     }
3829 
3830     /* mark dead temporaries and free the associated registers */
3831     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3832         if (IS_DEAD_ARG(i)) {
3833             temp_dead(s, arg_temp(op->args[i]));
3834         }
3835     }
3836 
3837     if (def->flags & TCG_OPF_COND_BRANCH) {
3838         tcg_reg_alloc_cbranch(s, i_allocated_regs);
3839     } else if (def->flags & TCG_OPF_BB_END) {
3840         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3841     } else {
3842         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3843             /* XXX: permit generic clobber register list ? */
3844             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3845                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3846                     tcg_reg_free(s, i, i_allocated_regs);
3847                 }
3848             }
3849         }
3850         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3851             /* sync globals if the op has side effects and might trigger
3852                an exception. */
3853             sync_globals(s, i_allocated_regs);
3854         }
3855 
3856         /* satisfy the output constraints */
3857         for(k = 0; k < nb_oargs; k++) {
3858             i = def->args_ct[k].sort_index;
3859             arg = op->args[i];
3860             arg_ct = &def->args_ct[i];
3861             ts = arg_temp(arg);
3862 
3863             /* ENV should not be modified.  */
3864             tcg_debug_assert(!ts->fixed_reg);
3865 
3866             if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3867                 reg = new_args[arg_ct->alias_index];
3868             } else if (arg_ct->newreg) {
3869                 reg = tcg_reg_alloc(s, arg_ct->regs,
3870                                     i_allocated_regs | o_allocated_regs,
3871                                     op->output_pref[k], ts->indirect_base);
3872             } else {
3873                 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3874                                     op->output_pref[k], ts->indirect_base);
3875             }
3876             tcg_regset_set_reg(o_allocated_regs, reg);
3877             if (ts->val_type == TEMP_VAL_REG) {
3878                 s->reg_to_temp[ts->reg] = NULL;
3879             }
3880             ts->val_type = TEMP_VAL_REG;
3881             ts->reg = reg;
3882             /*
3883              * Temp value is modified, so the value kept in memory is
3884              * potentially not the same.
3885              */
3886             ts->mem_coherent = 0;
3887             s->reg_to_temp[reg] = ts;
3888             new_args[i] = reg;
3889         }
3890     }
3891 
3892     /* emit instruction */
3893     if (def->flags & TCG_OPF_VECTOR) {
3894         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3895                        new_args, const_args);
3896     } else {
3897         tcg_out_op(s, op->opc, new_args, const_args);
3898     }
3899 
3900     /* move the outputs in the correct register if needed */
3901     for(i = 0; i < nb_oargs; i++) {
3902         ts = arg_temp(op->args[i]);
3903 
3904         /* ENV should not be modified.  */
3905         tcg_debug_assert(!ts->fixed_reg);
3906 
3907         if (NEED_SYNC_ARG(i)) {
3908             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3909         } else if (IS_DEAD_ARG(i)) {
3910             temp_dead(s, ts);
3911         }
3912     }
3913 }
3914 
3915 #ifdef TCG_TARGET_STACK_GROWSUP
3916 #define STACK_DIR(x) (-(x))
3917 #else
3918 #define STACK_DIR(x) (x)
3919 #endif
3920 
3921 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3922 {
3923     const int nb_oargs = TCGOP_CALLO(op);
3924     const int nb_iargs = TCGOP_CALLI(op);
3925     const TCGLifeData arg_life = op->life;
3926     int flags, nb_regs, i;
3927     TCGReg reg;
3928     TCGArg arg;
3929     TCGTemp *ts;
3930     intptr_t stack_offset;
3931     size_t call_stack_size;
3932     tcg_insn_unit *func_addr;
3933     int allocate_args;
3934     TCGRegSet allocated_regs;
3935 
3936     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3937     flags = op->args[nb_oargs + nb_iargs + 1];
3938 
3939     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3940     if (nb_regs > nb_iargs) {
3941         nb_regs = nb_iargs;
3942     }
3943 
3944     /* assign stack slots first */
3945     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3946     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3947         ~(TCG_TARGET_STACK_ALIGN - 1);
3948     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3949     if (allocate_args) {
3950         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3951            preallocate call stack */
3952         tcg_abort();
3953     }
3954 
3955     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3956     for (i = nb_regs; i < nb_iargs; i++) {
3957         arg = op->args[nb_oargs + i];
3958 #ifdef TCG_TARGET_STACK_GROWSUP
3959         stack_offset -= sizeof(tcg_target_long);
3960 #endif
3961         if (arg != TCG_CALL_DUMMY_ARG) {
3962             ts = arg_temp(arg);
3963             temp_load(s, ts, tcg_target_available_regs[ts->type],
3964                       s->reserved_regs, 0);
3965             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3966         }
3967 #ifndef TCG_TARGET_STACK_GROWSUP
3968         stack_offset += sizeof(tcg_target_long);
3969 #endif
3970     }
3971 
3972     /* assign input registers */
3973     allocated_regs = s->reserved_regs;
3974     for (i = 0; i < nb_regs; i++) {
3975         arg = op->args[nb_oargs + i];
3976         if (arg != TCG_CALL_DUMMY_ARG) {
3977             ts = arg_temp(arg);
3978             reg = tcg_target_call_iarg_regs[i];
3979 
3980             if (ts->val_type == TEMP_VAL_REG) {
3981                 if (ts->reg != reg) {
3982                     tcg_reg_free(s, reg, allocated_regs);
3983                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3984                         /*
3985                          * Cross register class move not supported.  Sync the
3986                          * temp back to its slot and load from there.
3987                          */
3988                         temp_sync(s, ts, allocated_regs, 0, 0);
3989                         tcg_out_ld(s, ts->type, reg,
3990                                    ts->mem_base->reg, ts->mem_offset);
3991                     }
3992                 }
3993             } else {
3994                 TCGRegSet arg_set = 0;
3995 
3996                 tcg_reg_free(s, reg, allocated_regs);
3997                 tcg_regset_set_reg(arg_set, reg);
3998                 temp_load(s, ts, arg_set, allocated_regs, 0);
3999             }
4000 
4001             tcg_regset_set_reg(allocated_regs, reg);
4002         }
4003     }
4004 
4005     /* mark dead temporaries and free the associated registers */
4006     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4007         if (IS_DEAD_ARG(i)) {
4008             temp_dead(s, arg_temp(op->args[i]));
4009         }
4010     }
4011 
4012     /* clobber call registers */
4013     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4014         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4015             tcg_reg_free(s, i, allocated_regs);
4016         }
4017     }
4018 
4019     /* Save globals if they might be written by the helper, sync them if
4020        they might be read. */
4021     if (flags & TCG_CALL_NO_READ_GLOBALS) {
4022         /* Nothing to do */
4023     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4024         sync_globals(s, allocated_regs);
4025     } else {
4026         save_globals(s, allocated_regs);
4027     }
4028 
4029     tcg_out_call(s, func_addr);
4030 
4031     /* assign output registers and emit moves if needed */
4032     for(i = 0; i < nb_oargs; i++) {
4033         arg = op->args[i];
4034         ts = arg_temp(arg);
4035 
4036         /* ENV should not be modified.  */
4037         tcg_debug_assert(!ts->fixed_reg);
4038 
4039         reg = tcg_target_call_oarg_regs[i];
4040         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4041         if (ts->val_type == TEMP_VAL_REG) {
4042             s->reg_to_temp[ts->reg] = NULL;
4043         }
4044         ts->val_type = TEMP_VAL_REG;
4045         ts->reg = reg;
4046         ts->mem_coherent = 0;
4047         s->reg_to_temp[reg] = ts;
4048         if (NEED_SYNC_ARG(i)) {
4049             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4050         } else if (IS_DEAD_ARG(i)) {
4051             temp_dead(s, ts);
4052         }
4053     }
4054 }
4055 
4056 #ifdef CONFIG_PROFILER
4057 
4058 /* avoid copy/paste errors */
4059 #define PROF_ADD(to, from, field)                       \
4060     do {                                                \
4061         (to)->field += qatomic_read(&((from)->field));  \
4062     } while (0)
4063 
4064 #define PROF_MAX(to, from, field)                                       \
4065     do {                                                                \
4066         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4067         if (val__ > (to)->field) {                                      \
4068             (to)->field = val__;                                        \
4069         }                                                               \
4070     } while (0)
4071 
4072 /* Pass in a zero'ed @prof */
4073 static inline
4074 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4075 {
4076     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4077     unsigned int i;
4078 
4079     for (i = 0; i < n_ctxs; i++) {
4080         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4081         const TCGProfile *orig = &s->prof;
4082 
4083         if (counters) {
4084             PROF_ADD(prof, orig, cpu_exec_time);
4085             PROF_ADD(prof, orig, tb_count1);
4086             PROF_ADD(prof, orig, tb_count);
4087             PROF_ADD(prof, orig, op_count);
4088             PROF_MAX(prof, orig, op_count_max);
4089             PROF_ADD(prof, orig, temp_count);
4090             PROF_MAX(prof, orig, temp_count_max);
4091             PROF_ADD(prof, orig, del_op_count);
4092             PROF_ADD(prof, orig, code_in_len);
4093             PROF_ADD(prof, orig, code_out_len);
4094             PROF_ADD(prof, orig, search_out_len);
4095             PROF_ADD(prof, orig, interm_time);
4096             PROF_ADD(prof, orig, code_time);
4097             PROF_ADD(prof, orig, la_time);
4098             PROF_ADD(prof, orig, opt_time);
4099             PROF_ADD(prof, orig, restore_count);
4100             PROF_ADD(prof, orig, restore_time);
4101         }
4102         if (table) {
4103             int i;
4104 
4105             for (i = 0; i < NB_OPS; i++) {
4106                 PROF_ADD(prof, orig, table_op_count[i]);
4107             }
4108         }
4109     }
4110 }
4111 
4112 #undef PROF_ADD
4113 #undef PROF_MAX
4114 
4115 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4116 {
4117     tcg_profile_snapshot(prof, true, false);
4118 }
4119 
4120 static void tcg_profile_snapshot_table(TCGProfile *prof)
4121 {
4122     tcg_profile_snapshot(prof, false, true);
4123 }
4124 
4125 void tcg_dump_op_count(void)
4126 {
4127     TCGProfile prof = {};
4128     int i;
4129 
4130     tcg_profile_snapshot_table(&prof);
4131     for (i = 0; i < NB_OPS; i++) {
4132         qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4133                     prof.table_op_count[i]);
4134     }
4135 }
4136 
4137 int64_t tcg_cpu_exec_time(void)
4138 {
4139     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4140     unsigned int i;
4141     int64_t ret = 0;
4142 
4143     for (i = 0; i < n_ctxs; i++) {
4144         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4145         const TCGProfile *prof = &s->prof;
4146 
4147         ret += qatomic_read(&prof->cpu_exec_time);
4148     }
4149     return ret;
4150 }
4151 #else
4152 void tcg_dump_op_count(void)
4153 {
4154     qemu_printf("[TCG profiler not compiled]\n");
4155 }
4156 
4157 int64_t tcg_cpu_exec_time(void)
4158 {
4159     error_report("%s: TCG profiler not compiled", __func__);
4160     exit(EXIT_FAILURE);
4161 }
4162 #endif
4163 
4164 
4165 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4166 {
4167 #ifdef CONFIG_PROFILER
4168     TCGProfile *prof = &s->prof;
4169 #endif
4170     int i, num_insns;
4171     TCGOp *op;
4172 
4173 #ifdef CONFIG_PROFILER
4174     {
4175         int n = 0;
4176 
4177         QTAILQ_FOREACH(op, &s->ops, link) {
4178             n++;
4179         }
4180         qatomic_set(&prof->op_count, prof->op_count + n);
4181         if (n > prof->op_count_max) {
4182             qatomic_set(&prof->op_count_max, n);
4183         }
4184 
4185         n = s->nb_temps;
4186         qatomic_set(&prof->temp_count, prof->temp_count + n);
4187         if (n > prof->temp_count_max) {
4188             qatomic_set(&prof->temp_count_max, n);
4189         }
4190     }
4191 #endif
4192 
4193 #ifdef DEBUG_DISAS
4194     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4195                  && qemu_log_in_addr_range(tb->pc))) {
4196         FILE *logfile = qemu_log_lock();
4197         qemu_log("OP:\n");
4198         tcg_dump_ops(s, false);
4199         qemu_log("\n");
4200         qemu_log_unlock(logfile);
4201     }
4202 #endif
4203 
4204 #ifdef CONFIG_DEBUG_TCG
4205     /* Ensure all labels referenced have been emitted.  */
4206     {
4207         TCGLabel *l;
4208         bool error = false;
4209 
4210         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4211             if (unlikely(!l->present) && l->refs) {
4212                 qemu_log_mask(CPU_LOG_TB_OP,
4213                               "$L%d referenced but not present.\n", l->id);
4214                 error = true;
4215             }
4216         }
4217         assert(!error);
4218     }
4219 #endif
4220 
4221 #ifdef CONFIG_PROFILER
4222     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4223 #endif
4224 
4225 #ifdef USE_TCG_OPTIMIZATIONS
4226     tcg_optimize(s);
4227 #endif
4228 
4229 #ifdef CONFIG_PROFILER
4230     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4231     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4232 #endif
4233 
4234     reachable_code_pass(s);
4235     liveness_pass_1(s);
4236 
4237     if (s->nb_indirects > 0) {
4238 #ifdef DEBUG_DISAS
4239         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4240                      && qemu_log_in_addr_range(tb->pc))) {
4241             FILE *logfile = qemu_log_lock();
4242             qemu_log("OP before indirect lowering:\n");
4243             tcg_dump_ops(s, false);
4244             qemu_log("\n");
4245             qemu_log_unlock(logfile);
4246         }
4247 #endif
4248         /* Replace indirect temps with direct temps.  */
4249         if (liveness_pass_2(s)) {
4250             /* If changes were made, re-run liveness.  */
4251             liveness_pass_1(s);
4252         }
4253     }
4254 
4255 #ifdef CONFIG_PROFILER
4256     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4257 #endif
4258 
4259 #ifdef DEBUG_DISAS
4260     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4261                  && qemu_log_in_addr_range(tb->pc))) {
4262         FILE *logfile = qemu_log_lock();
4263         qemu_log("OP after optimization and liveness analysis:\n");
4264         tcg_dump_ops(s, true);
4265         qemu_log("\n");
4266         qemu_log_unlock(logfile);
4267     }
4268 #endif
4269 
4270     tcg_reg_alloc_start(s);
4271 
4272     /*
4273      * Reset the buffer pointers when restarting after overflow.
4274      * TODO: Move this into translate-all.c with the rest of the
4275      * buffer management.  Having only this done here is confusing.
4276      */
4277     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4278     s->code_ptr = s->code_buf;
4279 
4280 #ifdef TCG_TARGET_NEED_LDST_LABELS
4281     QSIMPLEQ_INIT(&s->ldst_labels);
4282 #endif
4283 #ifdef TCG_TARGET_NEED_POOL_LABELS
4284     s->pool_labels = NULL;
4285 #endif
4286 
4287     num_insns = -1;
4288     QTAILQ_FOREACH(op, &s->ops, link) {
4289         TCGOpcode opc = op->opc;
4290 
4291 #ifdef CONFIG_PROFILER
4292         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4293 #endif
4294 
4295         switch (opc) {
4296         case INDEX_op_mov_i32:
4297         case INDEX_op_mov_i64:
4298         case INDEX_op_mov_vec:
4299             tcg_reg_alloc_mov(s, op);
4300             break;
4301         case INDEX_op_movi_i32:
4302         case INDEX_op_movi_i64:
4303         case INDEX_op_dupi_vec:
4304             tcg_reg_alloc_movi(s, op);
4305             break;
4306         case INDEX_op_dup_vec:
4307             tcg_reg_alloc_dup(s, op);
4308             break;
4309         case INDEX_op_insn_start:
4310             if (num_insns >= 0) {
4311                 size_t off = tcg_current_code_size(s);
4312                 s->gen_insn_end_off[num_insns] = off;
4313                 /* Assert that we do not overflow our stored offset.  */
4314                 assert(s->gen_insn_end_off[num_insns] == off);
4315             }
4316             num_insns++;
4317             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4318                 target_ulong a;
4319 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4320                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4321 #else
4322                 a = op->args[i];
4323 #endif
4324                 s->gen_insn_data[num_insns][i] = a;
4325             }
4326             break;
4327         case INDEX_op_discard:
4328             temp_dead(s, arg_temp(op->args[0]));
4329             break;
4330         case INDEX_op_set_label:
4331             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4332             tcg_out_label(s, arg_label(op->args[0]));
4333             break;
4334         case INDEX_op_call:
4335             tcg_reg_alloc_call(s, op);
4336             break;
4337         default:
4338             /* Sanity check that we've not introduced any unhandled opcodes. */
4339             tcg_debug_assert(tcg_op_supported(opc));
4340             /* Note: in order to speed up the code, it would be much
4341                faster to have specialized register allocator functions for
4342                some common argument patterns */
4343             tcg_reg_alloc_op(s, op);
4344             break;
4345         }
4346 #ifdef CONFIG_DEBUG_TCG
4347         check_regs(s);
4348 #endif
4349         /* Test for (pending) buffer overflow.  The assumption is that any
4350            one operation beginning below the high water mark cannot overrun
4351            the buffer completely.  Thus we can test for overflow after
4352            generating code without having to check during generation.  */
4353         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4354             return -1;
4355         }
4356         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4357         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4358             return -2;
4359         }
4360     }
4361     tcg_debug_assert(num_insns >= 0);
4362     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4363 
4364     /* Generate TB finalization at the end of block */
4365 #ifdef TCG_TARGET_NEED_LDST_LABELS
4366     i = tcg_out_ldst_finalize(s);
4367     if (i < 0) {
4368         return i;
4369     }
4370 #endif
4371 #ifdef TCG_TARGET_NEED_POOL_LABELS
4372     i = tcg_out_pool_finalize(s);
4373     if (i < 0) {
4374         return i;
4375     }
4376 #endif
4377     if (!tcg_resolve_relocs(s)) {
4378         return -2;
4379     }
4380 
4381 #ifndef CONFIG_TCG_INTERPRETER
4382     /* flush instruction cache */
4383     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4384                         (uintptr_t)s->code_buf,
4385                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4386 #endif
4387 
4388     return tcg_current_code_size(s);
4389 }
4390 
4391 #ifdef CONFIG_PROFILER
4392 void tcg_dump_info(void)
4393 {
4394     TCGProfile prof = {};
4395     const TCGProfile *s;
4396     int64_t tb_count;
4397     int64_t tb_div_count;
4398     int64_t tot;
4399 
4400     tcg_profile_snapshot_counters(&prof);
4401     s = &prof;
4402     tb_count = s->tb_count;
4403     tb_div_count = tb_count ? tb_count : 1;
4404     tot = s->interm_time + s->code_time;
4405 
4406     qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4407                 tot, tot / 2.4e9);
4408     qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4409                 " %0.1f%%)\n",
4410                 tb_count, s->tb_count1 - tb_count,
4411                 (double)(s->tb_count1 - s->tb_count)
4412                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4413     qemu_printf("avg ops/TB          %0.1f max=%d\n",
4414                 (double)s->op_count / tb_div_count, s->op_count_max);
4415     qemu_printf("deleted ops/TB      %0.2f\n",
4416                 (double)s->del_op_count / tb_div_count);
4417     qemu_printf("avg temps/TB        %0.2f max=%d\n",
4418                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4419     qemu_printf("avg host code/TB    %0.1f\n",
4420                 (double)s->code_out_len / tb_div_count);
4421     qemu_printf("avg search data/TB  %0.1f\n",
4422                 (double)s->search_out_len / tb_div_count);
4423 
4424     qemu_printf("cycles/op           %0.1f\n",
4425                 s->op_count ? (double)tot / s->op_count : 0);
4426     qemu_printf("cycles/in byte      %0.1f\n",
4427                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4428     qemu_printf("cycles/out byte     %0.1f\n",
4429                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4430     qemu_printf("cycles/search byte     %0.1f\n",
4431                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4432     if (tot == 0) {
4433         tot = 1;
4434     }
4435     qemu_printf("  gen_interm time   %0.1f%%\n",
4436                 (double)s->interm_time / tot * 100.0);
4437     qemu_printf("  gen_code time     %0.1f%%\n",
4438                 (double)s->code_time / tot * 100.0);
4439     qemu_printf("optim./code time    %0.1f%%\n",
4440                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4441                 * 100.0);
4442     qemu_printf("liveness/code time  %0.1f%%\n",
4443                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4444     qemu_printf("cpu_restore count   %" PRId64 "\n",
4445                 s->restore_count);
4446     qemu_printf("  avg cycles        %0.1f\n",
4447                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4448 }
4449 #else
4450 void tcg_dump_info(void)
4451 {
4452     qemu_printf("[TCG profiler not compiled]\n");
4453 }
4454 #endif
4455 
4456 #ifdef ELF_HOST_MACHINE
4457 /* In order to use this feature, the backend needs to do three things:
4458 
4459    (1) Define ELF_HOST_MACHINE to indicate both what value to
4460        put into the ELF image and to indicate support for the feature.
4461 
4462    (2) Define tcg_register_jit.  This should create a buffer containing
4463        the contents of a .debug_frame section that describes the post-
4464        prologue unwind info for the tcg machine.
4465 
4466    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4467 */
4468 
4469 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4470 typedef enum {
4471     JIT_NOACTION = 0,
4472     JIT_REGISTER_FN,
4473     JIT_UNREGISTER_FN
4474 } jit_actions_t;
4475 
4476 struct jit_code_entry {
4477     struct jit_code_entry *next_entry;
4478     struct jit_code_entry *prev_entry;
4479     const void *symfile_addr;
4480     uint64_t symfile_size;
4481 };
4482 
4483 struct jit_descriptor {
4484     uint32_t version;
4485     uint32_t action_flag;
4486     struct jit_code_entry *relevant_entry;
4487     struct jit_code_entry *first_entry;
4488 };
4489 
4490 void __jit_debug_register_code(void) __attribute__((noinline));
4491 void __jit_debug_register_code(void)
4492 {
4493     asm("");
4494 }
4495 
4496 /* Must statically initialize the version, because GDB may check
4497    the version before we can set it.  */
4498 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4499 
4500 /* End GDB interface.  */
4501 
4502 static int find_string(const char *strtab, const char *str)
4503 {
4504     const char *p = strtab + 1;
4505 
4506     while (1) {
4507         if (strcmp(p, str) == 0) {
4508             return p - strtab;
4509         }
4510         p += strlen(p) + 1;
4511     }
4512 }
4513 
4514 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4515                                  const void *debug_frame,
4516                                  size_t debug_frame_size)
4517 {
4518     struct __attribute__((packed)) DebugInfo {
4519         uint32_t  len;
4520         uint16_t  version;
4521         uint32_t  abbrev;
4522         uint8_t   ptr_size;
4523         uint8_t   cu_die;
4524         uint16_t  cu_lang;
4525         uintptr_t cu_low_pc;
4526         uintptr_t cu_high_pc;
4527         uint8_t   fn_die;
4528         char      fn_name[16];
4529         uintptr_t fn_low_pc;
4530         uintptr_t fn_high_pc;
4531         uint8_t   cu_eoc;
4532     };
4533 
4534     struct ElfImage {
4535         ElfW(Ehdr) ehdr;
4536         ElfW(Phdr) phdr;
4537         ElfW(Shdr) shdr[7];
4538         ElfW(Sym)  sym[2];
4539         struct DebugInfo di;
4540         uint8_t    da[24];
4541         char       str[80];
4542     };
4543 
4544     struct ElfImage *img;
4545 
4546     static const struct ElfImage img_template = {
4547         .ehdr = {
4548             .e_ident[EI_MAG0] = ELFMAG0,
4549             .e_ident[EI_MAG1] = ELFMAG1,
4550             .e_ident[EI_MAG2] = ELFMAG2,
4551             .e_ident[EI_MAG3] = ELFMAG3,
4552             .e_ident[EI_CLASS] = ELF_CLASS,
4553             .e_ident[EI_DATA] = ELF_DATA,
4554             .e_ident[EI_VERSION] = EV_CURRENT,
4555             .e_type = ET_EXEC,
4556             .e_machine = ELF_HOST_MACHINE,
4557             .e_version = EV_CURRENT,
4558             .e_phoff = offsetof(struct ElfImage, phdr),
4559             .e_shoff = offsetof(struct ElfImage, shdr),
4560             .e_ehsize = sizeof(ElfW(Shdr)),
4561             .e_phentsize = sizeof(ElfW(Phdr)),
4562             .e_phnum = 1,
4563             .e_shentsize = sizeof(ElfW(Shdr)),
4564             .e_shnum = ARRAY_SIZE(img->shdr),
4565             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4566 #ifdef ELF_HOST_FLAGS
4567             .e_flags = ELF_HOST_FLAGS,
4568 #endif
4569 #ifdef ELF_OSABI
4570             .e_ident[EI_OSABI] = ELF_OSABI,
4571 #endif
4572         },
4573         .phdr = {
4574             .p_type = PT_LOAD,
4575             .p_flags = PF_X,
4576         },
4577         .shdr = {
4578             [0] = { .sh_type = SHT_NULL },
4579             /* Trick: The contents of code_gen_buffer are not present in
4580                this fake ELF file; that got allocated elsewhere.  Therefore
4581                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4582                will not look for contents.  We can record any address.  */
4583             [1] = { /* .text */
4584                 .sh_type = SHT_NOBITS,
4585                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4586             },
4587             [2] = { /* .debug_info */
4588                 .sh_type = SHT_PROGBITS,
4589                 .sh_offset = offsetof(struct ElfImage, di),
4590                 .sh_size = sizeof(struct DebugInfo),
4591             },
4592             [3] = { /* .debug_abbrev */
4593                 .sh_type = SHT_PROGBITS,
4594                 .sh_offset = offsetof(struct ElfImage, da),
4595                 .sh_size = sizeof(img->da),
4596             },
4597             [4] = { /* .debug_frame */
4598                 .sh_type = SHT_PROGBITS,
4599                 .sh_offset = sizeof(struct ElfImage),
4600             },
4601             [5] = { /* .symtab */
4602                 .sh_type = SHT_SYMTAB,
4603                 .sh_offset = offsetof(struct ElfImage, sym),
4604                 .sh_size = sizeof(img->sym),
4605                 .sh_info = 1,
4606                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4607                 .sh_entsize = sizeof(ElfW(Sym)),
4608             },
4609             [6] = { /* .strtab */
4610                 .sh_type = SHT_STRTAB,
4611                 .sh_offset = offsetof(struct ElfImage, str),
4612                 .sh_size = sizeof(img->str),
4613             }
4614         },
4615         .sym = {
4616             [1] = { /* code_gen_buffer */
4617                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4618                 .st_shndx = 1,
4619             }
4620         },
4621         .di = {
4622             .len = sizeof(struct DebugInfo) - 4,
4623             .version = 2,
4624             .ptr_size = sizeof(void *),
4625             .cu_die = 1,
4626             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4627             .fn_die = 2,
4628             .fn_name = "code_gen_buffer"
4629         },
4630         .da = {
4631             1,          /* abbrev number (the cu) */
4632             0x11, 1,    /* DW_TAG_compile_unit, has children */
4633             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4634             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4635             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4636             0, 0,       /* end of abbrev */
4637             2,          /* abbrev number (the fn) */
4638             0x2e, 0,    /* DW_TAG_subprogram, no children */
4639             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4640             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4641             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4642             0, 0,       /* end of abbrev */
4643             0           /* no more abbrev */
4644         },
4645         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4646                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4647     };
4648 
4649     /* We only need a single jit entry; statically allocate it.  */
4650     static struct jit_code_entry one_entry;
4651 
4652     uintptr_t buf = (uintptr_t)buf_ptr;
4653     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4654     DebugFrameHeader *dfh;
4655 
4656     img = g_malloc(img_size);
4657     *img = img_template;
4658 
4659     img->phdr.p_vaddr = buf;
4660     img->phdr.p_paddr = buf;
4661     img->phdr.p_memsz = buf_size;
4662 
4663     img->shdr[1].sh_name = find_string(img->str, ".text");
4664     img->shdr[1].sh_addr = buf;
4665     img->shdr[1].sh_size = buf_size;
4666 
4667     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4668     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4669 
4670     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4671     img->shdr[4].sh_size = debug_frame_size;
4672 
4673     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4674     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4675 
4676     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4677     img->sym[1].st_value = buf;
4678     img->sym[1].st_size = buf_size;
4679 
4680     img->di.cu_low_pc = buf;
4681     img->di.cu_high_pc = buf + buf_size;
4682     img->di.fn_low_pc = buf;
4683     img->di.fn_high_pc = buf + buf_size;
4684 
4685     dfh = (DebugFrameHeader *)(img + 1);
4686     memcpy(dfh, debug_frame, debug_frame_size);
4687     dfh->fde.func_start = buf;
4688     dfh->fde.func_len = buf_size;
4689 
4690 #ifdef DEBUG_JIT
4691     /* Enable this block to be able to debug the ELF image file creation.
4692        One can use readelf, objdump, or other inspection utilities.  */
4693     {
4694         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4695         if (f) {
4696             if (fwrite(img, img_size, 1, f) != img_size) {
4697                 /* Avoid stupid unused return value warning for fwrite.  */
4698             }
4699             fclose(f);
4700         }
4701     }
4702 #endif
4703 
4704     one_entry.symfile_addr = img;
4705     one_entry.symfile_size = img_size;
4706 
4707     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4708     __jit_debug_descriptor.relevant_entry = &one_entry;
4709     __jit_debug_descriptor.first_entry = &one_entry;
4710     __jit_debug_register_code();
4711 }
4712 #else
4713 /* No support for the feature.  Provide the entry point expected by exec.c,
4714    and implement the internal function we declared earlier.  */
4715 
4716 static void tcg_register_jit_int(const void *buf, size_t size,
4717                                  const void *debug_frame,
4718                                  size_t debug_frame_size)
4719 {
4720 }
4721 
4722 void tcg_register_jit(const void *buf, size_t buf_size)
4723 {
4724 }
4725 #endif /* ELF_HOST_MACHINE */
4726 
4727 #if !TCG_TARGET_MAYBE_vec
4728 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4729 {
4730     g_assert_not_reached();
4731 }
4732 #endif
4733