xref: /openbmc/qemu/tcg/tcg.c (revision dcb32f1d)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 
39 /* Note: the long term plan is to reduce the dependencies on the QEMU
40    CPU definitions. Currently they are used for qemu_ld/st
41    instructions */
42 #define NO_CPU_IO_DEFS
43 #include "cpu.h"
44 
45 #include "exec/exec-all.h"
46 
47 #if !defined(CONFIG_USER_ONLY)
48 #include "hw/boards.h"
49 #endif
50 
51 #include "tcg/tcg-op.h"
52 
53 #if UINTPTR_MAX == UINT32_MAX
54 # define ELF_CLASS  ELFCLASS32
55 #else
56 # define ELF_CLASS  ELFCLASS64
57 #endif
58 #ifdef HOST_WORDS_BIGENDIAN
59 # define ELF_DATA   ELFDATA2MSB
60 #else
61 # define ELF_DATA   ELFDATA2LSB
62 #endif
63 
64 #include "elf.h"
65 #include "exec/log.h"
66 #include "sysemu/sysemu.h"
67 
68 /* Forward declarations for functions declared in tcg-target.inc.c and
69    used here. */
70 static void tcg_target_init(TCGContext *s);
71 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74                         intptr_t value, intptr_t addend);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 static void tcg_register_jit_int(void *buf, size_t size,
100                                  const void *debug_frame,
101                                  size_t debug_frame_size)
102     __attribute__((unused));
103 
104 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
105 static const char *target_parse_constraint(TCGArgConstraint *ct,
106                                            const char *ct_str, TCGType type);
107 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
108                        intptr_t arg2);
109 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
110 static void tcg_out_movi(TCGContext *s, TCGType type,
111                          TCGReg ret, tcg_target_long arg);
112 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
113                        const int *const_args);
114 #if TCG_TARGET_MAYBE_vec
115 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
116                             TCGReg dst, TCGReg src);
117 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
118                              TCGReg dst, TCGReg base, intptr_t offset);
119 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
120                              TCGReg dst, tcg_target_long arg);
121 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
122                            unsigned vece, const TCGArg *args,
123                            const int *const_args);
124 #else
125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126                                    TCGReg dst, TCGReg src)
127 {
128     g_assert_not_reached();
129 }
130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
131                                     TCGReg dst, TCGReg base, intptr_t offset)
132 {
133     g_assert_not_reached();
134 }
135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
136                                     TCGReg dst, tcg_target_long arg)
137 {
138     g_assert_not_reached();
139 }
140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
141                                   unsigned vece, const TCGArg *args,
142                                   const int *const_args)
143 {
144     g_assert_not_reached();
145 }
146 #endif
147 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
148                        intptr_t arg2);
149 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
150                         TCGReg base, intptr_t ofs);
151 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
152 static int tcg_target_const_match(tcg_target_long val, TCGType type,
153                                   const TCGArgConstraint *arg_ct);
154 #ifdef TCG_TARGET_NEED_LDST_LABELS
155 static int tcg_out_ldst_finalize(TCGContext *s);
156 #endif
157 
158 #define TCG_HIGHWATER 1024
159 
160 static TCGContext **tcg_ctxs;
161 static unsigned int n_tcg_ctxs;
162 TCGv_env cpu_env = 0;
163 
164 struct tcg_region_tree {
165     QemuMutex lock;
166     GTree *tree;
167     /* padding to avoid false sharing is computed at run-time */
168 };
169 
170 /*
171  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
172  * dynamically allocate from as demand dictates. Given appropriate region
173  * sizing, this minimizes flushes even when some TCG threads generate a lot
174  * more code than others.
175  */
176 struct tcg_region_state {
177     QemuMutex lock;
178 
179     /* fields set at init time */
180     void *start;
181     void *start_aligned;
182     void *end;
183     size_t n;
184     size_t size; /* size of one region */
185     size_t stride; /* .size + guard size */
186 
187     /* fields protected by the lock */
188     size_t current; /* current region index */
189     size_t agg_size_full; /* aggregate size of full regions */
190 };
191 
192 static struct tcg_region_state region;
193 /*
194  * This is an array of struct tcg_region_tree's, with padding.
195  * We use void * to simplify the computation of region_trees[i]; each
196  * struct is found every tree_size bytes.
197  */
198 static void *region_trees;
199 static size_t tree_size;
200 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
201 static TCGRegSet tcg_target_call_clobber_regs;
202 
203 #if TCG_TARGET_INSN_UNIT_SIZE == 1
204 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
205 {
206     *s->code_ptr++ = v;
207 }
208 
209 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
210                                                       uint8_t v)
211 {
212     *p = v;
213 }
214 #endif
215 
216 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
217 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
218 {
219     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
220         *s->code_ptr++ = v;
221     } else {
222         tcg_insn_unit *p = s->code_ptr;
223         memcpy(p, &v, sizeof(v));
224         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
225     }
226 }
227 
228 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
229                                                        uint16_t v)
230 {
231     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
232         *p = v;
233     } else {
234         memcpy(p, &v, sizeof(v));
235     }
236 }
237 #endif
238 
239 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
240 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
241 {
242     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
243         *s->code_ptr++ = v;
244     } else {
245         tcg_insn_unit *p = s->code_ptr;
246         memcpy(p, &v, sizeof(v));
247         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
248     }
249 }
250 
251 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
252                                                        uint32_t v)
253 {
254     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
255         *p = v;
256     } else {
257         memcpy(p, &v, sizeof(v));
258     }
259 }
260 #endif
261 
262 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
263 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
264 {
265     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
266         *s->code_ptr++ = v;
267     } else {
268         tcg_insn_unit *p = s->code_ptr;
269         memcpy(p, &v, sizeof(v));
270         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
271     }
272 }
273 
274 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
275                                                        uint64_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
278         *p = v;
279     } else {
280         memcpy(p, &v, sizeof(v));
281     }
282 }
283 #endif
284 
285 /* label relocation processing */
286 
287 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
288                           TCGLabel *l, intptr_t addend)
289 {
290     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
291 
292     r->type = type;
293     r->ptr = code_ptr;
294     r->addend = addend;
295     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
296 }
297 
298 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
299 {
300     tcg_debug_assert(!l->has_value);
301     l->has_value = 1;
302     l->u.value_ptr = ptr;
303 }
304 
305 TCGLabel *gen_new_label(void)
306 {
307     TCGContext *s = tcg_ctx;
308     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
309 
310     memset(l, 0, sizeof(TCGLabel));
311     l->id = s->nb_labels++;
312     QSIMPLEQ_INIT(&l->relocs);
313 
314     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
315 
316     return l;
317 }
318 
319 static bool tcg_resolve_relocs(TCGContext *s)
320 {
321     TCGLabel *l;
322 
323     QSIMPLEQ_FOREACH(l, &s->labels, next) {
324         TCGRelocation *r;
325         uintptr_t value = l->u.value;
326 
327         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
328             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
329                 return false;
330             }
331         }
332     }
333     return true;
334 }
335 
336 static void set_jmp_reset_offset(TCGContext *s, int which)
337 {
338     size_t off = tcg_current_code_size(s);
339     s->tb_jmp_reset_offset[which] = off;
340     /* Make sure that we didn't overflow the stored offset.  */
341     assert(s->tb_jmp_reset_offset[which] == off);
342 }
343 
344 #include "tcg-target.inc.c"
345 
346 /* compare a pointer @ptr and a tb_tc @s */
347 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
348 {
349     if (ptr >= s->ptr + s->size) {
350         return 1;
351     } else if (ptr < s->ptr) {
352         return -1;
353     }
354     return 0;
355 }
356 
357 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
358 {
359     const struct tb_tc *a = ap;
360     const struct tb_tc *b = bp;
361 
362     /*
363      * When both sizes are set, we know this isn't a lookup.
364      * This is the most likely case: every TB must be inserted; lookups
365      * are a lot less frequent.
366      */
367     if (likely(a->size && b->size)) {
368         if (a->ptr > b->ptr) {
369             return 1;
370         } else if (a->ptr < b->ptr) {
371             return -1;
372         }
373         /* a->ptr == b->ptr should happen only on deletions */
374         g_assert(a->size == b->size);
375         return 0;
376     }
377     /*
378      * All lookups have either .size field set to 0.
379      * From the glib sources we see that @ap is always the lookup key. However
380      * the docs provide no guarantee, so we just mark this case as likely.
381      */
382     if (likely(a->size == 0)) {
383         return ptr_cmp_tb_tc(a->ptr, b);
384     }
385     return ptr_cmp_tb_tc(b->ptr, a);
386 }
387 
388 static void tcg_region_trees_init(void)
389 {
390     size_t i;
391 
392     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
393     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
394     for (i = 0; i < region.n; i++) {
395         struct tcg_region_tree *rt = region_trees + i * tree_size;
396 
397         qemu_mutex_init(&rt->lock);
398         rt->tree = g_tree_new(tb_tc_cmp);
399     }
400 }
401 
402 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
403 {
404     size_t region_idx;
405 
406     if (p < region.start_aligned) {
407         region_idx = 0;
408     } else {
409         ptrdiff_t offset = p - region.start_aligned;
410 
411         if (offset > region.stride * (region.n - 1)) {
412             region_idx = region.n - 1;
413         } else {
414             region_idx = offset / region.stride;
415         }
416     }
417     return region_trees + region_idx * tree_size;
418 }
419 
420 void tcg_tb_insert(TranslationBlock *tb)
421 {
422     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
423 
424     qemu_mutex_lock(&rt->lock);
425     g_tree_insert(rt->tree, &tb->tc, tb);
426     qemu_mutex_unlock(&rt->lock);
427 }
428 
429 void tcg_tb_remove(TranslationBlock *tb)
430 {
431     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
432 
433     qemu_mutex_lock(&rt->lock);
434     g_tree_remove(rt->tree, &tb->tc);
435     qemu_mutex_unlock(&rt->lock);
436 }
437 
438 /*
439  * Find the TB 'tb' such that
440  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
441  * Return NULL if not found.
442  */
443 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
444 {
445     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
446     TranslationBlock *tb;
447     struct tb_tc s = { .ptr = (void *)tc_ptr };
448 
449     qemu_mutex_lock(&rt->lock);
450     tb = g_tree_lookup(rt->tree, &s);
451     qemu_mutex_unlock(&rt->lock);
452     return tb;
453 }
454 
455 static void tcg_region_tree_lock_all(void)
456 {
457     size_t i;
458 
459     for (i = 0; i < region.n; i++) {
460         struct tcg_region_tree *rt = region_trees + i * tree_size;
461 
462         qemu_mutex_lock(&rt->lock);
463     }
464 }
465 
466 static void tcg_region_tree_unlock_all(void)
467 {
468     size_t i;
469 
470     for (i = 0; i < region.n; i++) {
471         struct tcg_region_tree *rt = region_trees + i * tree_size;
472 
473         qemu_mutex_unlock(&rt->lock);
474     }
475 }
476 
477 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
478 {
479     size_t i;
480 
481     tcg_region_tree_lock_all();
482     for (i = 0; i < region.n; i++) {
483         struct tcg_region_tree *rt = region_trees + i * tree_size;
484 
485         g_tree_foreach(rt->tree, func, user_data);
486     }
487     tcg_region_tree_unlock_all();
488 }
489 
490 size_t tcg_nb_tbs(void)
491 {
492     size_t nb_tbs = 0;
493     size_t i;
494 
495     tcg_region_tree_lock_all();
496     for (i = 0; i < region.n; i++) {
497         struct tcg_region_tree *rt = region_trees + i * tree_size;
498 
499         nb_tbs += g_tree_nnodes(rt->tree);
500     }
501     tcg_region_tree_unlock_all();
502     return nb_tbs;
503 }
504 
505 static void tcg_region_tree_reset_all(void)
506 {
507     size_t i;
508 
509     tcg_region_tree_lock_all();
510     for (i = 0; i < region.n; i++) {
511         struct tcg_region_tree *rt = region_trees + i * tree_size;
512 
513         /* Increment the refcount first so that destroy acts as a reset */
514         g_tree_ref(rt->tree);
515         g_tree_destroy(rt->tree);
516     }
517     tcg_region_tree_unlock_all();
518 }
519 
520 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
521 {
522     void *start, *end;
523 
524     start = region.start_aligned + curr_region * region.stride;
525     end = start + region.size;
526 
527     if (curr_region == 0) {
528         start = region.start;
529     }
530     if (curr_region == region.n - 1) {
531         end = region.end;
532     }
533 
534     *pstart = start;
535     *pend = end;
536 }
537 
538 static void tcg_region_assign(TCGContext *s, size_t curr_region)
539 {
540     void *start, *end;
541 
542     tcg_region_bounds(curr_region, &start, &end);
543 
544     s->code_gen_buffer = start;
545     s->code_gen_ptr = start;
546     s->code_gen_buffer_size = end - start;
547     s->code_gen_highwater = end - TCG_HIGHWATER;
548 }
549 
550 static bool tcg_region_alloc__locked(TCGContext *s)
551 {
552     if (region.current == region.n) {
553         return true;
554     }
555     tcg_region_assign(s, region.current);
556     region.current++;
557     return false;
558 }
559 
560 /*
561  * Request a new region once the one in use has filled up.
562  * Returns true on error.
563  */
564 static bool tcg_region_alloc(TCGContext *s)
565 {
566     bool err;
567     /* read the region size now; alloc__locked will overwrite it on success */
568     size_t size_full = s->code_gen_buffer_size;
569 
570     qemu_mutex_lock(&region.lock);
571     err = tcg_region_alloc__locked(s);
572     if (!err) {
573         region.agg_size_full += size_full - TCG_HIGHWATER;
574     }
575     qemu_mutex_unlock(&region.lock);
576     return err;
577 }
578 
579 /*
580  * Perform a context's first region allocation.
581  * This function does _not_ increment region.agg_size_full.
582  */
583 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
584 {
585     return tcg_region_alloc__locked(s);
586 }
587 
588 /* Call from a safe-work context */
589 void tcg_region_reset_all(void)
590 {
591     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
592     unsigned int i;
593 
594     qemu_mutex_lock(&region.lock);
595     region.current = 0;
596     region.agg_size_full = 0;
597 
598     for (i = 0; i < n_ctxs; i++) {
599         TCGContext *s = atomic_read(&tcg_ctxs[i]);
600         bool err = tcg_region_initial_alloc__locked(s);
601 
602         g_assert(!err);
603     }
604     qemu_mutex_unlock(&region.lock);
605 
606     tcg_region_tree_reset_all();
607 }
608 
609 #ifdef CONFIG_USER_ONLY
610 static size_t tcg_n_regions(void)
611 {
612     return 1;
613 }
614 #else
615 /*
616  * It is likely that some vCPUs will translate more code than others, so we
617  * first try to set more regions than max_cpus, with those regions being of
618  * reasonable size. If that's not possible we make do by evenly dividing
619  * the code_gen_buffer among the vCPUs.
620  */
621 static size_t tcg_n_regions(void)
622 {
623     size_t i;
624 
625     /* Use a single region if all we have is one vCPU thread */
626 #if !defined(CONFIG_USER_ONLY)
627     MachineState *ms = MACHINE(qdev_get_machine());
628     unsigned int max_cpus = ms->smp.max_cpus;
629 #endif
630     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
631         return 1;
632     }
633 
634     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
635     for (i = 8; i > 0; i--) {
636         size_t regions_per_thread = i;
637         size_t region_size;
638 
639         region_size = tcg_init_ctx.code_gen_buffer_size;
640         region_size /= max_cpus * regions_per_thread;
641 
642         if (region_size >= 2 * 1024u * 1024) {
643             return max_cpus * regions_per_thread;
644         }
645     }
646     /* If we can't, then just allocate one region per vCPU thread */
647     return max_cpus;
648 }
649 #endif
650 
651 /*
652  * Initializes region partitioning.
653  *
654  * Called at init time from the parent thread (i.e. the one calling
655  * tcg_context_init), after the target's TCG globals have been set.
656  *
657  * Region partitioning works by splitting code_gen_buffer into separate regions,
658  * and then assigning regions to TCG threads so that the threads can translate
659  * code in parallel without synchronization.
660  *
661  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
662  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
663  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
664  * must have been parsed before calling this function, since it calls
665  * qemu_tcg_mttcg_enabled().
666  *
667  * In user-mode we use a single region.  Having multiple regions in user-mode
668  * is not supported, because the number of vCPU threads (recall that each thread
669  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
670  * OS, and usually this number is huge (tens of thousands is not uncommon).
671  * Thus, given this large bound on the number of vCPU threads and the fact
672  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
673  * that the availability of at least one region per vCPU thread.
674  *
675  * However, this user-mode limitation is unlikely to be a significant problem
676  * in practice. Multi-threaded guests share most if not all of their translated
677  * code, which makes parallel code generation less appealing than in softmmu.
678  */
679 void tcg_region_init(void)
680 {
681     void *buf = tcg_init_ctx.code_gen_buffer;
682     void *aligned;
683     size_t size = tcg_init_ctx.code_gen_buffer_size;
684     size_t page_size = qemu_real_host_page_size;
685     size_t region_size;
686     size_t n_regions;
687     size_t i;
688 
689     n_regions = tcg_n_regions();
690 
691     /* The first region will be 'aligned - buf' bytes larger than the others */
692     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
693     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
694     /*
695      * Make region_size a multiple of page_size, using aligned as the start.
696      * As a result of this we might end up with a few extra pages at the end of
697      * the buffer; we will assign those to the last region.
698      */
699     region_size = (size - (aligned - buf)) / n_regions;
700     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
701 
702     /* A region must have at least 2 pages; one code, one guard */
703     g_assert(region_size >= 2 * page_size);
704 
705     /* init the region struct */
706     qemu_mutex_init(&region.lock);
707     region.n = n_regions;
708     region.size = region_size - page_size;
709     region.stride = region_size;
710     region.start = buf;
711     region.start_aligned = aligned;
712     /* page-align the end, since its last page will be a guard page */
713     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
714     /* account for that last guard page */
715     region.end -= page_size;
716 
717     /* set guard pages */
718     for (i = 0; i < region.n; i++) {
719         void *start, *end;
720         int rc;
721 
722         tcg_region_bounds(i, &start, &end);
723         rc = qemu_mprotect_none(end, page_size);
724         g_assert(!rc);
725     }
726 
727     tcg_region_trees_init();
728 
729     /* In user-mode we support only one ctx, so do the initial allocation now */
730 #ifdef CONFIG_USER_ONLY
731     {
732         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
733 
734         g_assert(!err);
735     }
736 #endif
737 }
738 
739 static void alloc_tcg_plugin_context(TCGContext *s)
740 {
741 #ifdef CONFIG_PLUGIN
742     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
743     s->plugin_tb->insns =
744         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
745 #endif
746 }
747 
748 /*
749  * All TCG threads except the parent (i.e. the one that called tcg_context_init
750  * and registered the target's TCG globals) must register with this function
751  * before initiating translation.
752  *
753  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
754  * of tcg_region_init() for the reasoning behind this.
755  *
756  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
757  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
758  * is not used anymore for translation once this function is called.
759  *
760  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
761  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
762  */
763 #ifdef CONFIG_USER_ONLY
764 void tcg_register_thread(void)
765 {
766     tcg_ctx = &tcg_init_ctx;
767 }
768 #else
769 void tcg_register_thread(void)
770 {
771     MachineState *ms = MACHINE(qdev_get_machine());
772     TCGContext *s = g_malloc(sizeof(*s));
773     unsigned int i, n;
774     bool err;
775 
776     *s = tcg_init_ctx;
777 
778     /* Relink mem_base.  */
779     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
780         if (tcg_init_ctx.temps[i].mem_base) {
781             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
782             tcg_debug_assert(b >= 0 && b < n);
783             s->temps[i].mem_base = &s->temps[b];
784         }
785     }
786 
787     /* Claim an entry in tcg_ctxs */
788     n = atomic_fetch_inc(&n_tcg_ctxs);
789     g_assert(n < ms->smp.max_cpus);
790     atomic_set(&tcg_ctxs[n], s);
791 
792     if (n > 0) {
793         alloc_tcg_plugin_context(s);
794     }
795 
796     tcg_ctx = s;
797     qemu_mutex_lock(&region.lock);
798     err = tcg_region_initial_alloc__locked(tcg_ctx);
799     g_assert(!err);
800     qemu_mutex_unlock(&region.lock);
801 }
802 #endif /* !CONFIG_USER_ONLY */
803 
804 /*
805  * Returns the size (in bytes) of all translated code (i.e. from all regions)
806  * currently in the cache.
807  * See also: tcg_code_capacity()
808  * Do not confuse with tcg_current_code_size(); that one applies to a single
809  * TCG context.
810  */
811 size_t tcg_code_size(void)
812 {
813     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
814     unsigned int i;
815     size_t total;
816 
817     qemu_mutex_lock(&region.lock);
818     total = region.agg_size_full;
819     for (i = 0; i < n_ctxs; i++) {
820         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
821         size_t size;
822 
823         size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
824         g_assert(size <= s->code_gen_buffer_size);
825         total += size;
826     }
827     qemu_mutex_unlock(&region.lock);
828     return total;
829 }
830 
831 /*
832  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
833  * regions.
834  * See also: tcg_code_size()
835  */
836 size_t tcg_code_capacity(void)
837 {
838     size_t guard_size, capacity;
839 
840     /* no need for synchronization; these variables are set at init time */
841     guard_size = region.stride - region.size;
842     capacity = region.end + guard_size - region.start;
843     capacity -= region.n * (guard_size + TCG_HIGHWATER);
844     return capacity;
845 }
846 
847 size_t tcg_tb_phys_invalidate_count(void)
848 {
849     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
850     unsigned int i;
851     size_t total = 0;
852 
853     for (i = 0; i < n_ctxs; i++) {
854         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
855 
856         total += atomic_read(&s->tb_phys_invalidate_count);
857     }
858     return total;
859 }
860 
861 /* pool based memory allocation */
862 void *tcg_malloc_internal(TCGContext *s, int size)
863 {
864     TCGPool *p;
865     int pool_size;
866 
867     if (size > TCG_POOL_CHUNK_SIZE) {
868         /* big malloc: insert a new pool (XXX: could optimize) */
869         p = g_malloc(sizeof(TCGPool) + size);
870         p->size = size;
871         p->next = s->pool_first_large;
872         s->pool_first_large = p;
873         return p->data;
874     } else {
875         p = s->pool_current;
876         if (!p) {
877             p = s->pool_first;
878             if (!p)
879                 goto new_pool;
880         } else {
881             if (!p->next) {
882             new_pool:
883                 pool_size = TCG_POOL_CHUNK_SIZE;
884                 p = g_malloc(sizeof(TCGPool) + pool_size);
885                 p->size = pool_size;
886                 p->next = NULL;
887                 if (s->pool_current)
888                     s->pool_current->next = p;
889                 else
890                     s->pool_first = p;
891             } else {
892                 p = p->next;
893             }
894         }
895     }
896     s->pool_current = p;
897     s->pool_cur = p->data + size;
898     s->pool_end = p->data + p->size;
899     return p->data;
900 }
901 
902 void tcg_pool_reset(TCGContext *s)
903 {
904     TCGPool *p, *t;
905     for (p = s->pool_first_large; p; p = t) {
906         t = p->next;
907         g_free(p);
908     }
909     s->pool_first_large = NULL;
910     s->pool_cur = s->pool_end = NULL;
911     s->pool_current = NULL;
912 }
913 
914 typedef struct TCGHelperInfo {
915     void *func;
916     const char *name;
917     unsigned flags;
918     unsigned sizemask;
919 } TCGHelperInfo;
920 
921 #include "exec/helper-proto.h"
922 
923 static const TCGHelperInfo all_helpers[] = {
924 #include "exec/helper-tcg.h"
925 };
926 static GHashTable *helper_table;
927 
928 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
929 static void process_op_defs(TCGContext *s);
930 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
931                                             TCGReg reg, const char *name);
932 
933 void tcg_context_init(TCGContext *s)
934 {
935     int op, total_args, n, i;
936     TCGOpDef *def;
937     TCGArgConstraint *args_ct;
938     int *sorted_args;
939     TCGTemp *ts;
940 
941     memset(s, 0, sizeof(*s));
942     s->nb_globals = 0;
943 
944     /* Count total number of arguments and allocate the corresponding
945        space */
946     total_args = 0;
947     for(op = 0; op < NB_OPS; op++) {
948         def = &tcg_op_defs[op];
949         n = def->nb_iargs + def->nb_oargs;
950         total_args += n;
951     }
952 
953     args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
954     sorted_args = g_malloc(sizeof(int) * total_args);
955 
956     for(op = 0; op < NB_OPS; op++) {
957         def = &tcg_op_defs[op];
958         def->args_ct = args_ct;
959         def->sorted_args = sorted_args;
960         n = def->nb_iargs + def->nb_oargs;
961         sorted_args += n;
962         args_ct += n;
963     }
964 
965     /* Register helpers.  */
966     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
967     helper_table = g_hash_table_new(NULL, NULL);
968 
969     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
970         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
971                             (gpointer)&all_helpers[i]);
972     }
973 
974     tcg_target_init(s);
975     process_op_defs(s);
976 
977     /* Reverse the order of the saved registers, assuming they're all at
978        the start of tcg_target_reg_alloc_order.  */
979     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
980         int r = tcg_target_reg_alloc_order[n];
981         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
982             break;
983         }
984     }
985     for (i = 0; i < n; ++i) {
986         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
987     }
988     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
989         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
990     }
991 
992     alloc_tcg_plugin_context(s);
993 
994     tcg_ctx = s;
995     /*
996      * In user-mode we simply share the init context among threads, since we
997      * use a single region. See the documentation tcg_region_init() for the
998      * reasoning behind this.
999      * In softmmu we will have at most max_cpus TCG threads.
1000      */
1001 #ifdef CONFIG_USER_ONLY
1002     tcg_ctxs = &tcg_ctx;
1003     n_tcg_ctxs = 1;
1004 #else
1005     MachineState *ms = MACHINE(qdev_get_machine());
1006     unsigned int max_cpus = ms->smp.max_cpus;
1007     tcg_ctxs = g_new(TCGContext *, max_cpus);
1008 #endif
1009 
1010     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1011     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1012     cpu_env = temp_tcgv_ptr(ts);
1013 }
1014 
1015 /*
1016  * Allocate TBs right before their corresponding translated code, making
1017  * sure that TBs and code are on different cache lines.
1018  */
1019 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1020 {
1021     uintptr_t align = qemu_icache_linesize;
1022     TranslationBlock *tb;
1023     void *next;
1024 
1025  retry:
1026     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1027     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1028 
1029     if (unlikely(next > s->code_gen_highwater)) {
1030         if (tcg_region_alloc(s)) {
1031             return NULL;
1032         }
1033         goto retry;
1034     }
1035     atomic_set(&s->code_gen_ptr, next);
1036     s->data_gen_ptr = NULL;
1037     return tb;
1038 }
1039 
1040 void tcg_prologue_init(TCGContext *s)
1041 {
1042     size_t prologue_size, total_size;
1043     void *buf0, *buf1;
1044 
1045     /* Put the prologue at the beginning of code_gen_buffer.  */
1046     buf0 = s->code_gen_buffer;
1047     total_size = s->code_gen_buffer_size;
1048     s->code_ptr = buf0;
1049     s->code_buf = buf0;
1050     s->data_gen_ptr = NULL;
1051     s->code_gen_prologue = buf0;
1052 
1053     /* Compute a high-water mark, at which we voluntarily flush the buffer
1054        and start over.  The size here is arbitrary, significantly larger
1055        than we expect the code generation for any one opcode to require.  */
1056     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1057 
1058 #ifdef TCG_TARGET_NEED_POOL_LABELS
1059     s->pool_labels = NULL;
1060 #endif
1061 
1062     /* Generate the prologue.  */
1063     tcg_target_qemu_prologue(s);
1064 
1065 #ifdef TCG_TARGET_NEED_POOL_LABELS
1066     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1067     {
1068         int result = tcg_out_pool_finalize(s);
1069         tcg_debug_assert(result == 0);
1070     }
1071 #endif
1072 
1073     buf1 = s->code_ptr;
1074     flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1075 
1076     /* Deduct the prologue from the buffer.  */
1077     prologue_size = tcg_current_code_size(s);
1078     s->code_gen_ptr = buf1;
1079     s->code_gen_buffer = buf1;
1080     s->code_buf = buf1;
1081     total_size -= prologue_size;
1082     s->code_gen_buffer_size = total_size;
1083 
1084     tcg_register_jit(s->code_gen_buffer, total_size);
1085 
1086 #ifdef DEBUG_DISAS
1087     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1088         FILE *logfile = qemu_log_lock();
1089         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1090         if (s->data_gen_ptr) {
1091             size_t code_size = s->data_gen_ptr - buf0;
1092             size_t data_size = prologue_size - code_size;
1093             size_t i;
1094 
1095             log_disas(buf0, code_size);
1096 
1097             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1098                 if (sizeof(tcg_target_ulong) == 8) {
1099                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1100                              (uintptr_t)s->data_gen_ptr + i,
1101                              *(uint64_t *)(s->data_gen_ptr + i));
1102                 } else {
1103                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1104                              (uintptr_t)s->data_gen_ptr + i,
1105                              *(uint32_t *)(s->data_gen_ptr + i));
1106                 }
1107             }
1108         } else {
1109             log_disas(buf0, prologue_size);
1110         }
1111         qemu_log("\n");
1112         qemu_log_flush();
1113         qemu_log_unlock(logfile);
1114     }
1115 #endif
1116 
1117     /* Assert that goto_ptr is implemented completely.  */
1118     if (TCG_TARGET_HAS_goto_ptr) {
1119         tcg_debug_assert(s->code_gen_epilogue != NULL);
1120     }
1121 }
1122 
1123 void tcg_func_start(TCGContext *s)
1124 {
1125     tcg_pool_reset(s);
1126     s->nb_temps = s->nb_globals;
1127 
1128     /* No temps have been previously allocated for size or locality.  */
1129     memset(s->free_temps, 0, sizeof(s->free_temps));
1130 
1131     s->nb_ops = 0;
1132     s->nb_labels = 0;
1133     s->current_frame_offset = s->frame_start;
1134 
1135 #ifdef CONFIG_DEBUG_TCG
1136     s->goto_tb_issue_mask = 0;
1137 #endif
1138 
1139     QTAILQ_INIT(&s->ops);
1140     QTAILQ_INIT(&s->free_ops);
1141     QSIMPLEQ_INIT(&s->labels);
1142 }
1143 
1144 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1145 {
1146     int n = s->nb_temps++;
1147     tcg_debug_assert(n < TCG_MAX_TEMPS);
1148     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1149 }
1150 
1151 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1152 {
1153     TCGTemp *ts;
1154 
1155     tcg_debug_assert(s->nb_globals == s->nb_temps);
1156     s->nb_globals++;
1157     ts = tcg_temp_alloc(s);
1158     ts->temp_global = 1;
1159 
1160     return ts;
1161 }
1162 
1163 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1164                                             TCGReg reg, const char *name)
1165 {
1166     TCGTemp *ts;
1167 
1168     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1169         tcg_abort();
1170     }
1171 
1172     ts = tcg_global_alloc(s);
1173     ts->base_type = type;
1174     ts->type = type;
1175     ts->fixed_reg = 1;
1176     ts->reg = reg;
1177     ts->name = name;
1178     tcg_regset_set_reg(s->reserved_regs, reg);
1179 
1180     return ts;
1181 }
1182 
1183 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1184 {
1185     s->frame_start = start;
1186     s->frame_end = start + size;
1187     s->frame_temp
1188         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1189 }
1190 
1191 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1192                                      intptr_t offset, const char *name)
1193 {
1194     TCGContext *s = tcg_ctx;
1195     TCGTemp *base_ts = tcgv_ptr_temp(base);
1196     TCGTemp *ts = tcg_global_alloc(s);
1197     int indirect_reg = 0, bigendian = 0;
1198 #ifdef HOST_WORDS_BIGENDIAN
1199     bigendian = 1;
1200 #endif
1201 
1202     if (!base_ts->fixed_reg) {
1203         /* We do not support double-indirect registers.  */
1204         tcg_debug_assert(!base_ts->indirect_reg);
1205         base_ts->indirect_base = 1;
1206         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1207                             ? 2 : 1);
1208         indirect_reg = 1;
1209     }
1210 
1211     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1212         TCGTemp *ts2 = tcg_global_alloc(s);
1213         char buf[64];
1214 
1215         ts->base_type = TCG_TYPE_I64;
1216         ts->type = TCG_TYPE_I32;
1217         ts->indirect_reg = indirect_reg;
1218         ts->mem_allocated = 1;
1219         ts->mem_base = base_ts;
1220         ts->mem_offset = offset + bigendian * 4;
1221         pstrcpy(buf, sizeof(buf), name);
1222         pstrcat(buf, sizeof(buf), "_0");
1223         ts->name = strdup(buf);
1224 
1225         tcg_debug_assert(ts2 == ts + 1);
1226         ts2->base_type = TCG_TYPE_I64;
1227         ts2->type = TCG_TYPE_I32;
1228         ts2->indirect_reg = indirect_reg;
1229         ts2->mem_allocated = 1;
1230         ts2->mem_base = base_ts;
1231         ts2->mem_offset = offset + (1 - bigendian) * 4;
1232         pstrcpy(buf, sizeof(buf), name);
1233         pstrcat(buf, sizeof(buf), "_1");
1234         ts2->name = strdup(buf);
1235     } else {
1236         ts->base_type = type;
1237         ts->type = type;
1238         ts->indirect_reg = indirect_reg;
1239         ts->mem_allocated = 1;
1240         ts->mem_base = base_ts;
1241         ts->mem_offset = offset;
1242         ts->name = name;
1243     }
1244     return ts;
1245 }
1246 
1247 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1248 {
1249     TCGContext *s = tcg_ctx;
1250     TCGTemp *ts;
1251     int idx, k;
1252 
1253     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1254     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1255     if (idx < TCG_MAX_TEMPS) {
1256         /* There is already an available temp with the right type.  */
1257         clear_bit(idx, s->free_temps[k].l);
1258 
1259         ts = &s->temps[idx];
1260         ts->temp_allocated = 1;
1261         tcg_debug_assert(ts->base_type == type);
1262         tcg_debug_assert(ts->temp_local == temp_local);
1263     } else {
1264         ts = tcg_temp_alloc(s);
1265         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1266             TCGTemp *ts2 = tcg_temp_alloc(s);
1267 
1268             ts->base_type = type;
1269             ts->type = TCG_TYPE_I32;
1270             ts->temp_allocated = 1;
1271             ts->temp_local = temp_local;
1272 
1273             tcg_debug_assert(ts2 == ts + 1);
1274             ts2->base_type = TCG_TYPE_I64;
1275             ts2->type = TCG_TYPE_I32;
1276             ts2->temp_allocated = 1;
1277             ts2->temp_local = temp_local;
1278         } else {
1279             ts->base_type = type;
1280             ts->type = type;
1281             ts->temp_allocated = 1;
1282             ts->temp_local = temp_local;
1283         }
1284     }
1285 
1286 #if defined(CONFIG_DEBUG_TCG)
1287     s->temps_in_use++;
1288 #endif
1289     return ts;
1290 }
1291 
1292 TCGv_vec tcg_temp_new_vec(TCGType type)
1293 {
1294     TCGTemp *t;
1295 
1296 #ifdef CONFIG_DEBUG_TCG
1297     switch (type) {
1298     case TCG_TYPE_V64:
1299         assert(TCG_TARGET_HAS_v64);
1300         break;
1301     case TCG_TYPE_V128:
1302         assert(TCG_TARGET_HAS_v128);
1303         break;
1304     case TCG_TYPE_V256:
1305         assert(TCG_TARGET_HAS_v256);
1306         break;
1307     default:
1308         g_assert_not_reached();
1309     }
1310 #endif
1311 
1312     t = tcg_temp_new_internal(type, 0);
1313     return temp_tcgv_vec(t);
1314 }
1315 
1316 /* Create a new temp of the same type as an existing temp.  */
1317 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1318 {
1319     TCGTemp *t = tcgv_vec_temp(match);
1320 
1321     tcg_debug_assert(t->temp_allocated != 0);
1322 
1323     t = tcg_temp_new_internal(t->base_type, 0);
1324     return temp_tcgv_vec(t);
1325 }
1326 
1327 void tcg_temp_free_internal(TCGTemp *ts)
1328 {
1329     TCGContext *s = tcg_ctx;
1330     int k, idx;
1331 
1332 #if defined(CONFIG_DEBUG_TCG)
1333     s->temps_in_use--;
1334     if (s->temps_in_use < 0) {
1335         fprintf(stderr, "More temporaries freed than allocated!\n");
1336     }
1337 #endif
1338 
1339     tcg_debug_assert(ts->temp_global == 0);
1340     tcg_debug_assert(ts->temp_allocated != 0);
1341     ts->temp_allocated = 0;
1342 
1343     idx = temp_idx(ts);
1344     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1345     set_bit(idx, s->free_temps[k].l);
1346 }
1347 
1348 TCGv_i32 tcg_const_i32(int32_t val)
1349 {
1350     TCGv_i32 t0;
1351     t0 = tcg_temp_new_i32();
1352     tcg_gen_movi_i32(t0, val);
1353     return t0;
1354 }
1355 
1356 TCGv_i64 tcg_const_i64(int64_t val)
1357 {
1358     TCGv_i64 t0;
1359     t0 = tcg_temp_new_i64();
1360     tcg_gen_movi_i64(t0, val);
1361     return t0;
1362 }
1363 
1364 TCGv_i32 tcg_const_local_i32(int32_t val)
1365 {
1366     TCGv_i32 t0;
1367     t0 = tcg_temp_local_new_i32();
1368     tcg_gen_movi_i32(t0, val);
1369     return t0;
1370 }
1371 
1372 TCGv_i64 tcg_const_local_i64(int64_t val)
1373 {
1374     TCGv_i64 t0;
1375     t0 = tcg_temp_local_new_i64();
1376     tcg_gen_movi_i64(t0, val);
1377     return t0;
1378 }
1379 
1380 #if defined(CONFIG_DEBUG_TCG)
1381 void tcg_clear_temp_count(void)
1382 {
1383     TCGContext *s = tcg_ctx;
1384     s->temps_in_use = 0;
1385 }
1386 
1387 int tcg_check_temp_count(void)
1388 {
1389     TCGContext *s = tcg_ctx;
1390     if (s->temps_in_use) {
1391         /* Clear the count so that we don't give another
1392          * warning immediately next time around.
1393          */
1394         s->temps_in_use = 0;
1395         return 1;
1396     }
1397     return 0;
1398 }
1399 #endif
1400 
1401 /* Return true if OP may appear in the opcode stream.
1402    Test the runtime variable that controls each opcode.  */
1403 bool tcg_op_supported(TCGOpcode op)
1404 {
1405     const bool have_vec
1406         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1407 
1408     switch (op) {
1409     case INDEX_op_discard:
1410     case INDEX_op_set_label:
1411     case INDEX_op_call:
1412     case INDEX_op_br:
1413     case INDEX_op_mb:
1414     case INDEX_op_insn_start:
1415     case INDEX_op_exit_tb:
1416     case INDEX_op_goto_tb:
1417     case INDEX_op_qemu_ld_i32:
1418     case INDEX_op_qemu_st_i32:
1419     case INDEX_op_qemu_ld_i64:
1420     case INDEX_op_qemu_st_i64:
1421         return true;
1422 
1423     case INDEX_op_goto_ptr:
1424         return TCG_TARGET_HAS_goto_ptr;
1425 
1426     case INDEX_op_mov_i32:
1427     case INDEX_op_movi_i32:
1428     case INDEX_op_setcond_i32:
1429     case INDEX_op_brcond_i32:
1430     case INDEX_op_ld8u_i32:
1431     case INDEX_op_ld8s_i32:
1432     case INDEX_op_ld16u_i32:
1433     case INDEX_op_ld16s_i32:
1434     case INDEX_op_ld_i32:
1435     case INDEX_op_st8_i32:
1436     case INDEX_op_st16_i32:
1437     case INDEX_op_st_i32:
1438     case INDEX_op_add_i32:
1439     case INDEX_op_sub_i32:
1440     case INDEX_op_mul_i32:
1441     case INDEX_op_and_i32:
1442     case INDEX_op_or_i32:
1443     case INDEX_op_xor_i32:
1444     case INDEX_op_shl_i32:
1445     case INDEX_op_shr_i32:
1446     case INDEX_op_sar_i32:
1447         return true;
1448 
1449     case INDEX_op_movcond_i32:
1450         return TCG_TARGET_HAS_movcond_i32;
1451     case INDEX_op_div_i32:
1452     case INDEX_op_divu_i32:
1453         return TCG_TARGET_HAS_div_i32;
1454     case INDEX_op_rem_i32:
1455     case INDEX_op_remu_i32:
1456         return TCG_TARGET_HAS_rem_i32;
1457     case INDEX_op_div2_i32:
1458     case INDEX_op_divu2_i32:
1459         return TCG_TARGET_HAS_div2_i32;
1460     case INDEX_op_rotl_i32:
1461     case INDEX_op_rotr_i32:
1462         return TCG_TARGET_HAS_rot_i32;
1463     case INDEX_op_deposit_i32:
1464         return TCG_TARGET_HAS_deposit_i32;
1465     case INDEX_op_extract_i32:
1466         return TCG_TARGET_HAS_extract_i32;
1467     case INDEX_op_sextract_i32:
1468         return TCG_TARGET_HAS_sextract_i32;
1469     case INDEX_op_extract2_i32:
1470         return TCG_TARGET_HAS_extract2_i32;
1471     case INDEX_op_add2_i32:
1472         return TCG_TARGET_HAS_add2_i32;
1473     case INDEX_op_sub2_i32:
1474         return TCG_TARGET_HAS_sub2_i32;
1475     case INDEX_op_mulu2_i32:
1476         return TCG_TARGET_HAS_mulu2_i32;
1477     case INDEX_op_muls2_i32:
1478         return TCG_TARGET_HAS_muls2_i32;
1479     case INDEX_op_muluh_i32:
1480         return TCG_TARGET_HAS_muluh_i32;
1481     case INDEX_op_mulsh_i32:
1482         return TCG_TARGET_HAS_mulsh_i32;
1483     case INDEX_op_ext8s_i32:
1484         return TCG_TARGET_HAS_ext8s_i32;
1485     case INDEX_op_ext16s_i32:
1486         return TCG_TARGET_HAS_ext16s_i32;
1487     case INDEX_op_ext8u_i32:
1488         return TCG_TARGET_HAS_ext8u_i32;
1489     case INDEX_op_ext16u_i32:
1490         return TCG_TARGET_HAS_ext16u_i32;
1491     case INDEX_op_bswap16_i32:
1492         return TCG_TARGET_HAS_bswap16_i32;
1493     case INDEX_op_bswap32_i32:
1494         return TCG_TARGET_HAS_bswap32_i32;
1495     case INDEX_op_not_i32:
1496         return TCG_TARGET_HAS_not_i32;
1497     case INDEX_op_neg_i32:
1498         return TCG_TARGET_HAS_neg_i32;
1499     case INDEX_op_andc_i32:
1500         return TCG_TARGET_HAS_andc_i32;
1501     case INDEX_op_orc_i32:
1502         return TCG_TARGET_HAS_orc_i32;
1503     case INDEX_op_eqv_i32:
1504         return TCG_TARGET_HAS_eqv_i32;
1505     case INDEX_op_nand_i32:
1506         return TCG_TARGET_HAS_nand_i32;
1507     case INDEX_op_nor_i32:
1508         return TCG_TARGET_HAS_nor_i32;
1509     case INDEX_op_clz_i32:
1510         return TCG_TARGET_HAS_clz_i32;
1511     case INDEX_op_ctz_i32:
1512         return TCG_TARGET_HAS_ctz_i32;
1513     case INDEX_op_ctpop_i32:
1514         return TCG_TARGET_HAS_ctpop_i32;
1515 
1516     case INDEX_op_brcond2_i32:
1517     case INDEX_op_setcond2_i32:
1518         return TCG_TARGET_REG_BITS == 32;
1519 
1520     case INDEX_op_mov_i64:
1521     case INDEX_op_movi_i64:
1522     case INDEX_op_setcond_i64:
1523     case INDEX_op_brcond_i64:
1524     case INDEX_op_ld8u_i64:
1525     case INDEX_op_ld8s_i64:
1526     case INDEX_op_ld16u_i64:
1527     case INDEX_op_ld16s_i64:
1528     case INDEX_op_ld32u_i64:
1529     case INDEX_op_ld32s_i64:
1530     case INDEX_op_ld_i64:
1531     case INDEX_op_st8_i64:
1532     case INDEX_op_st16_i64:
1533     case INDEX_op_st32_i64:
1534     case INDEX_op_st_i64:
1535     case INDEX_op_add_i64:
1536     case INDEX_op_sub_i64:
1537     case INDEX_op_mul_i64:
1538     case INDEX_op_and_i64:
1539     case INDEX_op_or_i64:
1540     case INDEX_op_xor_i64:
1541     case INDEX_op_shl_i64:
1542     case INDEX_op_shr_i64:
1543     case INDEX_op_sar_i64:
1544     case INDEX_op_ext_i32_i64:
1545     case INDEX_op_extu_i32_i64:
1546         return TCG_TARGET_REG_BITS == 64;
1547 
1548     case INDEX_op_movcond_i64:
1549         return TCG_TARGET_HAS_movcond_i64;
1550     case INDEX_op_div_i64:
1551     case INDEX_op_divu_i64:
1552         return TCG_TARGET_HAS_div_i64;
1553     case INDEX_op_rem_i64:
1554     case INDEX_op_remu_i64:
1555         return TCG_TARGET_HAS_rem_i64;
1556     case INDEX_op_div2_i64:
1557     case INDEX_op_divu2_i64:
1558         return TCG_TARGET_HAS_div2_i64;
1559     case INDEX_op_rotl_i64:
1560     case INDEX_op_rotr_i64:
1561         return TCG_TARGET_HAS_rot_i64;
1562     case INDEX_op_deposit_i64:
1563         return TCG_TARGET_HAS_deposit_i64;
1564     case INDEX_op_extract_i64:
1565         return TCG_TARGET_HAS_extract_i64;
1566     case INDEX_op_sextract_i64:
1567         return TCG_TARGET_HAS_sextract_i64;
1568     case INDEX_op_extract2_i64:
1569         return TCG_TARGET_HAS_extract2_i64;
1570     case INDEX_op_extrl_i64_i32:
1571         return TCG_TARGET_HAS_extrl_i64_i32;
1572     case INDEX_op_extrh_i64_i32:
1573         return TCG_TARGET_HAS_extrh_i64_i32;
1574     case INDEX_op_ext8s_i64:
1575         return TCG_TARGET_HAS_ext8s_i64;
1576     case INDEX_op_ext16s_i64:
1577         return TCG_TARGET_HAS_ext16s_i64;
1578     case INDEX_op_ext32s_i64:
1579         return TCG_TARGET_HAS_ext32s_i64;
1580     case INDEX_op_ext8u_i64:
1581         return TCG_TARGET_HAS_ext8u_i64;
1582     case INDEX_op_ext16u_i64:
1583         return TCG_TARGET_HAS_ext16u_i64;
1584     case INDEX_op_ext32u_i64:
1585         return TCG_TARGET_HAS_ext32u_i64;
1586     case INDEX_op_bswap16_i64:
1587         return TCG_TARGET_HAS_bswap16_i64;
1588     case INDEX_op_bswap32_i64:
1589         return TCG_TARGET_HAS_bswap32_i64;
1590     case INDEX_op_bswap64_i64:
1591         return TCG_TARGET_HAS_bswap64_i64;
1592     case INDEX_op_not_i64:
1593         return TCG_TARGET_HAS_not_i64;
1594     case INDEX_op_neg_i64:
1595         return TCG_TARGET_HAS_neg_i64;
1596     case INDEX_op_andc_i64:
1597         return TCG_TARGET_HAS_andc_i64;
1598     case INDEX_op_orc_i64:
1599         return TCG_TARGET_HAS_orc_i64;
1600     case INDEX_op_eqv_i64:
1601         return TCG_TARGET_HAS_eqv_i64;
1602     case INDEX_op_nand_i64:
1603         return TCG_TARGET_HAS_nand_i64;
1604     case INDEX_op_nor_i64:
1605         return TCG_TARGET_HAS_nor_i64;
1606     case INDEX_op_clz_i64:
1607         return TCG_TARGET_HAS_clz_i64;
1608     case INDEX_op_ctz_i64:
1609         return TCG_TARGET_HAS_ctz_i64;
1610     case INDEX_op_ctpop_i64:
1611         return TCG_TARGET_HAS_ctpop_i64;
1612     case INDEX_op_add2_i64:
1613         return TCG_TARGET_HAS_add2_i64;
1614     case INDEX_op_sub2_i64:
1615         return TCG_TARGET_HAS_sub2_i64;
1616     case INDEX_op_mulu2_i64:
1617         return TCG_TARGET_HAS_mulu2_i64;
1618     case INDEX_op_muls2_i64:
1619         return TCG_TARGET_HAS_muls2_i64;
1620     case INDEX_op_muluh_i64:
1621         return TCG_TARGET_HAS_muluh_i64;
1622     case INDEX_op_mulsh_i64:
1623         return TCG_TARGET_HAS_mulsh_i64;
1624 
1625     case INDEX_op_mov_vec:
1626     case INDEX_op_dup_vec:
1627     case INDEX_op_dupi_vec:
1628     case INDEX_op_dupm_vec:
1629     case INDEX_op_ld_vec:
1630     case INDEX_op_st_vec:
1631     case INDEX_op_add_vec:
1632     case INDEX_op_sub_vec:
1633     case INDEX_op_and_vec:
1634     case INDEX_op_or_vec:
1635     case INDEX_op_xor_vec:
1636     case INDEX_op_cmp_vec:
1637         return have_vec;
1638     case INDEX_op_dup2_vec:
1639         return have_vec && TCG_TARGET_REG_BITS == 32;
1640     case INDEX_op_not_vec:
1641         return have_vec && TCG_TARGET_HAS_not_vec;
1642     case INDEX_op_neg_vec:
1643         return have_vec && TCG_TARGET_HAS_neg_vec;
1644     case INDEX_op_abs_vec:
1645         return have_vec && TCG_TARGET_HAS_abs_vec;
1646     case INDEX_op_andc_vec:
1647         return have_vec && TCG_TARGET_HAS_andc_vec;
1648     case INDEX_op_orc_vec:
1649         return have_vec && TCG_TARGET_HAS_orc_vec;
1650     case INDEX_op_mul_vec:
1651         return have_vec && TCG_TARGET_HAS_mul_vec;
1652     case INDEX_op_shli_vec:
1653     case INDEX_op_shri_vec:
1654     case INDEX_op_sari_vec:
1655         return have_vec && TCG_TARGET_HAS_shi_vec;
1656     case INDEX_op_shls_vec:
1657     case INDEX_op_shrs_vec:
1658     case INDEX_op_sars_vec:
1659         return have_vec && TCG_TARGET_HAS_shs_vec;
1660     case INDEX_op_shlv_vec:
1661     case INDEX_op_shrv_vec:
1662     case INDEX_op_sarv_vec:
1663         return have_vec && TCG_TARGET_HAS_shv_vec;
1664     case INDEX_op_ssadd_vec:
1665     case INDEX_op_usadd_vec:
1666     case INDEX_op_sssub_vec:
1667     case INDEX_op_ussub_vec:
1668         return have_vec && TCG_TARGET_HAS_sat_vec;
1669     case INDEX_op_smin_vec:
1670     case INDEX_op_umin_vec:
1671     case INDEX_op_smax_vec:
1672     case INDEX_op_umax_vec:
1673         return have_vec && TCG_TARGET_HAS_minmax_vec;
1674     case INDEX_op_bitsel_vec:
1675         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1676     case INDEX_op_cmpsel_vec:
1677         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1678 
1679     default:
1680         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1681         return true;
1682     }
1683 }
1684 
1685 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1686    and endian swap. Maybe it would be better to do the alignment
1687    and endian swap in tcg_reg_alloc_call(). */
1688 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1689 {
1690     int i, real_args, nb_rets, pi;
1691     unsigned sizemask, flags;
1692     TCGHelperInfo *info;
1693     TCGOp *op;
1694 
1695     info = g_hash_table_lookup(helper_table, (gpointer)func);
1696     flags = info->flags;
1697     sizemask = info->sizemask;
1698 
1699 #ifdef CONFIG_PLUGIN
1700     /* detect non-plugin helpers */
1701     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1702         tcg_ctx->plugin_insn->calls_helpers = true;
1703     }
1704 #endif
1705 
1706 #if defined(__sparc__) && !defined(__arch64__) \
1707     && !defined(CONFIG_TCG_INTERPRETER)
1708     /* We have 64-bit values in one register, but need to pass as two
1709        separate parameters.  Split them.  */
1710     int orig_sizemask = sizemask;
1711     int orig_nargs = nargs;
1712     TCGv_i64 retl, reth;
1713     TCGTemp *split_args[MAX_OPC_PARAM];
1714 
1715     retl = NULL;
1716     reth = NULL;
1717     if (sizemask != 0) {
1718         for (i = real_args = 0; i < nargs; ++i) {
1719             int is_64bit = sizemask & (1 << (i+1)*2);
1720             if (is_64bit) {
1721                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1722                 TCGv_i32 h = tcg_temp_new_i32();
1723                 TCGv_i32 l = tcg_temp_new_i32();
1724                 tcg_gen_extr_i64_i32(l, h, orig);
1725                 split_args[real_args++] = tcgv_i32_temp(h);
1726                 split_args[real_args++] = tcgv_i32_temp(l);
1727             } else {
1728                 split_args[real_args++] = args[i];
1729             }
1730         }
1731         nargs = real_args;
1732         args = split_args;
1733         sizemask = 0;
1734     }
1735 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1736     for (i = 0; i < nargs; ++i) {
1737         int is_64bit = sizemask & (1 << (i+1)*2);
1738         int is_signed = sizemask & (2 << (i+1)*2);
1739         if (!is_64bit) {
1740             TCGv_i64 temp = tcg_temp_new_i64();
1741             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1742             if (is_signed) {
1743                 tcg_gen_ext32s_i64(temp, orig);
1744             } else {
1745                 tcg_gen_ext32u_i64(temp, orig);
1746             }
1747             args[i] = tcgv_i64_temp(temp);
1748         }
1749     }
1750 #endif /* TCG_TARGET_EXTEND_ARGS */
1751 
1752     op = tcg_emit_op(INDEX_op_call);
1753 
1754     pi = 0;
1755     if (ret != NULL) {
1756 #if defined(__sparc__) && !defined(__arch64__) \
1757     && !defined(CONFIG_TCG_INTERPRETER)
1758         if (orig_sizemask & 1) {
1759             /* The 32-bit ABI is going to return the 64-bit value in
1760                the %o0/%o1 register pair.  Prepare for this by using
1761                two return temporaries, and reassemble below.  */
1762             retl = tcg_temp_new_i64();
1763             reth = tcg_temp_new_i64();
1764             op->args[pi++] = tcgv_i64_arg(reth);
1765             op->args[pi++] = tcgv_i64_arg(retl);
1766             nb_rets = 2;
1767         } else {
1768             op->args[pi++] = temp_arg(ret);
1769             nb_rets = 1;
1770         }
1771 #else
1772         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1773 #ifdef HOST_WORDS_BIGENDIAN
1774             op->args[pi++] = temp_arg(ret + 1);
1775             op->args[pi++] = temp_arg(ret);
1776 #else
1777             op->args[pi++] = temp_arg(ret);
1778             op->args[pi++] = temp_arg(ret + 1);
1779 #endif
1780             nb_rets = 2;
1781         } else {
1782             op->args[pi++] = temp_arg(ret);
1783             nb_rets = 1;
1784         }
1785 #endif
1786     } else {
1787         nb_rets = 0;
1788     }
1789     TCGOP_CALLO(op) = nb_rets;
1790 
1791     real_args = 0;
1792     for (i = 0; i < nargs; i++) {
1793         int is_64bit = sizemask & (1 << (i+1)*2);
1794         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1795 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1796             /* some targets want aligned 64 bit args */
1797             if (real_args & 1) {
1798                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1799                 real_args++;
1800             }
1801 #endif
1802            /* If stack grows up, then we will be placing successive
1803               arguments at lower addresses, which means we need to
1804               reverse the order compared to how we would normally
1805               treat either big or little-endian.  For those arguments
1806               that will wind up in registers, this still works for
1807               HPPA (the only current STACK_GROWSUP target) since the
1808               argument registers are *also* allocated in decreasing
1809               order.  If another such target is added, this logic may
1810               have to get more complicated to differentiate between
1811               stack arguments and register arguments.  */
1812 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1813             op->args[pi++] = temp_arg(args[i] + 1);
1814             op->args[pi++] = temp_arg(args[i]);
1815 #else
1816             op->args[pi++] = temp_arg(args[i]);
1817             op->args[pi++] = temp_arg(args[i] + 1);
1818 #endif
1819             real_args += 2;
1820             continue;
1821         }
1822 
1823         op->args[pi++] = temp_arg(args[i]);
1824         real_args++;
1825     }
1826     op->args[pi++] = (uintptr_t)func;
1827     op->args[pi++] = flags;
1828     TCGOP_CALLI(op) = real_args;
1829 
1830     /* Make sure the fields didn't overflow.  */
1831     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1832     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1833 
1834 #if defined(__sparc__) && !defined(__arch64__) \
1835     && !defined(CONFIG_TCG_INTERPRETER)
1836     /* Free all of the parts we allocated above.  */
1837     for (i = real_args = 0; i < orig_nargs; ++i) {
1838         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1839         if (is_64bit) {
1840             tcg_temp_free_internal(args[real_args++]);
1841             tcg_temp_free_internal(args[real_args++]);
1842         } else {
1843             real_args++;
1844         }
1845     }
1846     if (orig_sizemask & 1) {
1847         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1848            Note that describing these as TCGv_i64 eliminates an unnecessary
1849            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1850         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1851         tcg_temp_free_i64(retl);
1852         tcg_temp_free_i64(reth);
1853     }
1854 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1855     for (i = 0; i < nargs; ++i) {
1856         int is_64bit = sizemask & (1 << (i+1)*2);
1857         if (!is_64bit) {
1858             tcg_temp_free_internal(args[i]);
1859         }
1860     }
1861 #endif /* TCG_TARGET_EXTEND_ARGS */
1862 }
1863 
1864 static void tcg_reg_alloc_start(TCGContext *s)
1865 {
1866     int i, n;
1867     TCGTemp *ts;
1868 
1869     for (i = 0, n = s->nb_globals; i < n; i++) {
1870         ts = &s->temps[i];
1871         ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1872     }
1873     for (n = s->nb_temps; i < n; i++) {
1874         ts = &s->temps[i];
1875         ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1876         ts->mem_allocated = 0;
1877         ts->fixed_reg = 0;
1878     }
1879 
1880     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1881 }
1882 
1883 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1884                                  TCGTemp *ts)
1885 {
1886     int idx = temp_idx(ts);
1887 
1888     if (ts->temp_global) {
1889         pstrcpy(buf, buf_size, ts->name);
1890     } else if (ts->temp_local) {
1891         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1892     } else {
1893         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1894     }
1895     return buf;
1896 }
1897 
1898 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1899                              int buf_size, TCGArg arg)
1900 {
1901     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1902 }
1903 
1904 /* Find helper name.  */
1905 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1906 {
1907     const char *ret = NULL;
1908     if (helper_table) {
1909         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1910         if (info) {
1911             ret = info->name;
1912         }
1913     }
1914     return ret;
1915 }
1916 
1917 static const char * const cond_name[] =
1918 {
1919     [TCG_COND_NEVER] = "never",
1920     [TCG_COND_ALWAYS] = "always",
1921     [TCG_COND_EQ] = "eq",
1922     [TCG_COND_NE] = "ne",
1923     [TCG_COND_LT] = "lt",
1924     [TCG_COND_GE] = "ge",
1925     [TCG_COND_LE] = "le",
1926     [TCG_COND_GT] = "gt",
1927     [TCG_COND_LTU] = "ltu",
1928     [TCG_COND_GEU] = "geu",
1929     [TCG_COND_LEU] = "leu",
1930     [TCG_COND_GTU] = "gtu"
1931 };
1932 
1933 static const char * const ldst_name[] =
1934 {
1935     [MO_UB]   = "ub",
1936     [MO_SB]   = "sb",
1937     [MO_LEUW] = "leuw",
1938     [MO_LESW] = "lesw",
1939     [MO_LEUL] = "leul",
1940     [MO_LESL] = "lesl",
1941     [MO_LEQ]  = "leq",
1942     [MO_BEUW] = "beuw",
1943     [MO_BESW] = "besw",
1944     [MO_BEUL] = "beul",
1945     [MO_BESL] = "besl",
1946     [MO_BEQ]  = "beq",
1947 };
1948 
1949 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1950 #ifdef TARGET_ALIGNED_ONLY
1951     [MO_UNALN >> MO_ASHIFT]    = "un+",
1952     [MO_ALIGN >> MO_ASHIFT]    = "",
1953 #else
1954     [MO_UNALN >> MO_ASHIFT]    = "",
1955     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1956 #endif
1957     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1958     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1959     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1960     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1961     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1962     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1963 };
1964 
1965 static inline bool tcg_regset_single(TCGRegSet d)
1966 {
1967     return (d & (d - 1)) == 0;
1968 }
1969 
1970 static inline TCGReg tcg_regset_first(TCGRegSet d)
1971 {
1972     if (TCG_TARGET_NB_REGS <= 32) {
1973         return ctz32(d);
1974     } else {
1975         return ctz64(d);
1976     }
1977 }
1978 
1979 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1980 {
1981     char buf[128];
1982     TCGOp *op;
1983 
1984     QTAILQ_FOREACH(op, &s->ops, link) {
1985         int i, k, nb_oargs, nb_iargs, nb_cargs;
1986         const TCGOpDef *def;
1987         TCGOpcode c;
1988         int col = 0;
1989 
1990         c = op->opc;
1991         def = &tcg_op_defs[c];
1992 
1993         if (c == INDEX_op_insn_start) {
1994             nb_oargs = 0;
1995             col += qemu_log("\n ----");
1996 
1997             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1998                 target_ulong a;
1999 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2000                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2001 #else
2002                 a = op->args[i];
2003 #endif
2004                 col += qemu_log(" " TARGET_FMT_lx, a);
2005             }
2006         } else if (c == INDEX_op_call) {
2007             /* variable number of arguments */
2008             nb_oargs = TCGOP_CALLO(op);
2009             nb_iargs = TCGOP_CALLI(op);
2010             nb_cargs = def->nb_cargs;
2011 
2012             /* function name, flags, out args */
2013             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2014                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2015                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2016             for (i = 0; i < nb_oargs; i++) {
2017                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2018                                                        op->args[i]));
2019             }
2020             for (i = 0; i < nb_iargs; i++) {
2021                 TCGArg arg = op->args[nb_oargs + i];
2022                 const char *t = "<dummy>";
2023                 if (arg != TCG_CALL_DUMMY_ARG) {
2024                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2025                 }
2026                 col += qemu_log(",%s", t);
2027             }
2028         } else {
2029             col += qemu_log(" %s ", def->name);
2030 
2031             nb_oargs = def->nb_oargs;
2032             nb_iargs = def->nb_iargs;
2033             nb_cargs = def->nb_cargs;
2034 
2035             if (def->flags & TCG_OPF_VECTOR) {
2036                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2037                                 8 << TCGOP_VECE(op));
2038             }
2039 
2040             k = 0;
2041             for (i = 0; i < nb_oargs; i++) {
2042                 if (k != 0) {
2043                     col += qemu_log(",");
2044                 }
2045                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2046                                                       op->args[k++]));
2047             }
2048             for (i = 0; i < nb_iargs; i++) {
2049                 if (k != 0) {
2050                     col += qemu_log(",");
2051                 }
2052                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2053                                                       op->args[k++]));
2054             }
2055             switch (c) {
2056             case INDEX_op_brcond_i32:
2057             case INDEX_op_setcond_i32:
2058             case INDEX_op_movcond_i32:
2059             case INDEX_op_brcond2_i32:
2060             case INDEX_op_setcond2_i32:
2061             case INDEX_op_brcond_i64:
2062             case INDEX_op_setcond_i64:
2063             case INDEX_op_movcond_i64:
2064             case INDEX_op_cmp_vec:
2065             case INDEX_op_cmpsel_vec:
2066                 if (op->args[k] < ARRAY_SIZE(cond_name)
2067                     && cond_name[op->args[k]]) {
2068                     col += qemu_log(",%s", cond_name[op->args[k++]]);
2069                 } else {
2070                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2071                 }
2072                 i = 1;
2073                 break;
2074             case INDEX_op_qemu_ld_i32:
2075             case INDEX_op_qemu_st_i32:
2076             case INDEX_op_qemu_ld_i64:
2077             case INDEX_op_qemu_st_i64:
2078                 {
2079                     TCGMemOpIdx oi = op->args[k++];
2080                     MemOp op = get_memop(oi);
2081                     unsigned ix = get_mmuidx(oi);
2082 
2083                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2084                         col += qemu_log(",$0x%x,%u", op, ix);
2085                     } else {
2086                         const char *s_al, *s_op;
2087                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2088                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2089                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2090                     }
2091                     i = 1;
2092                 }
2093                 break;
2094             default:
2095                 i = 0;
2096                 break;
2097             }
2098             switch (c) {
2099             case INDEX_op_set_label:
2100             case INDEX_op_br:
2101             case INDEX_op_brcond_i32:
2102             case INDEX_op_brcond_i64:
2103             case INDEX_op_brcond2_i32:
2104                 col += qemu_log("%s$L%d", k ? "," : "",
2105                                 arg_label(op->args[k])->id);
2106                 i++, k++;
2107                 break;
2108             default:
2109                 break;
2110             }
2111             for (; i < nb_cargs; i++, k++) {
2112                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2113             }
2114         }
2115 
2116         if (have_prefs || op->life) {
2117 
2118             QemuLogFile *logfile;
2119 
2120             rcu_read_lock();
2121             logfile = atomic_rcu_read(&qemu_logfile);
2122             if (logfile) {
2123                 for (; col < 40; ++col) {
2124                     putc(' ', logfile->fd);
2125                 }
2126             }
2127             rcu_read_unlock();
2128         }
2129 
2130         if (op->life) {
2131             unsigned life = op->life;
2132 
2133             if (life & (SYNC_ARG * 3)) {
2134                 qemu_log("  sync:");
2135                 for (i = 0; i < 2; ++i) {
2136                     if (life & (SYNC_ARG << i)) {
2137                         qemu_log(" %d", i);
2138                     }
2139                 }
2140             }
2141             life /= DEAD_ARG;
2142             if (life) {
2143                 qemu_log("  dead:");
2144                 for (i = 0; life; ++i, life >>= 1) {
2145                     if (life & 1) {
2146                         qemu_log(" %d", i);
2147                     }
2148                 }
2149             }
2150         }
2151 
2152         if (have_prefs) {
2153             for (i = 0; i < nb_oargs; ++i) {
2154                 TCGRegSet set = op->output_pref[i];
2155 
2156                 if (i == 0) {
2157                     qemu_log("  pref=");
2158                 } else {
2159                     qemu_log(",");
2160                 }
2161                 if (set == 0) {
2162                     qemu_log("none");
2163                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2164                     qemu_log("all");
2165 #ifdef CONFIG_DEBUG_TCG
2166                 } else if (tcg_regset_single(set)) {
2167                     TCGReg reg = tcg_regset_first(set);
2168                     qemu_log("%s", tcg_target_reg_names[reg]);
2169 #endif
2170                 } else if (TCG_TARGET_NB_REGS <= 32) {
2171                     qemu_log("%#x", (uint32_t)set);
2172                 } else {
2173                     qemu_log("%#" PRIx64, (uint64_t)set);
2174                 }
2175             }
2176         }
2177 
2178         qemu_log("\n");
2179     }
2180 }
2181 
2182 /* we give more priority to constraints with less registers */
2183 static int get_constraint_priority(const TCGOpDef *def, int k)
2184 {
2185     const TCGArgConstraint *arg_ct;
2186 
2187     int i, n;
2188     arg_ct = &def->args_ct[k];
2189     if (arg_ct->ct & TCG_CT_ALIAS) {
2190         /* an alias is equivalent to a single register */
2191         n = 1;
2192     } else {
2193         if (!(arg_ct->ct & TCG_CT_REG))
2194             return 0;
2195         n = 0;
2196         for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2197             if (tcg_regset_test_reg(arg_ct->u.regs, i))
2198                 n++;
2199         }
2200     }
2201     return TCG_TARGET_NB_REGS - n + 1;
2202 }
2203 
2204 /* sort from highest priority to lowest */
2205 static void sort_constraints(TCGOpDef *def, int start, int n)
2206 {
2207     int i, j, p1, p2, tmp;
2208 
2209     for(i = 0; i < n; i++)
2210         def->sorted_args[start + i] = start + i;
2211     if (n <= 1)
2212         return;
2213     for(i = 0; i < n - 1; i++) {
2214         for(j = i + 1; j < n; j++) {
2215             p1 = get_constraint_priority(def, def->sorted_args[start + i]);
2216             p2 = get_constraint_priority(def, def->sorted_args[start + j]);
2217             if (p1 < p2) {
2218                 tmp = def->sorted_args[start + i];
2219                 def->sorted_args[start + i] = def->sorted_args[start + j];
2220                 def->sorted_args[start + j] = tmp;
2221             }
2222         }
2223     }
2224 }
2225 
2226 static void process_op_defs(TCGContext *s)
2227 {
2228     TCGOpcode op;
2229 
2230     for (op = 0; op < NB_OPS; op++) {
2231         TCGOpDef *def = &tcg_op_defs[op];
2232         const TCGTargetOpDef *tdefs;
2233         TCGType type;
2234         int i, nb_args;
2235 
2236         if (def->flags & TCG_OPF_NOT_PRESENT) {
2237             continue;
2238         }
2239 
2240         nb_args = def->nb_iargs + def->nb_oargs;
2241         if (nb_args == 0) {
2242             continue;
2243         }
2244 
2245         tdefs = tcg_target_op_def(op);
2246         /* Missing TCGTargetOpDef entry. */
2247         tcg_debug_assert(tdefs != NULL);
2248 
2249         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2250         for (i = 0; i < nb_args; i++) {
2251             const char *ct_str = tdefs->args_ct_str[i];
2252             /* Incomplete TCGTargetOpDef entry. */
2253             tcg_debug_assert(ct_str != NULL);
2254 
2255             def->args_ct[i].u.regs = 0;
2256             def->args_ct[i].ct = 0;
2257             while (*ct_str != '\0') {
2258                 switch(*ct_str) {
2259                 case '0' ... '9':
2260                     {
2261                         int oarg = *ct_str - '0';
2262                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2263                         tcg_debug_assert(oarg < def->nb_oargs);
2264                         tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
2265                         /* TCG_CT_ALIAS is for the output arguments.
2266                            The input is tagged with TCG_CT_IALIAS. */
2267                         def->args_ct[i] = def->args_ct[oarg];
2268                         def->args_ct[oarg].ct |= TCG_CT_ALIAS;
2269                         def->args_ct[oarg].alias_index = i;
2270                         def->args_ct[i].ct |= TCG_CT_IALIAS;
2271                         def->args_ct[i].alias_index = oarg;
2272                     }
2273                     ct_str++;
2274                     break;
2275                 case '&':
2276                     def->args_ct[i].ct |= TCG_CT_NEWREG;
2277                     ct_str++;
2278                     break;
2279                 case 'i':
2280                     def->args_ct[i].ct |= TCG_CT_CONST;
2281                     ct_str++;
2282                     break;
2283                 default:
2284                     ct_str = target_parse_constraint(&def->args_ct[i],
2285                                                      ct_str, type);
2286                     /* Typo in TCGTargetOpDef constraint. */
2287                     tcg_debug_assert(ct_str != NULL);
2288                 }
2289             }
2290         }
2291 
2292         /* TCGTargetOpDef entry with too much information? */
2293         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2294 
2295         /* sort the constraints (XXX: this is just an heuristic) */
2296         sort_constraints(def, 0, def->nb_oargs);
2297         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2298     }
2299 }
2300 
2301 void tcg_op_remove(TCGContext *s, TCGOp *op)
2302 {
2303     TCGLabel *label;
2304 
2305     switch (op->opc) {
2306     case INDEX_op_br:
2307         label = arg_label(op->args[0]);
2308         label->refs--;
2309         break;
2310     case INDEX_op_brcond_i32:
2311     case INDEX_op_brcond_i64:
2312         label = arg_label(op->args[3]);
2313         label->refs--;
2314         break;
2315     case INDEX_op_brcond2_i32:
2316         label = arg_label(op->args[5]);
2317         label->refs--;
2318         break;
2319     default:
2320         break;
2321     }
2322 
2323     QTAILQ_REMOVE(&s->ops, op, link);
2324     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2325     s->nb_ops--;
2326 
2327 #ifdef CONFIG_PROFILER
2328     atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2329 #endif
2330 }
2331 
2332 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2333 {
2334     TCGContext *s = tcg_ctx;
2335     TCGOp *op;
2336 
2337     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2338         op = tcg_malloc(sizeof(TCGOp));
2339     } else {
2340         op = QTAILQ_FIRST(&s->free_ops);
2341         QTAILQ_REMOVE(&s->free_ops, op, link);
2342     }
2343     memset(op, 0, offsetof(TCGOp, link));
2344     op->opc = opc;
2345     s->nb_ops++;
2346 
2347     return op;
2348 }
2349 
2350 TCGOp *tcg_emit_op(TCGOpcode opc)
2351 {
2352     TCGOp *op = tcg_op_alloc(opc);
2353     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2354     return op;
2355 }
2356 
2357 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2358 {
2359     TCGOp *new_op = tcg_op_alloc(opc);
2360     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2361     return new_op;
2362 }
2363 
2364 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2365 {
2366     TCGOp *new_op = tcg_op_alloc(opc);
2367     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2368     return new_op;
2369 }
2370 
2371 /* Reachable analysis : remove unreachable code.  */
2372 static void reachable_code_pass(TCGContext *s)
2373 {
2374     TCGOp *op, *op_next;
2375     bool dead = false;
2376 
2377     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2378         bool remove = dead;
2379         TCGLabel *label;
2380         int call_flags;
2381 
2382         switch (op->opc) {
2383         case INDEX_op_set_label:
2384             label = arg_label(op->args[0]);
2385             if (label->refs == 0) {
2386                 /*
2387                  * While there is an occasional backward branch, virtually
2388                  * all branches generated by the translators are forward.
2389                  * Which means that generally we will have already removed
2390                  * all references to the label that will be, and there is
2391                  * little to be gained by iterating.
2392                  */
2393                 remove = true;
2394             } else {
2395                 /* Once we see a label, insns become live again.  */
2396                 dead = false;
2397                 remove = false;
2398 
2399                 /*
2400                  * Optimization can fold conditional branches to unconditional.
2401                  * If we find a label with one reference which is preceded by
2402                  * an unconditional branch to it, remove both.  This needed to
2403                  * wait until the dead code in between them was removed.
2404                  */
2405                 if (label->refs == 1) {
2406                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2407                     if (op_prev->opc == INDEX_op_br &&
2408                         label == arg_label(op_prev->args[0])) {
2409                         tcg_op_remove(s, op_prev);
2410                         remove = true;
2411                     }
2412                 }
2413             }
2414             break;
2415 
2416         case INDEX_op_br:
2417         case INDEX_op_exit_tb:
2418         case INDEX_op_goto_ptr:
2419             /* Unconditional branches; everything following is dead.  */
2420             dead = true;
2421             break;
2422 
2423         case INDEX_op_call:
2424             /* Notice noreturn helper calls, raising exceptions.  */
2425             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2426             if (call_flags & TCG_CALL_NO_RETURN) {
2427                 dead = true;
2428             }
2429             break;
2430 
2431         case INDEX_op_insn_start:
2432             /* Never remove -- we need to keep these for unwind.  */
2433             remove = false;
2434             break;
2435 
2436         default:
2437             break;
2438         }
2439 
2440         if (remove) {
2441             tcg_op_remove(s, op);
2442         }
2443     }
2444 }
2445 
2446 #define TS_DEAD  1
2447 #define TS_MEM   2
2448 
2449 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2450 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2451 
2452 /* For liveness_pass_1, the register preferences for a given temp.  */
2453 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2454 {
2455     return ts->state_ptr;
2456 }
2457 
2458 /* For liveness_pass_1, reset the preferences for a given temp to the
2459  * maximal regset for its type.
2460  */
2461 static inline void la_reset_pref(TCGTemp *ts)
2462 {
2463     *la_temp_pref(ts)
2464         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2465 }
2466 
2467 /* liveness analysis: end of function: all temps are dead, and globals
2468    should be in memory. */
2469 static void la_func_end(TCGContext *s, int ng, int nt)
2470 {
2471     int i;
2472 
2473     for (i = 0; i < ng; ++i) {
2474         s->temps[i].state = TS_DEAD | TS_MEM;
2475         la_reset_pref(&s->temps[i]);
2476     }
2477     for (i = ng; i < nt; ++i) {
2478         s->temps[i].state = TS_DEAD;
2479         la_reset_pref(&s->temps[i]);
2480     }
2481 }
2482 
2483 /* liveness analysis: end of basic block: all temps are dead, globals
2484    and local temps should be in memory. */
2485 static void la_bb_end(TCGContext *s, int ng, int nt)
2486 {
2487     int i;
2488 
2489     for (i = 0; i < ng; ++i) {
2490         s->temps[i].state = TS_DEAD | TS_MEM;
2491         la_reset_pref(&s->temps[i]);
2492     }
2493     for (i = ng; i < nt; ++i) {
2494         s->temps[i].state = (s->temps[i].temp_local
2495                              ? TS_DEAD | TS_MEM
2496                              : TS_DEAD);
2497         la_reset_pref(&s->temps[i]);
2498     }
2499 }
2500 
2501 /* liveness analysis: sync globals back to memory.  */
2502 static void la_global_sync(TCGContext *s, int ng)
2503 {
2504     int i;
2505 
2506     for (i = 0; i < ng; ++i) {
2507         int state = s->temps[i].state;
2508         s->temps[i].state = state | TS_MEM;
2509         if (state == TS_DEAD) {
2510             /* If the global was previously dead, reset prefs.  */
2511             la_reset_pref(&s->temps[i]);
2512         }
2513     }
2514 }
2515 
2516 /* liveness analysis: sync globals back to memory and kill.  */
2517 static void la_global_kill(TCGContext *s, int ng)
2518 {
2519     int i;
2520 
2521     for (i = 0; i < ng; i++) {
2522         s->temps[i].state = TS_DEAD | TS_MEM;
2523         la_reset_pref(&s->temps[i]);
2524     }
2525 }
2526 
2527 /* liveness analysis: note live globals crossing calls.  */
2528 static void la_cross_call(TCGContext *s, int nt)
2529 {
2530     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2531     int i;
2532 
2533     for (i = 0; i < nt; i++) {
2534         TCGTemp *ts = &s->temps[i];
2535         if (!(ts->state & TS_DEAD)) {
2536             TCGRegSet *pset = la_temp_pref(ts);
2537             TCGRegSet set = *pset;
2538 
2539             set &= mask;
2540             /* If the combination is not possible, restart.  */
2541             if (set == 0) {
2542                 set = tcg_target_available_regs[ts->type] & mask;
2543             }
2544             *pset = set;
2545         }
2546     }
2547 }
2548 
2549 /* Liveness analysis : update the opc_arg_life array to tell if a
2550    given input arguments is dead. Instructions updating dead
2551    temporaries are removed. */
2552 static void liveness_pass_1(TCGContext *s)
2553 {
2554     int nb_globals = s->nb_globals;
2555     int nb_temps = s->nb_temps;
2556     TCGOp *op, *op_prev;
2557     TCGRegSet *prefs;
2558     int i;
2559 
2560     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2561     for (i = 0; i < nb_temps; ++i) {
2562         s->temps[i].state_ptr = prefs + i;
2563     }
2564 
2565     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2566     la_func_end(s, nb_globals, nb_temps);
2567 
2568     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2569         int nb_iargs, nb_oargs;
2570         TCGOpcode opc_new, opc_new2;
2571         bool have_opc_new2;
2572         TCGLifeData arg_life = 0;
2573         TCGTemp *ts;
2574         TCGOpcode opc = op->opc;
2575         const TCGOpDef *def = &tcg_op_defs[opc];
2576 
2577         switch (opc) {
2578         case INDEX_op_call:
2579             {
2580                 int call_flags;
2581                 int nb_call_regs;
2582 
2583                 nb_oargs = TCGOP_CALLO(op);
2584                 nb_iargs = TCGOP_CALLI(op);
2585                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2586 
2587                 /* pure functions can be removed if their result is unused */
2588                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2589                     for (i = 0; i < nb_oargs; i++) {
2590                         ts = arg_temp(op->args[i]);
2591                         if (ts->state != TS_DEAD) {
2592                             goto do_not_remove_call;
2593                         }
2594                     }
2595                     goto do_remove;
2596                 }
2597             do_not_remove_call:
2598 
2599                 /* Output args are dead.  */
2600                 for (i = 0; i < nb_oargs; i++) {
2601                     ts = arg_temp(op->args[i]);
2602                     if (ts->state & TS_DEAD) {
2603                         arg_life |= DEAD_ARG << i;
2604                     }
2605                     if (ts->state & TS_MEM) {
2606                         arg_life |= SYNC_ARG << i;
2607                     }
2608                     ts->state = TS_DEAD;
2609                     la_reset_pref(ts);
2610 
2611                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2612                     op->output_pref[i] = 0;
2613                 }
2614 
2615                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2616                                     TCG_CALL_NO_READ_GLOBALS))) {
2617                     la_global_kill(s, nb_globals);
2618                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2619                     la_global_sync(s, nb_globals);
2620                 }
2621 
2622                 /* Record arguments that die in this helper.  */
2623                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2624                     ts = arg_temp(op->args[i]);
2625                     if (ts && ts->state & TS_DEAD) {
2626                         arg_life |= DEAD_ARG << i;
2627                     }
2628                 }
2629 
2630                 /* For all live registers, remove call-clobbered prefs.  */
2631                 la_cross_call(s, nb_temps);
2632 
2633                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2634 
2635                 /* Input arguments are live for preceding opcodes.  */
2636                 for (i = 0; i < nb_iargs; i++) {
2637                     ts = arg_temp(op->args[i + nb_oargs]);
2638                     if (ts && ts->state & TS_DEAD) {
2639                         /* For those arguments that die, and will be allocated
2640                          * in registers, clear the register set for that arg,
2641                          * to be filled in below.  For args that will be on
2642                          * the stack, reset to any available reg.
2643                          */
2644                         *la_temp_pref(ts)
2645                             = (i < nb_call_regs ? 0 :
2646                                tcg_target_available_regs[ts->type]);
2647                         ts->state &= ~TS_DEAD;
2648                     }
2649                 }
2650 
2651                 /* For each input argument, add its input register to prefs.
2652                    If a temp is used once, this produces a single set bit.  */
2653                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2654                     ts = arg_temp(op->args[i + nb_oargs]);
2655                     if (ts) {
2656                         tcg_regset_set_reg(*la_temp_pref(ts),
2657                                            tcg_target_call_iarg_regs[i]);
2658                     }
2659                 }
2660             }
2661             break;
2662         case INDEX_op_insn_start:
2663             break;
2664         case INDEX_op_discard:
2665             /* mark the temporary as dead */
2666             ts = arg_temp(op->args[0]);
2667             ts->state = TS_DEAD;
2668             la_reset_pref(ts);
2669             break;
2670 
2671         case INDEX_op_add2_i32:
2672             opc_new = INDEX_op_add_i32;
2673             goto do_addsub2;
2674         case INDEX_op_sub2_i32:
2675             opc_new = INDEX_op_sub_i32;
2676             goto do_addsub2;
2677         case INDEX_op_add2_i64:
2678             opc_new = INDEX_op_add_i64;
2679             goto do_addsub2;
2680         case INDEX_op_sub2_i64:
2681             opc_new = INDEX_op_sub_i64;
2682         do_addsub2:
2683             nb_iargs = 4;
2684             nb_oargs = 2;
2685             /* Test if the high part of the operation is dead, but not
2686                the low part.  The result can be optimized to a simple
2687                add or sub.  This happens often for x86_64 guest when the
2688                cpu mode is set to 32 bit.  */
2689             if (arg_temp(op->args[1])->state == TS_DEAD) {
2690                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2691                     goto do_remove;
2692                 }
2693                 /* Replace the opcode and adjust the args in place,
2694                    leaving 3 unused args at the end.  */
2695                 op->opc = opc = opc_new;
2696                 op->args[1] = op->args[2];
2697                 op->args[2] = op->args[4];
2698                 /* Fall through and mark the single-word operation live.  */
2699                 nb_iargs = 2;
2700                 nb_oargs = 1;
2701             }
2702             goto do_not_remove;
2703 
2704         case INDEX_op_mulu2_i32:
2705             opc_new = INDEX_op_mul_i32;
2706             opc_new2 = INDEX_op_muluh_i32;
2707             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2708             goto do_mul2;
2709         case INDEX_op_muls2_i32:
2710             opc_new = INDEX_op_mul_i32;
2711             opc_new2 = INDEX_op_mulsh_i32;
2712             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2713             goto do_mul2;
2714         case INDEX_op_mulu2_i64:
2715             opc_new = INDEX_op_mul_i64;
2716             opc_new2 = INDEX_op_muluh_i64;
2717             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2718             goto do_mul2;
2719         case INDEX_op_muls2_i64:
2720             opc_new = INDEX_op_mul_i64;
2721             opc_new2 = INDEX_op_mulsh_i64;
2722             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2723             goto do_mul2;
2724         do_mul2:
2725             nb_iargs = 2;
2726             nb_oargs = 2;
2727             if (arg_temp(op->args[1])->state == TS_DEAD) {
2728                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2729                     /* Both parts of the operation are dead.  */
2730                     goto do_remove;
2731                 }
2732                 /* The high part of the operation is dead; generate the low. */
2733                 op->opc = opc = opc_new;
2734                 op->args[1] = op->args[2];
2735                 op->args[2] = op->args[3];
2736             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2737                 /* The low part of the operation is dead; generate the high. */
2738                 op->opc = opc = opc_new2;
2739                 op->args[0] = op->args[1];
2740                 op->args[1] = op->args[2];
2741                 op->args[2] = op->args[3];
2742             } else {
2743                 goto do_not_remove;
2744             }
2745             /* Mark the single-word operation live.  */
2746             nb_oargs = 1;
2747             goto do_not_remove;
2748 
2749         default:
2750             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2751             nb_iargs = def->nb_iargs;
2752             nb_oargs = def->nb_oargs;
2753 
2754             /* Test if the operation can be removed because all
2755                its outputs are dead. We assume that nb_oargs == 0
2756                implies side effects */
2757             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2758                 for (i = 0; i < nb_oargs; i++) {
2759                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2760                         goto do_not_remove;
2761                     }
2762                 }
2763                 goto do_remove;
2764             }
2765             goto do_not_remove;
2766 
2767         do_remove:
2768             tcg_op_remove(s, op);
2769             break;
2770 
2771         do_not_remove:
2772             for (i = 0; i < nb_oargs; i++) {
2773                 ts = arg_temp(op->args[i]);
2774 
2775                 /* Remember the preference of the uses that followed.  */
2776                 op->output_pref[i] = *la_temp_pref(ts);
2777 
2778                 /* Output args are dead.  */
2779                 if (ts->state & TS_DEAD) {
2780                     arg_life |= DEAD_ARG << i;
2781                 }
2782                 if (ts->state & TS_MEM) {
2783                     arg_life |= SYNC_ARG << i;
2784                 }
2785                 ts->state = TS_DEAD;
2786                 la_reset_pref(ts);
2787             }
2788 
2789             /* If end of basic block, update.  */
2790             if (def->flags & TCG_OPF_BB_EXIT) {
2791                 la_func_end(s, nb_globals, nb_temps);
2792             } else if (def->flags & TCG_OPF_BB_END) {
2793                 la_bb_end(s, nb_globals, nb_temps);
2794             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2795                 la_global_sync(s, nb_globals);
2796                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2797                     la_cross_call(s, nb_temps);
2798                 }
2799             }
2800 
2801             /* Record arguments that die in this opcode.  */
2802             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2803                 ts = arg_temp(op->args[i]);
2804                 if (ts->state & TS_DEAD) {
2805                     arg_life |= DEAD_ARG << i;
2806                 }
2807             }
2808 
2809             /* Input arguments are live for preceding opcodes.  */
2810             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2811                 ts = arg_temp(op->args[i]);
2812                 if (ts->state & TS_DEAD) {
2813                     /* For operands that were dead, initially allow
2814                        all regs for the type.  */
2815                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2816                     ts->state &= ~TS_DEAD;
2817                 }
2818             }
2819 
2820             /* Incorporate constraints for this operand.  */
2821             switch (opc) {
2822             case INDEX_op_mov_i32:
2823             case INDEX_op_mov_i64:
2824                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2825                    have proper constraints.  That said, special case
2826                    moves to propagate preferences backward.  */
2827                 if (IS_DEAD_ARG(1)) {
2828                     *la_temp_pref(arg_temp(op->args[0]))
2829                         = *la_temp_pref(arg_temp(op->args[1]));
2830                 }
2831                 break;
2832 
2833             default:
2834                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2835                     const TCGArgConstraint *ct = &def->args_ct[i];
2836                     TCGRegSet set, *pset;
2837 
2838                     ts = arg_temp(op->args[i]);
2839                     pset = la_temp_pref(ts);
2840                     set = *pset;
2841 
2842                     set &= ct->u.regs;
2843                     if (ct->ct & TCG_CT_IALIAS) {
2844                         set &= op->output_pref[ct->alias_index];
2845                     }
2846                     /* If the combination is not possible, restart.  */
2847                     if (set == 0) {
2848                         set = ct->u.regs;
2849                     }
2850                     *pset = set;
2851                 }
2852                 break;
2853             }
2854             break;
2855         }
2856         op->life = arg_life;
2857     }
2858 }
2859 
2860 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2861 static bool liveness_pass_2(TCGContext *s)
2862 {
2863     int nb_globals = s->nb_globals;
2864     int nb_temps, i;
2865     bool changes = false;
2866     TCGOp *op, *op_next;
2867 
2868     /* Create a temporary for each indirect global.  */
2869     for (i = 0; i < nb_globals; ++i) {
2870         TCGTemp *its = &s->temps[i];
2871         if (its->indirect_reg) {
2872             TCGTemp *dts = tcg_temp_alloc(s);
2873             dts->type = its->type;
2874             dts->base_type = its->base_type;
2875             its->state_ptr = dts;
2876         } else {
2877             its->state_ptr = NULL;
2878         }
2879         /* All globals begin dead.  */
2880         its->state = TS_DEAD;
2881     }
2882     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2883         TCGTemp *its = &s->temps[i];
2884         its->state_ptr = NULL;
2885         its->state = TS_DEAD;
2886     }
2887 
2888     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2889         TCGOpcode opc = op->opc;
2890         const TCGOpDef *def = &tcg_op_defs[opc];
2891         TCGLifeData arg_life = op->life;
2892         int nb_iargs, nb_oargs, call_flags;
2893         TCGTemp *arg_ts, *dir_ts;
2894 
2895         if (opc == INDEX_op_call) {
2896             nb_oargs = TCGOP_CALLO(op);
2897             nb_iargs = TCGOP_CALLI(op);
2898             call_flags = op->args[nb_oargs + nb_iargs + 1];
2899         } else {
2900             nb_iargs = def->nb_iargs;
2901             nb_oargs = def->nb_oargs;
2902 
2903             /* Set flags similar to how calls require.  */
2904             if (def->flags & TCG_OPF_BB_END) {
2905                 /* Like writing globals: save_globals */
2906                 call_flags = 0;
2907             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2908                 /* Like reading globals: sync_globals */
2909                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2910             } else {
2911                 /* No effect on globals.  */
2912                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2913                               TCG_CALL_NO_WRITE_GLOBALS);
2914             }
2915         }
2916 
2917         /* Make sure that input arguments are available.  */
2918         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2919             arg_ts = arg_temp(op->args[i]);
2920             if (arg_ts) {
2921                 dir_ts = arg_ts->state_ptr;
2922                 if (dir_ts && arg_ts->state == TS_DEAD) {
2923                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2924                                       ? INDEX_op_ld_i32
2925                                       : INDEX_op_ld_i64);
2926                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2927 
2928                     lop->args[0] = temp_arg(dir_ts);
2929                     lop->args[1] = temp_arg(arg_ts->mem_base);
2930                     lop->args[2] = arg_ts->mem_offset;
2931 
2932                     /* Loaded, but synced with memory.  */
2933                     arg_ts->state = TS_MEM;
2934                 }
2935             }
2936         }
2937 
2938         /* Perform input replacement, and mark inputs that became dead.
2939            No action is required except keeping temp_state up to date
2940            so that we reload when needed.  */
2941         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2942             arg_ts = arg_temp(op->args[i]);
2943             if (arg_ts) {
2944                 dir_ts = arg_ts->state_ptr;
2945                 if (dir_ts) {
2946                     op->args[i] = temp_arg(dir_ts);
2947                     changes = true;
2948                     if (IS_DEAD_ARG(i)) {
2949                         arg_ts->state = TS_DEAD;
2950                     }
2951                 }
2952             }
2953         }
2954 
2955         /* Liveness analysis should ensure that the following are
2956            all correct, for call sites and basic block end points.  */
2957         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2958             /* Nothing to do */
2959         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2960             for (i = 0; i < nb_globals; ++i) {
2961                 /* Liveness should see that globals are synced back,
2962                    that is, either TS_DEAD or TS_MEM.  */
2963                 arg_ts = &s->temps[i];
2964                 tcg_debug_assert(arg_ts->state_ptr == 0
2965                                  || arg_ts->state != 0);
2966             }
2967         } else {
2968             for (i = 0; i < nb_globals; ++i) {
2969                 /* Liveness should see that globals are saved back,
2970                    that is, TS_DEAD, waiting to be reloaded.  */
2971                 arg_ts = &s->temps[i];
2972                 tcg_debug_assert(arg_ts->state_ptr == 0
2973                                  || arg_ts->state == TS_DEAD);
2974             }
2975         }
2976 
2977         /* Outputs become available.  */
2978         for (i = 0; i < nb_oargs; i++) {
2979             arg_ts = arg_temp(op->args[i]);
2980             dir_ts = arg_ts->state_ptr;
2981             if (!dir_ts) {
2982                 continue;
2983             }
2984             op->args[i] = temp_arg(dir_ts);
2985             changes = true;
2986 
2987             /* The output is now live and modified.  */
2988             arg_ts->state = 0;
2989 
2990             /* Sync outputs upon their last write.  */
2991             if (NEED_SYNC_ARG(i)) {
2992                 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2993                                   ? INDEX_op_st_i32
2994                                   : INDEX_op_st_i64);
2995                 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2996 
2997                 sop->args[0] = temp_arg(dir_ts);
2998                 sop->args[1] = temp_arg(arg_ts->mem_base);
2999                 sop->args[2] = arg_ts->mem_offset;
3000 
3001                 arg_ts->state = TS_MEM;
3002             }
3003             /* Drop outputs that are dead.  */
3004             if (IS_DEAD_ARG(i)) {
3005                 arg_ts->state = TS_DEAD;
3006             }
3007         }
3008     }
3009 
3010     return changes;
3011 }
3012 
3013 #ifdef CONFIG_DEBUG_TCG
3014 static void dump_regs(TCGContext *s)
3015 {
3016     TCGTemp *ts;
3017     int i;
3018     char buf[64];
3019 
3020     for(i = 0; i < s->nb_temps; i++) {
3021         ts = &s->temps[i];
3022         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3023         switch(ts->val_type) {
3024         case TEMP_VAL_REG:
3025             printf("%s", tcg_target_reg_names[ts->reg]);
3026             break;
3027         case TEMP_VAL_MEM:
3028             printf("%d(%s)", (int)ts->mem_offset,
3029                    tcg_target_reg_names[ts->mem_base->reg]);
3030             break;
3031         case TEMP_VAL_CONST:
3032             printf("$0x%" TCG_PRIlx, ts->val);
3033             break;
3034         case TEMP_VAL_DEAD:
3035             printf("D");
3036             break;
3037         default:
3038             printf("???");
3039             break;
3040         }
3041         printf("\n");
3042     }
3043 
3044     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3045         if (s->reg_to_temp[i] != NULL) {
3046             printf("%s: %s\n",
3047                    tcg_target_reg_names[i],
3048                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3049         }
3050     }
3051 }
3052 
3053 static void check_regs(TCGContext *s)
3054 {
3055     int reg;
3056     int k;
3057     TCGTemp *ts;
3058     char buf[64];
3059 
3060     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3061         ts = s->reg_to_temp[reg];
3062         if (ts != NULL) {
3063             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3064                 printf("Inconsistency for register %s:\n",
3065                        tcg_target_reg_names[reg]);
3066                 goto fail;
3067             }
3068         }
3069     }
3070     for (k = 0; k < s->nb_temps; k++) {
3071         ts = &s->temps[k];
3072         if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
3073             && s->reg_to_temp[ts->reg] != ts) {
3074             printf("Inconsistency for temp %s:\n",
3075                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3076         fail:
3077             printf("reg state:\n");
3078             dump_regs(s);
3079             tcg_abort();
3080         }
3081     }
3082 }
3083 #endif
3084 
3085 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3086 {
3087 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3088     /* Sparc64 stack is accessed with offset of 2047 */
3089     s->current_frame_offset = (s->current_frame_offset +
3090                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3091         ~(sizeof(tcg_target_long) - 1);
3092 #endif
3093     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3094         s->frame_end) {
3095         tcg_abort();
3096     }
3097     ts->mem_offset = s->current_frame_offset;
3098     ts->mem_base = s->frame_temp;
3099     ts->mem_allocated = 1;
3100     s->current_frame_offset += sizeof(tcg_target_long);
3101 }
3102 
3103 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3104 
3105 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3106    mark it free; otherwise mark it dead.  */
3107 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3108 {
3109     if (ts->fixed_reg) {
3110         return;
3111     }
3112     if (ts->val_type == TEMP_VAL_REG) {
3113         s->reg_to_temp[ts->reg] = NULL;
3114     }
3115     ts->val_type = (free_or_dead < 0
3116                     || ts->temp_local
3117                     || ts->temp_global
3118                     ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3119 }
3120 
3121 /* Mark a temporary as dead.  */
3122 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3123 {
3124     temp_free_or_dead(s, ts, 1);
3125 }
3126 
3127 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3128    registers needs to be allocated to store a constant.  If 'free_or_dead'
3129    is non-zero, subsequently release the temporary; if it is positive, the
3130    temp is dead; if it is negative, the temp is free.  */
3131 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3132                       TCGRegSet preferred_regs, int free_or_dead)
3133 {
3134     if (ts->fixed_reg) {
3135         return;
3136     }
3137     if (!ts->mem_coherent) {
3138         if (!ts->mem_allocated) {
3139             temp_allocate_frame(s, ts);
3140         }
3141         switch (ts->val_type) {
3142         case TEMP_VAL_CONST:
3143             /* If we're going to free the temp immediately, then we won't
3144                require it later in a register, so attempt to store the
3145                constant to memory directly.  */
3146             if (free_or_dead
3147                 && tcg_out_sti(s, ts->type, ts->val,
3148                                ts->mem_base->reg, ts->mem_offset)) {
3149                 break;
3150             }
3151             temp_load(s, ts, tcg_target_available_regs[ts->type],
3152                       allocated_regs, preferred_regs);
3153             /* fallthrough */
3154 
3155         case TEMP_VAL_REG:
3156             tcg_out_st(s, ts->type, ts->reg,
3157                        ts->mem_base->reg, ts->mem_offset);
3158             break;
3159 
3160         case TEMP_VAL_MEM:
3161             break;
3162 
3163         case TEMP_VAL_DEAD:
3164         default:
3165             tcg_abort();
3166         }
3167         ts->mem_coherent = 1;
3168     }
3169     if (free_or_dead) {
3170         temp_free_or_dead(s, ts, free_or_dead);
3171     }
3172 }
3173 
3174 /* free register 'reg' by spilling the corresponding temporary if necessary */
3175 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3176 {
3177     TCGTemp *ts = s->reg_to_temp[reg];
3178     if (ts != NULL) {
3179         temp_sync(s, ts, allocated_regs, 0, -1);
3180     }
3181 }
3182 
3183 /**
3184  * tcg_reg_alloc:
3185  * @required_regs: Set of registers in which we must allocate.
3186  * @allocated_regs: Set of registers which must be avoided.
3187  * @preferred_regs: Set of registers we should prefer.
3188  * @rev: True if we search the registers in "indirect" order.
3189  *
3190  * The allocated register must be in @required_regs & ~@allocated_regs,
3191  * but if we can put it in @preferred_regs we may save a move later.
3192  */
3193 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3194                             TCGRegSet allocated_regs,
3195                             TCGRegSet preferred_regs, bool rev)
3196 {
3197     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3198     TCGRegSet reg_ct[2];
3199     const int *order;
3200 
3201     reg_ct[1] = required_regs & ~allocated_regs;
3202     tcg_debug_assert(reg_ct[1] != 0);
3203     reg_ct[0] = reg_ct[1] & preferred_regs;
3204 
3205     /* Skip the preferred_regs option if it cannot be satisfied,
3206        or if the preference made no difference.  */
3207     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3208 
3209     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3210 
3211     /* Try free registers, preferences first.  */
3212     for (j = f; j < 2; j++) {
3213         TCGRegSet set = reg_ct[j];
3214 
3215         if (tcg_regset_single(set)) {
3216             /* One register in the set.  */
3217             TCGReg reg = tcg_regset_first(set);
3218             if (s->reg_to_temp[reg] == NULL) {
3219                 return reg;
3220             }
3221         } else {
3222             for (i = 0; i < n; i++) {
3223                 TCGReg reg = order[i];
3224                 if (s->reg_to_temp[reg] == NULL &&
3225                     tcg_regset_test_reg(set, reg)) {
3226                     return reg;
3227                 }
3228             }
3229         }
3230     }
3231 
3232     /* We must spill something.  */
3233     for (j = f; j < 2; j++) {
3234         TCGRegSet set = reg_ct[j];
3235 
3236         if (tcg_regset_single(set)) {
3237             /* One register in the set.  */
3238             TCGReg reg = tcg_regset_first(set);
3239             tcg_reg_free(s, reg, allocated_regs);
3240             return reg;
3241         } else {
3242             for (i = 0; i < n; i++) {
3243                 TCGReg reg = order[i];
3244                 if (tcg_regset_test_reg(set, reg)) {
3245                     tcg_reg_free(s, reg, allocated_regs);
3246                     return reg;
3247                 }
3248             }
3249         }
3250     }
3251 
3252     tcg_abort();
3253 }
3254 
3255 /* Make sure the temporary is in a register.  If needed, allocate the register
3256    from DESIRED while avoiding ALLOCATED.  */
3257 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3258                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3259 {
3260     TCGReg reg;
3261 
3262     switch (ts->val_type) {
3263     case TEMP_VAL_REG:
3264         return;
3265     case TEMP_VAL_CONST:
3266         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3267                             preferred_regs, ts->indirect_base);
3268         tcg_out_movi(s, ts->type, reg, ts->val);
3269         ts->mem_coherent = 0;
3270         break;
3271     case TEMP_VAL_MEM:
3272         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3273                             preferred_regs, ts->indirect_base);
3274         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3275         ts->mem_coherent = 1;
3276         break;
3277     case TEMP_VAL_DEAD:
3278     default:
3279         tcg_abort();
3280     }
3281     ts->reg = reg;
3282     ts->val_type = TEMP_VAL_REG;
3283     s->reg_to_temp[reg] = ts;
3284 }
3285 
3286 /* Save a temporary to memory. 'allocated_regs' is used in case a
3287    temporary registers needs to be allocated to store a constant.  */
3288 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3289 {
3290     /* The liveness analysis already ensures that globals are back
3291        in memory. Keep an tcg_debug_assert for safety. */
3292     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3293 }
3294 
3295 /* save globals to their canonical location and assume they can be
3296    modified be the following code. 'allocated_regs' is used in case a
3297    temporary registers needs to be allocated to store a constant. */
3298 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3299 {
3300     int i, n;
3301 
3302     for (i = 0, n = s->nb_globals; i < n; i++) {
3303         temp_save(s, &s->temps[i], allocated_regs);
3304     }
3305 }
3306 
3307 /* sync globals to their canonical location and assume they can be
3308    read by the following code. 'allocated_regs' is used in case a
3309    temporary registers needs to be allocated to store a constant. */
3310 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3311 {
3312     int i, n;
3313 
3314     for (i = 0, n = s->nb_globals; i < n; i++) {
3315         TCGTemp *ts = &s->temps[i];
3316         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3317                          || ts->fixed_reg
3318                          || ts->mem_coherent);
3319     }
3320 }
3321 
3322 /* at the end of a basic block, we assume all temporaries are dead and
3323    all globals are stored at their canonical location. */
3324 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3325 {
3326     int i;
3327 
3328     for (i = s->nb_globals; i < s->nb_temps; i++) {
3329         TCGTemp *ts = &s->temps[i];
3330         if (ts->temp_local) {
3331             temp_save(s, ts, allocated_regs);
3332         } else {
3333             /* The liveness analysis already ensures that temps are dead.
3334                Keep an tcg_debug_assert for safety. */
3335             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3336         }
3337     }
3338 
3339     save_globals(s, allocated_regs);
3340 }
3341 
3342 /*
3343  * Specialized code generation for INDEX_op_movi_*.
3344  */
3345 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3346                                   tcg_target_ulong val, TCGLifeData arg_life,
3347                                   TCGRegSet preferred_regs)
3348 {
3349     /* ENV should not be modified.  */
3350     tcg_debug_assert(!ots->fixed_reg);
3351 
3352     /* The movi is not explicitly generated here.  */
3353     if (ots->val_type == TEMP_VAL_REG) {
3354         s->reg_to_temp[ots->reg] = NULL;
3355     }
3356     ots->val_type = TEMP_VAL_CONST;
3357     ots->val = val;
3358     ots->mem_coherent = 0;
3359     if (NEED_SYNC_ARG(0)) {
3360         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3361     } else if (IS_DEAD_ARG(0)) {
3362         temp_dead(s, ots);
3363     }
3364 }
3365 
3366 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3367 {
3368     TCGTemp *ots = arg_temp(op->args[0]);
3369     tcg_target_ulong val = op->args[1];
3370 
3371     tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3372 }
3373 
3374 /*
3375  * Specialized code generation for INDEX_op_mov_*.
3376  */
3377 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3378 {
3379     const TCGLifeData arg_life = op->life;
3380     TCGRegSet allocated_regs, preferred_regs;
3381     TCGTemp *ts, *ots;
3382     TCGType otype, itype;
3383 
3384     allocated_regs = s->reserved_regs;
3385     preferred_regs = op->output_pref[0];
3386     ots = arg_temp(op->args[0]);
3387     ts = arg_temp(op->args[1]);
3388 
3389     /* ENV should not be modified.  */
3390     tcg_debug_assert(!ots->fixed_reg);
3391 
3392     /* Note that otype != itype for no-op truncation.  */
3393     otype = ots->type;
3394     itype = ts->type;
3395 
3396     if (ts->val_type == TEMP_VAL_CONST) {
3397         /* propagate constant or generate sti */
3398         tcg_target_ulong val = ts->val;
3399         if (IS_DEAD_ARG(1)) {
3400             temp_dead(s, ts);
3401         }
3402         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3403         return;
3404     }
3405 
3406     /* If the source value is in memory we're going to be forced
3407        to have it in a register in order to perform the copy.  Copy
3408        the SOURCE value into its own register first, that way we
3409        don't have to reload SOURCE the next time it is used. */
3410     if (ts->val_type == TEMP_VAL_MEM) {
3411         temp_load(s, ts, tcg_target_available_regs[itype],
3412                   allocated_regs, preferred_regs);
3413     }
3414 
3415     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3416     if (IS_DEAD_ARG(0)) {
3417         /* mov to a non-saved dead register makes no sense (even with
3418            liveness analysis disabled). */
3419         tcg_debug_assert(NEED_SYNC_ARG(0));
3420         if (!ots->mem_allocated) {
3421             temp_allocate_frame(s, ots);
3422         }
3423         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3424         if (IS_DEAD_ARG(1)) {
3425             temp_dead(s, ts);
3426         }
3427         temp_dead(s, ots);
3428     } else {
3429         if (IS_DEAD_ARG(1) && !ts->fixed_reg) {
3430             /* the mov can be suppressed */
3431             if (ots->val_type == TEMP_VAL_REG) {
3432                 s->reg_to_temp[ots->reg] = NULL;
3433             }
3434             ots->reg = ts->reg;
3435             temp_dead(s, ts);
3436         } else {
3437             if (ots->val_type != TEMP_VAL_REG) {
3438                 /* When allocating a new register, make sure to not spill the
3439                    input one. */
3440                 tcg_regset_set_reg(allocated_regs, ts->reg);
3441                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3442                                          allocated_regs, preferred_regs,
3443                                          ots->indirect_base);
3444             }
3445             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3446                 /*
3447                  * Cross register class move not supported.
3448                  * Store the source register into the destination slot
3449                  * and leave the destination temp as TEMP_VAL_MEM.
3450                  */
3451                 assert(!ots->fixed_reg);
3452                 if (!ts->mem_allocated) {
3453                     temp_allocate_frame(s, ots);
3454                 }
3455                 tcg_out_st(s, ts->type, ts->reg,
3456                            ots->mem_base->reg, ots->mem_offset);
3457                 ots->mem_coherent = 1;
3458                 temp_free_or_dead(s, ots, -1);
3459                 return;
3460             }
3461         }
3462         ots->val_type = TEMP_VAL_REG;
3463         ots->mem_coherent = 0;
3464         s->reg_to_temp[ots->reg] = ots;
3465         if (NEED_SYNC_ARG(0)) {
3466             temp_sync(s, ots, allocated_regs, 0, 0);
3467         }
3468     }
3469 }
3470 
3471 /*
3472  * Specialized code generation for INDEX_op_dup_vec.
3473  */
3474 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3475 {
3476     const TCGLifeData arg_life = op->life;
3477     TCGRegSet dup_out_regs, dup_in_regs;
3478     TCGTemp *its, *ots;
3479     TCGType itype, vtype;
3480     intptr_t endian_fixup;
3481     unsigned vece;
3482     bool ok;
3483 
3484     ots = arg_temp(op->args[0]);
3485     its = arg_temp(op->args[1]);
3486 
3487     /* ENV should not be modified.  */
3488     tcg_debug_assert(!ots->fixed_reg);
3489 
3490     itype = its->type;
3491     vece = TCGOP_VECE(op);
3492     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3493 
3494     if (its->val_type == TEMP_VAL_CONST) {
3495         /* Propagate constant via movi -> dupi.  */
3496         tcg_target_ulong val = its->val;
3497         if (IS_DEAD_ARG(1)) {
3498             temp_dead(s, its);
3499         }
3500         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3501         return;
3502     }
3503 
3504     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs;
3505     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs;
3506 
3507     /* Allocate the output register now.  */
3508     if (ots->val_type != TEMP_VAL_REG) {
3509         TCGRegSet allocated_regs = s->reserved_regs;
3510 
3511         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3512             /* Make sure to not spill the input register. */
3513             tcg_regset_set_reg(allocated_regs, its->reg);
3514         }
3515         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3516                                  op->output_pref[0], ots->indirect_base);
3517         ots->val_type = TEMP_VAL_REG;
3518         ots->mem_coherent = 0;
3519         s->reg_to_temp[ots->reg] = ots;
3520     }
3521 
3522     switch (its->val_type) {
3523     case TEMP_VAL_REG:
3524         /*
3525          * The dup constriaints must be broad, covering all possible VECE.
3526          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3527          * to fail, indicating that extra moves are required for that case.
3528          */
3529         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3530             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3531                 goto done;
3532             }
3533             /* Try again from memory or a vector input register.  */
3534         }
3535         if (!its->mem_coherent) {
3536             /*
3537              * The input register is not synced, and so an extra store
3538              * would be required to use memory.  Attempt an integer-vector
3539              * register move first.  We do not have a TCGRegSet for this.
3540              */
3541             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3542                 break;
3543             }
3544             /* Sync the temp back to its slot and load from there.  */
3545             temp_sync(s, its, s->reserved_regs, 0, 0);
3546         }
3547         /* fall through */
3548 
3549     case TEMP_VAL_MEM:
3550 #ifdef HOST_WORDS_BIGENDIAN
3551         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3552         endian_fixup -= 1 << vece;
3553 #else
3554         endian_fixup = 0;
3555 #endif
3556         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3557                              its->mem_offset + endian_fixup)) {
3558             goto done;
3559         }
3560         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3561         break;
3562 
3563     default:
3564         g_assert_not_reached();
3565     }
3566 
3567     /* We now have a vector input register, so dup must succeed. */
3568     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3569     tcg_debug_assert(ok);
3570 
3571  done:
3572     if (IS_DEAD_ARG(1)) {
3573         temp_dead(s, its);
3574     }
3575     if (NEED_SYNC_ARG(0)) {
3576         temp_sync(s, ots, s->reserved_regs, 0, 0);
3577     }
3578     if (IS_DEAD_ARG(0)) {
3579         temp_dead(s, ots);
3580     }
3581 }
3582 
3583 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3584 {
3585     const TCGLifeData arg_life = op->life;
3586     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3587     TCGRegSet i_allocated_regs;
3588     TCGRegSet o_allocated_regs;
3589     int i, k, nb_iargs, nb_oargs;
3590     TCGReg reg;
3591     TCGArg arg;
3592     const TCGArgConstraint *arg_ct;
3593     TCGTemp *ts;
3594     TCGArg new_args[TCG_MAX_OP_ARGS];
3595     int const_args[TCG_MAX_OP_ARGS];
3596 
3597     nb_oargs = def->nb_oargs;
3598     nb_iargs = def->nb_iargs;
3599 
3600     /* copy constants */
3601     memcpy(new_args + nb_oargs + nb_iargs,
3602            op->args + nb_oargs + nb_iargs,
3603            sizeof(TCGArg) * def->nb_cargs);
3604 
3605     i_allocated_regs = s->reserved_regs;
3606     o_allocated_regs = s->reserved_regs;
3607 
3608     /* satisfy input constraints */
3609     for (k = 0; k < nb_iargs; k++) {
3610         TCGRegSet i_preferred_regs, o_preferred_regs;
3611 
3612         i = def->sorted_args[nb_oargs + k];
3613         arg = op->args[i];
3614         arg_ct = &def->args_ct[i];
3615         ts = arg_temp(arg);
3616 
3617         if (ts->val_type == TEMP_VAL_CONST
3618             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3619             /* constant is OK for instruction */
3620             const_args[i] = 1;
3621             new_args[i] = ts->val;
3622             continue;
3623         }
3624 
3625         i_preferred_regs = o_preferred_regs = 0;
3626         if (arg_ct->ct & TCG_CT_IALIAS) {
3627             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3628             if (ts->fixed_reg) {
3629                 /* if fixed register, we must allocate a new register
3630                    if the alias is not the same register */
3631                 if (arg != op->args[arg_ct->alias_index]) {
3632                     goto allocate_in_reg;
3633                 }
3634             } else {
3635                 /* if the input is aliased to an output and if it is
3636                    not dead after the instruction, we must allocate
3637                    a new register and move it */
3638                 if (!IS_DEAD_ARG(i)) {
3639                     goto allocate_in_reg;
3640                 }
3641 
3642                 /* check if the current register has already been allocated
3643                    for another input aliased to an output */
3644                 if (ts->val_type == TEMP_VAL_REG) {
3645                     int k2, i2;
3646                     reg = ts->reg;
3647                     for (k2 = 0 ; k2 < k ; k2++) {
3648                         i2 = def->sorted_args[nb_oargs + k2];
3649                         if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
3650                             reg == new_args[i2]) {
3651                             goto allocate_in_reg;
3652                         }
3653                     }
3654                 }
3655                 i_preferred_regs = o_preferred_regs;
3656             }
3657         }
3658 
3659         temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
3660         reg = ts->reg;
3661 
3662         if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3663             /* nothing to do : the constraint is satisfied */
3664         } else {
3665         allocate_in_reg:
3666             /* allocate a new register matching the constraint
3667                and move the temporary register into it */
3668             temp_load(s, ts, tcg_target_available_regs[ts->type],
3669                       i_allocated_regs, 0);
3670             reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
3671                                 o_preferred_regs, ts->indirect_base);
3672             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3673                 /*
3674                  * Cross register class move not supported.  Sync the
3675                  * temp back to its slot and load from there.
3676                  */
3677                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3678                 tcg_out_ld(s, ts->type, reg,
3679                            ts->mem_base->reg, ts->mem_offset);
3680             }
3681         }
3682         new_args[i] = reg;
3683         const_args[i] = 0;
3684         tcg_regset_set_reg(i_allocated_regs, reg);
3685     }
3686 
3687     /* mark dead temporaries and free the associated registers */
3688     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3689         if (IS_DEAD_ARG(i)) {
3690             temp_dead(s, arg_temp(op->args[i]));
3691         }
3692     }
3693 
3694     if (def->flags & TCG_OPF_BB_END) {
3695         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3696     } else {
3697         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3698             /* XXX: permit generic clobber register list ? */
3699             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3700                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3701                     tcg_reg_free(s, i, i_allocated_regs);
3702                 }
3703             }
3704         }
3705         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3706             /* sync globals if the op has side effects and might trigger
3707                an exception. */
3708             sync_globals(s, i_allocated_regs);
3709         }
3710 
3711         /* satisfy the output constraints */
3712         for(k = 0; k < nb_oargs; k++) {
3713             i = def->sorted_args[k];
3714             arg = op->args[i];
3715             arg_ct = &def->args_ct[i];
3716             ts = arg_temp(arg);
3717 
3718             /* ENV should not be modified.  */
3719             tcg_debug_assert(!ts->fixed_reg);
3720 
3721             if ((arg_ct->ct & TCG_CT_ALIAS)
3722                 && !const_args[arg_ct->alias_index]) {
3723                 reg = new_args[arg_ct->alias_index];
3724             } else if (arg_ct->ct & TCG_CT_NEWREG) {
3725                 reg = tcg_reg_alloc(s, arg_ct->u.regs,
3726                                     i_allocated_regs | o_allocated_regs,
3727                                     op->output_pref[k], ts->indirect_base);
3728             } else {
3729                 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
3730                                     op->output_pref[k], ts->indirect_base);
3731             }
3732             tcg_regset_set_reg(o_allocated_regs, reg);
3733             if (ts->val_type == TEMP_VAL_REG) {
3734                 s->reg_to_temp[ts->reg] = NULL;
3735             }
3736             ts->val_type = TEMP_VAL_REG;
3737             ts->reg = reg;
3738             /*
3739              * Temp value is modified, so the value kept in memory is
3740              * potentially not the same.
3741              */
3742             ts->mem_coherent = 0;
3743             s->reg_to_temp[reg] = ts;
3744             new_args[i] = reg;
3745         }
3746     }
3747 
3748     /* emit instruction */
3749     if (def->flags & TCG_OPF_VECTOR) {
3750         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3751                        new_args, const_args);
3752     } else {
3753         tcg_out_op(s, op->opc, new_args, const_args);
3754     }
3755 
3756     /* move the outputs in the correct register if needed */
3757     for(i = 0; i < nb_oargs; i++) {
3758         ts = arg_temp(op->args[i]);
3759 
3760         /* ENV should not be modified.  */
3761         tcg_debug_assert(!ts->fixed_reg);
3762 
3763         if (NEED_SYNC_ARG(i)) {
3764             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3765         } else if (IS_DEAD_ARG(i)) {
3766             temp_dead(s, ts);
3767         }
3768     }
3769 }
3770 
3771 #ifdef TCG_TARGET_STACK_GROWSUP
3772 #define STACK_DIR(x) (-(x))
3773 #else
3774 #define STACK_DIR(x) (x)
3775 #endif
3776 
3777 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3778 {
3779     const int nb_oargs = TCGOP_CALLO(op);
3780     const int nb_iargs = TCGOP_CALLI(op);
3781     const TCGLifeData arg_life = op->life;
3782     int flags, nb_regs, i;
3783     TCGReg reg;
3784     TCGArg arg;
3785     TCGTemp *ts;
3786     intptr_t stack_offset;
3787     size_t call_stack_size;
3788     tcg_insn_unit *func_addr;
3789     int allocate_args;
3790     TCGRegSet allocated_regs;
3791 
3792     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3793     flags = op->args[nb_oargs + nb_iargs + 1];
3794 
3795     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3796     if (nb_regs > nb_iargs) {
3797         nb_regs = nb_iargs;
3798     }
3799 
3800     /* assign stack slots first */
3801     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3802     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3803         ~(TCG_TARGET_STACK_ALIGN - 1);
3804     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3805     if (allocate_args) {
3806         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3807            preallocate call stack */
3808         tcg_abort();
3809     }
3810 
3811     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3812     for (i = nb_regs; i < nb_iargs; i++) {
3813         arg = op->args[nb_oargs + i];
3814 #ifdef TCG_TARGET_STACK_GROWSUP
3815         stack_offset -= sizeof(tcg_target_long);
3816 #endif
3817         if (arg != TCG_CALL_DUMMY_ARG) {
3818             ts = arg_temp(arg);
3819             temp_load(s, ts, tcg_target_available_regs[ts->type],
3820                       s->reserved_regs, 0);
3821             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3822         }
3823 #ifndef TCG_TARGET_STACK_GROWSUP
3824         stack_offset += sizeof(tcg_target_long);
3825 #endif
3826     }
3827 
3828     /* assign input registers */
3829     allocated_regs = s->reserved_regs;
3830     for (i = 0; i < nb_regs; i++) {
3831         arg = op->args[nb_oargs + i];
3832         if (arg != TCG_CALL_DUMMY_ARG) {
3833             ts = arg_temp(arg);
3834             reg = tcg_target_call_iarg_regs[i];
3835 
3836             if (ts->val_type == TEMP_VAL_REG) {
3837                 if (ts->reg != reg) {
3838                     tcg_reg_free(s, reg, allocated_regs);
3839                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3840                         /*
3841                          * Cross register class move not supported.  Sync the
3842                          * temp back to its slot and load from there.
3843                          */
3844                         temp_sync(s, ts, allocated_regs, 0, 0);
3845                         tcg_out_ld(s, ts->type, reg,
3846                                    ts->mem_base->reg, ts->mem_offset);
3847                     }
3848                 }
3849             } else {
3850                 TCGRegSet arg_set = 0;
3851 
3852                 tcg_reg_free(s, reg, allocated_regs);
3853                 tcg_regset_set_reg(arg_set, reg);
3854                 temp_load(s, ts, arg_set, allocated_regs, 0);
3855             }
3856 
3857             tcg_regset_set_reg(allocated_regs, reg);
3858         }
3859     }
3860 
3861     /* mark dead temporaries and free the associated registers */
3862     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3863         if (IS_DEAD_ARG(i)) {
3864             temp_dead(s, arg_temp(op->args[i]));
3865         }
3866     }
3867 
3868     /* clobber call registers */
3869     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3870         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3871             tcg_reg_free(s, i, allocated_regs);
3872         }
3873     }
3874 
3875     /* Save globals if they might be written by the helper, sync them if
3876        they might be read. */
3877     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3878         /* Nothing to do */
3879     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3880         sync_globals(s, allocated_regs);
3881     } else {
3882         save_globals(s, allocated_regs);
3883     }
3884 
3885     tcg_out_call(s, func_addr);
3886 
3887     /* assign output registers and emit moves if needed */
3888     for(i = 0; i < nb_oargs; i++) {
3889         arg = op->args[i];
3890         ts = arg_temp(arg);
3891 
3892         /* ENV should not be modified.  */
3893         tcg_debug_assert(!ts->fixed_reg);
3894 
3895         reg = tcg_target_call_oarg_regs[i];
3896         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3897         if (ts->val_type == TEMP_VAL_REG) {
3898             s->reg_to_temp[ts->reg] = NULL;
3899         }
3900         ts->val_type = TEMP_VAL_REG;
3901         ts->reg = reg;
3902         ts->mem_coherent = 0;
3903         s->reg_to_temp[reg] = ts;
3904         if (NEED_SYNC_ARG(i)) {
3905             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
3906         } else if (IS_DEAD_ARG(i)) {
3907             temp_dead(s, ts);
3908         }
3909     }
3910 }
3911 
3912 #ifdef CONFIG_PROFILER
3913 
3914 /* avoid copy/paste errors */
3915 #define PROF_ADD(to, from, field)                       \
3916     do {                                                \
3917         (to)->field += atomic_read(&((from)->field));   \
3918     } while (0)
3919 
3920 #define PROF_MAX(to, from, field)                                       \
3921     do {                                                                \
3922         typeof((from)->field) val__ = atomic_read(&((from)->field));    \
3923         if (val__ > (to)->field) {                                      \
3924             (to)->field = val__;                                        \
3925         }                                                               \
3926     } while (0)
3927 
3928 /* Pass in a zero'ed @prof */
3929 static inline
3930 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3931 {
3932     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3933     unsigned int i;
3934 
3935     for (i = 0; i < n_ctxs; i++) {
3936         TCGContext *s = atomic_read(&tcg_ctxs[i]);
3937         const TCGProfile *orig = &s->prof;
3938 
3939         if (counters) {
3940             PROF_ADD(prof, orig, cpu_exec_time);
3941             PROF_ADD(prof, orig, tb_count1);
3942             PROF_ADD(prof, orig, tb_count);
3943             PROF_ADD(prof, orig, op_count);
3944             PROF_MAX(prof, orig, op_count_max);
3945             PROF_ADD(prof, orig, temp_count);
3946             PROF_MAX(prof, orig, temp_count_max);
3947             PROF_ADD(prof, orig, del_op_count);
3948             PROF_ADD(prof, orig, code_in_len);
3949             PROF_ADD(prof, orig, code_out_len);
3950             PROF_ADD(prof, orig, search_out_len);
3951             PROF_ADD(prof, orig, interm_time);
3952             PROF_ADD(prof, orig, code_time);
3953             PROF_ADD(prof, orig, la_time);
3954             PROF_ADD(prof, orig, opt_time);
3955             PROF_ADD(prof, orig, restore_count);
3956             PROF_ADD(prof, orig, restore_time);
3957         }
3958         if (table) {
3959             int i;
3960 
3961             for (i = 0; i < NB_OPS; i++) {
3962                 PROF_ADD(prof, orig, table_op_count[i]);
3963             }
3964         }
3965     }
3966 }
3967 
3968 #undef PROF_ADD
3969 #undef PROF_MAX
3970 
3971 static void tcg_profile_snapshot_counters(TCGProfile *prof)
3972 {
3973     tcg_profile_snapshot(prof, true, false);
3974 }
3975 
3976 static void tcg_profile_snapshot_table(TCGProfile *prof)
3977 {
3978     tcg_profile_snapshot(prof, false, true);
3979 }
3980 
3981 void tcg_dump_op_count(void)
3982 {
3983     TCGProfile prof = {};
3984     int i;
3985 
3986     tcg_profile_snapshot_table(&prof);
3987     for (i = 0; i < NB_OPS; i++) {
3988         qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
3989                     prof.table_op_count[i]);
3990     }
3991 }
3992 
3993 int64_t tcg_cpu_exec_time(void)
3994 {
3995     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3996     unsigned int i;
3997     int64_t ret = 0;
3998 
3999     for (i = 0; i < n_ctxs; i++) {
4000         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
4001         const TCGProfile *prof = &s->prof;
4002 
4003         ret += atomic_read(&prof->cpu_exec_time);
4004     }
4005     return ret;
4006 }
4007 #else
4008 void tcg_dump_op_count(void)
4009 {
4010     qemu_printf("[TCG profiler not compiled]\n");
4011 }
4012 
4013 int64_t tcg_cpu_exec_time(void)
4014 {
4015     error_report("%s: TCG profiler not compiled", __func__);
4016     exit(EXIT_FAILURE);
4017 }
4018 #endif
4019 
4020 
4021 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4022 {
4023 #ifdef CONFIG_PROFILER
4024     TCGProfile *prof = &s->prof;
4025 #endif
4026     int i, num_insns;
4027     TCGOp *op;
4028 
4029 #ifdef CONFIG_PROFILER
4030     {
4031         int n = 0;
4032 
4033         QTAILQ_FOREACH(op, &s->ops, link) {
4034             n++;
4035         }
4036         atomic_set(&prof->op_count, prof->op_count + n);
4037         if (n > prof->op_count_max) {
4038             atomic_set(&prof->op_count_max, n);
4039         }
4040 
4041         n = s->nb_temps;
4042         atomic_set(&prof->temp_count, prof->temp_count + n);
4043         if (n > prof->temp_count_max) {
4044             atomic_set(&prof->temp_count_max, n);
4045         }
4046     }
4047 #endif
4048 
4049 #ifdef DEBUG_DISAS
4050     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4051                  && qemu_log_in_addr_range(tb->pc))) {
4052         FILE *logfile = qemu_log_lock();
4053         qemu_log("OP:\n");
4054         tcg_dump_ops(s, false);
4055         qemu_log("\n");
4056         qemu_log_unlock(logfile);
4057     }
4058 #endif
4059 
4060 #ifdef CONFIG_DEBUG_TCG
4061     /* Ensure all labels referenced have been emitted.  */
4062     {
4063         TCGLabel *l;
4064         bool error = false;
4065 
4066         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4067             if (unlikely(!l->present) && l->refs) {
4068                 qemu_log_mask(CPU_LOG_TB_OP,
4069                               "$L%d referenced but not present.\n", l->id);
4070                 error = true;
4071             }
4072         }
4073         assert(!error);
4074     }
4075 #endif
4076 
4077 #ifdef CONFIG_PROFILER
4078     atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4079 #endif
4080 
4081 #ifdef USE_TCG_OPTIMIZATIONS
4082     tcg_optimize(s);
4083 #endif
4084 
4085 #ifdef CONFIG_PROFILER
4086     atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4087     atomic_set(&prof->la_time, prof->la_time - profile_getclock());
4088 #endif
4089 
4090     reachable_code_pass(s);
4091     liveness_pass_1(s);
4092 
4093     if (s->nb_indirects > 0) {
4094 #ifdef DEBUG_DISAS
4095         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4096                      && qemu_log_in_addr_range(tb->pc))) {
4097             FILE *logfile = qemu_log_lock();
4098             qemu_log("OP before indirect lowering:\n");
4099             tcg_dump_ops(s, false);
4100             qemu_log("\n");
4101             qemu_log_unlock(logfile);
4102         }
4103 #endif
4104         /* Replace indirect temps with direct temps.  */
4105         if (liveness_pass_2(s)) {
4106             /* If changes were made, re-run liveness.  */
4107             liveness_pass_1(s);
4108         }
4109     }
4110 
4111 #ifdef CONFIG_PROFILER
4112     atomic_set(&prof->la_time, prof->la_time + profile_getclock());
4113 #endif
4114 
4115 #ifdef DEBUG_DISAS
4116     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4117                  && qemu_log_in_addr_range(tb->pc))) {
4118         FILE *logfile = qemu_log_lock();
4119         qemu_log("OP after optimization and liveness analysis:\n");
4120         tcg_dump_ops(s, true);
4121         qemu_log("\n");
4122         qemu_log_unlock(logfile);
4123     }
4124 #endif
4125 
4126     tcg_reg_alloc_start(s);
4127 
4128     s->code_buf = tb->tc.ptr;
4129     s->code_ptr = tb->tc.ptr;
4130 
4131 #ifdef TCG_TARGET_NEED_LDST_LABELS
4132     QSIMPLEQ_INIT(&s->ldst_labels);
4133 #endif
4134 #ifdef TCG_TARGET_NEED_POOL_LABELS
4135     s->pool_labels = NULL;
4136 #endif
4137 
4138     num_insns = -1;
4139     QTAILQ_FOREACH(op, &s->ops, link) {
4140         TCGOpcode opc = op->opc;
4141 
4142 #ifdef CONFIG_PROFILER
4143         atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4144 #endif
4145 
4146         switch (opc) {
4147         case INDEX_op_mov_i32:
4148         case INDEX_op_mov_i64:
4149         case INDEX_op_mov_vec:
4150             tcg_reg_alloc_mov(s, op);
4151             break;
4152         case INDEX_op_movi_i32:
4153         case INDEX_op_movi_i64:
4154         case INDEX_op_dupi_vec:
4155             tcg_reg_alloc_movi(s, op);
4156             break;
4157         case INDEX_op_dup_vec:
4158             tcg_reg_alloc_dup(s, op);
4159             break;
4160         case INDEX_op_insn_start:
4161             if (num_insns >= 0) {
4162                 size_t off = tcg_current_code_size(s);
4163                 s->gen_insn_end_off[num_insns] = off;
4164                 /* Assert that we do not overflow our stored offset.  */
4165                 assert(s->gen_insn_end_off[num_insns] == off);
4166             }
4167             num_insns++;
4168             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4169                 target_ulong a;
4170 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4171                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4172 #else
4173                 a = op->args[i];
4174 #endif
4175                 s->gen_insn_data[num_insns][i] = a;
4176             }
4177             break;
4178         case INDEX_op_discard:
4179             temp_dead(s, arg_temp(op->args[0]));
4180             break;
4181         case INDEX_op_set_label:
4182             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4183             tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
4184             break;
4185         case INDEX_op_call:
4186             tcg_reg_alloc_call(s, op);
4187             break;
4188         default:
4189             /* Sanity check that we've not introduced any unhandled opcodes. */
4190             tcg_debug_assert(tcg_op_supported(opc));
4191             /* Note: in order to speed up the code, it would be much
4192                faster to have specialized register allocator functions for
4193                some common argument patterns */
4194             tcg_reg_alloc_op(s, op);
4195             break;
4196         }
4197 #ifdef CONFIG_DEBUG_TCG
4198         check_regs(s);
4199 #endif
4200         /* Test for (pending) buffer overflow.  The assumption is that any
4201            one operation beginning below the high water mark cannot overrun
4202            the buffer completely.  Thus we can test for overflow after
4203            generating code without having to check during generation.  */
4204         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4205             return -1;
4206         }
4207         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4208         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4209             return -2;
4210         }
4211     }
4212     tcg_debug_assert(num_insns >= 0);
4213     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4214 
4215     /* Generate TB finalization at the end of block */
4216 #ifdef TCG_TARGET_NEED_LDST_LABELS
4217     i = tcg_out_ldst_finalize(s);
4218     if (i < 0) {
4219         return i;
4220     }
4221 #endif
4222 #ifdef TCG_TARGET_NEED_POOL_LABELS
4223     i = tcg_out_pool_finalize(s);
4224     if (i < 0) {
4225         return i;
4226     }
4227 #endif
4228     if (!tcg_resolve_relocs(s)) {
4229         return -2;
4230     }
4231 
4232     /* flush instruction cache */
4233     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
4234 
4235     return tcg_current_code_size(s);
4236 }
4237 
4238 #ifdef CONFIG_PROFILER
4239 void tcg_dump_info(void)
4240 {
4241     TCGProfile prof = {};
4242     const TCGProfile *s;
4243     int64_t tb_count;
4244     int64_t tb_div_count;
4245     int64_t tot;
4246 
4247     tcg_profile_snapshot_counters(&prof);
4248     s = &prof;
4249     tb_count = s->tb_count;
4250     tb_div_count = tb_count ? tb_count : 1;
4251     tot = s->interm_time + s->code_time;
4252 
4253     qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4254                 tot, tot / 2.4e9);
4255     qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4256                 " %0.1f%%)\n",
4257                 tb_count, s->tb_count1 - tb_count,
4258                 (double)(s->tb_count1 - s->tb_count)
4259                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4260     qemu_printf("avg ops/TB          %0.1f max=%d\n",
4261                 (double)s->op_count / tb_div_count, s->op_count_max);
4262     qemu_printf("deleted ops/TB      %0.2f\n",
4263                 (double)s->del_op_count / tb_div_count);
4264     qemu_printf("avg temps/TB        %0.2f max=%d\n",
4265                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4266     qemu_printf("avg host code/TB    %0.1f\n",
4267                 (double)s->code_out_len / tb_div_count);
4268     qemu_printf("avg search data/TB  %0.1f\n",
4269                 (double)s->search_out_len / tb_div_count);
4270 
4271     qemu_printf("cycles/op           %0.1f\n",
4272                 s->op_count ? (double)tot / s->op_count : 0);
4273     qemu_printf("cycles/in byte      %0.1f\n",
4274                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4275     qemu_printf("cycles/out byte     %0.1f\n",
4276                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4277     qemu_printf("cycles/search byte     %0.1f\n",
4278                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4279     if (tot == 0) {
4280         tot = 1;
4281     }
4282     qemu_printf("  gen_interm time   %0.1f%%\n",
4283                 (double)s->interm_time / tot * 100.0);
4284     qemu_printf("  gen_code time     %0.1f%%\n",
4285                 (double)s->code_time / tot * 100.0);
4286     qemu_printf("optim./code time    %0.1f%%\n",
4287                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4288                 * 100.0);
4289     qemu_printf("liveness/code time  %0.1f%%\n",
4290                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4291     qemu_printf("cpu_restore count   %" PRId64 "\n",
4292                 s->restore_count);
4293     qemu_printf("  avg cycles        %0.1f\n",
4294                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4295 }
4296 #else
4297 void tcg_dump_info(void)
4298 {
4299     qemu_printf("[TCG profiler not compiled]\n");
4300 }
4301 #endif
4302 
4303 #ifdef ELF_HOST_MACHINE
4304 /* In order to use this feature, the backend needs to do three things:
4305 
4306    (1) Define ELF_HOST_MACHINE to indicate both what value to
4307        put into the ELF image and to indicate support for the feature.
4308 
4309    (2) Define tcg_register_jit.  This should create a buffer containing
4310        the contents of a .debug_frame section that describes the post-
4311        prologue unwind info for the tcg machine.
4312 
4313    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4314 */
4315 
4316 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4317 typedef enum {
4318     JIT_NOACTION = 0,
4319     JIT_REGISTER_FN,
4320     JIT_UNREGISTER_FN
4321 } jit_actions_t;
4322 
4323 struct jit_code_entry {
4324     struct jit_code_entry *next_entry;
4325     struct jit_code_entry *prev_entry;
4326     const void *symfile_addr;
4327     uint64_t symfile_size;
4328 };
4329 
4330 struct jit_descriptor {
4331     uint32_t version;
4332     uint32_t action_flag;
4333     struct jit_code_entry *relevant_entry;
4334     struct jit_code_entry *first_entry;
4335 };
4336 
4337 void __jit_debug_register_code(void) __attribute__((noinline));
4338 void __jit_debug_register_code(void)
4339 {
4340     asm("");
4341 }
4342 
4343 /* Must statically initialize the version, because GDB may check
4344    the version before we can set it.  */
4345 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4346 
4347 /* End GDB interface.  */
4348 
4349 static int find_string(const char *strtab, const char *str)
4350 {
4351     const char *p = strtab + 1;
4352 
4353     while (1) {
4354         if (strcmp(p, str) == 0) {
4355             return p - strtab;
4356         }
4357         p += strlen(p) + 1;
4358     }
4359 }
4360 
4361 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
4362                                  const void *debug_frame,
4363                                  size_t debug_frame_size)
4364 {
4365     struct __attribute__((packed)) DebugInfo {
4366         uint32_t  len;
4367         uint16_t  version;
4368         uint32_t  abbrev;
4369         uint8_t   ptr_size;
4370         uint8_t   cu_die;
4371         uint16_t  cu_lang;
4372         uintptr_t cu_low_pc;
4373         uintptr_t cu_high_pc;
4374         uint8_t   fn_die;
4375         char      fn_name[16];
4376         uintptr_t fn_low_pc;
4377         uintptr_t fn_high_pc;
4378         uint8_t   cu_eoc;
4379     };
4380 
4381     struct ElfImage {
4382         ElfW(Ehdr) ehdr;
4383         ElfW(Phdr) phdr;
4384         ElfW(Shdr) shdr[7];
4385         ElfW(Sym)  sym[2];
4386         struct DebugInfo di;
4387         uint8_t    da[24];
4388         char       str[80];
4389     };
4390 
4391     struct ElfImage *img;
4392 
4393     static const struct ElfImage img_template = {
4394         .ehdr = {
4395             .e_ident[EI_MAG0] = ELFMAG0,
4396             .e_ident[EI_MAG1] = ELFMAG1,
4397             .e_ident[EI_MAG2] = ELFMAG2,
4398             .e_ident[EI_MAG3] = ELFMAG3,
4399             .e_ident[EI_CLASS] = ELF_CLASS,
4400             .e_ident[EI_DATA] = ELF_DATA,
4401             .e_ident[EI_VERSION] = EV_CURRENT,
4402             .e_type = ET_EXEC,
4403             .e_machine = ELF_HOST_MACHINE,
4404             .e_version = EV_CURRENT,
4405             .e_phoff = offsetof(struct ElfImage, phdr),
4406             .e_shoff = offsetof(struct ElfImage, shdr),
4407             .e_ehsize = sizeof(ElfW(Shdr)),
4408             .e_phentsize = sizeof(ElfW(Phdr)),
4409             .e_phnum = 1,
4410             .e_shentsize = sizeof(ElfW(Shdr)),
4411             .e_shnum = ARRAY_SIZE(img->shdr),
4412             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4413 #ifdef ELF_HOST_FLAGS
4414             .e_flags = ELF_HOST_FLAGS,
4415 #endif
4416 #ifdef ELF_OSABI
4417             .e_ident[EI_OSABI] = ELF_OSABI,
4418 #endif
4419         },
4420         .phdr = {
4421             .p_type = PT_LOAD,
4422             .p_flags = PF_X,
4423         },
4424         .shdr = {
4425             [0] = { .sh_type = SHT_NULL },
4426             /* Trick: The contents of code_gen_buffer are not present in
4427                this fake ELF file; that got allocated elsewhere.  Therefore
4428                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4429                will not look for contents.  We can record any address.  */
4430             [1] = { /* .text */
4431                 .sh_type = SHT_NOBITS,
4432                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4433             },
4434             [2] = { /* .debug_info */
4435                 .sh_type = SHT_PROGBITS,
4436                 .sh_offset = offsetof(struct ElfImage, di),
4437                 .sh_size = sizeof(struct DebugInfo),
4438             },
4439             [3] = { /* .debug_abbrev */
4440                 .sh_type = SHT_PROGBITS,
4441                 .sh_offset = offsetof(struct ElfImage, da),
4442                 .sh_size = sizeof(img->da),
4443             },
4444             [4] = { /* .debug_frame */
4445                 .sh_type = SHT_PROGBITS,
4446                 .sh_offset = sizeof(struct ElfImage),
4447             },
4448             [5] = { /* .symtab */
4449                 .sh_type = SHT_SYMTAB,
4450                 .sh_offset = offsetof(struct ElfImage, sym),
4451                 .sh_size = sizeof(img->sym),
4452                 .sh_info = 1,
4453                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4454                 .sh_entsize = sizeof(ElfW(Sym)),
4455             },
4456             [6] = { /* .strtab */
4457                 .sh_type = SHT_STRTAB,
4458                 .sh_offset = offsetof(struct ElfImage, str),
4459                 .sh_size = sizeof(img->str),
4460             }
4461         },
4462         .sym = {
4463             [1] = { /* code_gen_buffer */
4464                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4465                 .st_shndx = 1,
4466             }
4467         },
4468         .di = {
4469             .len = sizeof(struct DebugInfo) - 4,
4470             .version = 2,
4471             .ptr_size = sizeof(void *),
4472             .cu_die = 1,
4473             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4474             .fn_die = 2,
4475             .fn_name = "code_gen_buffer"
4476         },
4477         .da = {
4478             1,          /* abbrev number (the cu) */
4479             0x11, 1,    /* DW_TAG_compile_unit, has children */
4480             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4481             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4482             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4483             0, 0,       /* end of abbrev */
4484             2,          /* abbrev number (the fn) */
4485             0x2e, 0,    /* DW_TAG_subprogram, no children */
4486             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4487             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4488             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4489             0, 0,       /* end of abbrev */
4490             0           /* no more abbrev */
4491         },
4492         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4493                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4494     };
4495 
4496     /* We only need a single jit entry; statically allocate it.  */
4497     static struct jit_code_entry one_entry;
4498 
4499     uintptr_t buf = (uintptr_t)buf_ptr;
4500     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4501     DebugFrameHeader *dfh;
4502 
4503     img = g_malloc(img_size);
4504     *img = img_template;
4505 
4506     img->phdr.p_vaddr = buf;
4507     img->phdr.p_paddr = buf;
4508     img->phdr.p_memsz = buf_size;
4509 
4510     img->shdr[1].sh_name = find_string(img->str, ".text");
4511     img->shdr[1].sh_addr = buf;
4512     img->shdr[1].sh_size = buf_size;
4513 
4514     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4515     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4516 
4517     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4518     img->shdr[4].sh_size = debug_frame_size;
4519 
4520     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4521     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4522 
4523     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4524     img->sym[1].st_value = buf;
4525     img->sym[1].st_size = buf_size;
4526 
4527     img->di.cu_low_pc = buf;
4528     img->di.cu_high_pc = buf + buf_size;
4529     img->di.fn_low_pc = buf;
4530     img->di.fn_high_pc = buf + buf_size;
4531 
4532     dfh = (DebugFrameHeader *)(img + 1);
4533     memcpy(dfh, debug_frame, debug_frame_size);
4534     dfh->fde.func_start = buf;
4535     dfh->fde.func_len = buf_size;
4536 
4537 #ifdef DEBUG_JIT
4538     /* Enable this block to be able to debug the ELF image file creation.
4539        One can use readelf, objdump, or other inspection utilities.  */
4540     {
4541         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4542         if (f) {
4543             if (fwrite(img, img_size, 1, f) != img_size) {
4544                 /* Avoid stupid unused return value warning for fwrite.  */
4545             }
4546             fclose(f);
4547         }
4548     }
4549 #endif
4550 
4551     one_entry.symfile_addr = img;
4552     one_entry.symfile_size = img_size;
4553 
4554     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4555     __jit_debug_descriptor.relevant_entry = &one_entry;
4556     __jit_debug_descriptor.first_entry = &one_entry;
4557     __jit_debug_register_code();
4558 }
4559 #else
4560 /* No support for the feature.  Provide the entry point expected by exec.c,
4561    and implement the internal function we declared earlier.  */
4562 
4563 static void tcg_register_jit_int(void *buf, size_t size,
4564                                  const void *debug_frame,
4565                                  size_t debug_frame_size)
4566 {
4567 }
4568 
4569 void tcg_register_jit(void *buf, size_t buf_size)
4570 {
4571 }
4572 #endif /* ELF_HOST_MACHINE */
4573 
4574 #if !TCG_TARGET_MAYBE_vec
4575 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4576 {
4577     g_assert_not_reached();
4578 }
4579 #endif
4580