xref: /openbmc/qemu/tcg/tcg.c (revision e2c546358f0eaccc001f457a9eec0ecbe9d3bed7)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 
39 /* Note: the long term plan is to reduce the dependencies on the QEMU
40    CPU definitions. Currently they are used for qemu_ld/st
41    instructions */
42 #define NO_CPU_IO_DEFS
43 #include "cpu.h"
44 
45 #include "exec/exec-all.h"
46 
47 #if !defined(CONFIG_USER_ONLY)
48 #include "hw/boards.h"
49 #endif
50 
51 #include "tcg-op.h"
52 
53 #if UINTPTR_MAX == UINT32_MAX
54 # define ELF_CLASS  ELFCLASS32
55 #else
56 # define ELF_CLASS  ELFCLASS64
57 #endif
58 #ifdef HOST_WORDS_BIGENDIAN
59 # define ELF_DATA   ELFDATA2MSB
60 #else
61 # define ELF_DATA   ELFDATA2LSB
62 #endif
63 
64 #include "elf.h"
65 #include "exec/log.h"
66 #include "sysemu/sysemu.h"
67 
68 /* Forward declarations for functions declared in tcg-target.inc.c and
69    used here. */
70 static void tcg_target_init(TCGContext *s);
71 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74                         intptr_t value, intptr_t addend);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 static void tcg_register_jit_int(void *buf, size_t size,
100                                  const void *debug_frame,
101                                  size_t debug_frame_size)
102     __attribute__((unused));
103 
104 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
105 static const char *target_parse_constraint(TCGArgConstraint *ct,
106                                            const char *ct_str, TCGType type);
107 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
108                        intptr_t arg2);
109 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
110 static void tcg_out_movi(TCGContext *s, TCGType type,
111                          TCGReg ret, tcg_target_long arg);
112 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
113                        const int *const_args);
114 #if TCG_TARGET_MAYBE_vec
115 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
116                             TCGReg dst, TCGReg src);
117 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
118                              TCGReg dst, TCGReg base, intptr_t offset);
119 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
120                              TCGReg dst, tcg_target_long arg);
121 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
122                            unsigned vece, const TCGArg *args,
123                            const int *const_args);
124 #else
125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126                                    TCGReg dst, TCGReg src)
127 {
128     g_assert_not_reached();
129 }
130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
131                                     TCGReg dst, TCGReg base, intptr_t offset)
132 {
133     g_assert_not_reached();
134 }
135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
136                                     TCGReg dst, tcg_target_long arg)
137 {
138     g_assert_not_reached();
139 }
140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
141                                   unsigned vece, const TCGArg *args,
142                                   const int *const_args)
143 {
144     g_assert_not_reached();
145 }
146 #endif
147 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
148                        intptr_t arg2);
149 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
150                         TCGReg base, intptr_t ofs);
151 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
152 static int tcg_target_const_match(tcg_target_long val, TCGType type,
153                                   const TCGArgConstraint *arg_ct);
154 #ifdef TCG_TARGET_NEED_LDST_LABELS
155 static int tcg_out_ldst_finalize(TCGContext *s);
156 #endif
157 
158 #define TCG_HIGHWATER 1024
159 
160 static TCGContext **tcg_ctxs;
161 static unsigned int n_tcg_ctxs;
162 TCGv_env cpu_env = 0;
163 
164 struct tcg_region_tree {
165     QemuMutex lock;
166     GTree *tree;
167     /* padding to avoid false sharing is computed at run-time */
168 };
169 
170 /*
171  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
172  * dynamically allocate from as demand dictates. Given appropriate region
173  * sizing, this minimizes flushes even when some TCG threads generate a lot
174  * more code than others.
175  */
176 struct tcg_region_state {
177     QemuMutex lock;
178 
179     /* fields set at init time */
180     void *start;
181     void *start_aligned;
182     void *end;
183     size_t n;
184     size_t size; /* size of one region */
185     size_t stride; /* .size + guard size */
186 
187     /* fields protected by the lock */
188     size_t current; /* current region index */
189     size_t agg_size_full; /* aggregate size of full regions */
190 };
191 
192 static struct tcg_region_state region;
193 /*
194  * This is an array of struct tcg_region_tree's, with padding.
195  * We use void * to simplify the computation of region_trees[i]; each
196  * struct is found every tree_size bytes.
197  */
198 static void *region_trees;
199 static size_t tree_size;
200 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
201 static TCGRegSet tcg_target_call_clobber_regs;
202 
203 #if TCG_TARGET_INSN_UNIT_SIZE == 1
204 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
205 {
206     *s->code_ptr++ = v;
207 }
208 
209 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
210                                                       uint8_t v)
211 {
212     *p = v;
213 }
214 #endif
215 
216 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
217 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
218 {
219     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
220         *s->code_ptr++ = v;
221     } else {
222         tcg_insn_unit *p = s->code_ptr;
223         memcpy(p, &v, sizeof(v));
224         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
225     }
226 }
227 
228 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
229                                                        uint16_t v)
230 {
231     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
232         *p = v;
233     } else {
234         memcpy(p, &v, sizeof(v));
235     }
236 }
237 #endif
238 
239 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
240 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
241 {
242     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
243         *s->code_ptr++ = v;
244     } else {
245         tcg_insn_unit *p = s->code_ptr;
246         memcpy(p, &v, sizeof(v));
247         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
248     }
249 }
250 
251 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
252                                                        uint32_t v)
253 {
254     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
255         *p = v;
256     } else {
257         memcpy(p, &v, sizeof(v));
258     }
259 }
260 #endif
261 
262 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
263 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
264 {
265     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
266         *s->code_ptr++ = v;
267     } else {
268         tcg_insn_unit *p = s->code_ptr;
269         memcpy(p, &v, sizeof(v));
270         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
271     }
272 }
273 
274 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
275                                                        uint64_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
278         *p = v;
279     } else {
280         memcpy(p, &v, sizeof(v));
281     }
282 }
283 #endif
284 
285 /* label relocation processing */
286 
287 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
288                           TCGLabel *l, intptr_t addend)
289 {
290     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
291 
292     r->type = type;
293     r->ptr = code_ptr;
294     r->addend = addend;
295     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
296 }
297 
298 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
299 {
300     tcg_debug_assert(!l->has_value);
301     l->has_value = 1;
302     l->u.value_ptr = ptr;
303 }
304 
305 TCGLabel *gen_new_label(void)
306 {
307     TCGContext *s = tcg_ctx;
308     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
309 
310     memset(l, 0, sizeof(TCGLabel));
311     l->id = s->nb_labels++;
312     QSIMPLEQ_INIT(&l->relocs);
313 
314     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
315 
316     return l;
317 }
318 
319 static bool tcg_resolve_relocs(TCGContext *s)
320 {
321     TCGLabel *l;
322 
323     QSIMPLEQ_FOREACH(l, &s->labels, next) {
324         TCGRelocation *r;
325         uintptr_t value = l->u.value;
326 
327         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
328             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
329                 return false;
330             }
331         }
332     }
333     return true;
334 }
335 
336 static void set_jmp_reset_offset(TCGContext *s, int which)
337 {
338     size_t off = tcg_current_code_size(s);
339     s->tb_jmp_reset_offset[which] = off;
340     /* Make sure that we didn't overflow the stored offset.  */
341     assert(s->tb_jmp_reset_offset[which] == off);
342 }
343 
344 #include "tcg-target.inc.c"
345 
346 /* compare a pointer @ptr and a tb_tc @s */
347 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
348 {
349     if (ptr >= s->ptr + s->size) {
350         return 1;
351     } else if (ptr < s->ptr) {
352         return -1;
353     }
354     return 0;
355 }
356 
357 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
358 {
359     const struct tb_tc *a = ap;
360     const struct tb_tc *b = bp;
361 
362     /*
363      * When both sizes are set, we know this isn't a lookup.
364      * This is the most likely case: every TB must be inserted; lookups
365      * are a lot less frequent.
366      */
367     if (likely(a->size && b->size)) {
368         if (a->ptr > b->ptr) {
369             return 1;
370         } else if (a->ptr < b->ptr) {
371             return -1;
372         }
373         /* a->ptr == b->ptr should happen only on deletions */
374         g_assert(a->size == b->size);
375         return 0;
376     }
377     /*
378      * All lookups have either .size field set to 0.
379      * From the glib sources we see that @ap is always the lookup key. However
380      * the docs provide no guarantee, so we just mark this case as likely.
381      */
382     if (likely(a->size == 0)) {
383         return ptr_cmp_tb_tc(a->ptr, b);
384     }
385     return ptr_cmp_tb_tc(b->ptr, a);
386 }
387 
388 static void tcg_region_trees_init(void)
389 {
390     size_t i;
391 
392     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
393     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
394     for (i = 0; i < region.n; i++) {
395         struct tcg_region_tree *rt = region_trees + i * tree_size;
396 
397         qemu_mutex_init(&rt->lock);
398         rt->tree = g_tree_new(tb_tc_cmp);
399     }
400 }
401 
402 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
403 {
404     size_t region_idx;
405 
406     if (p < region.start_aligned) {
407         region_idx = 0;
408     } else {
409         ptrdiff_t offset = p - region.start_aligned;
410 
411         if (offset > region.stride * (region.n - 1)) {
412             region_idx = region.n - 1;
413         } else {
414             region_idx = offset / region.stride;
415         }
416     }
417     return region_trees + region_idx * tree_size;
418 }
419 
420 void tcg_tb_insert(TranslationBlock *tb)
421 {
422     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
423 
424     qemu_mutex_lock(&rt->lock);
425     g_tree_insert(rt->tree, &tb->tc, tb);
426     qemu_mutex_unlock(&rt->lock);
427 }
428 
429 void tcg_tb_remove(TranslationBlock *tb)
430 {
431     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
432 
433     qemu_mutex_lock(&rt->lock);
434     g_tree_remove(rt->tree, &tb->tc);
435     qemu_mutex_unlock(&rt->lock);
436 }
437 
438 /*
439  * Find the TB 'tb' such that
440  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
441  * Return NULL if not found.
442  */
443 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
444 {
445     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
446     TranslationBlock *tb;
447     struct tb_tc s = { .ptr = (void *)tc_ptr };
448 
449     qemu_mutex_lock(&rt->lock);
450     tb = g_tree_lookup(rt->tree, &s);
451     qemu_mutex_unlock(&rt->lock);
452     return tb;
453 }
454 
455 static void tcg_region_tree_lock_all(void)
456 {
457     size_t i;
458 
459     for (i = 0; i < region.n; i++) {
460         struct tcg_region_tree *rt = region_trees + i * tree_size;
461 
462         qemu_mutex_lock(&rt->lock);
463     }
464 }
465 
466 static void tcg_region_tree_unlock_all(void)
467 {
468     size_t i;
469 
470     for (i = 0; i < region.n; i++) {
471         struct tcg_region_tree *rt = region_trees + i * tree_size;
472 
473         qemu_mutex_unlock(&rt->lock);
474     }
475 }
476 
477 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
478 {
479     size_t i;
480 
481     tcg_region_tree_lock_all();
482     for (i = 0; i < region.n; i++) {
483         struct tcg_region_tree *rt = region_trees + i * tree_size;
484 
485         g_tree_foreach(rt->tree, func, user_data);
486     }
487     tcg_region_tree_unlock_all();
488 }
489 
490 size_t tcg_nb_tbs(void)
491 {
492     size_t nb_tbs = 0;
493     size_t i;
494 
495     tcg_region_tree_lock_all();
496     for (i = 0; i < region.n; i++) {
497         struct tcg_region_tree *rt = region_trees + i * tree_size;
498 
499         nb_tbs += g_tree_nnodes(rt->tree);
500     }
501     tcg_region_tree_unlock_all();
502     return nb_tbs;
503 }
504 
505 static void tcg_region_tree_reset_all(void)
506 {
507     size_t i;
508 
509     tcg_region_tree_lock_all();
510     for (i = 0; i < region.n; i++) {
511         struct tcg_region_tree *rt = region_trees + i * tree_size;
512 
513         /* Increment the refcount first so that destroy acts as a reset */
514         g_tree_ref(rt->tree);
515         g_tree_destroy(rt->tree);
516     }
517     tcg_region_tree_unlock_all();
518 }
519 
520 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
521 {
522     void *start, *end;
523 
524     start = region.start_aligned + curr_region * region.stride;
525     end = start + region.size;
526 
527     if (curr_region == 0) {
528         start = region.start;
529     }
530     if (curr_region == region.n - 1) {
531         end = region.end;
532     }
533 
534     *pstart = start;
535     *pend = end;
536 }
537 
538 static void tcg_region_assign(TCGContext *s, size_t curr_region)
539 {
540     void *start, *end;
541 
542     tcg_region_bounds(curr_region, &start, &end);
543 
544     s->code_gen_buffer = start;
545     s->code_gen_ptr = start;
546     s->code_gen_buffer_size = end - start;
547     s->code_gen_highwater = end - TCG_HIGHWATER;
548 }
549 
550 static bool tcg_region_alloc__locked(TCGContext *s)
551 {
552     if (region.current == region.n) {
553         return true;
554     }
555     tcg_region_assign(s, region.current);
556     region.current++;
557     return false;
558 }
559 
560 /*
561  * Request a new region once the one in use has filled up.
562  * Returns true on error.
563  */
564 static bool tcg_region_alloc(TCGContext *s)
565 {
566     bool err;
567     /* read the region size now; alloc__locked will overwrite it on success */
568     size_t size_full = s->code_gen_buffer_size;
569 
570     qemu_mutex_lock(&region.lock);
571     err = tcg_region_alloc__locked(s);
572     if (!err) {
573         region.agg_size_full += size_full - TCG_HIGHWATER;
574     }
575     qemu_mutex_unlock(&region.lock);
576     return err;
577 }
578 
579 /*
580  * Perform a context's first region allocation.
581  * This function does _not_ increment region.agg_size_full.
582  */
583 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
584 {
585     return tcg_region_alloc__locked(s);
586 }
587 
588 /* Call from a safe-work context */
589 void tcg_region_reset_all(void)
590 {
591     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
592     unsigned int i;
593 
594     qemu_mutex_lock(&region.lock);
595     region.current = 0;
596     region.agg_size_full = 0;
597 
598     for (i = 0; i < n_ctxs; i++) {
599         TCGContext *s = atomic_read(&tcg_ctxs[i]);
600         bool err = tcg_region_initial_alloc__locked(s);
601 
602         g_assert(!err);
603     }
604     qemu_mutex_unlock(&region.lock);
605 
606     tcg_region_tree_reset_all();
607 }
608 
609 #ifdef CONFIG_USER_ONLY
610 static size_t tcg_n_regions(void)
611 {
612     return 1;
613 }
614 #else
615 /*
616  * It is likely that some vCPUs will translate more code than others, so we
617  * first try to set more regions than max_cpus, with those regions being of
618  * reasonable size. If that's not possible we make do by evenly dividing
619  * the code_gen_buffer among the vCPUs.
620  */
621 static size_t tcg_n_regions(void)
622 {
623     size_t i;
624 
625     /* Use a single region if all we have is one vCPU thread */
626 #if !defined(CONFIG_USER_ONLY)
627     MachineState *ms = MACHINE(qdev_get_machine());
628     unsigned int max_cpus = ms->smp.max_cpus;
629 #endif
630     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
631         return 1;
632     }
633 
634     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
635     for (i = 8; i > 0; i--) {
636         size_t regions_per_thread = i;
637         size_t region_size;
638 
639         region_size = tcg_init_ctx.code_gen_buffer_size;
640         region_size /= max_cpus * regions_per_thread;
641 
642         if (region_size >= 2 * 1024u * 1024) {
643             return max_cpus * regions_per_thread;
644         }
645     }
646     /* If we can't, then just allocate one region per vCPU thread */
647     return max_cpus;
648 }
649 #endif
650 
651 /*
652  * Initializes region partitioning.
653  *
654  * Called at init time from the parent thread (i.e. the one calling
655  * tcg_context_init), after the target's TCG globals have been set.
656  *
657  * Region partitioning works by splitting code_gen_buffer into separate regions,
658  * and then assigning regions to TCG threads so that the threads can translate
659  * code in parallel without synchronization.
660  *
661  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
662  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
663  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
664  * must have been parsed before calling this function, since it calls
665  * qemu_tcg_mttcg_enabled().
666  *
667  * In user-mode we use a single region.  Having multiple regions in user-mode
668  * is not supported, because the number of vCPU threads (recall that each thread
669  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
670  * OS, and usually this number is huge (tens of thousands is not uncommon).
671  * Thus, given this large bound on the number of vCPU threads and the fact
672  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
673  * that the availability of at least one region per vCPU thread.
674  *
675  * However, this user-mode limitation is unlikely to be a significant problem
676  * in practice. Multi-threaded guests share most if not all of their translated
677  * code, which makes parallel code generation less appealing than in softmmu.
678  */
679 void tcg_region_init(void)
680 {
681     void *buf = tcg_init_ctx.code_gen_buffer;
682     void *aligned;
683     size_t size = tcg_init_ctx.code_gen_buffer_size;
684     size_t page_size = qemu_real_host_page_size;
685     size_t region_size;
686     size_t n_regions;
687     size_t i;
688 
689     n_regions = tcg_n_regions();
690 
691     /* The first region will be 'aligned - buf' bytes larger than the others */
692     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
693     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
694     /*
695      * Make region_size a multiple of page_size, using aligned as the start.
696      * As a result of this we might end up with a few extra pages at the end of
697      * the buffer; we will assign those to the last region.
698      */
699     region_size = (size - (aligned - buf)) / n_regions;
700     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
701 
702     /* A region must have at least 2 pages; one code, one guard */
703     g_assert(region_size >= 2 * page_size);
704 
705     /* init the region struct */
706     qemu_mutex_init(&region.lock);
707     region.n = n_regions;
708     region.size = region_size - page_size;
709     region.stride = region_size;
710     region.start = buf;
711     region.start_aligned = aligned;
712     /* page-align the end, since its last page will be a guard page */
713     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
714     /* account for that last guard page */
715     region.end -= page_size;
716 
717     /* set guard pages */
718     for (i = 0; i < region.n; i++) {
719         void *start, *end;
720         int rc;
721 
722         tcg_region_bounds(i, &start, &end);
723         rc = qemu_mprotect_none(end, page_size);
724         g_assert(!rc);
725     }
726 
727     tcg_region_trees_init();
728 
729     /* In user-mode we support only one ctx, so do the initial allocation now */
730 #ifdef CONFIG_USER_ONLY
731     {
732         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
733 
734         g_assert(!err);
735     }
736 #endif
737 }
738 
739 static void alloc_tcg_plugin_context(TCGContext *s)
740 {
741 #ifdef CONFIG_PLUGIN
742     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
743     s->plugin_tb->insns =
744         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
745 #endif
746 }
747 
748 /*
749  * All TCG threads except the parent (i.e. the one that called tcg_context_init
750  * and registered the target's TCG globals) must register with this function
751  * before initiating translation.
752  *
753  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
754  * of tcg_region_init() for the reasoning behind this.
755  *
756  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
757  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
758  * is not used anymore for translation once this function is called.
759  *
760  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
761  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
762  */
763 #ifdef CONFIG_USER_ONLY
764 void tcg_register_thread(void)
765 {
766     tcg_ctx = &tcg_init_ctx;
767 }
768 #else
769 void tcg_register_thread(void)
770 {
771     MachineState *ms = MACHINE(qdev_get_machine());
772     TCGContext *s = g_malloc(sizeof(*s));
773     unsigned int i, n;
774     bool err;
775 
776     *s = tcg_init_ctx;
777 
778     /* Relink mem_base.  */
779     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
780         if (tcg_init_ctx.temps[i].mem_base) {
781             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
782             tcg_debug_assert(b >= 0 && b < n);
783             s->temps[i].mem_base = &s->temps[b];
784         }
785     }
786 
787     /* Claim an entry in tcg_ctxs */
788     n = atomic_fetch_inc(&n_tcg_ctxs);
789     g_assert(n < ms->smp.max_cpus);
790     atomic_set(&tcg_ctxs[n], s);
791 
792     if (n > 0) {
793         alloc_tcg_plugin_context(s);
794     }
795 
796     tcg_ctx = s;
797     qemu_mutex_lock(&region.lock);
798     err = tcg_region_initial_alloc__locked(tcg_ctx);
799     g_assert(!err);
800     qemu_mutex_unlock(&region.lock);
801 }
802 #endif /* !CONFIG_USER_ONLY */
803 
804 /*
805  * Returns the size (in bytes) of all translated code (i.e. from all regions)
806  * currently in the cache.
807  * See also: tcg_code_capacity()
808  * Do not confuse with tcg_current_code_size(); that one applies to a single
809  * TCG context.
810  */
811 size_t tcg_code_size(void)
812 {
813     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
814     unsigned int i;
815     size_t total;
816 
817     qemu_mutex_lock(&region.lock);
818     total = region.agg_size_full;
819     for (i = 0; i < n_ctxs; i++) {
820         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
821         size_t size;
822 
823         size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
824         g_assert(size <= s->code_gen_buffer_size);
825         total += size;
826     }
827     qemu_mutex_unlock(&region.lock);
828     return total;
829 }
830 
831 /*
832  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
833  * regions.
834  * See also: tcg_code_size()
835  */
836 size_t tcg_code_capacity(void)
837 {
838     size_t guard_size, capacity;
839 
840     /* no need for synchronization; these variables are set at init time */
841     guard_size = region.stride - region.size;
842     capacity = region.end + guard_size - region.start;
843     capacity -= region.n * (guard_size + TCG_HIGHWATER);
844     return capacity;
845 }
846 
847 size_t tcg_tb_phys_invalidate_count(void)
848 {
849     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
850     unsigned int i;
851     size_t total = 0;
852 
853     for (i = 0; i < n_ctxs; i++) {
854         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
855 
856         total += atomic_read(&s->tb_phys_invalidate_count);
857     }
858     return total;
859 }
860 
861 /* pool based memory allocation */
862 void *tcg_malloc_internal(TCGContext *s, int size)
863 {
864     TCGPool *p;
865     int pool_size;
866 
867     if (size > TCG_POOL_CHUNK_SIZE) {
868         /* big malloc: insert a new pool (XXX: could optimize) */
869         p = g_malloc(sizeof(TCGPool) + size);
870         p->size = size;
871         p->next = s->pool_first_large;
872         s->pool_first_large = p;
873         return p->data;
874     } else {
875         p = s->pool_current;
876         if (!p) {
877             p = s->pool_first;
878             if (!p)
879                 goto new_pool;
880         } else {
881             if (!p->next) {
882             new_pool:
883                 pool_size = TCG_POOL_CHUNK_SIZE;
884                 p = g_malloc(sizeof(TCGPool) + pool_size);
885                 p->size = pool_size;
886                 p->next = NULL;
887                 if (s->pool_current)
888                     s->pool_current->next = p;
889                 else
890                     s->pool_first = p;
891             } else {
892                 p = p->next;
893             }
894         }
895     }
896     s->pool_current = p;
897     s->pool_cur = p->data + size;
898     s->pool_end = p->data + p->size;
899     return p->data;
900 }
901 
902 void tcg_pool_reset(TCGContext *s)
903 {
904     TCGPool *p, *t;
905     for (p = s->pool_first_large; p; p = t) {
906         t = p->next;
907         g_free(p);
908     }
909     s->pool_first_large = NULL;
910     s->pool_cur = s->pool_end = NULL;
911     s->pool_current = NULL;
912 }
913 
914 typedef struct TCGHelperInfo {
915     void *func;
916     const char *name;
917     unsigned flags;
918     unsigned sizemask;
919 } TCGHelperInfo;
920 
921 #include "exec/helper-proto.h"
922 
923 static const TCGHelperInfo all_helpers[] = {
924 #include "exec/helper-tcg.h"
925 };
926 static GHashTable *helper_table;
927 
928 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
929 static void process_op_defs(TCGContext *s);
930 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
931                                             TCGReg reg, const char *name);
932 
933 void tcg_context_init(TCGContext *s)
934 {
935     int op, total_args, n, i;
936     TCGOpDef *def;
937     TCGArgConstraint *args_ct;
938     int *sorted_args;
939     TCGTemp *ts;
940 
941     memset(s, 0, sizeof(*s));
942     s->nb_globals = 0;
943 
944     /* Count total number of arguments and allocate the corresponding
945        space */
946     total_args = 0;
947     for(op = 0; op < NB_OPS; op++) {
948         def = &tcg_op_defs[op];
949         n = def->nb_iargs + def->nb_oargs;
950         total_args += n;
951     }
952 
953     args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
954     sorted_args = g_malloc(sizeof(int) * total_args);
955 
956     for(op = 0; op < NB_OPS; op++) {
957         def = &tcg_op_defs[op];
958         def->args_ct = args_ct;
959         def->sorted_args = sorted_args;
960         n = def->nb_iargs + def->nb_oargs;
961         sorted_args += n;
962         args_ct += n;
963     }
964 
965     /* Register helpers.  */
966     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
967     helper_table = g_hash_table_new(NULL, NULL);
968 
969     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
970         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
971                             (gpointer)&all_helpers[i]);
972     }
973 
974     tcg_target_init(s);
975     process_op_defs(s);
976 
977     /* Reverse the order of the saved registers, assuming they're all at
978        the start of tcg_target_reg_alloc_order.  */
979     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
980         int r = tcg_target_reg_alloc_order[n];
981         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
982             break;
983         }
984     }
985     for (i = 0; i < n; ++i) {
986         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
987     }
988     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
989         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
990     }
991 
992     alloc_tcg_plugin_context(s);
993 
994     tcg_ctx = s;
995     /*
996      * In user-mode we simply share the init context among threads, since we
997      * use a single region. See the documentation tcg_region_init() for the
998      * reasoning behind this.
999      * In softmmu we will have at most max_cpus TCG threads.
1000      */
1001 #ifdef CONFIG_USER_ONLY
1002     tcg_ctxs = &tcg_ctx;
1003     n_tcg_ctxs = 1;
1004 #else
1005     MachineState *ms = MACHINE(qdev_get_machine());
1006     unsigned int max_cpus = ms->smp.max_cpus;
1007     tcg_ctxs = g_new(TCGContext *, max_cpus);
1008 #endif
1009 
1010     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1011     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1012     cpu_env = temp_tcgv_ptr(ts);
1013 }
1014 
1015 /*
1016  * Allocate TBs right before their corresponding translated code, making
1017  * sure that TBs and code are on different cache lines.
1018  */
1019 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1020 {
1021     uintptr_t align = qemu_icache_linesize;
1022     TranslationBlock *tb;
1023     void *next;
1024 
1025  retry:
1026     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1027     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1028 
1029     if (unlikely(next > s->code_gen_highwater)) {
1030         if (tcg_region_alloc(s)) {
1031             return NULL;
1032         }
1033         goto retry;
1034     }
1035     atomic_set(&s->code_gen_ptr, next);
1036     s->data_gen_ptr = NULL;
1037     return tb;
1038 }
1039 
1040 void tcg_prologue_init(TCGContext *s)
1041 {
1042     size_t prologue_size, total_size;
1043     void *buf0, *buf1;
1044 
1045     /* Put the prologue at the beginning of code_gen_buffer.  */
1046     buf0 = s->code_gen_buffer;
1047     total_size = s->code_gen_buffer_size;
1048     s->code_ptr = buf0;
1049     s->code_buf = buf0;
1050     s->data_gen_ptr = NULL;
1051     s->code_gen_prologue = buf0;
1052 
1053     /* Compute a high-water mark, at which we voluntarily flush the buffer
1054        and start over.  The size here is arbitrary, significantly larger
1055        than we expect the code generation for any one opcode to require.  */
1056     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1057 
1058 #ifdef TCG_TARGET_NEED_POOL_LABELS
1059     s->pool_labels = NULL;
1060 #endif
1061 
1062     /* Generate the prologue.  */
1063     tcg_target_qemu_prologue(s);
1064 
1065 #ifdef TCG_TARGET_NEED_POOL_LABELS
1066     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1067     {
1068         int result = tcg_out_pool_finalize(s);
1069         tcg_debug_assert(result == 0);
1070     }
1071 #endif
1072 
1073     buf1 = s->code_ptr;
1074     flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1075 
1076     /* Deduct the prologue from the buffer.  */
1077     prologue_size = tcg_current_code_size(s);
1078     s->code_gen_ptr = buf1;
1079     s->code_gen_buffer = buf1;
1080     s->code_buf = buf1;
1081     total_size -= prologue_size;
1082     s->code_gen_buffer_size = total_size;
1083 
1084     tcg_register_jit(s->code_gen_buffer, total_size);
1085 
1086 #ifdef DEBUG_DISAS
1087     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1088         qemu_log_lock();
1089         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1090         if (s->data_gen_ptr) {
1091             size_t code_size = s->data_gen_ptr - buf0;
1092             size_t data_size = prologue_size - code_size;
1093             size_t i;
1094 
1095             log_disas(buf0, code_size);
1096 
1097             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1098                 if (sizeof(tcg_target_ulong) == 8) {
1099                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1100                              (uintptr_t)s->data_gen_ptr + i,
1101                              *(uint64_t *)(s->data_gen_ptr + i));
1102                 } else {
1103                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1104                              (uintptr_t)s->data_gen_ptr + i,
1105                              *(uint32_t *)(s->data_gen_ptr + i));
1106                 }
1107             }
1108         } else {
1109             log_disas(buf0, prologue_size);
1110         }
1111         qemu_log("\n");
1112         qemu_log_flush();
1113         qemu_log_unlock();
1114     }
1115 #endif
1116 
1117     /* Assert that goto_ptr is implemented completely.  */
1118     if (TCG_TARGET_HAS_goto_ptr) {
1119         tcg_debug_assert(s->code_gen_epilogue != NULL);
1120     }
1121 }
1122 
1123 void tcg_func_start(TCGContext *s)
1124 {
1125     tcg_pool_reset(s);
1126     s->nb_temps = s->nb_globals;
1127 
1128     /* No temps have been previously allocated for size or locality.  */
1129     memset(s->free_temps, 0, sizeof(s->free_temps));
1130 
1131     s->nb_ops = 0;
1132     s->nb_labels = 0;
1133     s->current_frame_offset = s->frame_start;
1134 
1135 #ifdef CONFIG_DEBUG_TCG
1136     s->goto_tb_issue_mask = 0;
1137 #endif
1138 
1139     QTAILQ_INIT(&s->ops);
1140     QTAILQ_INIT(&s->free_ops);
1141     QSIMPLEQ_INIT(&s->labels);
1142 }
1143 
1144 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1145 {
1146     int n = s->nb_temps++;
1147     tcg_debug_assert(n < TCG_MAX_TEMPS);
1148     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1149 }
1150 
1151 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1152 {
1153     TCGTemp *ts;
1154 
1155     tcg_debug_assert(s->nb_globals == s->nb_temps);
1156     s->nb_globals++;
1157     ts = tcg_temp_alloc(s);
1158     ts->temp_global = 1;
1159 
1160     return ts;
1161 }
1162 
1163 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1164                                             TCGReg reg, const char *name)
1165 {
1166     TCGTemp *ts;
1167 
1168     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1169         tcg_abort();
1170     }
1171 
1172     ts = tcg_global_alloc(s);
1173     ts->base_type = type;
1174     ts->type = type;
1175     ts->fixed_reg = 1;
1176     ts->reg = reg;
1177     ts->name = name;
1178     tcg_regset_set_reg(s->reserved_regs, reg);
1179 
1180     return ts;
1181 }
1182 
1183 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1184 {
1185     s->frame_start = start;
1186     s->frame_end = start + size;
1187     s->frame_temp
1188         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1189 }
1190 
1191 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1192                                      intptr_t offset, const char *name)
1193 {
1194     TCGContext *s = tcg_ctx;
1195     TCGTemp *base_ts = tcgv_ptr_temp(base);
1196     TCGTemp *ts = tcg_global_alloc(s);
1197     int indirect_reg = 0, bigendian = 0;
1198 #ifdef HOST_WORDS_BIGENDIAN
1199     bigendian = 1;
1200 #endif
1201 
1202     if (!base_ts->fixed_reg) {
1203         /* We do not support double-indirect registers.  */
1204         tcg_debug_assert(!base_ts->indirect_reg);
1205         base_ts->indirect_base = 1;
1206         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1207                             ? 2 : 1);
1208         indirect_reg = 1;
1209     }
1210 
1211     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1212         TCGTemp *ts2 = tcg_global_alloc(s);
1213         char buf[64];
1214 
1215         ts->base_type = TCG_TYPE_I64;
1216         ts->type = TCG_TYPE_I32;
1217         ts->indirect_reg = indirect_reg;
1218         ts->mem_allocated = 1;
1219         ts->mem_base = base_ts;
1220         ts->mem_offset = offset + bigendian * 4;
1221         pstrcpy(buf, sizeof(buf), name);
1222         pstrcat(buf, sizeof(buf), "_0");
1223         ts->name = strdup(buf);
1224 
1225         tcg_debug_assert(ts2 == ts + 1);
1226         ts2->base_type = TCG_TYPE_I64;
1227         ts2->type = TCG_TYPE_I32;
1228         ts2->indirect_reg = indirect_reg;
1229         ts2->mem_allocated = 1;
1230         ts2->mem_base = base_ts;
1231         ts2->mem_offset = offset + (1 - bigendian) * 4;
1232         pstrcpy(buf, sizeof(buf), name);
1233         pstrcat(buf, sizeof(buf), "_1");
1234         ts2->name = strdup(buf);
1235     } else {
1236         ts->base_type = type;
1237         ts->type = type;
1238         ts->indirect_reg = indirect_reg;
1239         ts->mem_allocated = 1;
1240         ts->mem_base = base_ts;
1241         ts->mem_offset = offset;
1242         ts->name = name;
1243     }
1244     return ts;
1245 }
1246 
1247 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1248 {
1249     TCGContext *s = tcg_ctx;
1250     TCGTemp *ts;
1251     int idx, k;
1252 
1253     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1254     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1255     if (idx < TCG_MAX_TEMPS) {
1256         /* There is already an available temp with the right type.  */
1257         clear_bit(idx, s->free_temps[k].l);
1258 
1259         ts = &s->temps[idx];
1260         ts->temp_allocated = 1;
1261         tcg_debug_assert(ts->base_type == type);
1262         tcg_debug_assert(ts->temp_local == temp_local);
1263     } else {
1264         ts = tcg_temp_alloc(s);
1265         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1266             TCGTemp *ts2 = tcg_temp_alloc(s);
1267 
1268             ts->base_type = type;
1269             ts->type = TCG_TYPE_I32;
1270             ts->temp_allocated = 1;
1271             ts->temp_local = temp_local;
1272 
1273             tcg_debug_assert(ts2 == ts + 1);
1274             ts2->base_type = TCG_TYPE_I64;
1275             ts2->type = TCG_TYPE_I32;
1276             ts2->temp_allocated = 1;
1277             ts2->temp_local = temp_local;
1278         } else {
1279             ts->base_type = type;
1280             ts->type = type;
1281             ts->temp_allocated = 1;
1282             ts->temp_local = temp_local;
1283         }
1284     }
1285 
1286 #if defined(CONFIG_DEBUG_TCG)
1287     s->temps_in_use++;
1288 #endif
1289     return ts;
1290 }
1291 
1292 TCGv_vec tcg_temp_new_vec(TCGType type)
1293 {
1294     TCGTemp *t;
1295 
1296 #ifdef CONFIG_DEBUG_TCG
1297     switch (type) {
1298     case TCG_TYPE_V64:
1299         assert(TCG_TARGET_HAS_v64);
1300         break;
1301     case TCG_TYPE_V128:
1302         assert(TCG_TARGET_HAS_v128);
1303         break;
1304     case TCG_TYPE_V256:
1305         assert(TCG_TARGET_HAS_v256);
1306         break;
1307     default:
1308         g_assert_not_reached();
1309     }
1310 #endif
1311 
1312     t = tcg_temp_new_internal(type, 0);
1313     return temp_tcgv_vec(t);
1314 }
1315 
1316 /* Create a new temp of the same type as an existing temp.  */
1317 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1318 {
1319     TCGTemp *t = tcgv_vec_temp(match);
1320 
1321     tcg_debug_assert(t->temp_allocated != 0);
1322 
1323     t = tcg_temp_new_internal(t->base_type, 0);
1324     return temp_tcgv_vec(t);
1325 }
1326 
1327 void tcg_temp_free_internal(TCGTemp *ts)
1328 {
1329     TCGContext *s = tcg_ctx;
1330     int k, idx;
1331 
1332 #if defined(CONFIG_DEBUG_TCG)
1333     s->temps_in_use--;
1334     if (s->temps_in_use < 0) {
1335         fprintf(stderr, "More temporaries freed than allocated!\n");
1336     }
1337 #endif
1338 
1339     tcg_debug_assert(ts->temp_global == 0);
1340     tcg_debug_assert(ts->temp_allocated != 0);
1341     ts->temp_allocated = 0;
1342 
1343     idx = temp_idx(ts);
1344     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1345     set_bit(idx, s->free_temps[k].l);
1346 }
1347 
1348 TCGv_i32 tcg_const_i32(int32_t val)
1349 {
1350     TCGv_i32 t0;
1351     t0 = tcg_temp_new_i32();
1352     tcg_gen_movi_i32(t0, val);
1353     return t0;
1354 }
1355 
1356 TCGv_i64 tcg_const_i64(int64_t val)
1357 {
1358     TCGv_i64 t0;
1359     t0 = tcg_temp_new_i64();
1360     tcg_gen_movi_i64(t0, val);
1361     return t0;
1362 }
1363 
1364 TCGv_i32 tcg_const_local_i32(int32_t val)
1365 {
1366     TCGv_i32 t0;
1367     t0 = tcg_temp_local_new_i32();
1368     tcg_gen_movi_i32(t0, val);
1369     return t0;
1370 }
1371 
1372 TCGv_i64 tcg_const_local_i64(int64_t val)
1373 {
1374     TCGv_i64 t0;
1375     t0 = tcg_temp_local_new_i64();
1376     tcg_gen_movi_i64(t0, val);
1377     return t0;
1378 }
1379 
1380 #if defined(CONFIG_DEBUG_TCG)
1381 void tcg_clear_temp_count(void)
1382 {
1383     TCGContext *s = tcg_ctx;
1384     s->temps_in_use = 0;
1385 }
1386 
1387 int tcg_check_temp_count(void)
1388 {
1389     TCGContext *s = tcg_ctx;
1390     if (s->temps_in_use) {
1391         /* Clear the count so that we don't give another
1392          * warning immediately next time around.
1393          */
1394         s->temps_in_use = 0;
1395         return 1;
1396     }
1397     return 0;
1398 }
1399 #endif
1400 
1401 /* Return true if OP may appear in the opcode stream.
1402    Test the runtime variable that controls each opcode.  */
1403 bool tcg_op_supported(TCGOpcode op)
1404 {
1405     const bool have_vec
1406         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1407 
1408     switch (op) {
1409     case INDEX_op_discard:
1410     case INDEX_op_set_label:
1411     case INDEX_op_call:
1412     case INDEX_op_br:
1413     case INDEX_op_mb:
1414     case INDEX_op_insn_start:
1415     case INDEX_op_exit_tb:
1416     case INDEX_op_goto_tb:
1417     case INDEX_op_qemu_ld_i32:
1418     case INDEX_op_qemu_st_i32:
1419     case INDEX_op_qemu_ld_i64:
1420     case INDEX_op_qemu_st_i64:
1421         return true;
1422 
1423     case INDEX_op_goto_ptr:
1424         return TCG_TARGET_HAS_goto_ptr;
1425 
1426     case INDEX_op_mov_i32:
1427     case INDEX_op_movi_i32:
1428     case INDEX_op_setcond_i32:
1429     case INDEX_op_brcond_i32:
1430     case INDEX_op_ld8u_i32:
1431     case INDEX_op_ld8s_i32:
1432     case INDEX_op_ld16u_i32:
1433     case INDEX_op_ld16s_i32:
1434     case INDEX_op_ld_i32:
1435     case INDEX_op_st8_i32:
1436     case INDEX_op_st16_i32:
1437     case INDEX_op_st_i32:
1438     case INDEX_op_add_i32:
1439     case INDEX_op_sub_i32:
1440     case INDEX_op_mul_i32:
1441     case INDEX_op_and_i32:
1442     case INDEX_op_or_i32:
1443     case INDEX_op_xor_i32:
1444     case INDEX_op_shl_i32:
1445     case INDEX_op_shr_i32:
1446     case INDEX_op_sar_i32:
1447         return true;
1448 
1449     case INDEX_op_movcond_i32:
1450         return TCG_TARGET_HAS_movcond_i32;
1451     case INDEX_op_div_i32:
1452     case INDEX_op_divu_i32:
1453         return TCG_TARGET_HAS_div_i32;
1454     case INDEX_op_rem_i32:
1455     case INDEX_op_remu_i32:
1456         return TCG_TARGET_HAS_rem_i32;
1457     case INDEX_op_div2_i32:
1458     case INDEX_op_divu2_i32:
1459         return TCG_TARGET_HAS_div2_i32;
1460     case INDEX_op_rotl_i32:
1461     case INDEX_op_rotr_i32:
1462         return TCG_TARGET_HAS_rot_i32;
1463     case INDEX_op_deposit_i32:
1464         return TCG_TARGET_HAS_deposit_i32;
1465     case INDEX_op_extract_i32:
1466         return TCG_TARGET_HAS_extract_i32;
1467     case INDEX_op_sextract_i32:
1468         return TCG_TARGET_HAS_sextract_i32;
1469     case INDEX_op_extract2_i32:
1470         return TCG_TARGET_HAS_extract2_i32;
1471     case INDEX_op_add2_i32:
1472         return TCG_TARGET_HAS_add2_i32;
1473     case INDEX_op_sub2_i32:
1474         return TCG_TARGET_HAS_sub2_i32;
1475     case INDEX_op_mulu2_i32:
1476         return TCG_TARGET_HAS_mulu2_i32;
1477     case INDEX_op_muls2_i32:
1478         return TCG_TARGET_HAS_muls2_i32;
1479     case INDEX_op_muluh_i32:
1480         return TCG_TARGET_HAS_muluh_i32;
1481     case INDEX_op_mulsh_i32:
1482         return TCG_TARGET_HAS_mulsh_i32;
1483     case INDEX_op_ext8s_i32:
1484         return TCG_TARGET_HAS_ext8s_i32;
1485     case INDEX_op_ext16s_i32:
1486         return TCG_TARGET_HAS_ext16s_i32;
1487     case INDEX_op_ext8u_i32:
1488         return TCG_TARGET_HAS_ext8u_i32;
1489     case INDEX_op_ext16u_i32:
1490         return TCG_TARGET_HAS_ext16u_i32;
1491     case INDEX_op_bswap16_i32:
1492         return TCG_TARGET_HAS_bswap16_i32;
1493     case INDEX_op_bswap32_i32:
1494         return TCG_TARGET_HAS_bswap32_i32;
1495     case INDEX_op_not_i32:
1496         return TCG_TARGET_HAS_not_i32;
1497     case INDEX_op_neg_i32:
1498         return TCG_TARGET_HAS_neg_i32;
1499     case INDEX_op_andc_i32:
1500         return TCG_TARGET_HAS_andc_i32;
1501     case INDEX_op_orc_i32:
1502         return TCG_TARGET_HAS_orc_i32;
1503     case INDEX_op_eqv_i32:
1504         return TCG_TARGET_HAS_eqv_i32;
1505     case INDEX_op_nand_i32:
1506         return TCG_TARGET_HAS_nand_i32;
1507     case INDEX_op_nor_i32:
1508         return TCG_TARGET_HAS_nor_i32;
1509     case INDEX_op_clz_i32:
1510         return TCG_TARGET_HAS_clz_i32;
1511     case INDEX_op_ctz_i32:
1512         return TCG_TARGET_HAS_ctz_i32;
1513     case INDEX_op_ctpop_i32:
1514         return TCG_TARGET_HAS_ctpop_i32;
1515 
1516     case INDEX_op_brcond2_i32:
1517     case INDEX_op_setcond2_i32:
1518         return TCG_TARGET_REG_BITS == 32;
1519 
1520     case INDEX_op_mov_i64:
1521     case INDEX_op_movi_i64:
1522     case INDEX_op_setcond_i64:
1523     case INDEX_op_brcond_i64:
1524     case INDEX_op_ld8u_i64:
1525     case INDEX_op_ld8s_i64:
1526     case INDEX_op_ld16u_i64:
1527     case INDEX_op_ld16s_i64:
1528     case INDEX_op_ld32u_i64:
1529     case INDEX_op_ld32s_i64:
1530     case INDEX_op_ld_i64:
1531     case INDEX_op_st8_i64:
1532     case INDEX_op_st16_i64:
1533     case INDEX_op_st32_i64:
1534     case INDEX_op_st_i64:
1535     case INDEX_op_add_i64:
1536     case INDEX_op_sub_i64:
1537     case INDEX_op_mul_i64:
1538     case INDEX_op_and_i64:
1539     case INDEX_op_or_i64:
1540     case INDEX_op_xor_i64:
1541     case INDEX_op_shl_i64:
1542     case INDEX_op_shr_i64:
1543     case INDEX_op_sar_i64:
1544     case INDEX_op_ext_i32_i64:
1545     case INDEX_op_extu_i32_i64:
1546         return TCG_TARGET_REG_BITS == 64;
1547 
1548     case INDEX_op_movcond_i64:
1549         return TCG_TARGET_HAS_movcond_i64;
1550     case INDEX_op_div_i64:
1551     case INDEX_op_divu_i64:
1552         return TCG_TARGET_HAS_div_i64;
1553     case INDEX_op_rem_i64:
1554     case INDEX_op_remu_i64:
1555         return TCG_TARGET_HAS_rem_i64;
1556     case INDEX_op_div2_i64:
1557     case INDEX_op_divu2_i64:
1558         return TCG_TARGET_HAS_div2_i64;
1559     case INDEX_op_rotl_i64:
1560     case INDEX_op_rotr_i64:
1561         return TCG_TARGET_HAS_rot_i64;
1562     case INDEX_op_deposit_i64:
1563         return TCG_TARGET_HAS_deposit_i64;
1564     case INDEX_op_extract_i64:
1565         return TCG_TARGET_HAS_extract_i64;
1566     case INDEX_op_sextract_i64:
1567         return TCG_TARGET_HAS_sextract_i64;
1568     case INDEX_op_extract2_i64:
1569         return TCG_TARGET_HAS_extract2_i64;
1570     case INDEX_op_extrl_i64_i32:
1571         return TCG_TARGET_HAS_extrl_i64_i32;
1572     case INDEX_op_extrh_i64_i32:
1573         return TCG_TARGET_HAS_extrh_i64_i32;
1574     case INDEX_op_ext8s_i64:
1575         return TCG_TARGET_HAS_ext8s_i64;
1576     case INDEX_op_ext16s_i64:
1577         return TCG_TARGET_HAS_ext16s_i64;
1578     case INDEX_op_ext32s_i64:
1579         return TCG_TARGET_HAS_ext32s_i64;
1580     case INDEX_op_ext8u_i64:
1581         return TCG_TARGET_HAS_ext8u_i64;
1582     case INDEX_op_ext16u_i64:
1583         return TCG_TARGET_HAS_ext16u_i64;
1584     case INDEX_op_ext32u_i64:
1585         return TCG_TARGET_HAS_ext32u_i64;
1586     case INDEX_op_bswap16_i64:
1587         return TCG_TARGET_HAS_bswap16_i64;
1588     case INDEX_op_bswap32_i64:
1589         return TCG_TARGET_HAS_bswap32_i64;
1590     case INDEX_op_bswap64_i64:
1591         return TCG_TARGET_HAS_bswap64_i64;
1592     case INDEX_op_not_i64:
1593         return TCG_TARGET_HAS_not_i64;
1594     case INDEX_op_neg_i64:
1595         return TCG_TARGET_HAS_neg_i64;
1596     case INDEX_op_andc_i64:
1597         return TCG_TARGET_HAS_andc_i64;
1598     case INDEX_op_orc_i64:
1599         return TCG_TARGET_HAS_orc_i64;
1600     case INDEX_op_eqv_i64:
1601         return TCG_TARGET_HAS_eqv_i64;
1602     case INDEX_op_nand_i64:
1603         return TCG_TARGET_HAS_nand_i64;
1604     case INDEX_op_nor_i64:
1605         return TCG_TARGET_HAS_nor_i64;
1606     case INDEX_op_clz_i64:
1607         return TCG_TARGET_HAS_clz_i64;
1608     case INDEX_op_ctz_i64:
1609         return TCG_TARGET_HAS_ctz_i64;
1610     case INDEX_op_ctpop_i64:
1611         return TCG_TARGET_HAS_ctpop_i64;
1612     case INDEX_op_add2_i64:
1613         return TCG_TARGET_HAS_add2_i64;
1614     case INDEX_op_sub2_i64:
1615         return TCG_TARGET_HAS_sub2_i64;
1616     case INDEX_op_mulu2_i64:
1617         return TCG_TARGET_HAS_mulu2_i64;
1618     case INDEX_op_muls2_i64:
1619         return TCG_TARGET_HAS_muls2_i64;
1620     case INDEX_op_muluh_i64:
1621         return TCG_TARGET_HAS_muluh_i64;
1622     case INDEX_op_mulsh_i64:
1623         return TCG_TARGET_HAS_mulsh_i64;
1624 
1625     case INDEX_op_mov_vec:
1626     case INDEX_op_dup_vec:
1627     case INDEX_op_dupi_vec:
1628     case INDEX_op_dupm_vec:
1629     case INDEX_op_ld_vec:
1630     case INDEX_op_st_vec:
1631     case INDEX_op_add_vec:
1632     case INDEX_op_sub_vec:
1633     case INDEX_op_and_vec:
1634     case INDEX_op_or_vec:
1635     case INDEX_op_xor_vec:
1636     case INDEX_op_cmp_vec:
1637         return have_vec;
1638     case INDEX_op_dup2_vec:
1639         return have_vec && TCG_TARGET_REG_BITS == 32;
1640     case INDEX_op_not_vec:
1641         return have_vec && TCG_TARGET_HAS_not_vec;
1642     case INDEX_op_neg_vec:
1643         return have_vec && TCG_TARGET_HAS_neg_vec;
1644     case INDEX_op_abs_vec:
1645         return have_vec && TCG_TARGET_HAS_abs_vec;
1646     case INDEX_op_andc_vec:
1647         return have_vec && TCG_TARGET_HAS_andc_vec;
1648     case INDEX_op_orc_vec:
1649         return have_vec && TCG_TARGET_HAS_orc_vec;
1650     case INDEX_op_mul_vec:
1651         return have_vec && TCG_TARGET_HAS_mul_vec;
1652     case INDEX_op_shli_vec:
1653     case INDEX_op_shri_vec:
1654     case INDEX_op_sari_vec:
1655         return have_vec && TCG_TARGET_HAS_shi_vec;
1656     case INDEX_op_shls_vec:
1657     case INDEX_op_shrs_vec:
1658     case INDEX_op_sars_vec:
1659         return have_vec && TCG_TARGET_HAS_shs_vec;
1660     case INDEX_op_shlv_vec:
1661     case INDEX_op_shrv_vec:
1662     case INDEX_op_sarv_vec:
1663         return have_vec && TCG_TARGET_HAS_shv_vec;
1664     case INDEX_op_ssadd_vec:
1665     case INDEX_op_usadd_vec:
1666     case INDEX_op_sssub_vec:
1667     case INDEX_op_ussub_vec:
1668         return have_vec && TCG_TARGET_HAS_sat_vec;
1669     case INDEX_op_smin_vec:
1670     case INDEX_op_umin_vec:
1671     case INDEX_op_smax_vec:
1672     case INDEX_op_umax_vec:
1673         return have_vec && TCG_TARGET_HAS_minmax_vec;
1674     case INDEX_op_bitsel_vec:
1675         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1676     case INDEX_op_cmpsel_vec:
1677         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1678 
1679     default:
1680         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1681         return true;
1682     }
1683 }
1684 
1685 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1686    and endian swap. Maybe it would be better to do the alignment
1687    and endian swap in tcg_reg_alloc_call(). */
1688 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1689 {
1690     int i, real_args, nb_rets, pi;
1691     unsigned sizemask, flags;
1692     TCGHelperInfo *info;
1693     TCGOp *op;
1694 
1695     info = g_hash_table_lookup(helper_table, (gpointer)func);
1696     flags = info->flags;
1697     sizemask = info->sizemask;
1698 
1699 #ifdef CONFIG_PLUGIN
1700     /* detect non-plugin helpers */
1701     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1702         tcg_ctx->plugin_insn->calls_helpers = true;
1703     }
1704 #endif
1705 
1706 #if defined(__sparc__) && !defined(__arch64__) \
1707     && !defined(CONFIG_TCG_INTERPRETER)
1708     /* We have 64-bit values in one register, but need to pass as two
1709        separate parameters.  Split them.  */
1710     int orig_sizemask = sizemask;
1711     int orig_nargs = nargs;
1712     TCGv_i64 retl, reth;
1713     TCGTemp *split_args[MAX_OPC_PARAM];
1714 
1715     retl = NULL;
1716     reth = NULL;
1717     if (sizemask != 0) {
1718         for (i = real_args = 0; i < nargs; ++i) {
1719             int is_64bit = sizemask & (1 << (i+1)*2);
1720             if (is_64bit) {
1721                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1722                 TCGv_i32 h = tcg_temp_new_i32();
1723                 TCGv_i32 l = tcg_temp_new_i32();
1724                 tcg_gen_extr_i64_i32(l, h, orig);
1725                 split_args[real_args++] = tcgv_i32_temp(h);
1726                 split_args[real_args++] = tcgv_i32_temp(l);
1727             } else {
1728                 split_args[real_args++] = args[i];
1729             }
1730         }
1731         nargs = real_args;
1732         args = split_args;
1733         sizemask = 0;
1734     }
1735 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1736     for (i = 0; i < nargs; ++i) {
1737         int is_64bit = sizemask & (1 << (i+1)*2);
1738         int is_signed = sizemask & (2 << (i+1)*2);
1739         if (!is_64bit) {
1740             TCGv_i64 temp = tcg_temp_new_i64();
1741             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1742             if (is_signed) {
1743                 tcg_gen_ext32s_i64(temp, orig);
1744             } else {
1745                 tcg_gen_ext32u_i64(temp, orig);
1746             }
1747             args[i] = tcgv_i64_temp(temp);
1748         }
1749     }
1750 #endif /* TCG_TARGET_EXTEND_ARGS */
1751 
1752     op = tcg_emit_op(INDEX_op_call);
1753 
1754     pi = 0;
1755     if (ret != NULL) {
1756 #if defined(__sparc__) && !defined(__arch64__) \
1757     && !defined(CONFIG_TCG_INTERPRETER)
1758         if (orig_sizemask & 1) {
1759             /* The 32-bit ABI is going to return the 64-bit value in
1760                the %o0/%o1 register pair.  Prepare for this by using
1761                two return temporaries, and reassemble below.  */
1762             retl = tcg_temp_new_i64();
1763             reth = tcg_temp_new_i64();
1764             op->args[pi++] = tcgv_i64_arg(reth);
1765             op->args[pi++] = tcgv_i64_arg(retl);
1766             nb_rets = 2;
1767         } else {
1768             op->args[pi++] = temp_arg(ret);
1769             nb_rets = 1;
1770         }
1771 #else
1772         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1773 #ifdef HOST_WORDS_BIGENDIAN
1774             op->args[pi++] = temp_arg(ret + 1);
1775             op->args[pi++] = temp_arg(ret);
1776 #else
1777             op->args[pi++] = temp_arg(ret);
1778             op->args[pi++] = temp_arg(ret + 1);
1779 #endif
1780             nb_rets = 2;
1781         } else {
1782             op->args[pi++] = temp_arg(ret);
1783             nb_rets = 1;
1784         }
1785 #endif
1786     } else {
1787         nb_rets = 0;
1788     }
1789     TCGOP_CALLO(op) = nb_rets;
1790 
1791     real_args = 0;
1792     for (i = 0; i < nargs; i++) {
1793         int is_64bit = sizemask & (1 << (i+1)*2);
1794         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1795 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1796             /* some targets want aligned 64 bit args */
1797             if (real_args & 1) {
1798                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1799                 real_args++;
1800             }
1801 #endif
1802            /* If stack grows up, then we will be placing successive
1803               arguments at lower addresses, which means we need to
1804               reverse the order compared to how we would normally
1805               treat either big or little-endian.  For those arguments
1806               that will wind up in registers, this still works for
1807               HPPA (the only current STACK_GROWSUP target) since the
1808               argument registers are *also* allocated in decreasing
1809               order.  If another such target is added, this logic may
1810               have to get more complicated to differentiate between
1811               stack arguments and register arguments.  */
1812 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1813             op->args[pi++] = temp_arg(args[i] + 1);
1814             op->args[pi++] = temp_arg(args[i]);
1815 #else
1816             op->args[pi++] = temp_arg(args[i]);
1817             op->args[pi++] = temp_arg(args[i] + 1);
1818 #endif
1819             real_args += 2;
1820             continue;
1821         }
1822 
1823         op->args[pi++] = temp_arg(args[i]);
1824         real_args++;
1825     }
1826     op->args[pi++] = (uintptr_t)func;
1827     op->args[pi++] = flags;
1828     TCGOP_CALLI(op) = real_args;
1829 
1830     /* Make sure the fields didn't overflow.  */
1831     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1832     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1833 
1834 #if defined(__sparc__) && !defined(__arch64__) \
1835     && !defined(CONFIG_TCG_INTERPRETER)
1836     /* Free all of the parts we allocated above.  */
1837     for (i = real_args = 0; i < orig_nargs; ++i) {
1838         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1839         if (is_64bit) {
1840             tcg_temp_free_internal(args[real_args++]);
1841             tcg_temp_free_internal(args[real_args++]);
1842         } else {
1843             real_args++;
1844         }
1845     }
1846     if (orig_sizemask & 1) {
1847         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1848            Note that describing these as TCGv_i64 eliminates an unnecessary
1849            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1850         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1851         tcg_temp_free_i64(retl);
1852         tcg_temp_free_i64(reth);
1853     }
1854 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1855     for (i = 0; i < nargs; ++i) {
1856         int is_64bit = sizemask & (1 << (i+1)*2);
1857         if (!is_64bit) {
1858             tcg_temp_free_internal(args[i]);
1859         }
1860     }
1861 #endif /* TCG_TARGET_EXTEND_ARGS */
1862 }
1863 
1864 static void tcg_reg_alloc_start(TCGContext *s)
1865 {
1866     int i, n;
1867     TCGTemp *ts;
1868 
1869     for (i = 0, n = s->nb_globals; i < n; i++) {
1870         ts = &s->temps[i];
1871         ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1872     }
1873     for (n = s->nb_temps; i < n; i++) {
1874         ts = &s->temps[i];
1875         ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1876         ts->mem_allocated = 0;
1877         ts->fixed_reg = 0;
1878     }
1879 
1880     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1881 }
1882 
1883 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1884                                  TCGTemp *ts)
1885 {
1886     int idx = temp_idx(ts);
1887 
1888     if (ts->temp_global) {
1889         pstrcpy(buf, buf_size, ts->name);
1890     } else if (ts->temp_local) {
1891         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1892     } else {
1893         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1894     }
1895     return buf;
1896 }
1897 
1898 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1899                              int buf_size, TCGArg arg)
1900 {
1901     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1902 }
1903 
1904 /* Find helper name.  */
1905 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1906 {
1907     const char *ret = NULL;
1908     if (helper_table) {
1909         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1910         if (info) {
1911             ret = info->name;
1912         }
1913     }
1914     return ret;
1915 }
1916 
1917 static const char * const cond_name[] =
1918 {
1919     [TCG_COND_NEVER] = "never",
1920     [TCG_COND_ALWAYS] = "always",
1921     [TCG_COND_EQ] = "eq",
1922     [TCG_COND_NE] = "ne",
1923     [TCG_COND_LT] = "lt",
1924     [TCG_COND_GE] = "ge",
1925     [TCG_COND_LE] = "le",
1926     [TCG_COND_GT] = "gt",
1927     [TCG_COND_LTU] = "ltu",
1928     [TCG_COND_GEU] = "geu",
1929     [TCG_COND_LEU] = "leu",
1930     [TCG_COND_GTU] = "gtu"
1931 };
1932 
1933 static const char * const ldst_name[] =
1934 {
1935     [MO_UB]   = "ub",
1936     [MO_SB]   = "sb",
1937     [MO_LEUW] = "leuw",
1938     [MO_LESW] = "lesw",
1939     [MO_LEUL] = "leul",
1940     [MO_LESL] = "lesl",
1941     [MO_LEQ]  = "leq",
1942     [MO_BEUW] = "beuw",
1943     [MO_BESW] = "besw",
1944     [MO_BEUL] = "beul",
1945     [MO_BESL] = "besl",
1946     [MO_BEQ]  = "beq",
1947 };
1948 
1949 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1950 #ifdef TARGET_ALIGNED_ONLY
1951     [MO_UNALN >> MO_ASHIFT]    = "un+",
1952     [MO_ALIGN >> MO_ASHIFT]    = "",
1953 #else
1954     [MO_UNALN >> MO_ASHIFT]    = "",
1955     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1956 #endif
1957     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1958     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1959     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1960     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1961     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1962     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1963 };
1964 
1965 static inline bool tcg_regset_single(TCGRegSet d)
1966 {
1967     return (d & (d - 1)) == 0;
1968 }
1969 
1970 static inline TCGReg tcg_regset_first(TCGRegSet d)
1971 {
1972     if (TCG_TARGET_NB_REGS <= 32) {
1973         return ctz32(d);
1974     } else {
1975         return ctz64(d);
1976     }
1977 }
1978 
1979 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1980 {
1981     char buf[128];
1982     TCGOp *op;
1983 
1984     QTAILQ_FOREACH(op, &s->ops, link) {
1985         int i, k, nb_oargs, nb_iargs, nb_cargs;
1986         const TCGOpDef *def;
1987         TCGOpcode c;
1988         int col = 0;
1989 
1990         c = op->opc;
1991         def = &tcg_op_defs[c];
1992 
1993         if (c == INDEX_op_insn_start) {
1994             nb_oargs = 0;
1995             col += qemu_log("\n ----");
1996 
1997             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1998                 target_ulong a;
1999 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2000                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2001 #else
2002                 a = op->args[i];
2003 #endif
2004                 col += qemu_log(" " TARGET_FMT_lx, a);
2005             }
2006         } else if (c == INDEX_op_call) {
2007             /* variable number of arguments */
2008             nb_oargs = TCGOP_CALLO(op);
2009             nb_iargs = TCGOP_CALLI(op);
2010             nb_cargs = def->nb_cargs;
2011 
2012             /* function name, flags, out args */
2013             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2014                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2015                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2016             for (i = 0; i < nb_oargs; i++) {
2017                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2018                                                        op->args[i]));
2019             }
2020             for (i = 0; i < nb_iargs; i++) {
2021                 TCGArg arg = op->args[nb_oargs + i];
2022                 const char *t = "<dummy>";
2023                 if (arg != TCG_CALL_DUMMY_ARG) {
2024                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2025                 }
2026                 col += qemu_log(",%s", t);
2027             }
2028         } else {
2029             col += qemu_log(" %s ", def->name);
2030 
2031             nb_oargs = def->nb_oargs;
2032             nb_iargs = def->nb_iargs;
2033             nb_cargs = def->nb_cargs;
2034 
2035             if (def->flags & TCG_OPF_VECTOR) {
2036                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2037                                 8 << TCGOP_VECE(op));
2038             }
2039 
2040             k = 0;
2041             for (i = 0; i < nb_oargs; i++) {
2042                 if (k != 0) {
2043                     col += qemu_log(",");
2044                 }
2045                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2046                                                       op->args[k++]));
2047             }
2048             for (i = 0; i < nb_iargs; i++) {
2049                 if (k != 0) {
2050                     col += qemu_log(",");
2051                 }
2052                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2053                                                       op->args[k++]));
2054             }
2055             switch (c) {
2056             case INDEX_op_brcond_i32:
2057             case INDEX_op_setcond_i32:
2058             case INDEX_op_movcond_i32:
2059             case INDEX_op_brcond2_i32:
2060             case INDEX_op_setcond2_i32:
2061             case INDEX_op_brcond_i64:
2062             case INDEX_op_setcond_i64:
2063             case INDEX_op_movcond_i64:
2064             case INDEX_op_cmp_vec:
2065             case INDEX_op_cmpsel_vec:
2066                 if (op->args[k] < ARRAY_SIZE(cond_name)
2067                     && cond_name[op->args[k]]) {
2068                     col += qemu_log(",%s", cond_name[op->args[k++]]);
2069                 } else {
2070                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2071                 }
2072                 i = 1;
2073                 break;
2074             case INDEX_op_qemu_ld_i32:
2075             case INDEX_op_qemu_st_i32:
2076             case INDEX_op_qemu_ld_i64:
2077             case INDEX_op_qemu_st_i64:
2078                 {
2079                     TCGMemOpIdx oi = op->args[k++];
2080                     MemOp op = get_memop(oi);
2081                     unsigned ix = get_mmuidx(oi);
2082 
2083                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2084                         col += qemu_log(",$0x%x,%u", op, ix);
2085                     } else {
2086                         const char *s_al, *s_op;
2087                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2088                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2089                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2090                     }
2091                     i = 1;
2092                 }
2093                 break;
2094             default:
2095                 i = 0;
2096                 break;
2097             }
2098             switch (c) {
2099             case INDEX_op_set_label:
2100             case INDEX_op_br:
2101             case INDEX_op_brcond_i32:
2102             case INDEX_op_brcond_i64:
2103             case INDEX_op_brcond2_i32:
2104                 col += qemu_log("%s$L%d", k ? "," : "",
2105                                 arg_label(op->args[k])->id);
2106                 i++, k++;
2107                 break;
2108             default:
2109                 break;
2110             }
2111             for (; i < nb_cargs; i++, k++) {
2112                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2113             }
2114         }
2115 
2116         if (have_prefs || op->life) {
2117             for (; col < 40; ++col) {
2118                 putc(' ', qemu_logfile);
2119             }
2120         }
2121 
2122         if (op->life) {
2123             unsigned life = op->life;
2124 
2125             if (life & (SYNC_ARG * 3)) {
2126                 qemu_log("  sync:");
2127                 for (i = 0; i < 2; ++i) {
2128                     if (life & (SYNC_ARG << i)) {
2129                         qemu_log(" %d", i);
2130                     }
2131                 }
2132             }
2133             life /= DEAD_ARG;
2134             if (life) {
2135                 qemu_log("  dead:");
2136                 for (i = 0; life; ++i, life >>= 1) {
2137                     if (life & 1) {
2138                         qemu_log(" %d", i);
2139                     }
2140                 }
2141             }
2142         }
2143 
2144         if (have_prefs) {
2145             for (i = 0; i < nb_oargs; ++i) {
2146                 TCGRegSet set = op->output_pref[i];
2147 
2148                 if (i == 0) {
2149                     qemu_log("  pref=");
2150                 } else {
2151                     qemu_log(",");
2152                 }
2153                 if (set == 0) {
2154                     qemu_log("none");
2155                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2156                     qemu_log("all");
2157 #ifdef CONFIG_DEBUG_TCG
2158                 } else if (tcg_regset_single(set)) {
2159                     TCGReg reg = tcg_regset_first(set);
2160                     qemu_log("%s", tcg_target_reg_names[reg]);
2161 #endif
2162                 } else if (TCG_TARGET_NB_REGS <= 32) {
2163                     qemu_log("%#x", (uint32_t)set);
2164                 } else {
2165                     qemu_log("%#" PRIx64, (uint64_t)set);
2166                 }
2167             }
2168         }
2169 
2170         qemu_log("\n");
2171     }
2172 }
2173 
2174 /* we give more priority to constraints with less registers */
2175 static int get_constraint_priority(const TCGOpDef *def, int k)
2176 {
2177     const TCGArgConstraint *arg_ct;
2178 
2179     int i, n;
2180     arg_ct = &def->args_ct[k];
2181     if (arg_ct->ct & TCG_CT_ALIAS) {
2182         /* an alias is equivalent to a single register */
2183         n = 1;
2184     } else {
2185         if (!(arg_ct->ct & TCG_CT_REG))
2186             return 0;
2187         n = 0;
2188         for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2189             if (tcg_regset_test_reg(arg_ct->u.regs, i))
2190                 n++;
2191         }
2192     }
2193     return TCG_TARGET_NB_REGS - n + 1;
2194 }
2195 
2196 /* sort from highest priority to lowest */
2197 static void sort_constraints(TCGOpDef *def, int start, int n)
2198 {
2199     int i, j, p1, p2, tmp;
2200 
2201     for(i = 0; i < n; i++)
2202         def->sorted_args[start + i] = start + i;
2203     if (n <= 1)
2204         return;
2205     for(i = 0; i < n - 1; i++) {
2206         for(j = i + 1; j < n; j++) {
2207             p1 = get_constraint_priority(def, def->sorted_args[start + i]);
2208             p2 = get_constraint_priority(def, def->sorted_args[start + j]);
2209             if (p1 < p2) {
2210                 tmp = def->sorted_args[start + i];
2211                 def->sorted_args[start + i] = def->sorted_args[start + j];
2212                 def->sorted_args[start + j] = tmp;
2213             }
2214         }
2215     }
2216 }
2217 
2218 static void process_op_defs(TCGContext *s)
2219 {
2220     TCGOpcode op;
2221 
2222     for (op = 0; op < NB_OPS; op++) {
2223         TCGOpDef *def = &tcg_op_defs[op];
2224         const TCGTargetOpDef *tdefs;
2225         TCGType type;
2226         int i, nb_args;
2227 
2228         if (def->flags & TCG_OPF_NOT_PRESENT) {
2229             continue;
2230         }
2231 
2232         nb_args = def->nb_iargs + def->nb_oargs;
2233         if (nb_args == 0) {
2234             continue;
2235         }
2236 
2237         tdefs = tcg_target_op_def(op);
2238         /* Missing TCGTargetOpDef entry. */
2239         tcg_debug_assert(tdefs != NULL);
2240 
2241         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2242         for (i = 0; i < nb_args; i++) {
2243             const char *ct_str = tdefs->args_ct_str[i];
2244             /* Incomplete TCGTargetOpDef entry. */
2245             tcg_debug_assert(ct_str != NULL);
2246 
2247             def->args_ct[i].u.regs = 0;
2248             def->args_ct[i].ct = 0;
2249             while (*ct_str != '\0') {
2250                 switch(*ct_str) {
2251                 case '0' ... '9':
2252                     {
2253                         int oarg = *ct_str - '0';
2254                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2255                         tcg_debug_assert(oarg < def->nb_oargs);
2256                         tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
2257                         /* TCG_CT_ALIAS is for the output arguments.
2258                            The input is tagged with TCG_CT_IALIAS. */
2259                         def->args_ct[i] = def->args_ct[oarg];
2260                         def->args_ct[oarg].ct |= TCG_CT_ALIAS;
2261                         def->args_ct[oarg].alias_index = i;
2262                         def->args_ct[i].ct |= TCG_CT_IALIAS;
2263                         def->args_ct[i].alias_index = oarg;
2264                     }
2265                     ct_str++;
2266                     break;
2267                 case '&':
2268                     def->args_ct[i].ct |= TCG_CT_NEWREG;
2269                     ct_str++;
2270                     break;
2271                 case 'i':
2272                     def->args_ct[i].ct |= TCG_CT_CONST;
2273                     ct_str++;
2274                     break;
2275                 default:
2276                     ct_str = target_parse_constraint(&def->args_ct[i],
2277                                                      ct_str, type);
2278                     /* Typo in TCGTargetOpDef constraint. */
2279                     tcg_debug_assert(ct_str != NULL);
2280                 }
2281             }
2282         }
2283 
2284         /* TCGTargetOpDef entry with too much information? */
2285         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2286 
2287         /* sort the constraints (XXX: this is just an heuristic) */
2288         sort_constraints(def, 0, def->nb_oargs);
2289         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2290     }
2291 }
2292 
2293 void tcg_op_remove(TCGContext *s, TCGOp *op)
2294 {
2295     TCGLabel *label;
2296 
2297     switch (op->opc) {
2298     case INDEX_op_br:
2299         label = arg_label(op->args[0]);
2300         label->refs--;
2301         break;
2302     case INDEX_op_brcond_i32:
2303     case INDEX_op_brcond_i64:
2304         label = arg_label(op->args[3]);
2305         label->refs--;
2306         break;
2307     case INDEX_op_brcond2_i32:
2308         label = arg_label(op->args[5]);
2309         label->refs--;
2310         break;
2311     default:
2312         break;
2313     }
2314 
2315     QTAILQ_REMOVE(&s->ops, op, link);
2316     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2317     s->nb_ops--;
2318 
2319 #ifdef CONFIG_PROFILER
2320     atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2321 #endif
2322 }
2323 
2324 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2325 {
2326     TCGContext *s = tcg_ctx;
2327     TCGOp *op;
2328 
2329     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2330         op = tcg_malloc(sizeof(TCGOp));
2331     } else {
2332         op = QTAILQ_FIRST(&s->free_ops);
2333         QTAILQ_REMOVE(&s->free_ops, op, link);
2334     }
2335     memset(op, 0, offsetof(TCGOp, link));
2336     op->opc = opc;
2337     s->nb_ops++;
2338 
2339     return op;
2340 }
2341 
2342 TCGOp *tcg_emit_op(TCGOpcode opc)
2343 {
2344     TCGOp *op = tcg_op_alloc(opc);
2345     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2346     return op;
2347 }
2348 
2349 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2350 {
2351     TCGOp *new_op = tcg_op_alloc(opc);
2352     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2353     return new_op;
2354 }
2355 
2356 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2357 {
2358     TCGOp *new_op = tcg_op_alloc(opc);
2359     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2360     return new_op;
2361 }
2362 
2363 /* Reachable analysis : remove unreachable code.  */
2364 static void reachable_code_pass(TCGContext *s)
2365 {
2366     TCGOp *op, *op_next;
2367     bool dead = false;
2368 
2369     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2370         bool remove = dead;
2371         TCGLabel *label;
2372         int call_flags;
2373 
2374         switch (op->opc) {
2375         case INDEX_op_set_label:
2376             label = arg_label(op->args[0]);
2377             if (label->refs == 0) {
2378                 /*
2379                  * While there is an occasional backward branch, virtually
2380                  * all branches generated by the translators are forward.
2381                  * Which means that generally we will have already removed
2382                  * all references to the label that will be, and there is
2383                  * little to be gained by iterating.
2384                  */
2385                 remove = true;
2386             } else {
2387                 /* Once we see a label, insns become live again.  */
2388                 dead = false;
2389                 remove = false;
2390 
2391                 /*
2392                  * Optimization can fold conditional branches to unconditional.
2393                  * If we find a label with one reference which is preceded by
2394                  * an unconditional branch to it, remove both.  This needed to
2395                  * wait until the dead code in between them was removed.
2396                  */
2397                 if (label->refs == 1) {
2398                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2399                     if (op_prev->opc == INDEX_op_br &&
2400                         label == arg_label(op_prev->args[0])) {
2401                         tcg_op_remove(s, op_prev);
2402                         remove = true;
2403                     }
2404                 }
2405             }
2406             break;
2407 
2408         case INDEX_op_br:
2409         case INDEX_op_exit_tb:
2410         case INDEX_op_goto_ptr:
2411             /* Unconditional branches; everything following is dead.  */
2412             dead = true;
2413             break;
2414 
2415         case INDEX_op_call:
2416             /* Notice noreturn helper calls, raising exceptions.  */
2417             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2418             if (call_flags & TCG_CALL_NO_RETURN) {
2419                 dead = true;
2420             }
2421             break;
2422 
2423         case INDEX_op_insn_start:
2424             /* Never remove -- we need to keep these for unwind.  */
2425             remove = false;
2426             break;
2427 
2428         default:
2429             break;
2430         }
2431 
2432         if (remove) {
2433             tcg_op_remove(s, op);
2434         }
2435     }
2436 }
2437 
2438 #define TS_DEAD  1
2439 #define TS_MEM   2
2440 
2441 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2442 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2443 
2444 /* For liveness_pass_1, the register preferences for a given temp.  */
2445 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2446 {
2447     return ts->state_ptr;
2448 }
2449 
2450 /* For liveness_pass_1, reset the preferences for a given temp to the
2451  * maximal regset for its type.
2452  */
2453 static inline void la_reset_pref(TCGTemp *ts)
2454 {
2455     *la_temp_pref(ts)
2456         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2457 }
2458 
2459 /* liveness analysis: end of function: all temps are dead, and globals
2460    should be in memory. */
2461 static void la_func_end(TCGContext *s, int ng, int nt)
2462 {
2463     int i;
2464 
2465     for (i = 0; i < ng; ++i) {
2466         s->temps[i].state = TS_DEAD | TS_MEM;
2467         la_reset_pref(&s->temps[i]);
2468     }
2469     for (i = ng; i < nt; ++i) {
2470         s->temps[i].state = TS_DEAD;
2471         la_reset_pref(&s->temps[i]);
2472     }
2473 }
2474 
2475 /* liveness analysis: end of basic block: all temps are dead, globals
2476    and local temps should be in memory. */
2477 static void la_bb_end(TCGContext *s, int ng, int nt)
2478 {
2479     int i;
2480 
2481     for (i = 0; i < ng; ++i) {
2482         s->temps[i].state = TS_DEAD | TS_MEM;
2483         la_reset_pref(&s->temps[i]);
2484     }
2485     for (i = ng; i < nt; ++i) {
2486         s->temps[i].state = (s->temps[i].temp_local
2487                              ? TS_DEAD | TS_MEM
2488                              : TS_DEAD);
2489         la_reset_pref(&s->temps[i]);
2490     }
2491 }
2492 
2493 /* liveness analysis: sync globals back to memory.  */
2494 static void la_global_sync(TCGContext *s, int ng)
2495 {
2496     int i;
2497 
2498     for (i = 0; i < ng; ++i) {
2499         int state = s->temps[i].state;
2500         s->temps[i].state = state | TS_MEM;
2501         if (state == TS_DEAD) {
2502             /* If the global was previously dead, reset prefs.  */
2503             la_reset_pref(&s->temps[i]);
2504         }
2505     }
2506 }
2507 
2508 /* liveness analysis: sync globals back to memory and kill.  */
2509 static void la_global_kill(TCGContext *s, int ng)
2510 {
2511     int i;
2512 
2513     for (i = 0; i < ng; i++) {
2514         s->temps[i].state = TS_DEAD | TS_MEM;
2515         la_reset_pref(&s->temps[i]);
2516     }
2517 }
2518 
2519 /* liveness analysis: note live globals crossing calls.  */
2520 static void la_cross_call(TCGContext *s, int nt)
2521 {
2522     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2523     int i;
2524 
2525     for (i = 0; i < nt; i++) {
2526         TCGTemp *ts = &s->temps[i];
2527         if (!(ts->state & TS_DEAD)) {
2528             TCGRegSet *pset = la_temp_pref(ts);
2529             TCGRegSet set = *pset;
2530 
2531             set &= mask;
2532             /* If the combination is not possible, restart.  */
2533             if (set == 0) {
2534                 set = tcg_target_available_regs[ts->type] & mask;
2535             }
2536             *pset = set;
2537         }
2538     }
2539 }
2540 
2541 /* Liveness analysis : update the opc_arg_life array to tell if a
2542    given input arguments is dead. Instructions updating dead
2543    temporaries are removed. */
2544 static void liveness_pass_1(TCGContext *s)
2545 {
2546     int nb_globals = s->nb_globals;
2547     int nb_temps = s->nb_temps;
2548     TCGOp *op, *op_prev;
2549     TCGRegSet *prefs;
2550     int i;
2551 
2552     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2553     for (i = 0; i < nb_temps; ++i) {
2554         s->temps[i].state_ptr = prefs + i;
2555     }
2556 
2557     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2558     la_func_end(s, nb_globals, nb_temps);
2559 
2560     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2561         int nb_iargs, nb_oargs;
2562         TCGOpcode opc_new, opc_new2;
2563         bool have_opc_new2;
2564         TCGLifeData arg_life = 0;
2565         TCGTemp *ts;
2566         TCGOpcode opc = op->opc;
2567         const TCGOpDef *def = &tcg_op_defs[opc];
2568 
2569         switch (opc) {
2570         case INDEX_op_call:
2571             {
2572                 int call_flags;
2573                 int nb_call_regs;
2574 
2575                 nb_oargs = TCGOP_CALLO(op);
2576                 nb_iargs = TCGOP_CALLI(op);
2577                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2578 
2579                 /* pure functions can be removed if their result is unused */
2580                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2581                     for (i = 0; i < nb_oargs; i++) {
2582                         ts = arg_temp(op->args[i]);
2583                         if (ts->state != TS_DEAD) {
2584                             goto do_not_remove_call;
2585                         }
2586                     }
2587                     goto do_remove;
2588                 }
2589             do_not_remove_call:
2590 
2591                 /* Output args are dead.  */
2592                 for (i = 0; i < nb_oargs; i++) {
2593                     ts = arg_temp(op->args[i]);
2594                     if (ts->state & TS_DEAD) {
2595                         arg_life |= DEAD_ARG << i;
2596                     }
2597                     if (ts->state & TS_MEM) {
2598                         arg_life |= SYNC_ARG << i;
2599                     }
2600                     ts->state = TS_DEAD;
2601                     la_reset_pref(ts);
2602 
2603                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2604                     op->output_pref[i] = 0;
2605                 }
2606 
2607                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2608                                     TCG_CALL_NO_READ_GLOBALS))) {
2609                     la_global_kill(s, nb_globals);
2610                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2611                     la_global_sync(s, nb_globals);
2612                 }
2613 
2614                 /* Record arguments that die in this helper.  */
2615                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2616                     ts = arg_temp(op->args[i]);
2617                     if (ts && ts->state & TS_DEAD) {
2618                         arg_life |= DEAD_ARG << i;
2619                     }
2620                 }
2621 
2622                 /* For all live registers, remove call-clobbered prefs.  */
2623                 la_cross_call(s, nb_temps);
2624 
2625                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2626 
2627                 /* Input arguments are live for preceding opcodes.  */
2628                 for (i = 0; i < nb_iargs; i++) {
2629                     ts = arg_temp(op->args[i + nb_oargs]);
2630                     if (ts && ts->state & TS_DEAD) {
2631                         /* For those arguments that die, and will be allocated
2632                          * in registers, clear the register set for that arg,
2633                          * to be filled in below.  For args that will be on
2634                          * the stack, reset to any available reg.
2635                          */
2636                         *la_temp_pref(ts)
2637                             = (i < nb_call_regs ? 0 :
2638                                tcg_target_available_regs[ts->type]);
2639                         ts->state &= ~TS_DEAD;
2640                     }
2641                 }
2642 
2643                 /* For each input argument, add its input register to prefs.
2644                    If a temp is used once, this produces a single set bit.  */
2645                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2646                     ts = arg_temp(op->args[i + nb_oargs]);
2647                     if (ts) {
2648                         tcg_regset_set_reg(*la_temp_pref(ts),
2649                                            tcg_target_call_iarg_regs[i]);
2650                     }
2651                 }
2652             }
2653             break;
2654         case INDEX_op_insn_start:
2655             break;
2656         case INDEX_op_discard:
2657             /* mark the temporary as dead */
2658             ts = arg_temp(op->args[0]);
2659             ts->state = TS_DEAD;
2660             la_reset_pref(ts);
2661             break;
2662 
2663         case INDEX_op_add2_i32:
2664             opc_new = INDEX_op_add_i32;
2665             goto do_addsub2;
2666         case INDEX_op_sub2_i32:
2667             opc_new = INDEX_op_sub_i32;
2668             goto do_addsub2;
2669         case INDEX_op_add2_i64:
2670             opc_new = INDEX_op_add_i64;
2671             goto do_addsub2;
2672         case INDEX_op_sub2_i64:
2673             opc_new = INDEX_op_sub_i64;
2674         do_addsub2:
2675             nb_iargs = 4;
2676             nb_oargs = 2;
2677             /* Test if the high part of the operation is dead, but not
2678                the low part.  The result can be optimized to a simple
2679                add or sub.  This happens often for x86_64 guest when the
2680                cpu mode is set to 32 bit.  */
2681             if (arg_temp(op->args[1])->state == TS_DEAD) {
2682                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2683                     goto do_remove;
2684                 }
2685                 /* Replace the opcode and adjust the args in place,
2686                    leaving 3 unused args at the end.  */
2687                 op->opc = opc = opc_new;
2688                 op->args[1] = op->args[2];
2689                 op->args[2] = op->args[4];
2690                 /* Fall through and mark the single-word operation live.  */
2691                 nb_iargs = 2;
2692                 nb_oargs = 1;
2693             }
2694             goto do_not_remove;
2695 
2696         case INDEX_op_mulu2_i32:
2697             opc_new = INDEX_op_mul_i32;
2698             opc_new2 = INDEX_op_muluh_i32;
2699             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2700             goto do_mul2;
2701         case INDEX_op_muls2_i32:
2702             opc_new = INDEX_op_mul_i32;
2703             opc_new2 = INDEX_op_mulsh_i32;
2704             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2705             goto do_mul2;
2706         case INDEX_op_mulu2_i64:
2707             opc_new = INDEX_op_mul_i64;
2708             opc_new2 = INDEX_op_muluh_i64;
2709             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2710             goto do_mul2;
2711         case INDEX_op_muls2_i64:
2712             opc_new = INDEX_op_mul_i64;
2713             opc_new2 = INDEX_op_mulsh_i64;
2714             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2715             goto do_mul2;
2716         do_mul2:
2717             nb_iargs = 2;
2718             nb_oargs = 2;
2719             if (arg_temp(op->args[1])->state == TS_DEAD) {
2720                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2721                     /* Both parts of the operation are dead.  */
2722                     goto do_remove;
2723                 }
2724                 /* The high part of the operation is dead; generate the low. */
2725                 op->opc = opc = opc_new;
2726                 op->args[1] = op->args[2];
2727                 op->args[2] = op->args[3];
2728             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2729                 /* The low part of the operation is dead; generate the high. */
2730                 op->opc = opc = opc_new2;
2731                 op->args[0] = op->args[1];
2732                 op->args[1] = op->args[2];
2733                 op->args[2] = op->args[3];
2734             } else {
2735                 goto do_not_remove;
2736             }
2737             /* Mark the single-word operation live.  */
2738             nb_oargs = 1;
2739             goto do_not_remove;
2740 
2741         default:
2742             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2743             nb_iargs = def->nb_iargs;
2744             nb_oargs = def->nb_oargs;
2745 
2746             /* Test if the operation can be removed because all
2747                its outputs are dead. We assume that nb_oargs == 0
2748                implies side effects */
2749             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2750                 for (i = 0; i < nb_oargs; i++) {
2751                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2752                         goto do_not_remove;
2753                     }
2754                 }
2755                 goto do_remove;
2756             }
2757             goto do_not_remove;
2758 
2759         do_remove:
2760             tcg_op_remove(s, op);
2761             break;
2762 
2763         do_not_remove:
2764             for (i = 0; i < nb_oargs; i++) {
2765                 ts = arg_temp(op->args[i]);
2766 
2767                 /* Remember the preference of the uses that followed.  */
2768                 op->output_pref[i] = *la_temp_pref(ts);
2769 
2770                 /* Output args are dead.  */
2771                 if (ts->state & TS_DEAD) {
2772                     arg_life |= DEAD_ARG << i;
2773                 }
2774                 if (ts->state & TS_MEM) {
2775                     arg_life |= SYNC_ARG << i;
2776                 }
2777                 ts->state = TS_DEAD;
2778                 la_reset_pref(ts);
2779             }
2780 
2781             /* If end of basic block, update.  */
2782             if (def->flags & TCG_OPF_BB_EXIT) {
2783                 la_func_end(s, nb_globals, nb_temps);
2784             } else if (def->flags & TCG_OPF_BB_END) {
2785                 la_bb_end(s, nb_globals, nb_temps);
2786             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2787                 la_global_sync(s, nb_globals);
2788                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2789                     la_cross_call(s, nb_temps);
2790                 }
2791             }
2792 
2793             /* Record arguments that die in this opcode.  */
2794             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2795                 ts = arg_temp(op->args[i]);
2796                 if (ts->state & TS_DEAD) {
2797                     arg_life |= DEAD_ARG << i;
2798                 }
2799             }
2800 
2801             /* Input arguments are live for preceding opcodes.  */
2802             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2803                 ts = arg_temp(op->args[i]);
2804                 if (ts->state & TS_DEAD) {
2805                     /* For operands that were dead, initially allow
2806                        all regs for the type.  */
2807                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2808                     ts->state &= ~TS_DEAD;
2809                 }
2810             }
2811 
2812             /* Incorporate constraints for this operand.  */
2813             switch (opc) {
2814             case INDEX_op_mov_i32:
2815             case INDEX_op_mov_i64:
2816                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2817                    have proper constraints.  That said, special case
2818                    moves to propagate preferences backward.  */
2819                 if (IS_DEAD_ARG(1)) {
2820                     *la_temp_pref(arg_temp(op->args[0]))
2821                         = *la_temp_pref(arg_temp(op->args[1]));
2822                 }
2823                 break;
2824 
2825             default:
2826                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2827                     const TCGArgConstraint *ct = &def->args_ct[i];
2828                     TCGRegSet set, *pset;
2829 
2830                     ts = arg_temp(op->args[i]);
2831                     pset = la_temp_pref(ts);
2832                     set = *pset;
2833 
2834                     set &= ct->u.regs;
2835                     if (ct->ct & TCG_CT_IALIAS) {
2836                         set &= op->output_pref[ct->alias_index];
2837                     }
2838                     /* If the combination is not possible, restart.  */
2839                     if (set == 0) {
2840                         set = ct->u.regs;
2841                     }
2842                     *pset = set;
2843                 }
2844                 break;
2845             }
2846             break;
2847         }
2848         op->life = arg_life;
2849     }
2850 }
2851 
2852 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2853 static bool liveness_pass_2(TCGContext *s)
2854 {
2855     int nb_globals = s->nb_globals;
2856     int nb_temps, i;
2857     bool changes = false;
2858     TCGOp *op, *op_next;
2859 
2860     /* Create a temporary for each indirect global.  */
2861     for (i = 0; i < nb_globals; ++i) {
2862         TCGTemp *its = &s->temps[i];
2863         if (its->indirect_reg) {
2864             TCGTemp *dts = tcg_temp_alloc(s);
2865             dts->type = its->type;
2866             dts->base_type = its->base_type;
2867             its->state_ptr = dts;
2868         } else {
2869             its->state_ptr = NULL;
2870         }
2871         /* All globals begin dead.  */
2872         its->state = TS_DEAD;
2873     }
2874     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2875         TCGTemp *its = &s->temps[i];
2876         its->state_ptr = NULL;
2877         its->state = TS_DEAD;
2878     }
2879 
2880     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2881         TCGOpcode opc = op->opc;
2882         const TCGOpDef *def = &tcg_op_defs[opc];
2883         TCGLifeData arg_life = op->life;
2884         int nb_iargs, nb_oargs, call_flags;
2885         TCGTemp *arg_ts, *dir_ts;
2886 
2887         if (opc == INDEX_op_call) {
2888             nb_oargs = TCGOP_CALLO(op);
2889             nb_iargs = TCGOP_CALLI(op);
2890             call_flags = op->args[nb_oargs + nb_iargs + 1];
2891         } else {
2892             nb_iargs = def->nb_iargs;
2893             nb_oargs = def->nb_oargs;
2894 
2895             /* Set flags similar to how calls require.  */
2896             if (def->flags & TCG_OPF_BB_END) {
2897                 /* Like writing globals: save_globals */
2898                 call_flags = 0;
2899             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2900                 /* Like reading globals: sync_globals */
2901                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2902             } else {
2903                 /* No effect on globals.  */
2904                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2905                               TCG_CALL_NO_WRITE_GLOBALS);
2906             }
2907         }
2908 
2909         /* Make sure that input arguments are available.  */
2910         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2911             arg_ts = arg_temp(op->args[i]);
2912             if (arg_ts) {
2913                 dir_ts = arg_ts->state_ptr;
2914                 if (dir_ts && arg_ts->state == TS_DEAD) {
2915                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2916                                       ? INDEX_op_ld_i32
2917                                       : INDEX_op_ld_i64);
2918                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2919 
2920                     lop->args[0] = temp_arg(dir_ts);
2921                     lop->args[1] = temp_arg(arg_ts->mem_base);
2922                     lop->args[2] = arg_ts->mem_offset;
2923 
2924                     /* Loaded, but synced with memory.  */
2925                     arg_ts->state = TS_MEM;
2926                 }
2927             }
2928         }
2929 
2930         /* Perform input replacement, and mark inputs that became dead.
2931            No action is required except keeping temp_state up to date
2932            so that we reload when needed.  */
2933         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2934             arg_ts = arg_temp(op->args[i]);
2935             if (arg_ts) {
2936                 dir_ts = arg_ts->state_ptr;
2937                 if (dir_ts) {
2938                     op->args[i] = temp_arg(dir_ts);
2939                     changes = true;
2940                     if (IS_DEAD_ARG(i)) {
2941                         arg_ts->state = TS_DEAD;
2942                     }
2943                 }
2944             }
2945         }
2946 
2947         /* Liveness analysis should ensure that the following are
2948            all correct, for call sites and basic block end points.  */
2949         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2950             /* Nothing to do */
2951         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2952             for (i = 0; i < nb_globals; ++i) {
2953                 /* Liveness should see that globals are synced back,
2954                    that is, either TS_DEAD or TS_MEM.  */
2955                 arg_ts = &s->temps[i];
2956                 tcg_debug_assert(arg_ts->state_ptr == 0
2957                                  || arg_ts->state != 0);
2958             }
2959         } else {
2960             for (i = 0; i < nb_globals; ++i) {
2961                 /* Liveness should see that globals are saved back,
2962                    that is, TS_DEAD, waiting to be reloaded.  */
2963                 arg_ts = &s->temps[i];
2964                 tcg_debug_assert(arg_ts->state_ptr == 0
2965                                  || arg_ts->state == TS_DEAD);
2966             }
2967         }
2968 
2969         /* Outputs become available.  */
2970         for (i = 0; i < nb_oargs; i++) {
2971             arg_ts = arg_temp(op->args[i]);
2972             dir_ts = arg_ts->state_ptr;
2973             if (!dir_ts) {
2974                 continue;
2975             }
2976             op->args[i] = temp_arg(dir_ts);
2977             changes = true;
2978 
2979             /* The output is now live and modified.  */
2980             arg_ts->state = 0;
2981 
2982             /* Sync outputs upon their last write.  */
2983             if (NEED_SYNC_ARG(i)) {
2984                 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2985                                   ? INDEX_op_st_i32
2986                                   : INDEX_op_st_i64);
2987                 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2988 
2989                 sop->args[0] = temp_arg(dir_ts);
2990                 sop->args[1] = temp_arg(arg_ts->mem_base);
2991                 sop->args[2] = arg_ts->mem_offset;
2992 
2993                 arg_ts->state = TS_MEM;
2994             }
2995             /* Drop outputs that are dead.  */
2996             if (IS_DEAD_ARG(i)) {
2997                 arg_ts->state = TS_DEAD;
2998             }
2999         }
3000     }
3001 
3002     return changes;
3003 }
3004 
3005 #ifdef CONFIG_DEBUG_TCG
3006 static void dump_regs(TCGContext *s)
3007 {
3008     TCGTemp *ts;
3009     int i;
3010     char buf[64];
3011 
3012     for(i = 0; i < s->nb_temps; i++) {
3013         ts = &s->temps[i];
3014         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3015         switch(ts->val_type) {
3016         case TEMP_VAL_REG:
3017             printf("%s", tcg_target_reg_names[ts->reg]);
3018             break;
3019         case TEMP_VAL_MEM:
3020             printf("%d(%s)", (int)ts->mem_offset,
3021                    tcg_target_reg_names[ts->mem_base->reg]);
3022             break;
3023         case TEMP_VAL_CONST:
3024             printf("$0x%" TCG_PRIlx, ts->val);
3025             break;
3026         case TEMP_VAL_DEAD:
3027             printf("D");
3028             break;
3029         default:
3030             printf("???");
3031             break;
3032         }
3033         printf("\n");
3034     }
3035 
3036     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3037         if (s->reg_to_temp[i] != NULL) {
3038             printf("%s: %s\n",
3039                    tcg_target_reg_names[i],
3040                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3041         }
3042     }
3043 }
3044 
3045 static void check_regs(TCGContext *s)
3046 {
3047     int reg;
3048     int k;
3049     TCGTemp *ts;
3050     char buf[64];
3051 
3052     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3053         ts = s->reg_to_temp[reg];
3054         if (ts != NULL) {
3055             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3056                 printf("Inconsistency for register %s:\n",
3057                        tcg_target_reg_names[reg]);
3058                 goto fail;
3059             }
3060         }
3061     }
3062     for (k = 0; k < s->nb_temps; k++) {
3063         ts = &s->temps[k];
3064         if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
3065             && s->reg_to_temp[ts->reg] != ts) {
3066             printf("Inconsistency for temp %s:\n",
3067                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3068         fail:
3069             printf("reg state:\n");
3070             dump_regs(s);
3071             tcg_abort();
3072         }
3073     }
3074 }
3075 #endif
3076 
3077 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3078 {
3079 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3080     /* Sparc64 stack is accessed with offset of 2047 */
3081     s->current_frame_offset = (s->current_frame_offset +
3082                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3083         ~(sizeof(tcg_target_long) - 1);
3084 #endif
3085     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3086         s->frame_end) {
3087         tcg_abort();
3088     }
3089     ts->mem_offset = s->current_frame_offset;
3090     ts->mem_base = s->frame_temp;
3091     ts->mem_allocated = 1;
3092     s->current_frame_offset += sizeof(tcg_target_long);
3093 }
3094 
3095 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3096 
3097 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3098    mark it free; otherwise mark it dead.  */
3099 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3100 {
3101     if (ts->fixed_reg) {
3102         return;
3103     }
3104     if (ts->val_type == TEMP_VAL_REG) {
3105         s->reg_to_temp[ts->reg] = NULL;
3106     }
3107     ts->val_type = (free_or_dead < 0
3108                     || ts->temp_local
3109                     || ts->temp_global
3110                     ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3111 }
3112 
3113 /* Mark a temporary as dead.  */
3114 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3115 {
3116     temp_free_or_dead(s, ts, 1);
3117 }
3118 
3119 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3120    registers needs to be allocated to store a constant.  If 'free_or_dead'
3121    is non-zero, subsequently release the temporary; if it is positive, the
3122    temp is dead; if it is negative, the temp is free.  */
3123 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3124                       TCGRegSet preferred_regs, int free_or_dead)
3125 {
3126     if (ts->fixed_reg) {
3127         return;
3128     }
3129     if (!ts->mem_coherent) {
3130         if (!ts->mem_allocated) {
3131             temp_allocate_frame(s, ts);
3132         }
3133         switch (ts->val_type) {
3134         case TEMP_VAL_CONST:
3135             /* If we're going to free the temp immediately, then we won't
3136                require it later in a register, so attempt to store the
3137                constant to memory directly.  */
3138             if (free_or_dead
3139                 && tcg_out_sti(s, ts->type, ts->val,
3140                                ts->mem_base->reg, ts->mem_offset)) {
3141                 break;
3142             }
3143             temp_load(s, ts, tcg_target_available_regs[ts->type],
3144                       allocated_regs, preferred_regs);
3145             /* fallthrough */
3146 
3147         case TEMP_VAL_REG:
3148             tcg_out_st(s, ts->type, ts->reg,
3149                        ts->mem_base->reg, ts->mem_offset);
3150             break;
3151 
3152         case TEMP_VAL_MEM:
3153             break;
3154 
3155         case TEMP_VAL_DEAD:
3156         default:
3157             tcg_abort();
3158         }
3159         ts->mem_coherent = 1;
3160     }
3161     if (free_or_dead) {
3162         temp_free_or_dead(s, ts, free_or_dead);
3163     }
3164 }
3165 
3166 /* free register 'reg' by spilling the corresponding temporary if necessary */
3167 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3168 {
3169     TCGTemp *ts = s->reg_to_temp[reg];
3170     if (ts != NULL) {
3171         temp_sync(s, ts, allocated_regs, 0, -1);
3172     }
3173 }
3174 
3175 /**
3176  * tcg_reg_alloc:
3177  * @required_regs: Set of registers in which we must allocate.
3178  * @allocated_regs: Set of registers which must be avoided.
3179  * @preferred_regs: Set of registers we should prefer.
3180  * @rev: True if we search the registers in "indirect" order.
3181  *
3182  * The allocated register must be in @required_regs & ~@allocated_regs,
3183  * but if we can put it in @preferred_regs we may save a move later.
3184  */
3185 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3186                             TCGRegSet allocated_regs,
3187                             TCGRegSet preferred_regs, bool rev)
3188 {
3189     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3190     TCGRegSet reg_ct[2];
3191     const int *order;
3192 
3193     reg_ct[1] = required_regs & ~allocated_regs;
3194     tcg_debug_assert(reg_ct[1] != 0);
3195     reg_ct[0] = reg_ct[1] & preferred_regs;
3196 
3197     /* Skip the preferred_regs option if it cannot be satisfied,
3198        or if the preference made no difference.  */
3199     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3200 
3201     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3202 
3203     /* Try free registers, preferences first.  */
3204     for (j = f; j < 2; j++) {
3205         TCGRegSet set = reg_ct[j];
3206 
3207         if (tcg_regset_single(set)) {
3208             /* One register in the set.  */
3209             TCGReg reg = tcg_regset_first(set);
3210             if (s->reg_to_temp[reg] == NULL) {
3211                 return reg;
3212             }
3213         } else {
3214             for (i = 0; i < n; i++) {
3215                 TCGReg reg = order[i];
3216                 if (s->reg_to_temp[reg] == NULL &&
3217                     tcg_regset_test_reg(set, reg)) {
3218                     return reg;
3219                 }
3220             }
3221         }
3222     }
3223 
3224     /* We must spill something.  */
3225     for (j = f; j < 2; j++) {
3226         TCGRegSet set = reg_ct[j];
3227 
3228         if (tcg_regset_single(set)) {
3229             /* One register in the set.  */
3230             TCGReg reg = tcg_regset_first(set);
3231             tcg_reg_free(s, reg, allocated_regs);
3232             return reg;
3233         } else {
3234             for (i = 0; i < n; i++) {
3235                 TCGReg reg = order[i];
3236                 if (tcg_regset_test_reg(set, reg)) {
3237                     tcg_reg_free(s, reg, allocated_regs);
3238                     return reg;
3239                 }
3240             }
3241         }
3242     }
3243 
3244     tcg_abort();
3245 }
3246 
3247 /* Make sure the temporary is in a register.  If needed, allocate the register
3248    from DESIRED while avoiding ALLOCATED.  */
3249 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3250                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3251 {
3252     TCGReg reg;
3253 
3254     switch (ts->val_type) {
3255     case TEMP_VAL_REG:
3256         return;
3257     case TEMP_VAL_CONST:
3258         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3259                             preferred_regs, ts->indirect_base);
3260         tcg_out_movi(s, ts->type, reg, ts->val);
3261         ts->mem_coherent = 0;
3262         break;
3263     case TEMP_VAL_MEM:
3264         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3265                             preferred_regs, ts->indirect_base);
3266         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3267         ts->mem_coherent = 1;
3268         break;
3269     case TEMP_VAL_DEAD:
3270     default:
3271         tcg_abort();
3272     }
3273     ts->reg = reg;
3274     ts->val_type = TEMP_VAL_REG;
3275     s->reg_to_temp[reg] = ts;
3276 }
3277 
3278 /* Save a temporary to memory. 'allocated_regs' is used in case a
3279    temporary registers needs to be allocated to store a constant.  */
3280 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3281 {
3282     /* The liveness analysis already ensures that globals are back
3283        in memory. Keep an tcg_debug_assert for safety. */
3284     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3285 }
3286 
3287 /* save globals to their canonical location and assume they can be
3288    modified be the following code. 'allocated_regs' is used in case a
3289    temporary registers needs to be allocated to store a constant. */
3290 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3291 {
3292     int i, n;
3293 
3294     for (i = 0, n = s->nb_globals; i < n; i++) {
3295         temp_save(s, &s->temps[i], allocated_regs);
3296     }
3297 }
3298 
3299 /* sync globals to their canonical location and assume they can be
3300    read by the following code. 'allocated_regs' is used in case a
3301    temporary registers needs to be allocated to store a constant. */
3302 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3303 {
3304     int i, n;
3305 
3306     for (i = 0, n = s->nb_globals; i < n; i++) {
3307         TCGTemp *ts = &s->temps[i];
3308         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3309                          || ts->fixed_reg
3310                          || ts->mem_coherent);
3311     }
3312 }
3313 
3314 /* at the end of a basic block, we assume all temporaries are dead and
3315    all globals are stored at their canonical location. */
3316 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3317 {
3318     int i;
3319 
3320     for (i = s->nb_globals; i < s->nb_temps; i++) {
3321         TCGTemp *ts = &s->temps[i];
3322         if (ts->temp_local) {
3323             temp_save(s, ts, allocated_regs);
3324         } else {
3325             /* The liveness analysis already ensures that temps are dead.
3326                Keep an tcg_debug_assert for safety. */
3327             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3328         }
3329     }
3330 
3331     save_globals(s, allocated_regs);
3332 }
3333 
3334 /*
3335  * Specialized code generation for INDEX_op_movi_*.
3336  */
3337 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3338                                   tcg_target_ulong val, TCGLifeData arg_life,
3339                                   TCGRegSet preferred_regs)
3340 {
3341     /* ENV should not be modified.  */
3342     tcg_debug_assert(!ots->fixed_reg);
3343 
3344     /* The movi is not explicitly generated here.  */
3345     if (ots->val_type == TEMP_VAL_REG) {
3346         s->reg_to_temp[ots->reg] = NULL;
3347     }
3348     ots->val_type = TEMP_VAL_CONST;
3349     ots->val = val;
3350     ots->mem_coherent = 0;
3351     if (NEED_SYNC_ARG(0)) {
3352         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3353     } else if (IS_DEAD_ARG(0)) {
3354         temp_dead(s, ots);
3355     }
3356 }
3357 
3358 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3359 {
3360     TCGTemp *ots = arg_temp(op->args[0]);
3361     tcg_target_ulong val = op->args[1];
3362 
3363     tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3364 }
3365 
3366 /*
3367  * Specialized code generation for INDEX_op_mov_*.
3368  */
3369 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3370 {
3371     const TCGLifeData arg_life = op->life;
3372     TCGRegSet allocated_regs, preferred_regs;
3373     TCGTemp *ts, *ots;
3374     TCGType otype, itype;
3375 
3376     allocated_regs = s->reserved_regs;
3377     preferred_regs = op->output_pref[0];
3378     ots = arg_temp(op->args[0]);
3379     ts = arg_temp(op->args[1]);
3380 
3381     /* ENV should not be modified.  */
3382     tcg_debug_assert(!ots->fixed_reg);
3383 
3384     /* Note that otype != itype for no-op truncation.  */
3385     otype = ots->type;
3386     itype = ts->type;
3387 
3388     if (ts->val_type == TEMP_VAL_CONST) {
3389         /* propagate constant or generate sti */
3390         tcg_target_ulong val = ts->val;
3391         if (IS_DEAD_ARG(1)) {
3392             temp_dead(s, ts);
3393         }
3394         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3395         return;
3396     }
3397 
3398     /* If the source value is in memory we're going to be forced
3399        to have it in a register in order to perform the copy.  Copy
3400        the SOURCE value into its own register first, that way we
3401        don't have to reload SOURCE the next time it is used. */
3402     if (ts->val_type == TEMP_VAL_MEM) {
3403         temp_load(s, ts, tcg_target_available_regs[itype],
3404                   allocated_regs, preferred_regs);
3405     }
3406 
3407     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3408     if (IS_DEAD_ARG(0)) {
3409         /* mov to a non-saved dead register makes no sense (even with
3410            liveness analysis disabled). */
3411         tcg_debug_assert(NEED_SYNC_ARG(0));
3412         if (!ots->mem_allocated) {
3413             temp_allocate_frame(s, ots);
3414         }
3415         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3416         if (IS_DEAD_ARG(1)) {
3417             temp_dead(s, ts);
3418         }
3419         temp_dead(s, ots);
3420     } else {
3421         if (IS_DEAD_ARG(1) && !ts->fixed_reg) {
3422             /* the mov can be suppressed */
3423             if (ots->val_type == TEMP_VAL_REG) {
3424                 s->reg_to_temp[ots->reg] = NULL;
3425             }
3426             ots->reg = ts->reg;
3427             temp_dead(s, ts);
3428         } else {
3429             if (ots->val_type != TEMP_VAL_REG) {
3430                 /* When allocating a new register, make sure to not spill the
3431                    input one. */
3432                 tcg_regset_set_reg(allocated_regs, ts->reg);
3433                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3434                                          allocated_regs, preferred_regs,
3435                                          ots->indirect_base);
3436             }
3437             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3438                 /*
3439                  * Cross register class move not supported.
3440                  * Store the source register into the destination slot
3441                  * and leave the destination temp as TEMP_VAL_MEM.
3442                  */
3443                 assert(!ots->fixed_reg);
3444                 if (!ts->mem_allocated) {
3445                     temp_allocate_frame(s, ots);
3446                 }
3447                 tcg_out_st(s, ts->type, ts->reg,
3448                            ots->mem_base->reg, ots->mem_offset);
3449                 ots->mem_coherent = 1;
3450                 temp_free_or_dead(s, ots, -1);
3451                 return;
3452             }
3453         }
3454         ots->val_type = TEMP_VAL_REG;
3455         ots->mem_coherent = 0;
3456         s->reg_to_temp[ots->reg] = ots;
3457         if (NEED_SYNC_ARG(0)) {
3458             temp_sync(s, ots, allocated_regs, 0, 0);
3459         }
3460     }
3461 }
3462 
3463 /*
3464  * Specialized code generation for INDEX_op_dup_vec.
3465  */
3466 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3467 {
3468     const TCGLifeData arg_life = op->life;
3469     TCGRegSet dup_out_regs, dup_in_regs;
3470     TCGTemp *its, *ots;
3471     TCGType itype, vtype;
3472     intptr_t endian_fixup;
3473     unsigned vece;
3474     bool ok;
3475 
3476     ots = arg_temp(op->args[0]);
3477     its = arg_temp(op->args[1]);
3478 
3479     /* ENV should not be modified.  */
3480     tcg_debug_assert(!ots->fixed_reg);
3481 
3482     itype = its->type;
3483     vece = TCGOP_VECE(op);
3484     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3485 
3486     if (its->val_type == TEMP_VAL_CONST) {
3487         /* Propagate constant via movi -> dupi.  */
3488         tcg_target_ulong val = its->val;
3489         if (IS_DEAD_ARG(1)) {
3490             temp_dead(s, its);
3491         }
3492         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3493         return;
3494     }
3495 
3496     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs;
3497     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs;
3498 
3499     /* Allocate the output register now.  */
3500     if (ots->val_type != TEMP_VAL_REG) {
3501         TCGRegSet allocated_regs = s->reserved_regs;
3502 
3503         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3504             /* Make sure to not spill the input register. */
3505             tcg_regset_set_reg(allocated_regs, its->reg);
3506         }
3507         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3508                                  op->output_pref[0], ots->indirect_base);
3509         ots->val_type = TEMP_VAL_REG;
3510         ots->mem_coherent = 0;
3511         s->reg_to_temp[ots->reg] = ots;
3512     }
3513 
3514     switch (its->val_type) {
3515     case TEMP_VAL_REG:
3516         /*
3517          * The dup constriaints must be broad, covering all possible VECE.
3518          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3519          * to fail, indicating that extra moves are required for that case.
3520          */
3521         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3522             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3523                 goto done;
3524             }
3525             /* Try again from memory or a vector input register.  */
3526         }
3527         if (!its->mem_coherent) {
3528             /*
3529              * The input register is not synced, and so an extra store
3530              * would be required to use memory.  Attempt an integer-vector
3531              * register move first.  We do not have a TCGRegSet for this.
3532              */
3533             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3534                 break;
3535             }
3536             /* Sync the temp back to its slot and load from there.  */
3537             temp_sync(s, its, s->reserved_regs, 0, 0);
3538         }
3539         /* fall through */
3540 
3541     case TEMP_VAL_MEM:
3542 #ifdef HOST_WORDS_BIGENDIAN
3543         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3544         endian_fixup -= 1 << vece;
3545 #else
3546         endian_fixup = 0;
3547 #endif
3548         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3549                              its->mem_offset + endian_fixup)) {
3550             goto done;
3551         }
3552         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3553         break;
3554 
3555     default:
3556         g_assert_not_reached();
3557     }
3558 
3559     /* We now have a vector input register, so dup must succeed. */
3560     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3561     tcg_debug_assert(ok);
3562 
3563  done:
3564     if (IS_DEAD_ARG(1)) {
3565         temp_dead(s, its);
3566     }
3567     if (NEED_SYNC_ARG(0)) {
3568         temp_sync(s, ots, s->reserved_regs, 0, 0);
3569     }
3570     if (IS_DEAD_ARG(0)) {
3571         temp_dead(s, ots);
3572     }
3573 }
3574 
3575 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3576 {
3577     const TCGLifeData arg_life = op->life;
3578     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3579     TCGRegSet i_allocated_regs;
3580     TCGRegSet o_allocated_regs;
3581     int i, k, nb_iargs, nb_oargs;
3582     TCGReg reg;
3583     TCGArg arg;
3584     const TCGArgConstraint *arg_ct;
3585     TCGTemp *ts;
3586     TCGArg new_args[TCG_MAX_OP_ARGS];
3587     int const_args[TCG_MAX_OP_ARGS];
3588 
3589     nb_oargs = def->nb_oargs;
3590     nb_iargs = def->nb_iargs;
3591 
3592     /* copy constants */
3593     memcpy(new_args + nb_oargs + nb_iargs,
3594            op->args + nb_oargs + nb_iargs,
3595            sizeof(TCGArg) * def->nb_cargs);
3596 
3597     i_allocated_regs = s->reserved_regs;
3598     o_allocated_regs = s->reserved_regs;
3599 
3600     /* satisfy input constraints */
3601     for (k = 0; k < nb_iargs; k++) {
3602         TCGRegSet i_preferred_regs, o_preferred_regs;
3603 
3604         i = def->sorted_args[nb_oargs + k];
3605         arg = op->args[i];
3606         arg_ct = &def->args_ct[i];
3607         ts = arg_temp(arg);
3608 
3609         if (ts->val_type == TEMP_VAL_CONST
3610             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3611             /* constant is OK for instruction */
3612             const_args[i] = 1;
3613             new_args[i] = ts->val;
3614             continue;
3615         }
3616 
3617         i_preferred_regs = o_preferred_regs = 0;
3618         if (arg_ct->ct & TCG_CT_IALIAS) {
3619             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3620             if (ts->fixed_reg) {
3621                 /* if fixed register, we must allocate a new register
3622                    if the alias is not the same register */
3623                 if (arg != op->args[arg_ct->alias_index]) {
3624                     goto allocate_in_reg;
3625                 }
3626             } else {
3627                 /* if the input is aliased to an output and if it is
3628                    not dead after the instruction, we must allocate
3629                    a new register and move it */
3630                 if (!IS_DEAD_ARG(i)) {
3631                     goto allocate_in_reg;
3632                 }
3633 
3634                 /* check if the current register has already been allocated
3635                    for another input aliased to an output */
3636                 if (ts->val_type == TEMP_VAL_REG) {
3637                     int k2, i2;
3638                     reg = ts->reg;
3639                     for (k2 = 0 ; k2 < k ; k2++) {
3640                         i2 = def->sorted_args[nb_oargs + k2];
3641                         if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
3642                             reg == new_args[i2]) {
3643                             goto allocate_in_reg;
3644                         }
3645                     }
3646                 }
3647                 i_preferred_regs = o_preferred_regs;
3648             }
3649         }
3650 
3651         temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
3652         reg = ts->reg;
3653 
3654         if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3655             /* nothing to do : the constraint is satisfied */
3656         } else {
3657         allocate_in_reg:
3658             /* allocate a new register matching the constraint
3659                and move the temporary register into it */
3660             temp_load(s, ts, tcg_target_available_regs[ts->type],
3661                       i_allocated_regs, 0);
3662             reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
3663                                 o_preferred_regs, ts->indirect_base);
3664             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3665                 /*
3666                  * Cross register class move not supported.  Sync the
3667                  * temp back to its slot and load from there.
3668                  */
3669                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3670                 tcg_out_ld(s, ts->type, reg,
3671                            ts->mem_base->reg, ts->mem_offset);
3672             }
3673         }
3674         new_args[i] = reg;
3675         const_args[i] = 0;
3676         tcg_regset_set_reg(i_allocated_regs, reg);
3677     }
3678 
3679     /* mark dead temporaries and free the associated registers */
3680     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3681         if (IS_DEAD_ARG(i)) {
3682             temp_dead(s, arg_temp(op->args[i]));
3683         }
3684     }
3685 
3686     if (def->flags & TCG_OPF_BB_END) {
3687         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3688     } else {
3689         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3690             /* XXX: permit generic clobber register list ? */
3691             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3692                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3693                     tcg_reg_free(s, i, i_allocated_regs);
3694                 }
3695             }
3696         }
3697         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3698             /* sync globals if the op has side effects and might trigger
3699                an exception. */
3700             sync_globals(s, i_allocated_regs);
3701         }
3702 
3703         /* satisfy the output constraints */
3704         for(k = 0; k < nb_oargs; k++) {
3705             i = def->sorted_args[k];
3706             arg = op->args[i];
3707             arg_ct = &def->args_ct[i];
3708             ts = arg_temp(arg);
3709 
3710             /* ENV should not be modified.  */
3711             tcg_debug_assert(!ts->fixed_reg);
3712 
3713             if ((arg_ct->ct & TCG_CT_ALIAS)
3714                 && !const_args[arg_ct->alias_index]) {
3715                 reg = new_args[arg_ct->alias_index];
3716             } else if (arg_ct->ct & TCG_CT_NEWREG) {
3717                 reg = tcg_reg_alloc(s, arg_ct->u.regs,
3718                                     i_allocated_regs | o_allocated_regs,
3719                                     op->output_pref[k], ts->indirect_base);
3720             } else {
3721                 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
3722                                     op->output_pref[k], ts->indirect_base);
3723             }
3724             tcg_regset_set_reg(o_allocated_regs, reg);
3725             if (ts->val_type == TEMP_VAL_REG) {
3726                 s->reg_to_temp[ts->reg] = NULL;
3727             }
3728             ts->val_type = TEMP_VAL_REG;
3729             ts->reg = reg;
3730             /*
3731              * Temp value is modified, so the value kept in memory is
3732              * potentially not the same.
3733              */
3734             ts->mem_coherent = 0;
3735             s->reg_to_temp[reg] = ts;
3736             new_args[i] = reg;
3737         }
3738     }
3739 
3740     /* emit instruction */
3741     if (def->flags & TCG_OPF_VECTOR) {
3742         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3743                        new_args, const_args);
3744     } else {
3745         tcg_out_op(s, op->opc, new_args, const_args);
3746     }
3747 
3748     /* move the outputs in the correct register if needed */
3749     for(i = 0; i < nb_oargs; i++) {
3750         ts = arg_temp(op->args[i]);
3751 
3752         /* ENV should not be modified.  */
3753         tcg_debug_assert(!ts->fixed_reg);
3754 
3755         if (NEED_SYNC_ARG(i)) {
3756             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3757         } else if (IS_DEAD_ARG(i)) {
3758             temp_dead(s, ts);
3759         }
3760     }
3761 }
3762 
3763 #ifdef TCG_TARGET_STACK_GROWSUP
3764 #define STACK_DIR(x) (-(x))
3765 #else
3766 #define STACK_DIR(x) (x)
3767 #endif
3768 
3769 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3770 {
3771     const int nb_oargs = TCGOP_CALLO(op);
3772     const int nb_iargs = TCGOP_CALLI(op);
3773     const TCGLifeData arg_life = op->life;
3774     int flags, nb_regs, i;
3775     TCGReg reg;
3776     TCGArg arg;
3777     TCGTemp *ts;
3778     intptr_t stack_offset;
3779     size_t call_stack_size;
3780     tcg_insn_unit *func_addr;
3781     int allocate_args;
3782     TCGRegSet allocated_regs;
3783 
3784     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3785     flags = op->args[nb_oargs + nb_iargs + 1];
3786 
3787     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3788     if (nb_regs > nb_iargs) {
3789         nb_regs = nb_iargs;
3790     }
3791 
3792     /* assign stack slots first */
3793     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3794     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3795         ~(TCG_TARGET_STACK_ALIGN - 1);
3796     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3797     if (allocate_args) {
3798         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3799            preallocate call stack */
3800         tcg_abort();
3801     }
3802 
3803     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3804     for (i = nb_regs; i < nb_iargs; i++) {
3805         arg = op->args[nb_oargs + i];
3806 #ifdef TCG_TARGET_STACK_GROWSUP
3807         stack_offset -= sizeof(tcg_target_long);
3808 #endif
3809         if (arg != TCG_CALL_DUMMY_ARG) {
3810             ts = arg_temp(arg);
3811             temp_load(s, ts, tcg_target_available_regs[ts->type],
3812                       s->reserved_regs, 0);
3813             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3814         }
3815 #ifndef TCG_TARGET_STACK_GROWSUP
3816         stack_offset += sizeof(tcg_target_long);
3817 #endif
3818     }
3819 
3820     /* assign input registers */
3821     allocated_regs = s->reserved_regs;
3822     for (i = 0; i < nb_regs; i++) {
3823         arg = op->args[nb_oargs + i];
3824         if (arg != TCG_CALL_DUMMY_ARG) {
3825             ts = arg_temp(arg);
3826             reg = tcg_target_call_iarg_regs[i];
3827 
3828             if (ts->val_type == TEMP_VAL_REG) {
3829                 if (ts->reg != reg) {
3830                     tcg_reg_free(s, reg, allocated_regs);
3831                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3832                         /*
3833                          * Cross register class move not supported.  Sync the
3834                          * temp back to its slot and load from there.
3835                          */
3836                         temp_sync(s, ts, allocated_regs, 0, 0);
3837                         tcg_out_ld(s, ts->type, reg,
3838                                    ts->mem_base->reg, ts->mem_offset);
3839                     }
3840                 }
3841             } else {
3842                 TCGRegSet arg_set = 0;
3843 
3844                 tcg_reg_free(s, reg, allocated_regs);
3845                 tcg_regset_set_reg(arg_set, reg);
3846                 temp_load(s, ts, arg_set, allocated_regs, 0);
3847             }
3848 
3849             tcg_regset_set_reg(allocated_regs, reg);
3850         }
3851     }
3852 
3853     /* mark dead temporaries and free the associated registers */
3854     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3855         if (IS_DEAD_ARG(i)) {
3856             temp_dead(s, arg_temp(op->args[i]));
3857         }
3858     }
3859 
3860     /* clobber call registers */
3861     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3862         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3863             tcg_reg_free(s, i, allocated_regs);
3864         }
3865     }
3866 
3867     /* Save globals if they might be written by the helper, sync them if
3868        they might be read. */
3869     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3870         /* Nothing to do */
3871     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3872         sync_globals(s, allocated_regs);
3873     } else {
3874         save_globals(s, allocated_regs);
3875     }
3876 
3877     tcg_out_call(s, func_addr);
3878 
3879     /* assign output registers and emit moves if needed */
3880     for(i = 0; i < nb_oargs; i++) {
3881         arg = op->args[i];
3882         ts = arg_temp(arg);
3883 
3884         /* ENV should not be modified.  */
3885         tcg_debug_assert(!ts->fixed_reg);
3886 
3887         reg = tcg_target_call_oarg_regs[i];
3888         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3889         if (ts->val_type == TEMP_VAL_REG) {
3890             s->reg_to_temp[ts->reg] = NULL;
3891         }
3892         ts->val_type = TEMP_VAL_REG;
3893         ts->reg = reg;
3894         ts->mem_coherent = 0;
3895         s->reg_to_temp[reg] = ts;
3896         if (NEED_SYNC_ARG(i)) {
3897             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
3898         } else if (IS_DEAD_ARG(i)) {
3899             temp_dead(s, ts);
3900         }
3901     }
3902 }
3903 
3904 #ifdef CONFIG_PROFILER
3905 
3906 /* avoid copy/paste errors */
3907 #define PROF_ADD(to, from, field)                       \
3908     do {                                                \
3909         (to)->field += atomic_read(&((from)->field));   \
3910     } while (0)
3911 
3912 #define PROF_MAX(to, from, field)                                       \
3913     do {                                                                \
3914         typeof((from)->field) val__ = atomic_read(&((from)->field));    \
3915         if (val__ > (to)->field) {                                      \
3916             (to)->field = val__;                                        \
3917         }                                                               \
3918     } while (0)
3919 
3920 /* Pass in a zero'ed @prof */
3921 static inline
3922 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3923 {
3924     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3925     unsigned int i;
3926 
3927     for (i = 0; i < n_ctxs; i++) {
3928         TCGContext *s = atomic_read(&tcg_ctxs[i]);
3929         const TCGProfile *orig = &s->prof;
3930 
3931         if (counters) {
3932             PROF_ADD(prof, orig, cpu_exec_time);
3933             PROF_ADD(prof, orig, tb_count1);
3934             PROF_ADD(prof, orig, tb_count);
3935             PROF_ADD(prof, orig, op_count);
3936             PROF_MAX(prof, orig, op_count_max);
3937             PROF_ADD(prof, orig, temp_count);
3938             PROF_MAX(prof, orig, temp_count_max);
3939             PROF_ADD(prof, orig, del_op_count);
3940             PROF_ADD(prof, orig, code_in_len);
3941             PROF_ADD(prof, orig, code_out_len);
3942             PROF_ADD(prof, orig, search_out_len);
3943             PROF_ADD(prof, orig, interm_time);
3944             PROF_ADD(prof, orig, code_time);
3945             PROF_ADD(prof, orig, la_time);
3946             PROF_ADD(prof, orig, opt_time);
3947             PROF_ADD(prof, orig, restore_count);
3948             PROF_ADD(prof, orig, restore_time);
3949         }
3950         if (table) {
3951             int i;
3952 
3953             for (i = 0; i < NB_OPS; i++) {
3954                 PROF_ADD(prof, orig, table_op_count[i]);
3955             }
3956         }
3957     }
3958 }
3959 
3960 #undef PROF_ADD
3961 #undef PROF_MAX
3962 
3963 static void tcg_profile_snapshot_counters(TCGProfile *prof)
3964 {
3965     tcg_profile_snapshot(prof, true, false);
3966 }
3967 
3968 static void tcg_profile_snapshot_table(TCGProfile *prof)
3969 {
3970     tcg_profile_snapshot(prof, false, true);
3971 }
3972 
3973 void tcg_dump_op_count(void)
3974 {
3975     TCGProfile prof = {};
3976     int i;
3977 
3978     tcg_profile_snapshot_table(&prof);
3979     for (i = 0; i < NB_OPS; i++) {
3980         qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
3981                     prof.table_op_count[i]);
3982     }
3983 }
3984 
3985 int64_t tcg_cpu_exec_time(void)
3986 {
3987     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3988     unsigned int i;
3989     int64_t ret = 0;
3990 
3991     for (i = 0; i < n_ctxs; i++) {
3992         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
3993         const TCGProfile *prof = &s->prof;
3994 
3995         ret += atomic_read(&prof->cpu_exec_time);
3996     }
3997     return ret;
3998 }
3999 #else
4000 void tcg_dump_op_count(void)
4001 {
4002     qemu_printf("[TCG profiler not compiled]\n");
4003 }
4004 
4005 int64_t tcg_cpu_exec_time(void)
4006 {
4007     error_report("%s: TCG profiler not compiled", __func__);
4008     exit(EXIT_FAILURE);
4009 }
4010 #endif
4011 
4012 
4013 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4014 {
4015 #ifdef CONFIG_PROFILER
4016     TCGProfile *prof = &s->prof;
4017 #endif
4018     int i, num_insns;
4019     TCGOp *op;
4020 
4021 #ifdef CONFIG_PROFILER
4022     {
4023         int n = 0;
4024 
4025         QTAILQ_FOREACH(op, &s->ops, link) {
4026             n++;
4027         }
4028         atomic_set(&prof->op_count, prof->op_count + n);
4029         if (n > prof->op_count_max) {
4030             atomic_set(&prof->op_count_max, n);
4031         }
4032 
4033         n = s->nb_temps;
4034         atomic_set(&prof->temp_count, prof->temp_count + n);
4035         if (n > prof->temp_count_max) {
4036             atomic_set(&prof->temp_count_max, n);
4037         }
4038     }
4039 #endif
4040 
4041 #ifdef DEBUG_DISAS
4042     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4043                  && qemu_log_in_addr_range(tb->pc))) {
4044         qemu_log_lock();
4045         qemu_log("OP:\n");
4046         tcg_dump_ops(s, false);
4047         qemu_log("\n");
4048         qemu_log_unlock();
4049     }
4050 #endif
4051 
4052 #ifdef CONFIG_DEBUG_TCG
4053     /* Ensure all labels referenced have been emitted.  */
4054     {
4055         TCGLabel *l;
4056         bool error = false;
4057 
4058         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4059             if (unlikely(!l->present) && l->refs) {
4060                 qemu_log_mask(CPU_LOG_TB_OP,
4061                               "$L%d referenced but not present.\n", l->id);
4062                 error = true;
4063             }
4064         }
4065         assert(!error);
4066     }
4067 #endif
4068 
4069 #ifdef CONFIG_PROFILER
4070     atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4071 #endif
4072 
4073 #ifdef USE_TCG_OPTIMIZATIONS
4074     tcg_optimize(s);
4075 #endif
4076 
4077 #ifdef CONFIG_PROFILER
4078     atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4079     atomic_set(&prof->la_time, prof->la_time - profile_getclock());
4080 #endif
4081 
4082     reachable_code_pass(s);
4083     liveness_pass_1(s);
4084 
4085     if (s->nb_indirects > 0) {
4086 #ifdef DEBUG_DISAS
4087         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4088                      && qemu_log_in_addr_range(tb->pc))) {
4089             qemu_log_lock();
4090             qemu_log("OP before indirect lowering:\n");
4091             tcg_dump_ops(s, false);
4092             qemu_log("\n");
4093             qemu_log_unlock();
4094         }
4095 #endif
4096         /* Replace indirect temps with direct temps.  */
4097         if (liveness_pass_2(s)) {
4098             /* If changes were made, re-run liveness.  */
4099             liveness_pass_1(s);
4100         }
4101     }
4102 
4103 #ifdef CONFIG_PROFILER
4104     atomic_set(&prof->la_time, prof->la_time + profile_getclock());
4105 #endif
4106 
4107 #ifdef DEBUG_DISAS
4108     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4109                  && qemu_log_in_addr_range(tb->pc))) {
4110         qemu_log_lock();
4111         qemu_log("OP after optimization and liveness analysis:\n");
4112         tcg_dump_ops(s, true);
4113         qemu_log("\n");
4114         qemu_log_unlock();
4115     }
4116 #endif
4117 
4118     tcg_reg_alloc_start(s);
4119 
4120     s->code_buf = tb->tc.ptr;
4121     s->code_ptr = tb->tc.ptr;
4122 
4123 #ifdef TCG_TARGET_NEED_LDST_LABELS
4124     QSIMPLEQ_INIT(&s->ldst_labels);
4125 #endif
4126 #ifdef TCG_TARGET_NEED_POOL_LABELS
4127     s->pool_labels = NULL;
4128 #endif
4129 
4130     num_insns = -1;
4131     QTAILQ_FOREACH(op, &s->ops, link) {
4132         TCGOpcode opc = op->opc;
4133 
4134 #ifdef CONFIG_PROFILER
4135         atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4136 #endif
4137 
4138         switch (opc) {
4139         case INDEX_op_mov_i32:
4140         case INDEX_op_mov_i64:
4141         case INDEX_op_mov_vec:
4142             tcg_reg_alloc_mov(s, op);
4143             break;
4144         case INDEX_op_movi_i32:
4145         case INDEX_op_movi_i64:
4146         case INDEX_op_dupi_vec:
4147             tcg_reg_alloc_movi(s, op);
4148             break;
4149         case INDEX_op_dup_vec:
4150             tcg_reg_alloc_dup(s, op);
4151             break;
4152         case INDEX_op_insn_start:
4153             if (num_insns >= 0) {
4154                 size_t off = tcg_current_code_size(s);
4155                 s->gen_insn_end_off[num_insns] = off;
4156                 /* Assert that we do not overflow our stored offset.  */
4157                 assert(s->gen_insn_end_off[num_insns] == off);
4158             }
4159             num_insns++;
4160             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4161                 target_ulong a;
4162 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4163                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4164 #else
4165                 a = op->args[i];
4166 #endif
4167                 s->gen_insn_data[num_insns][i] = a;
4168             }
4169             break;
4170         case INDEX_op_discard:
4171             temp_dead(s, arg_temp(op->args[0]));
4172             break;
4173         case INDEX_op_set_label:
4174             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4175             tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
4176             break;
4177         case INDEX_op_call:
4178             tcg_reg_alloc_call(s, op);
4179             break;
4180         default:
4181             /* Sanity check that we've not introduced any unhandled opcodes. */
4182             tcg_debug_assert(tcg_op_supported(opc));
4183             /* Note: in order to speed up the code, it would be much
4184                faster to have specialized register allocator functions for
4185                some common argument patterns */
4186             tcg_reg_alloc_op(s, op);
4187             break;
4188         }
4189 #ifdef CONFIG_DEBUG_TCG
4190         check_regs(s);
4191 #endif
4192         /* Test for (pending) buffer overflow.  The assumption is that any
4193            one operation beginning below the high water mark cannot overrun
4194            the buffer completely.  Thus we can test for overflow after
4195            generating code without having to check during generation.  */
4196         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4197             return -1;
4198         }
4199         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4200         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4201             return -2;
4202         }
4203     }
4204     tcg_debug_assert(num_insns >= 0);
4205     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4206 
4207     /* Generate TB finalization at the end of block */
4208 #ifdef TCG_TARGET_NEED_LDST_LABELS
4209     i = tcg_out_ldst_finalize(s);
4210     if (i < 0) {
4211         return i;
4212     }
4213 #endif
4214 #ifdef TCG_TARGET_NEED_POOL_LABELS
4215     i = tcg_out_pool_finalize(s);
4216     if (i < 0) {
4217         return i;
4218     }
4219 #endif
4220     if (!tcg_resolve_relocs(s)) {
4221         return -2;
4222     }
4223 
4224     /* flush instruction cache */
4225     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
4226 
4227     return tcg_current_code_size(s);
4228 }
4229 
4230 #ifdef CONFIG_PROFILER
4231 void tcg_dump_info(void)
4232 {
4233     TCGProfile prof = {};
4234     const TCGProfile *s;
4235     int64_t tb_count;
4236     int64_t tb_div_count;
4237     int64_t tot;
4238 
4239     tcg_profile_snapshot_counters(&prof);
4240     s = &prof;
4241     tb_count = s->tb_count;
4242     tb_div_count = tb_count ? tb_count : 1;
4243     tot = s->interm_time + s->code_time;
4244 
4245     qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4246                 tot, tot / 2.4e9);
4247     qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4248                 " %0.1f%%)\n",
4249                 tb_count, s->tb_count1 - tb_count,
4250                 (double)(s->tb_count1 - s->tb_count)
4251                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4252     qemu_printf("avg ops/TB          %0.1f max=%d\n",
4253                 (double)s->op_count / tb_div_count, s->op_count_max);
4254     qemu_printf("deleted ops/TB      %0.2f\n",
4255                 (double)s->del_op_count / tb_div_count);
4256     qemu_printf("avg temps/TB        %0.2f max=%d\n",
4257                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4258     qemu_printf("avg host code/TB    %0.1f\n",
4259                 (double)s->code_out_len / tb_div_count);
4260     qemu_printf("avg search data/TB  %0.1f\n",
4261                 (double)s->search_out_len / tb_div_count);
4262 
4263     qemu_printf("cycles/op           %0.1f\n",
4264                 s->op_count ? (double)tot / s->op_count : 0);
4265     qemu_printf("cycles/in byte      %0.1f\n",
4266                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4267     qemu_printf("cycles/out byte     %0.1f\n",
4268                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4269     qemu_printf("cycles/search byte     %0.1f\n",
4270                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4271     if (tot == 0) {
4272         tot = 1;
4273     }
4274     qemu_printf("  gen_interm time   %0.1f%%\n",
4275                 (double)s->interm_time / tot * 100.0);
4276     qemu_printf("  gen_code time     %0.1f%%\n",
4277                 (double)s->code_time / tot * 100.0);
4278     qemu_printf("optim./code time    %0.1f%%\n",
4279                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4280                 * 100.0);
4281     qemu_printf("liveness/code time  %0.1f%%\n",
4282                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4283     qemu_printf("cpu_restore count   %" PRId64 "\n",
4284                 s->restore_count);
4285     qemu_printf("  avg cycles        %0.1f\n",
4286                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4287 }
4288 #else
4289 void tcg_dump_info(void)
4290 {
4291     qemu_printf("[TCG profiler not compiled]\n");
4292 }
4293 #endif
4294 
4295 #ifdef ELF_HOST_MACHINE
4296 /* In order to use this feature, the backend needs to do three things:
4297 
4298    (1) Define ELF_HOST_MACHINE to indicate both what value to
4299        put into the ELF image and to indicate support for the feature.
4300 
4301    (2) Define tcg_register_jit.  This should create a buffer containing
4302        the contents of a .debug_frame section that describes the post-
4303        prologue unwind info for the tcg machine.
4304 
4305    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4306 */
4307 
4308 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4309 typedef enum {
4310     JIT_NOACTION = 0,
4311     JIT_REGISTER_FN,
4312     JIT_UNREGISTER_FN
4313 } jit_actions_t;
4314 
4315 struct jit_code_entry {
4316     struct jit_code_entry *next_entry;
4317     struct jit_code_entry *prev_entry;
4318     const void *symfile_addr;
4319     uint64_t symfile_size;
4320 };
4321 
4322 struct jit_descriptor {
4323     uint32_t version;
4324     uint32_t action_flag;
4325     struct jit_code_entry *relevant_entry;
4326     struct jit_code_entry *first_entry;
4327 };
4328 
4329 void __jit_debug_register_code(void) __attribute__((noinline));
4330 void __jit_debug_register_code(void)
4331 {
4332     asm("");
4333 }
4334 
4335 /* Must statically initialize the version, because GDB may check
4336    the version before we can set it.  */
4337 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4338 
4339 /* End GDB interface.  */
4340 
4341 static int find_string(const char *strtab, const char *str)
4342 {
4343     const char *p = strtab + 1;
4344 
4345     while (1) {
4346         if (strcmp(p, str) == 0) {
4347             return p - strtab;
4348         }
4349         p += strlen(p) + 1;
4350     }
4351 }
4352 
4353 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
4354                                  const void *debug_frame,
4355                                  size_t debug_frame_size)
4356 {
4357     struct __attribute__((packed)) DebugInfo {
4358         uint32_t  len;
4359         uint16_t  version;
4360         uint32_t  abbrev;
4361         uint8_t   ptr_size;
4362         uint8_t   cu_die;
4363         uint16_t  cu_lang;
4364         uintptr_t cu_low_pc;
4365         uintptr_t cu_high_pc;
4366         uint8_t   fn_die;
4367         char      fn_name[16];
4368         uintptr_t fn_low_pc;
4369         uintptr_t fn_high_pc;
4370         uint8_t   cu_eoc;
4371     };
4372 
4373     struct ElfImage {
4374         ElfW(Ehdr) ehdr;
4375         ElfW(Phdr) phdr;
4376         ElfW(Shdr) shdr[7];
4377         ElfW(Sym)  sym[2];
4378         struct DebugInfo di;
4379         uint8_t    da[24];
4380         char       str[80];
4381     };
4382 
4383     struct ElfImage *img;
4384 
4385     static const struct ElfImage img_template = {
4386         .ehdr = {
4387             .e_ident[EI_MAG0] = ELFMAG0,
4388             .e_ident[EI_MAG1] = ELFMAG1,
4389             .e_ident[EI_MAG2] = ELFMAG2,
4390             .e_ident[EI_MAG3] = ELFMAG3,
4391             .e_ident[EI_CLASS] = ELF_CLASS,
4392             .e_ident[EI_DATA] = ELF_DATA,
4393             .e_ident[EI_VERSION] = EV_CURRENT,
4394             .e_type = ET_EXEC,
4395             .e_machine = ELF_HOST_MACHINE,
4396             .e_version = EV_CURRENT,
4397             .e_phoff = offsetof(struct ElfImage, phdr),
4398             .e_shoff = offsetof(struct ElfImage, shdr),
4399             .e_ehsize = sizeof(ElfW(Shdr)),
4400             .e_phentsize = sizeof(ElfW(Phdr)),
4401             .e_phnum = 1,
4402             .e_shentsize = sizeof(ElfW(Shdr)),
4403             .e_shnum = ARRAY_SIZE(img->shdr),
4404             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4405 #ifdef ELF_HOST_FLAGS
4406             .e_flags = ELF_HOST_FLAGS,
4407 #endif
4408 #ifdef ELF_OSABI
4409             .e_ident[EI_OSABI] = ELF_OSABI,
4410 #endif
4411         },
4412         .phdr = {
4413             .p_type = PT_LOAD,
4414             .p_flags = PF_X,
4415         },
4416         .shdr = {
4417             [0] = { .sh_type = SHT_NULL },
4418             /* Trick: The contents of code_gen_buffer are not present in
4419                this fake ELF file; that got allocated elsewhere.  Therefore
4420                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4421                will not look for contents.  We can record any address.  */
4422             [1] = { /* .text */
4423                 .sh_type = SHT_NOBITS,
4424                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4425             },
4426             [2] = { /* .debug_info */
4427                 .sh_type = SHT_PROGBITS,
4428                 .sh_offset = offsetof(struct ElfImage, di),
4429                 .sh_size = sizeof(struct DebugInfo),
4430             },
4431             [3] = { /* .debug_abbrev */
4432                 .sh_type = SHT_PROGBITS,
4433                 .sh_offset = offsetof(struct ElfImage, da),
4434                 .sh_size = sizeof(img->da),
4435             },
4436             [4] = { /* .debug_frame */
4437                 .sh_type = SHT_PROGBITS,
4438                 .sh_offset = sizeof(struct ElfImage),
4439             },
4440             [5] = { /* .symtab */
4441                 .sh_type = SHT_SYMTAB,
4442                 .sh_offset = offsetof(struct ElfImage, sym),
4443                 .sh_size = sizeof(img->sym),
4444                 .sh_info = 1,
4445                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4446                 .sh_entsize = sizeof(ElfW(Sym)),
4447             },
4448             [6] = { /* .strtab */
4449                 .sh_type = SHT_STRTAB,
4450                 .sh_offset = offsetof(struct ElfImage, str),
4451                 .sh_size = sizeof(img->str),
4452             }
4453         },
4454         .sym = {
4455             [1] = { /* code_gen_buffer */
4456                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4457                 .st_shndx = 1,
4458             }
4459         },
4460         .di = {
4461             .len = sizeof(struct DebugInfo) - 4,
4462             .version = 2,
4463             .ptr_size = sizeof(void *),
4464             .cu_die = 1,
4465             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4466             .fn_die = 2,
4467             .fn_name = "code_gen_buffer"
4468         },
4469         .da = {
4470             1,          /* abbrev number (the cu) */
4471             0x11, 1,    /* DW_TAG_compile_unit, has children */
4472             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4473             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4474             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4475             0, 0,       /* end of abbrev */
4476             2,          /* abbrev number (the fn) */
4477             0x2e, 0,    /* DW_TAG_subprogram, no children */
4478             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4479             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4480             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4481             0, 0,       /* end of abbrev */
4482             0           /* no more abbrev */
4483         },
4484         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4485                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4486     };
4487 
4488     /* We only need a single jit entry; statically allocate it.  */
4489     static struct jit_code_entry one_entry;
4490 
4491     uintptr_t buf = (uintptr_t)buf_ptr;
4492     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4493     DebugFrameHeader *dfh;
4494 
4495     img = g_malloc(img_size);
4496     *img = img_template;
4497 
4498     img->phdr.p_vaddr = buf;
4499     img->phdr.p_paddr = buf;
4500     img->phdr.p_memsz = buf_size;
4501 
4502     img->shdr[1].sh_name = find_string(img->str, ".text");
4503     img->shdr[1].sh_addr = buf;
4504     img->shdr[1].sh_size = buf_size;
4505 
4506     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4507     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4508 
4509     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4510     img->shdr[4].sh_size = debug_frame_size;
4511 
4512     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4513     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4514 
4515     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4516     img->sym[1].st_value = buf;
4517     img->sym[1].st_size = buf_size;
4518 
4519     img->di.cu_low_pc = buf;
4520     img->di.cu_high_pc = buf + buf_size;
4521     img->di.fn_low_pc = buf;
4522     img->di.fn_high_pc = buf + buf_size;
4523 
4524     dfh = (DebugFrameHeader *)(img + 1);
4525     memcpy(dfh, debug_frame, debug_frame_size);
4526     dfh->fde.func_start = buf;
4527     dfh->fde.func_len = buf_size;
4528 
4529 #ifdef DEBUG_JIT
4530     /* Enable this block to be able to debug the ELF image file creation.
4531        One can use readelf, objdump, or other inspection utilities.  */
4532     {
4533         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4534         if (f) {
4535             if (fwrite(img, img_size, 1, f) != img_size) {
4536                 /* Avoid stupid unused return value warning for fwrite.  */
4537             }
4538             fclose(f);
4539         }
4540     }
4541 #endif
4542 
4543     one_entry.symfile_addr = img;
4544     one_entry.symfile_size = img_size;
4545 
4546     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4547     __jit_debug_descriptor.relevant_entry = &one_entry;
4548     __jit_debug_descriptor.first_entry = &one_entry;
4549     __jit_debug_register_code();
4550 }
4551 #else
4552 /* No support for the feature.  Provide the entry point expected by exec.c,
4553    and implement the internal function we declared earlier.  */
4554 
4555 static void tcg_register_jit_int(void *buf, size_t size,
4556                                  const void *debug_frame,
4557                                  size_t debug_frame_size)
4558 {
4559 }
4560 
4561 void tcg_register_jit(void *buf, size_t buf_size)
4562 {
4563 }
4564 #endif /* ELF_HOST_MACHINE */
4565 
4566 #if !TCG_TARGET_MAYBE_vec
4567 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4568 {
4569     g_assert_not_reached();
4570 }
4571 #endif
4572