xref: /openbmc/qemu/tcg/tcg.c (revision ca27b5eb)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 
39 /* Note: the long term plan is to reduce the dependencies on the QEMU
40    CPU definitions. Currently they are used for qemu_ld/st
41    instructions */
42 #define NO_CPU_IO_DEFS
43 #include "cpu.h"
44 
45 #include "exec/exec-all.h"
46 
47 #if !defined(CONFIG_USER_ONLY)
48 #include "hw/boards.h"
49 #endif
50 
51 #include "tcg/tcg-op.h"
52 
53 #if UINTPTR_MAX == UINT32_MAX
54 # define ELF_CLASS  ELFCLASS32
55 #else
56 # define ELF_CLASS  ELFCLASS64
57 #endif
58 #ifdef HOST_WORDS_BIGENDIAN
59 # define ELF_DATA   ELFDATA2MSB
60 #else
61 # define ELF_DATA   ELFDATA2LSB
62 #endif
63 
64 #include "elf.h"
65 #include "exec/log.h"
66 #include "sysemu/sysemu.h"
67 
68 /* Forward declarations for functions declared in tcg-target.inc.c and
69    used here. */
70 static void tcg_target_init(TCGContext *s);
71 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74                         intptr_t value, intptr_t addend);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 static void tcg_register_jit_int(void *buf, size_t size,
100                                  const void *debug_frame,
101                                  size_t debug_frame_size)
102     __attribute__((unused));
103 
104 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
105 static const char *target_parse_constraint(TCGArgConstraint *ct,
106                                            const char *ct_str, TCGType type);
107 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
108                        intptr_t arg2);
109 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
110 static void tcg_out_movi(TCGContext *s, TCGType type,
111                          TCGReg ret, tcg_target_long arg);
112 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
113                        const int *const_args);
114 #if TCG_TARGET_MAYBE_vec
115 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
116                             TCGReg dst, TCGReg src);
117 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
118                              TCGReg dst, TCGReg base, intptr_t offset);
119 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
120                              TCGReg dst, tcg_target_long arg);
121 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
122                            unsigned vece, const TCGArg *args,
123                            const int *const_args);
124 #else
125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126                                    TCGReg dst, TCGReg src)
127 {
128     g_assert_not_reached();
129 }
130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
131                                     TCGReg dst, TCGReg base, intptr_t offset)
132 {
133     g_assert_not_reached();
134 }
135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
136                                     TCGReg dst, tcg_target_long arg)
137 {
138     g_assert_not_reached();
139 }
140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
141                                   unsigned vece, const TCGArg *args,
142                                   const int *const_args)
143 {
144     g_assert_not_reached();
145 }
146 #endif
147 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
148                        intptr_t arg2);
149 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
150                         TCGReg base, intptr_t ofs);
151 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
152 static int tcg_target_const_match(tcg_target_long val, TCGType type,
153                                   const TCGArgConstraint *arg_ct);
154 #ifdef TCG_TARGET_NEED_LDST_LABELS
155 static int tcg_out_ldst_finalize(TCGContext *s);
156 #endif
157 
158 #define TCG_HIGHWATER 1024
159 
160 static TCGContext **tcg_ctxs;
161 static unsigned int n_tcg_ctxs;
162 TCGv_env cpu_env = 0;
163 
164 struct tcg_region_tree {
165     QemuMutex lock;
166     GTree *tree;
167     /* padding to avoid false sharing is computed at run-time */
168 };
169 
170 /*
171  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
172  * dynamically allocate from as demand dictates. Given appropriate region
173  * sizing, this minimizes flushes even when some TCG threads generate a lot
174  * more code than others.
175  */
176 struct tcg_region_state {
177     QemuMutex lock;
178 
179     /* fields set at init time */
180     void *start;
181     void *start_aligned;
182     void *end;
183     size_t n;
184     size_t size; /* size of one region */
185     size_t stride; /* .size + guard size */
186 
187     /* fields protected by the lock */
188     size_t current; /* current region index */
189     size_t agg_size_full; /* aggregate size of full regions */
190 };
191 
192 static struct tcg_region_state region;
193 /*
194  * This is an array of struct tcg_region_tree's, with padding.
195  * We use void * to simplify the computation of region_trees[i]; each
196  * struct is found every tree_size bytes.
197  */
198 static void *region_trees;
199 static size_t tree_size;
200 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
201 static TCGRegSet tcg_target_call_clobber_regs;
202 
203 #if TCG_TARGET_INSN_UNIT_SIZE == 1
204 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
205 {
206     *s->code_ptr++ = v;
207 }
208 
209 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
210                                                       uint8_t v)
211 {
212     *p = v;
213 }
214 #endif
215 
216 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
217 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
218 {
219     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
220         *s->code_ptr++ = v;
221     } else {
222         tcg_insn_unit *p = s->code_ptr;
223         memcpy(p, &v, sizeof(v));
224         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
225     }
226 }
227 
228 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
229                                                        uint16_t v)
230 {
231     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
232         *p = v;
233     } else {
234         memcpy(p, &v, sizeof(v));
235     }
236 }
237 #endif
238 
239 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
240 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
241 {
242     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
243         *s->code_ptr++ = v;
244     } else {
245         tcg_insn_unit *p = s->code_ptr;
246         memcpy(p, &v, sizeof(v));
247         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
248     }
249 }
250 
251 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
252                                                        uint32_t v)
253 {
254     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
255         *p = v;
256     } else {
257         memcpy(p, &v, sizeof(v));
258     }
259 }
260 #endif
261 
262 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
263 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
264 {
265     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
266         *s->code_ptr++ = v;
267     } else {
268         tcg_insn_unit *p = s->code_ptr;
269         memcpy(p, &v, sizeof(v));
270         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
271     }
272 }
273 
274 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
275                                                        uint64_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
278         *p = v;
279     } else {
280         memcpy(p, &v, sizeof(v));
281     }
282 }
283 #endif
284 
285 /* label relocation processing */
286 
287 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
288                           TCGLabel *l, intptr_t addend)
289 {
290     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
291 
292     r->type = type;
293     r->ptr = code_ptr;
294     r->addend = addend;
295     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
296 }
297 
298 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
299 {
300     tcg_debug_assert(!l->has_value);
301     l->has_value = 1;
302     l->u.value_ptr = ptr;
303 }
304 
305 TCGLabel *gen_new_label(void)
306 {
307     TCGContext *s = tcg_ctx;
308     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
309 
310     memset(l, 0, sizeof(TCGLabel));
311     l->id = s->nb_labels++;
312     QSIMPLEQ_INIT(&l->relocs);
313 
314     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
315 
316     return l;
317 }
318 
319 static bool tcg_resolve_relocs(TCGContext *s)
320 {
321     TCGLabel *l;
322 
323     QSIMPLEQ_FOREACH(l, &s->labels, next) {
324         TCGRelocation *r;
325         uintptr_t value = l->u.value;
326 
327         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
328             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
329                 return false;
330             }
331         }
332     }
333     return true;
334 }
335 
336 static void set_jmp_reset_offset(TCGContext *s, int which)
337 {
338     size_t off = tcg_current_code_size(s);
339     s->tb_jmp_reset_offset[which] = off;
340     /* Make sure that we didn't overflow the stored offset.  */
341     assert(s->tb_jmp_reset_offset[which] == off);
342 }
343 
344 #include "tcg-target.inc.c"
345 
346 /* compare a pointer @ptr and a tb_tc @s */
347 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
348 {
349     if (ptr >= s->ptr + s->size) {
350         return 1;
351     } else if (ptr < s->ptr) {
352         return -1;
353     }
354     return 0;
355 }
356 
357 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
358 {
359     const struct tb_tc *a = ap;
360     const struct tb_tc *b = bp;
361 
362     /*
363      * When both sizes are set, we know this isn't a lookup.
364      * This is the most likely case: every TB must be inserted; lookups
365      * are a lot less frequent.
366      */
367     if (likely(a->size && b->size)) {
368         if (a->ptr > b->ptr) {
369             return 1;
370         } else if (a->ptr < b->ptr) {
371             return -1;
372         }
373         /* a->ptr == b->ptr should happen only on deletions */
374         g_assert(a->size == b->size);
375         return 0;
376     }
377     /*
378      * All lookups have either .size field set to 0.
379      * From the glib sources we see that @ap is always the lookup key. However
380      * the docs provide no guarantee, so we just mark this case as likely.
381      */
382     if (likely(a->size == 0)) {
383         return ptr_cmp_tb_tc(a->ptr, b);
384     }
385     return ptr_cmp_tb_tc(b->ptr, a);
386 }
387 
388 static void tcg_region_trees_init(void)
389 {
390     size_t i;
391 
392     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
393     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
394     for (i = 0; i < region.n; i++) {
395         struct tcg_region_tree *rt = region_trees + i * tree_size;
396 
397         qemu_mutex_init(&rt->lock);
398         rt->tree = g_tree_new(tb_tc_cmp);
399     }
400 }
401 
402 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
403 {
404     size_t region_idx;
405 
406     if (p < region.start_aligned) {
407         region_idx = 0;
408     } else {
409         ptrdiff_t offset = p - region.start_aligned;
410 
411         if (offset > region.stride * (region.n - 1)) {
412             region_idx = region.n - 1;
413         } else {
414             region_idx = offset / region.stride;
415         }
416     }
417     return region_trees + region_idx * tree_size;
418 }
419 
420 void tcg_tb_insert(TranslationBlock *tb)
421 {
422     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
423 
424     qemu_mutex_lock(&rt->lock);
425     g_tree_insert(rt->tree, &tb->tc, tb);
426     qemu_mutex_unlock(&rt->lock);
427 }
428 
429 void tcg_tb_remove(TranslationBlock *tb)
430 {
431     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
432 
433     qemu_mutex_lock(&rt->lock);
434     g_tree_remove(rt->tree, &tb->tc);
435     qemu_mutex_unlock(&rt->lock);
436 }
437 
438 /*
439  * Find the TB 'tb' such that
440  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
441  * Return NULL if not found.
442  */
443 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
444 {
445     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
446     TranslationBlock *tb;
447     struct tb_tc s = { .ptr = (void *)tc_ptr };
448 
449     qemu_mutex_lock(&rt->lock);
450     tb = g_tree_lookup(rt->tree, &s);
451     qemu_mutex_unlock(&rt->lock);
452     return tb;
453 }
454 
455 static void tcg_region_tree_lock_all(void)
456 {
457     size_t i;
458 
459     for (i = 0; i < region.n; i++) {
460         struct tcg_region_tree *rt = region_trees + i * tree_size;
461 
462         qemu_mutex_lock(&rt->lock);
463     }
464 }
465 
466 static void tcg_region_tree_unlock_all(void)
467 {
468     size_t i;
469 
470     for (i = 0; i < region.n; i++) {
471         struct tcg_region_tree *rt = region_trees + i * tree_size;
472 
473         qemu_mutex_unlock(&rt->lock);
474     }
475 }
476 
477 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
478 {
479     size_t i;
480 
481     tcg_region_tree_lock_all();
482     for (i = 0; i < region.n; i++) {
483         struct tcg_region_tree *rt = region_trees + i * tree_size;
484 
485         g_tree_foreach(rt->tree, func, user_data);
486     }
487     tcg_region_tree_unlock_all();
488 }
489 
490 size_t tcg_nb_tbs(void)
491 {
492     size_t nb_tbs = 0;
493     size_t i;
494 
495     tcg_region_tree_lock_all();
496     for (i = 0; i < region.n; i++) {
497         struct tcg_region_tree *rt = region_trees + i * tree_size;
498 
499         nb_tbs += g_tree_nnodes(rt->tree);
500     }
501     tcg_region_tree_unlock_all();
502     return nb_tbs;
503 }
504 
505 static void tcg_region_tree_reset_all(void)
506 {
507     size_t i;
508 
509     tcg_region_tree_lock_all();
510     for (i = 0; i < region.n; i++) {
511         struct tcg_region_tree *rt = region_trees + i * tree_size;
512 
513         /* Increment the refcount first so that destroy acts as a reset */
514         g_tree_ref(rt->tree);
515         g_tree_destroy(rt->tree);
516     }
517     tcg_region_tree_unlock_all();
518 }
519 
520 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
521 {
522     void *start, *end;
523 
524     start = region.start_aligned + curr_region * region.stride;
525     end = start + region.size;
526 
527     if (curr_region == 0) {
528         start = region.start;
529     }
530     if (curr_region == region.n - 1) {
531         end = region.end;
532     }
533 
534     *pstart = start;
535     *pend = end;
536 }
537 
538 static void tcg_region_assign(TCGContext *s, size_t curr_region)
539 {
540     void *start, *end;
541 
542     tcg_region_bounds(curr_region, &start, &end);
543 
544     s->code_gen_buffer = start;
545     s->code_gen_ptr = start;
546     s->code_gen_buffer_size = end - start;
547     s->code_gen_highwater = end - TCG_HIGHWATER;
548 }
549 
550 static bool tcg_region_alloc__locked(TCGContext *s)
551 {
552     if (region.current == region.n) {
553         return true;
554     }
555     tcg_region_assign(s, region.current);
556     region.current++;
557     return false;
558 }
559 
560 /*
561  * Request a new region once the one in use has filled up.
562  * Returns true on error.
563  */
564 static bool tcg_region_alloc(TCGContext *s)
565 {
566     bool err;
567     /* read the region size now; alloc__locked will overwrite it on success */
568     size_t size_full = s->code_gen_buffer_size;
569 
570     qemu_mutex_lock(&region.lock);
571     err = tcg_region_alloc__locked(s);
572     if (!err) {
573         region.agg_size_full += size_full - TCG_HIGHWATER;
574     }
575     qemu_mutex_unlock(&region.lock);
576     return err;
577 }
578 
579 /*
580  * Perform a context's first region allocation.
581  * This function does _not_ increment region.agg_size_full.
582  */
583 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
584 {
585     return tcg_region_alloc__locked(s);
586 }
587 
588 /* Call from a safe-work context */
589 void tcg_region_reset_all(void)
590 {
591     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
592     unsigned int i;
593 
594     qemu_mutex_lock(&region.lock);
595     region.current = 0;
596     region.agg_size_full = 0;
597 
598     for (i = 0; i < n_ctxs; i++) {
599         TCGContext *s = atomic_read(&tcg_ctxs[i]);
600         bool err = tcg_region_initial_alloc__locked(s);
601 
602         g_assert(!err);
603     }
604     qemu_mutex_unlock(&region.lock);
605 
606     tcg_region_tree_reset_all();
607 }
608 
609 #ifdef CONFIG_USER_ONLY
610 static size_t tcg_n_regions(void)
611 {
612     return 1;
613 }
614 #else
615 /*
616  * It is likely that some vCPUs will translate more code than others, so we
617  * first try to set more regions than max_cpus, with those regions being of
618  * reasonable size. If that's not possible we make do by evenly dividing
619  * the code_gen_buffer among the vCPUs.
620  */
621 static size_t tcg_n_regions(void)
622 {
623     size_t i;
624 
625     /* Use a single region if all we have is one vCPU thread */
626 #if !defined(CONFIG_USER_ONLY)
627     MachineState *ms = MACHINE(qdev_get_machine());
628     unsigned int max_cpus = ms->smp.max_cpus;
629 #endif
630     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
631         return 1;
632     }
633 
634     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
635     for (i = 8; i > 0; i--) {
636         size_t regions_per_thread = i;
637         size_t region_size;
638 
639         region_size = tcg_init_ctx.code_gen_buffer_size;
640         region_size /= max_cpus * regions_per_thread;
641 
642         if (region_size >= 2 * 1024u * 1024) {
643             return max_cpus * regions_per_thread;
644         }
645     }
646     /* If we can't, then just allocate one region per vCPU thread */
647     return max_cpus;
648 }
649 #endif
650 
651 /*
652  * Initializes region partitioning.
653  *
654  * Called at init time from the parent thread (i.e. the one calling
655  * tcg_context_init), after the target's TCG globals have been set.
656  *
657  * Region partitioning works by splitting code_gen_buffer into separate regions,
658  * and then assigning regions to TCG threads so that the threads can translate
659  * code in parallel without synchronization.
660  *
661  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
662  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
663  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
664  * must have been parsed before calling this function, since it calls
665  * qemu_tcg_mttcg_enabled().
666  *
667  * In user-mode we use a single region.  Having multiple regions in user-mode
668  * is not supported, because the number of vCPU threads (recall that each thread
669  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
670  * OS, and usually this number is huge (tens of thousands is not uncommon).
671  * Thus, given this large bound on the number of vCPU threads and the fact
672  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
673  * that the availability of at least one region per vCPU thread.
674  *
675  * However, this user-mode limitation is unlikely to be a significant problem
676  * in practice. Multi-threaded guests share most if not all of their translated
677  * code, which makes parallel code generation less appealing than in softmmu.
678  */
679 void tcg_region_init(void)
680 {
681     void *buf = tcg_init_ctx.code_gen_buffer;
682     void *aligned;
683     size_t size = tcg_init_ctx.code_gen_buffer_size;
684     size_t page_size = qemu_real_host_page_size;
685     size_t region_size;
686     size_t n_regions;
687     size_t i;
688 
689     n_regions = tcg_n_regions();
690 
691     /* The first region will be 'aligned - buf' bytes larger than the others */
692     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
693     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
694     /*
695      * Make region_size a multiple of page_size, using aligned as the start.
696      * As a result of this we might end up with a few extra pages at the end of
697      * the buffer; we will assign those to the last region.
698      */
699     region_size = (size - (aligned - buf)) / n_regions;
700     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
701 
702     /* A region must have at least 2 pages; one code, one guard */
703     g_assert(region_size >= 2 * page_size);
704 
705     /* init the region struct */
706     qemu_mutex_init(&region.lock);
707     region.n = n_regions;
708     region.size = region_size - page_size;
709     region.stride = region_size;
710     region.start = buf;
711     region.start_aligned = aligned;
712     /* page-align the end, since its last page will be a guard page */
713     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
714     /* account for that last guard page */
715     region.end -= page_size;
716 
717     /* set guard pages */
718     for (i = 0; i < region.n; i++) {
719         void *start, *end;
720         int rc;
721 
722         tcg_region_bounds(i, &start, &end);
723         rc = qemu_mprotect_none(end, page_size);
724         g_assert(!rc);
725     }
726 
727     tcg_region_trees_init();
728 
729     /* In user-mode we support only one ctx, so do the initial allocation now */
730 #ifdef CONFIG_USER_ONLY
731     {
732         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
733 
734         g_assert(!err);
735     }
736 #endif
737 }
738 
739 static void alloc_tcg_plugin_context(TCGContext *s)
740 {
741 #ifdef CONFIG_PLUGIN
742     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
743     s->plugin_tb->insns =
744         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
745 #endif
746 }
747 
748 /*
749  * All TCG threads except the parent (i.e. the one that called tcg_context_init
750  * and registered the target's TCG globals) must register with this function
751  * before initiating translation.
752  *
753  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
754  * of tcg_region_init() for the reasoning behind this.
755  *
756  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
757  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
758  * is not used anymore for translation once this function is called.
759  *
760  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
761  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
762  */
763 #ifdef CONFIG_USER_ONLY
764 void tcg_register_thread(void)
765 {
766     tcg_ctx = &tcg_init_ctx;
767 }
768 #else
769 void tcg_register_thread(void)
770 {
771     MachineState *ms = MACHINE(qdev_get_machine());
772     TCGContext *s = g_malloc(sizeof(*s));
773     unsigned int i, n;
774     bool err;
775 
776     *s = tcg_init_ctx;
777 
778     /* Relink mem_base.  */
779     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
780         if (tcg_init_ctx.temps[i].mem_base) {
781             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
782             tcg_debug_assert(b >= 0 && b < n);
783             s->temps[i].mem_base = &s->temps[b];
784         }
785     }
786 
787     /* Claim an entry in tcg_ctxs */
788     n = atomic_fetch_inc(&n_tcg_ctxs);
789     g_assert(n < ms->smp.max_cpus);
790     atomic_set(&tcg_ctxs[n], s);
791 
792     if (n > 0) {
793         alloc_tcg_plugin_context(s);
794     }
795 
796     tcg_ctx = s;
797     qemu_mutex_lock(&region.lock);
798     err = tcg_region_initial_alloc__locked(tcg_ctx);
799     g_assert(!err);
800     qemu_mutex_unlock(&region.lock);
801 }
802 #endif /* !CONFIG_USER_ONLY */
803 
804 /*
805  * Returns the size (in bytes) of all translated code (i.e. from all regions)
806  * currently in the cache.
807  * See also: tcg_code_capacity()
808  * Do not confuse with tcg_current_code_size(); that one applies to a single
809  * TCG context.
810  */
811 size_t tcg_code_size(void)
812 {
813     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
814     unsigned int i;
815     size_t total;
816 
817     qemu_mutex_lock(&region.lock);
818     total = region.agg_size_full;
819     for (i = 0; i < n_ctxs; i++) {
820         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
821         size_t size;
822 
823         size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
824         g_assert(size <= s->code_gen_buffer_size);
825         total += size;
826     }
827     qemu_mutex_unlock(&region.lock);
828     return total;
829 }
830 
831 /*
832  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
833  * regions.
834  * See also: tcg_code_size()
835  */
836 size_t tcg_code_capacity(void)
837 {
838     size_t guard_size, capacity;
839 
840     /* no need for synchronization; these variables are set at init time */
841     guard_size = region.stride - region.size;
842     capacity = region.end + guard_size - region.start;
843     capacity -= region.n * (guard_size + TCG_HIGHWATER);
844     return capacity;
845 }
846 
847 size_t tcg_tb_phys_invalidate_count(void)
848 {
849     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
850     unsigned int i;
851     size_t total = 0;
852 
853     for (i = 0; i < n_ctxs; i++) {
854         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
855 
856         total += atomic_read(&s->tb_phys_invalidate_count);
857     }
858     return total;
859 }
860 
861 /* pool based memory allocation */
862 void *tcg_malloc_internal(TCGContext *s, int size)
863 {
864     TCGPool *p;
865     int pool_size;
866 
867     if (size > TCG_POOL_CHUNK_SIZE) {
868         /* big malloc: insert a new pool (XXX: could optimize) */
869         p = g_malloc(sizeof(TCGPool) + size);
870         p->size = size;
871         p->next = s->pool_first_large;
872         s->pool_first_large = p;
873         return p->data;
874     } else {
875         p = s->pool_current;
876         if (!p) {
877             p = s->pool_first;
878             if (!p)
879                 goto new_pool;
880         } else {
881             if (!p->next) {
882             new_pool:
883                 pool_size = TCG_POOL_CHUNK_SIZE;
884                 p = g_malloc(sizeof(TCGPool) + pool_size);
885                 p->size = pool_size;
886                 p->next = NULL;
887                 if (s->pool_current)
888                     s->pool_current->next = p;
889                 else
890                     s->pool_first = p;
891             } else {
892                 p = p->next;
893             }
894         }
895     }
896     s->pool_current = p;
897     s->pool_cur = p->data + size;
898     s->pool_end = p->data + p->size;
899     return p->data;
900 }
901 
902 void tcg_pool_reset(TCGContext *s)
903 {
904     TCGPool *p, *t;
905     for (p = s->pool_first_large; p; p = t) {
906         t = p->next;
907         g_free(p);
908     }
909     s->pool_first_large = NULL;
910     s->pool_cur = s->pool_end = NULL;
911     s->pool_current = NULL;
912 }
913 
914 typedef struct TCGHelperInfo {
915     void *func;
916     const char *name;
917     unsigned flags;
918     unsigned sizemask;
919 } TCGHelperInfo;
920 
921 #include "exec/helper-proto.h"
922 
923 static const TCGHelperInfo all_helpers[] = {
924 #include "exec/helper-tcg.h"
925 };
926 static GHashTable *helper_table;
927 
928 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
929 static void process_op_defs(TCGContext *s);
930 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
931                                             TCGReg reg, const char *name);
932 
933 void tcg_context_init(TCGContext *s)
934 {
935     int op, total_args, n, i;
936     TCGOpDef *def;
937     TCGArgConstraint *args_ct;
938     int *sorted_args;
939     TCGTemp *ts;
940 
941     memset(s, 0, sizeof(*s));
942     s->nb_globals = 0;
943 
944     /* Count total number of arguments and allocate the corresponding
945        space */
946     total_args = 0;
947     for(op = 0; op < NB_OPS; op++) {
948         def = &tcg_op_defs[op];
949         n = def->nb_iargs + def->nb_oargs;
950         total_args += n;
951     }
952 
953     args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
954     sorted_args = g_malloc(sizeof(int) * total_args);
955 
956     for(op = 0; op < NB_OPS; op++) {
957         def = &tcg_op_defs[op];
958         def->args_ct = args_ct;
959         def->sorted_args = sorted_args;
960         n = def->nb_iargs + def->nb_oargs;
961         sorted_args += n;
962         args_ct += n;
963     }
964 
965     /* Register helpers.  */
966     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
967     helper_table = g_hash_table_new(NULL, NULL);
968 
969     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
970         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
971                             (gpointer)&all_helpers[i]);
972     }
973 
974     tcg_target_init(s);
975     process_op_defs(s);
976 
977     /* Reverse the order of the saved registers, assuming they're all at
978        the start of tcg_target_reg_alloc_order.  */
979     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
980         int r = tcg_target_reg_alloc_order[n];
981         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
982             break;
983         }
984     }
985     for (i = 0; i < n; ++i) {
986         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
987     }
988     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
989         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
990     }
991 
992     alloc_tcg_plugin_context(s);
993 
994     tcg_ctx = s;
995     /*
996      * In user-mode we simply share the init context among threads, since we
997      * use a single region. See the documentation tcg_region_init() for the
998      * reasoning behind this.
999      * In softmmu we will have at most max_cpus TCG threads.
1000      */
1001 #ifdef CONFIG_USER_ONLY
1002     tcg_ctxs = &tcg_ctx;
1003     n_tcg_ctxs = 1;
1004 #else
1005     MachineState *ms = MACHINE(qdev_get_machine());
1006     unsigned int max_cpus = ms->smp.max_cpus;
1007     tcg_ctxs = g_new(TCGContext *, max_cpus);
1008 #endif
1009 
1010     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1011     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1012     cpu_env = temp_tcgv_ptr(ts);
1013 }
1014 
1015 /*
1016  * Allocate TBs right before their corresponding translated code, making
1017  * sure that TBs and code are on different cache lines.
1018  */
1019 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1020 {
1021     uintptr_t align = qemu_icache_linesize;
1022     TranslationBlock *tb;
1023     void *next;
1024 
1025  retry:
1026     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1027     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1028 
1029     if (unlikely(next > s->code_gen_highwater)) {
1030         if (tcg_region_alloc(s)) {
1031             return NULL;
1032         }
1033         goto retry;
1034     }
1035     atomic_set(&s->code_gen_ptr, next);
1036     s->data_gen_ptr = NULL;
1037     return tb;
1038 }
1039 
1040 void tcg_prologue_init(TCGContext *s)
1041 {
1042     size_t prologue_size, total_size;
1043     void *buf0, *buf1;
1044 
1045     /* Put the prologue at the beginning of code_gen_buffer.  */
1046     buf0 = s->code_gen_buffer;
1047     total_size = s->code_gen_buffer_size;
1048     s->code_ptr = buf0;
1049     s->code_buf = buf0;
1050     s->data_gen_ptr = NULL;
1051     s->code_gen_prologue = buf0;
1052 
1053     /* Compute a high-water mark, at which we voluntarily flush the buffer
1054        and start over.  The size here is arbitrary, significantly larger
1055        than we expect the code generation for any one opcode to require.  */
1056     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1057 
1058 #ifdef TCG_TARGET_NEED_POOL_LABELS
1059     s->pool_labels = NULL;
1060 #endif
1061 
1062     /* Generate the prologue.  */
1063     tcg_target_qemu_prologue(s);
1064 
1065 #ifdef TCG_TARGET_NEED_POOL_LABELS
1066     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1067     {
1068         int result = tcg_out_pool_finalize(s);
1069         tcg_debug_assert(result == 0);
1070     }
1071 #endif
1072 
1073     buf1 = s->code_ptr;
1074     flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1075 
1076     /* Deduct the prologue from the buffer.  */
1077     prologue_size = tcg_current_code_size(s);
1078     s->code_gen_ptr = buf1;
1079     s->code_gen_buffer = buf1;
1080     s->code_buf = buf1;
1081     total_size -= prologue_size;
1082     s->code_gen_buffer_size = total_size;
1083 
1084     tcg_register_jit(s->code_gen_buffer, total_size);
1085 
1086 #ifdef DEBUG_DISAS
1087     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1088         FILE *logfile = qemu_log_lock();
1089         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1090         if (s->data_gen_ptr) {
1091             size_t code_size = s->data_gen_ptr - buf0;
1092             size_t data_size = prologue_size - code_size;
1093             size_t i;
1094 
1095             log_disas(buf0, code_size, NULL);
1096 
1097             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1098                 if (sizeof(tcg_target_ulong) == 8) {
1099                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1100                              (uintptr_t)s->data_gen_ptr + i,
1101                              *(uint64_t *)(s->data_gen_ptr + i));
1102                 } else {
1103                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1104                              (uintptr_t)s->data_gen_ptr + i,
1105                              *(uint32_t *)(s->data_gen_ptr + i));
1106                 }
1107             }
1108         } else {
1109             log_disas(buf0, prologue_size, NULL);
1110         }
1111         qemu_log("\n");
1112         qemu_log_flush();
1113         qemu_log_unlock(logfile);
1114     }
1115 #endif
1116 
1117     /* Assert that goto_ptr is implemented completely.  */
1118     if (TCG_TARGET_HAS_goto_ptr) {
1119         tcg_debug_assert(s->code_gen_epilogue != NULL);
1120     }
1121 }
1122 
1123 void tcg_func_start(TCGContext *s)
1124 {
1125     tcg_pool_reset(s);
1126     s->nb_temps = s->nb_globals;
1127 
1128     /* No temps have been previously allocated for size or locality.  */
1129     memset(s->free_temps, 0, sizeof(s->free_temps));
1130 
1131     s->nb_ops = 0;
1132     s->nb_labels = 0;
1133     s->current_frame_offset = s->frame_start;
1134 
1135 #ifdef CONFIG_DEBUG_TCG
1136     s->goto_tb_issue_mask = 0;
1137 #endif
1138 
1139     QTAILQ_INIT(&s->ops);
1140     QTAILQ_INIT(&s->free_ops);
1141     QSIMPLEQ_INIT(&s->labels);
1142 }
1143 
1144 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1145 {
1146     int n = s->nb_temps++;
1147     tcg_debug_assert(n < TCG_MAX_TEMPS);
1148     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1149 }
1150 
1151 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1152 {
1153     TCGTemp *ts;
1154 
1155     tcg_debug_assert(s->nb_globals == s->nb_temps);
1156     s->nb_globals++;
1157     ts = tcg_temp_alloc(s);
1158     ts->temp_global = 1;
1159 
1160     return ts;
1161 }
1162 
1163 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1164                                             TCGReg reg, const char *name)
1165 {
1166     TCGTemp *ts;
1167 
1168     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1169         tcg_abort();
1170     }
1171 
1172     ts = tcg_global_alloc(s);
1173     ts->base_type = type;
1174     ts->type = type;
1175     ts->fixed_reg = 1;
1176     ts->reg = reg;
1177     ts->name = name;
1178     tcg_regset_set_reg(s->reserved_regs, reg);
1179 
1180     return ts;
1181 }
1182 
1183 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1184 {
1185     s->frame_start = start;
1186     s->frame_end = start + size;
1187     s->frame_temp
1188         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1189 }
1190 
1191 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1192                                      intptr_t offset, const char *name)
1193 {
1194     TCGContext *s = tcg_ctx;
1195     TCGTemp *base_ts = tcgv_ptr_temp(base);
1196     TCGTemp *ts = tcg_global_alloc(s);
1197     int indirect_reg = 0, bigendian = 0;
1198 #ifdef HOST_WORDS_BIGENDIAN
1199     bigendian = 1;
1200 #endif
1201 
1202     if (!base_ts->fixed_reg) {
1203         /* We do not support double-indirect registers.  */
1204         tcg_debug_assert(!base_ts->indirect_reg);
1205         base_ts->indirect_base = 1;
1206         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1207                             ? 2 : 1);
1208         indirect_reg = 1;
1209     }
1210 
1211     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1212         TCGTemp *ts2 = tcg_global_alloc(s);
1213         char buf[64];
1214 
1215         ts->base_type = TCG_TYPE_I64;
1216         ts->type = TCG_TYPE_I32;
1217         ts->indirect_reg = indirect_reg;
1218         ts->mem_allocated = 1;
1219         ts->mem_base = base_ts;
1220         ts->mem_offset = offset + bigendian * 4;
1221         pstrcpy(buf, sizeof(buf), name);
1222         pstrcat(buf, sizeof(buf), "_0");
1223         ts->name = strdup(buf);
1224 
1225         tcg_debug_assert(ts2 == ts + 1);
1226         ts2->base_type = TCG_TYPE_I64;
1227         ts2->type = TCG_TYPE_I32;
1228         ts2->indirect_reg = indirect_reg;
1229         ts2->mem_allocated = 1;
1230         ts2->mem_base = base_ts;
1231         ts2->mem_offset = offset + (1 - bigendian) * 4;
1232         pstrcpy(buf, sizeof(buf), name);
1233         pstrcat(buf, sizeof(buf), "_1");
1234         ts2->name = strdup(buf);
1235     } else {
1236         ts->base_type = type;
1237         ts->type = type;
1238         ts->indirect_reg = indirect_reg;
1239         ts->mem_allocated = 1;
1240         ts->mem_base = base_ts;
1241         ts->mem_offset = offset;
1242         ts->name = name;
1243     }
1244     return ts;
1245 }
1246 
1247 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1248 {
1249     TCGContext *s = tcg_ctx;
1250     TCGTemp *ts;
1251     int idx, k;
1252 
1253     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1254     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1255     if (idx < TCG_MAX_TEMPS) {
1256         /* There is already an available temp with the right type.  */
1257         clear_bit(idx, s->free_temps[k].l);
1258 
1259         ts = &s->temps[idx];
1260         ts->temp_allocated = 1;
1261         tcg_debug_assert(ts->base_type == type);
1262         tcg_debug_assert(ts->temp_local == temp_local);
1263     } else {
1264         ts = tcg_temp_alloc(s);
1265         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1266             TCGTemp *ts2 = tcg_temp_alloc(s);
1267 
1268             ts->base_type = type;
1269             ts->type = TCG_TYPE_I32;
1270             ts->temp_allocated = 1;
1271             ts->temp_local = temp_local;
1272 
1273             tcg_debug_assert(ts2 == ts + 1);
1274             ts2->base_type = TCG_TYPE_I64;
1275             ts2->type = TCG_TYPE_I32;
1276             ts2->temp_allocated = 1;
1277             ts2->temp_local = temp_local;
1278         } else {
1279             ts->base_type = type;
1280             ts->type = type;
1281             ts->temp_allocated = 1;
1282             ts->temp_local = temp_local;
1283         }
1284     }
1285 
1286 #if defined(CONFIG_DEBUG_TCG)
1287     s->temps_in_use++;
1288 #endif
1289     return ts;
1290 }
1291 
1292 TCGv_vec tcg_temp_new_vec(TCGType type)
1293 {
1294     TCGTemp *t;
1295 
1296 #ifdef CONFIG_DEBUG_TCG
1297     switch (type) {
1298     case TCG_TYPE_V64:
1299         assert(TCG_TARGET_HAS_v64);
1300         break;
1301     case TCG_TYPE_V128:
1302         assert(TCG_TARGET_HAS_v128);
1303         break;
1304     case TCG_TYPE_V256:
1305         assert(TCG_TARGET_HAS_v256);
1306         break;
1307     default:
1308         g_assert_not_reached();
1309     }
1310 #endif
1311 
1312     t = tcg_temp_new_internal(type, 0);
1313     return temp_tcgv_vec(t);
1314 }
1315 
1316 /* Create a new temp of the same type as an existing temp.  */
1317 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1318 {
1319     TCGTemp *t = tcgv_vec_temp(match);
1320 
1321     tcg_debug_assert(t->temp_allocated != 0);
1322 
1323     t = tcg_temp_new_internal(t->base_type, 0);
1324     return temp_tcgv_vec(t);
1325 }
1326 
1327 void tcg_temp_free_internal(TCGTemp *ts)
1328 {
1329     TCGContext *s = tcg_ctx;
1330     int k, idx;
1331 
1332 #if defined(CONFIG_DEBUG_TCG)
1333     s->temps_in_use--;
1334     if (s->temps_in_use < 0) {
1335         fprintf(stderr, "More temporaries freed than allocated!\n");
1336     }
1337 #endif
1338 
1339     tcg_debug_assert(ts->temp_global == 0);
1340     tcg_debug_assert(ts->temp_allocated != 0);
1341     ts->temp_allocated = 0;
1342 
1343     idx = temp_idx(ts);
1344     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1345     set_bit(idx, s->free_temps[k].l);
1346 }
1347 
1348 TCGv_i32 tcg_const_i32(int32_t val)
1349 {
1350     TCGv_i32 t0;
1351     t0 = tcg_temp_new_i32();
1352     tcg_gen_movi_i32(t0, val);
1353     return t0;
1354 }
1355 
1356 TCGv_i64 tcg_const_i64(int64_t val)
1357 {
1358     TCGv_i64 t0;
1359     t0 = tcg_temp_new_i64();
1360     tcg_gen_movi_i64(t0, val);
1361     return t0;
1362 }
1363 
1364 TCGv_i32 tcg_const_local_i32(int32_t val)
1365 {
1366     TCGv_i32 t0;
1367     t0 = tcg_temp_local_new_i32();
1368     tcg_gen_movi_i32(t0, val);
1369     return t0;
1370 }
1371 
1372 TCGv_i64 tcg_const_local_i64(int64_t val)
1373 {
1374     TCGv_i64 t0;
1375     t0 = tcg_temp_local_new_i64();
1376     tcg_gen_movi_i64(t0, val);
1377     return t0;
1378 }
1379 
1380 #if defined(CONFIG_DEBUG_TCG)
1381 void tcg_clear_temp_count(void)
1382 {
1383     TCGContext *s = tcg_ctx;
1384     s->temps_in_use = 0;
1385 }
1386 
1387 int tcg_check_temp_count(void)
1388 {
1389     TCGContext *s = tcg_ctx;
1390     if (s->temps_in_use) {
1391         /* Clear the count so that we don't give another
1392          * warning immediately next time around.
1393          */
1394         s->temps_in_use = 0;
1395         return 1;
1396     }
1397     return 0;
1398 }
1399 #endif
1400 
1401 /* Return true if OP may appear in the opcode stream.
1402    Test the runtime variable that controls each opcode.  */
1403 bool tcg_op_supported(TCGOpcode op)
1404 {
1405     const bool have_vec
1406         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1407 
1408     switch (op) {
1409     case INDEX_op_discard:
1410     case INDEX_op_set_label:
1411     case INDEX_op_call:
1412     case INDEX_op_br:
1413     case INDEX_op_mb:
1414     case INDEX_op_insn_start:
1415     case INDEX_op_exit_tb:
1416     case INDEX_op_goto_tb:
1417     case INDEX_op_qemu_ld_i32:
1418     case INDEX_op_qemu_st_i32:
1419     case INDEX_op_qemu_ld_i64:
1420     case INDEX_op_qemu_st_i64:
1421         return true;
1422 
1423     case INDEX_op_goto_ptr:
1424         return TCG_TARGET_HAS_goto_ptr;
1425 
1426     case INDEX_op_mov_i32:
1427     case INDEX_op_movi_i32:
1428     case INDEX_op_setcond_i32:
1429     case INDEX_op_brcond_i32:
1430     case INDEX_op_ld8u_i32:
1431     case INDEX_op_ld8s_i32:
1432     case INDEX_op_ld16u_i32:
1433     case INDEX_op_ld16s_i32:
1434     case INDEX_op_ld_i32:
1435     case INDEX_op_st8_i32:
1436     case INDEX_op_st16_i32:
1437     case INDEX_op_st_i32:
1438     case INDEX_op_add_i32:
1439     case INDEX_op_sub_i32:
1440     case INDEX_op_mul_i32:
1441     case INDEX_op_and_i32:
1442     case INDEX_op_or_i32:
1443     case INDEX_op_xor_i32:
1444     case INDEX_op_shl_i32:
1445     case INDEX_op_shr_i32:
1446     case INDEX_op_sar_i32:
1447         return true;
1448 
1449     case INDEX_op_movcond_i32:
1450         return TCG_TARGET_HAS_movcond_i32;
1451     case INDEX_op_div_i32:
1452     case INDEX_op_divu_i32:
1453         return TCG_TARGET_HAS_div_i32;
1454     case INDEX_op_rem_i32:
1455     case INDEX_op_remu_i32:
1456         return TCG_TARGET_HAS_rem_i32;
1457     case INDEX_op_div2_i32:
1458     case INDEX_op_divu2_i32:
1459         return TCG_TARGET_HAS_div2_i32;
1460     case INDEX_op_rotl_i32:
1461     case INDEX_op_rotr_i32:
1462         return TCG_TARGET_HAS_rot_i32;
1463     case INDEX_op_deposit_i32:
1464         return TCG_TARGET_HAS_deposit_i32;
1465     case INDEX_op_extract_i32:
1466         return TCG_TARGET_HAS_extract_i32;
1467     case INDEX_op_sextract_i32:
1468         return TCG_TARGET_HAS_sextract_i32;
1469     case INDEX_op_extract2_i32:
1470         return TCG_TARGET_HAS_extract2_i32;
1471     case INDEX_op_add2_i32:
1472         return TCG_TARGET_HAS_add2_i32;
1473     case INDEX_op_sub2_i32:
1474         return TCG_TARGET_HAS_sub2_i32;
1475     case INDEX_op_mulu2_i32:
1476         return TCG_TARGET_HAS_mulu2_i32;
1477     case INDEX_op_muls2_i32:
1478         return TCG_TARGET_HAS_muls2_i32;
1479     case INDEX_op_muluh_i32:
1480         return TCG_TARGET_HAS_muluh_i32;
1481     case INDEX_op_mulsh_i32:
1482         return TCG_TARGET_HAS_mulsh_i32;
1483     case INDEX_op_ext8s_i32:
1484         return TCG_TARGET_HAS_ext8s_i32;
1485     case INDEX_op_ext16s_i32:
1486         return TCG_TARGET_HAS_ext16s_i32;
1487     case INDEX_op_ext8u_i32:
1488         return TCG_TARGET_HAS_ext8u_i32;
1489     case INDEX_op_ext16u_i32:
1490         return TCG_TARGET_HAS_ext16u_i32;
1491     case INDEX_op_bswap16_i32:
1492         return TCG_TARGET_HAS_bswap16_i32;
1493     case INDEX_op_bswap32_i32:
1494         return TCG_TARGET_HAS_bswap32_i32;
1495     case INDEX_op_not_i32:
1496         return TCG_TARGET_HAS_not_i32;
1497     case INDEX_op_neg_i32:
1498         return TCG_TARGET_HAS_neg_i32;
1499     case INDEX_op_andc_i32:
1500         return TCG_TARGET_HAS_andc_i32;
1501     case INDEX_op_orc_i32:
1502         return TCG_TARGET_HAS_orc_i32;
1503     case INDEX_op_eqv_i32:
1504         return TCG_TARGET_HAS_eqv_i32;
1505     case INDEX_op_nand_i32:
1506         return TCG_TARGET_HAS_nand_i32;
1507     case INDEX_op_nor_i32:
1508         return TCG_TARGET_HAS_nor_i32;
1509     case INDEX_op_clz_i32:
1510         return TCG_TARGET_HAS_clz_i32;
1511     case INDEX_op_ctz_i32:
1512         return TCG_TARGET_HAS_ctz_i32;
1513     case INDEX_op_ctpop_i32:
1514         return TCG_TARGET_HAS_ctpop_i32;
1515 
1516     case INDEX_op_brcond2_i32:
1517     case INDEX_op_setcond2_i32:
1518         return TCG_TARGET_REG_BITS == 32;
1519 
1520     case INDEX_op_mov_i64:
1521     case INDEX_op_movi_i64:
1522     case INDEX_op_setcond_i64:
1523     case INDEX_op_brcond_i64:
1524     case INDEX_op_ld8u_i64:
1525     case INDEX_op_ld8s_i64:
1526     case INDEX_op_ld16u_i64:
1527     case INDEX_op_ld16s_i64:
1528     case INDEX_op_ld32u_i64:
1529     case INDEX_op_ld32s_i64:
1530     case INDEX_op_ld_i64:
1531     case INDEX_op_st8_i64:
1532     case INDEX_op_st16_i64:
1533     case INDEX_op_st32_i64:
1534     case INDEX_op_st_i64:
1535     case INDEX_op_add_i64:
1536     case INDEX_op_sub_i64:
1537     case INDEX_op_mul_i64:
1538     case INDEX_op_and_i64:
1539     case INDEX_op_or_i64:
1540     case INDEX_op_xor_i64:
1541     case INDEX_op_shl_i64:
1542     case INDEX_op_shr_i64:
1543     case INDEX_op_sar_i64:
1544     case INDEX_op_ext_i32_i64:
1545     case INDEX_op_extu_i32_i64:
1546         return TCG_TARGET_REG_BITS == 64;
1547 
1548     case INDEX_op_movcond_i64:
1549         return TCG_TARGET_HAS_movcond_i64;
1550     case INDEX_op_div_i64:
1551     case INDEX_op_divu_i64:
1552         return TCG_TARGET_HAS_div_i64;
1553     case INDEX_op_rem_i64:
1554     case INDEX_op_remu_i64:
1555         return TCG_TARGET_HAS_rem_i64;
1556     case INDEX_op_div2_i64:
1557     case INDEX_op_divu2_i64:
1558         return TCG_TARGET_HAS_div2_i64;
1559     case INDEX_op_rotl_i64:
1560     case INDEX_op_rotr_i64:
1561         return TCG_TARGET_HAS_rot_i64;
1562     case INDEX_op_deposit_i64:
1563         return TCG_TARGET_HAS_deposit_i64;
1564     case INDEX_op_extract_i64:
1565         return TCG_TARGET_HAS_extract_i64;
1566     case INDEX_op_sextract_i64:
1567         return TCG_TARGET_HAS_sextract_i64;
1568     case INDEX_op_extract2_i64:
1569         return TCG_TARGET_HAS_extract2_i64;
1570     case INDEX_op_extrl_i64_i32:
1571         return TCG_TARGET_HAS_extrl_i64_i32;
1572     case INDEX_op_extrh_i64_i32:
1573         return TCG_TARGET_HAS_extrh_i64_i32;
1574     case INDEX_op_ext8s_i64:
1575         return TCG_TARGET_HAS_ext8s_i64;
1576     case INDEX_op_ext16s_i64:
1577         return TCG_TARGET_HAS_ext16s_i64;
1578     case INDEX_op_ext32s_i64:
1579         return TCG_TARGET_HAS_ext32s_i64;
1580     case INDEX_op_ext8u_i64:
1581         return TCG_TARGET_HAS_ext8u_i64;
1582     case INDEX_op_ext16u_i64:
1583         return TCG_TARGET_HAS_ext16u_i64;
1584     case INDEX_op_ext32u_i64:
1585         return TCG_TARGET_HAS_ext32u_i64;
1586     case INDEX_op_bswap16_i64:
1587         return TCG_TARGET_HAS_bswap16_i64;
1588     case INDEX_op_bswap32_i64:
1589         return TCG_TARGET_HAS_bswap32_i64;
1590     case INDEX_op_bswap64_i64:
1591         return TCG_TARGET_HAS_bswap64_i64;
1592     case INDEX_op_not_i64:
1593         return TCG_TARGET_HAS_not_i64;
1594     case INDEX_op_neg_i64:
1595         return TCG_TARGET_HAS_neg_i64;
1596     case INDEX_op_andc_i64:
1597         return TCG_TARGET_HAS_andc_i64;
1598     case INDEX_op_orc_i64:
1599         return TCG_TARGET_HAS_orc_i64;
1600     case INDEX_op_eqv_i64:
1601         return TCG_TARGET_HAS_eqv_i64;
1602     case INDEX_op_nand_i64:
1603         return TCG_TARGET_HAS_nand_i64;
1604     case INDEX_op_nor_i64:
1605         return TCG_TARGET_HAS_nor_i64;
1606     case INDEX_op_clz_i64:
1607         return TCG_TARGET_HAS_clz_i64;
1608     case INDEX_op_ctz_i64:
1609         return TCG_TARGET_HAS_ctz_i64;
1610     case INDEX_op_ctpop_i64:
1611         return TCG_TARGET_HAS_ctpop_i64;
1612     case INDEX_op_add2_i64:
1613         return TCG_TARGET_HAS_add2_i64;
1614     case INDEX_op_sub2_i64:
1615         return TCG_TARGET_HAS_sub2_i64;
1616     case INDEX_op_mulu2_i64:
1617         return TCG_TARGET_HAS_mulu2_i64;
1618     case INDEX_op_muls2_i64:
1619         return TCG_TARGET_HAS_muls2_i64;
1620     case INDEX_op_muluh_i64:
1621         return TCG_TARGET_HAS_muluh_i64;
1622     case INDEX_op_mulsh_i64:
1623         return TCG_TARGET_HAS_mulsh_i64;
1624 
1625     case INDEX_op_mov_vec:
1626     case INDEX_op_dup_vec:
1627     case INDEX_op_dupi_vec:
1628     case INDEX_op_dupm_vec:
1629     case INDEX_op_ld_vec:
1630     case INDEX_op_st_vec:
1631     case INDEX_op_add_vec:
1632     case INDEX_op_sub_vec:
1633     case INDEX_op_and_vec:
1634     case INDEX_op_or_vec:
1635     case INDEX_op_xor_vec:
1636     case INDEX_op_cmp_vec:
1637         return have_vec;
1638     case INDEX_op_dup2_vec:
1639         return have_vec && TCG_TARGET_REG_BITS == 32;
1640     case INDEX_op_not_vec:
1641         return have_vec && TCG_TARGET_HAS_not_vec;
1642     case INDEX_op_neg_vec:
1643         return have_vec && TCG_TARGET_HAS_neg_vec;
1644     case INDEX_op_abs_vec:
1645         return have_vec && TCG_TARGET_HAS_abs_vec;
1646     case INDEX_op_andc_vec:
1647         return have_vec && TCG_TARGET_HAS_andc_vec;
1648     case INDEX_op_orc_vec:
1649         return have_vec && TCG_TARGET_HAS_orc_vec;
1650     case INDEX_op_mul_vec:
1651         return have_vec && TCG_TARGET_HAS_mul_vec;
1652     case INDEX_op_shli_vec:
1653     case INDEX_op_shri_vec:
1654     case INDEX_op_sari_vec:
1655         return have_vec && TCG_TARGET_HAS_shi_vec;
1656     case INDEX_op_shls_vec:
1657     case INDEX_op_shrs_vec:
1658     case INDEX_op_sars_vec:
1659         return have_vec && TCG_TARGET_HAS_shs_vec;
1660     case INDEX_op_shlv_vec:
1661     case INDEX_op_shrv_vec:
1662     case INDEX_op_sarv_vec:
1663         return have_vec && TCG_TARGET_HAS_shv_vec;
1664     case INDEX_op_rotli_vec:
1665         return have_vec && TCG_TARGET_HAS_roti_vec;
1666     case INDEX_op_rotls_vec:
1667         return have_vec && TCG_TARGET_HAS_rots_vec;
1668     case INDEX_op_rotlv_vec:
1669     case INDEX_op_rotrv_vec:
1670         return have_vec && TCG_TARGET_HAS_rotv_vec;
1671     case INDEX_op_ssadd_vec:
1672     case INDEX_op_usadd_vec:
1673     case INDEX_op_sssub_vec:
1674     case INDEX_op_ussub_vec:
1675         return have_vec && TCG_TARGET_HAS_sat_vec;
1676     case INDEX_op_smin_vec:
1677     case INDEX_op_umin_vec:
1678     case INDEX_op_smax_vec:
1679     case INDEX_op_umax_vec:
1680         return have_vec && TCG_TARGET_HAS_minmax_vec;
1681     case INDEX_op_bitsel_vec:
1682         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1683     case INDEX_op_cmpsel_vec:
1684         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1685 
1686     default:
1687         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1688         return true;
1689     }
1690 }
1691 
1692 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1693    and endian swap. Maybe it would be better to do the alignment
1694    and endian swap in tcg_reg_alloc_call(). */
1695 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1696 {
1697     int i, real_args, nb_rets, pi;
1698     unsigned sizemask, flags;
1699     TCGHelperInfo *info;
1700     TCGOp *op;
1701 
1702     info = g_hash_table_lookup(helper_table, (gpointer)func);
1703     flags = info->flags;
1704     sizemask = info->sizemask;
1705 
1706 #ifdef CONFIG_PLUGIN
1707     /* detect non-plugin helpers */
1708     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1709         tcg_ctx->plugin_insn->calls_helpers = true;
1710     }
1711 #endif
1712 
1713 #if defined(__sparc__) && !defined(__arch64__) \
1714     && !defined(CONFIG_TCG_INTERPRETER)
1715     /* We have 64-bit values in one register, but need to pass as two
1716        separate parameters.  Split them.  */
1717     int orig_sizemask = sizemask;
1718     int orig_nargs = nargs;
1719     TCGv_i64 retl, reth;
1720     TCGTemp *split_args[MAX_OPC_PARAM];
1721 
1722     retl = NULL;
1723     reth = NULL;
1724     if (sizemask != 0) {
1725         for (i = real_args = 0; i < nargs; ++i) {
1726             int is_64bit = sizemask & (1 << (i+1)*2);
1727             if (is_64bit) {
1728                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1729                 TCGv_i32 h = tcg_temp_new_i32();
1730                 TCGv_i32 l = tcg_temp_new_i32();
1731                 tcg_gen_extr_i64_i32(l, h, orig);
1732                 split_args[real_args++] = tcgv_i32_temp(h);
1733                 split_args[real_args++] = tcgv_i32_temp(l);
1734             } else {
1735                 split_args[real_args++] = args[i];
1736             }
1737         }
1738         nargs = real_args;
1739         args = split_args;
1740         sizemask = 0;
1741     }
1742 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1743     for (i = 0; i < nargs; ++i) {
1744         int is_64bit = sizemask & (1 << (i+1)*2);
1745         int is_signed = sizemask & (2 << (i+1)*2);
1746         if (!is_64bit) {
1747             TCGv_i64 temp = tcg_temp_new_i64();
1748             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1749             if (is_signed) {
1750                 tcg_gen_ext32s_i64(temp, orig);
1751             } else {
1752                 tcg_gen_ext32u_i64(temp, orig);
1753             }
1754             args[i] = tcgv_i64_temp(temp);
1755         }
1756     }
1757 #endif /* TCG_TARGET_EXTEND_ARGS */
1758 
1759     op = tcg_emit_op(INDEX_op_call);
1760 
1761     pi = 0;
1762     if (ret != NULL) {
1763 #if defined(__sparc__) && !defined(__arch64__) \
1764     && !defined(CONFIG_TCG_INTERPRETER)
1765         if (orig_sizemask & 1) {
1766             /* The 32-bit ABI is going to return the 64-bit value in
1767                the %o0/%o1 register pair.  Prepare for this by using
1768                two return temporaries, and reassemble below.  */
1769             retl = tcg_temp_new_i64();
1770             reth = tcg_temp_new_i64();
1771             op->args[pi++] = tcgv_i64_arg(reth);
1772             op->args[pi++] = tcgv_i64_arg(retl);
1773             nb_rets = 2;
1774         } else {
1775             op->args[pi++] = temp_arg(ret);
1776             nb_rets = 1;
1777         }
1778 #else
1779         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1780 #ifdef HOST_WORDS_BIGENDIAN
1781             op->args[pi++] = temp_arg(ret + 1);
1782             op->args[pi++] = temp_arg(ret);
1783 #else
1784             op->args[pi++] = temp_arg(ret);
1785             op->args[pi++] = temp_arg(ret + 1);
1786 #endif
1787             nb_rets = 2;
1788         } else {
1789             op->args[pi++] = temp_arg(ret);
1790             nb_rets = 1;
1791         }
1792 #endif
1793     } else {
1794         nb_rets = 0;
1795     }
1796     TCGOP_CALLO(op) = nb_rets;
1797 
1798     real_args = 0;
1799     for (i = 0; i < nargs; i++) {
1800         int is_64bit = sizemask & (1 << (i+1)*2);
1801         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1802 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1803             /* some targets want aligned 64 bit args */
1804             if (real_args & 1) {
1805                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1806                 real_args++;
1807             }
1808 #endif
1809            /* If stack grows up, then we will be placing successive
1810               arguments at lower addresses, which means we need to
1811               reverse the order compared to how we would normally
1812               treat either big or little-endian.  For those arguments
1813               that will wind up in registers, this still works for
1814               HPPA (the only current STACK_GROWSUP target) since the
1815               argument registers are *also* allocated in decreasing
1816               order.  If another such target is added, this logic may
1817               have to get more complicated to differentiate between
1818               stack arguments and register arguments.  */
1819 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1820             op->args[pi++] = temp_arg(args[i] + 1);
1821             op->args[pi++] = temp_arg(args[i]);
1822 #else
1823             op->args[pi++] = temp_arg(args[i]);
1824             op->args[pi++] = temp_arg(args[i] + 1);
1825 #endif
1826             real_args += 2;
1827             continue;
1828         }
1829 
1830         op->args[pi++] = temp_arg(args[i]);
1831         real_args++;
1832     }
1833     op->args[pi++] = (uintptr_t)func;
1834     op->args[pi++] = flags;
1835     TCGOP_CALLI(op) = real_args;
1836 
1837     /* Make sure the fields didn't overflow.  */
1838     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1839     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1840 
1841 #if defined(__sparc__) && !defined(__arch64__) \
1842     && !defined(CONFIG_TCG_INTERPRETER)
1843     /* Free all of the parts we allocated above.  */
1844     for (i = real_args = 0; i < orig_nargs; ++i) {
1845         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1846         if (is_64bit) {
1847             tcg_temp_free_internal(args[real_args++]);
1848             tcg_temp_free_internal(args[real_args++]);
1849         } else {
1850             real_args++;
1851         }
1852     }
1853     if (orig_sizemask & 1) {
1854         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1855            Note that describing these as TCGv_i64 eliminates an unnecessary
1856            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1857         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1858         tcg_temp_free_i64(retl);
1859         tcg_temp_free_i64(reth);
1860     }
1861 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1862     for (i = 0; i < nargs; ++i) {
1863         int is_64bit = sizemask & (1 << (i+1)*2);
1864         if (!is_64bit) {
1865             tcg_temp_free_internal(args[i]);
1866         }
1867     }
1868 #endif /* TCG_TARGET_EXTEND_ARGS */
1869 }
1870 
1871 static void tcg_reg_alloc_start(TCGContext *s)
1872 {
1873     int i, n;
1874     TCGTemp *ts;
1875 
1876     for (i = 0, n = s->nb_globals; i < n; i++) {
1877         ts = &s->temps[i];
1878         ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1879     }
1880     for (n = s->nb_temps; i < n; i++) {
1881         ts = &s->temps[i];
1882         ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1883         ts->mem_allocated = 0;
1884         ts->fixed_reg = 0;
1885     }
1886 
1887     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1888 }
1889 
1890 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1891                                  TCGTemp *ts)
1892 {
1893     int idx = temp_idx(ts);
1894 
1895     if (ts->temp_global) {
1896         pstrcpy(buf, buf_size, ts->name);
1897     } else if (ts->temp_local) {
1898         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1899     } else {
1900         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1901     }
1902     return buf;
1903 }
1904 
1905 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1906                              int buf_size, TCGArg arg)
1907 {
1908     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1909 }
1910 
1911 /* Find helper name.  */
1912 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1913 {
1914     const char *ret = NULL;
1915     if (helper_table) {
1916         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1917         if (info) {
1918             ret = info->name;
1919         }
1920     }
1921     return ret;
1922 }
1923 
1924 static const char * const cond_name[] =
1925 {
1926     [TCG_COND_NEVER] = "never",
1927     [TCG_COND_ALWAYS] = "always",
1928     [TCG_COND_EQ] = "eq",
1929     [TCG_COND_NE] = "ne",
1930     [TCG_COND_LT] = "lt",
1931     [TCG_COND_GE] = "ge",
1932     [TCG_COND_LE] = "le",
1933     [TCG_COND_GT] = "gt",
1934     [TCG_COND_LTU] = "ltu",
1935     [TCG_COND_GEU] = "geu",
1936     [TCG_COND_LEU] = "leu",
1937     [TCG_COND_GTU] = "gtu"
1938 };
1939 
1940 static const char * const ldst_name[] =
1941 {
1942     [MO_UB]   = "ub",
1943     [MO_SB]   = "sb",
1944     [MO_LEUW] = "leuw",
1945     [MO_LESW] = "lesw",
1946     [MO_LEUL] = "leul",
1947     [MO_LESL] = "lesl",
1948     [MO_LEQ]  = "leq",
1949     [MO_BEUW] = "beuw",
1950     [MO_BESW] = "besw",
1951     [MO_BEUL] = "beul",
1952     [MO_BESL] = "besl",
1953     [MO_BEQ]  = "beq",
1954 };
1955 
1956 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1957 #ifdef TARGET_ALIGNED_ONLY
1958     [MO_UNALN >> MO_ASHIFT]    = "un+",
1959     [MO_ALIGN >> MO_ASHIFT]    = "",
1960 #else
1961     [MO_UNALN >> MO_ASHIFT]    = "",
1962     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1963 #endif
1964     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1965     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1966     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1967     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1968     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1969     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1970 };
1971 
1972 static inline bool tcg_regset_single(TCGRegSet d)
1973 {
1974     return (d & (d - 1)) == 0;
1975 }
1976 
1977 static inline TCGReg tcg_regset_first(TCGRegSet d)
1978 {
1979     if (TCG_TARGET_NB_REGS <= 32) {
1980         return ctz32(d);
1981     } else {
1982         return ctz64(d);
1983     }
1984 }
1985 
1986 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1987 {
1988     char buf[128];
1989     TCGOp *op;
1990 
1991     QTAILQ_FOREACH(op, &s->ops, link) {
1992         int i, k, nb_oargs, nb_iargs, nb_cargs;
1993         const TCGOpDef *def;
1994         TCGOpcode c;
1995         int col = 0;
1996 
1997         c = op->opc;
1998         def = &tcg_op_defs[c];
1999 
2000         if (c == INDEX_op_insn_start) {
2001             nb_oargs = 0;
2002             col += qemu_log("\n ----");
2003 
2004             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2005                 target_ulong a;
2006 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2007                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2008 #else
2009                 a = op->args[i];
2010 #endif
2011                 col += qemu_log(" " TARGET_FMT_lx, a);
2012             }
2013         } else if (c == INDEX_op_call) {
2014             /* variable number of arguments */
2015             nb_oargs = TCGOP_CALLO(op);
2016             nb_iargs = TCGOP_CALLI(op);
2017             nb_cargs = def->nb_cargs;
2018 
2019             /* function name, flags, out args */
2020             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2021                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2022                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2023             for (i = 0; i < nb_oargs; i++) {
2024                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2025                                                        op->args[i]));
2026             }
2027             for (i = 0; i < nb_iargs; i++) {
2028                 TCGArg arg = op->args[nb_oargs + i];
2029                 const char *t = "<dummy>";
2030                 if (arg != TCG_CALL_DUMMY_ARG) {
2031                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2032                 }
2033                 col += qemu_log(",%s", t);
2034             }
2035         } else {
2036             col += qemu_log(" %s ", def->name);
2037 
2038             nb_oargs = def->nb_oargs;
2039             nb_iargs = def->nb_iargs;
2040             nb_cargs = def->nb_cargs;
2041 
2042             if (def->flags & TCG_OPF_VECTOR) {
2043                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2044                                 8 << TCGOP_VECE(op));
2045             }
2046 
2047             k = 0;
2048             for (i = 0; i < nb_oargs; i++) {
2049                 if (k != 0) {
2050                     col += qemu_log(",");
2051                 }
2052                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2053                                                       op->args[k++]));
2054             }
2055             for (i = 0; i < nb_iargs; i++) {
2056                 if (k != 0) {
2057                     col += qemu_log(",");
2058                 }
2059                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2060                                                       op->args[k++]));
2061             }
2062             switch (c) {
2063             case INDEX_op_brcond_i32:
2064             case INDEX_op_setcond_i32:
2065             case INDEX_op_movcond_i32:
2066             case INDEX_op_brcond2_i32:
2067             case INDEX_op_setcond2_i32:
2068             case INDEX_op_brcond_i64:
2069             case INDEX_op_setcond_i64:
2070             case INDEX_op_movcond_i64:
2071             case INDEX_op_cmp_vec:
2072             case INDEX_op_cmpsel_vec:
2073                 if (op->args[k] < ARRAY_SIZE(cond_name)
2074                     && cond_name[op->args[k]]) {
2075                     col += qemu_log(",%s", cond_name[op->args[k++]]);
2076                 } else {
2077                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2078                 }
2079                 i = 1;
2080                 break;
2081             case INDEX_op_qemu_ld_i32:
2082             case INDEX_op_qemu_st_i32:
2083             case INDEX_op_qemu_ld_i64:
2084             case INDEX_op_qemu_st_i64:
2085                 {
2086                     TCGMemOpIdx oi = op->args[k++];
2087                     MemOp op = get_memop(oi);
2088                     unsigned ix = get_mmuidx(oi);
2089 
2090                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2091                         col += qemu_log(",$0x%x,%u", op, ix);
2092                     } else {
2093                         const char *s_al, *s_op;
2094                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2095                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2096                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2097                     }
2098                     i = 1;
2099                 }
2100                 break;
2101             default:
2102                 i = 0;
2103                 break;
2104             }
2105             switch (c) {
2106             case INDEX_op_set_label:
2107             case INDEX_op_br:
2108             case INDEX_op_brcond_i32:
2109             case INDEX_op_brcond_i64:
2110             case INDEX_op_brcond2_i32:
2111                 col += qemu_log("%s$L%d", k ? "," : "",
2112                                 arg_label(op->args[k])->id);
2113                 i++, k++;
2114                 break;
2115             default:
2116                 break;
2117             }
2118             for (; i < nb_cargs; i++, k++) {
2119                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2120             }
2121         }
2122 
2123         if (have_prefs || op->life) {
2124 
2125             QemuLogFile *logfile;
2126 
2127             rcu_read_lock();
2128             logfile = atomic_rcu_read(&qemu_logfile);
2129             if (logfile) {
2130                 for (; col < 40; ++col) {
2131                     putc(' ', logfile->fd);
2132                 }
2133             }
2134             rcu_read_unlock();
2135         }
2136 
2137         if (op->life) {
2138             unsigned life = op->life;
2139 
2140             if (life & (SYNC_ARG * 3)) {
2141                 qemu_log("  sync:");
2142                 for (i = 0; i < 2; ++i) {
2143                     if (life & (SYNC_ARG << i)) {
2144                         qemu_log(" %d", i);
2145                     }
2146                 }
2147             }
2148             life /= DEAD_ARG;
2149             if (life) {
2150                 qemu_log("  dead:");
2151                 for (i = 0; life; ++i, life >>= 1) {
2152                     if (life & 1) {
2153                         qemu_log(" %d", i);
2154                     }
2155                 }
2156             }
2157         }
2158 
2159         if (have_prefs) {
2160             for (i = 0; i < nb_oargs; ++i) {
2161                 TCGRegSet set = op->output_pref[i];
2162 
2163                 if (i == 0) {
2164                     qemu_log("  pref=");
2165                 } else {
2166                     qemu_log(",");
2167                 }
2168                 if (set == 0) {
2169                     qemu_log("none");
2170                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2171                     qemu_log("all");
2172 #ifdef CONFIG_DEBUG_TCG
2173                 } else if (tcg_regset_single(set)) {
2174                     TCGReg reg = tcg_regset_first(set);
2175                     qemu_log("%s", tcg_target_reg_names[reg]);
2176 #endif
2177                 } else if (TCG_TARGET_NB_REGS <= 32) {
2178                     qemu_log("%#x", (uint32_t)set);
2179                 } else {
2180                     qemu_log("%#" PRIx64, (uint64_t)set);
2181                 }
2182             }
2183         }
2184 
2185         qemu_log("\n");
2186     }
2187 }
2188 
2189 /* we give more priority to constraints with less registers */
2190 static int get_constraint_priority(const TCGOpDef *def, int k)
2191 {
2192     const TCGArgConstraint *arg_ct;
2193 
2194     int i, n;
2195     arg_ct = &def->args_ct[k];
2196     if (arg_ct->ct & TCG_CT_ALIAS) {
2197         /* an alias is equivalent to a single register */
2198         n = 1;
2199     } else {
2200         if (!(arg_ct->ct & TCG_CT_REG))
2201             return 0;
2202         n = 0;
2203         for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2204             if (tcg_regset_test_reg(arg_ct->u.regs, i))
2205                 n++;
2206         }
2207     }
2208     return TCG_TARGET_NB_REGS - n + 1;
2209 }
2210 
2211 /* sort from highest priority to lowest */
2212 static void sort_constraints(TCGOpDef *def, int start, int n)
2213 {
2214     int i, j, p1, p2, tmp;
2215 
2216     for(i = 0; i < n; i++)
2217         def->sorted_args[start + i] = start + i;
2218     if (n <= 1)
2219         return;
2220     for(i = 0; i < n - 1; i++) {
2221         for(j = i + 1; j < n; j++) {
2222             p1 = get_constraint_priority(def, def->sorted_args[start + i]);
2223             p2 = get_constraint_priority(def, def->sorted_args[start + j]);
2224             if (p1 < p2) {
2225                 tmp = def->sorted_args[start + i];
2226                 def->sorted_args[start + i] = def->sorted_args[start + j];
2227                 def->sorted_args[start + j] = tmp;
2228             }
2229         }
2230     }
2231 }
2232 
2233 static void process_op_defs(TCGContext *s)
2234 {
2235     TCGOpcode op;
2236 
2237     for (op = 0; op < NB_OPS; op++) {
2238         TCGOpDef *def = &tcg_op_defs[op];
2239         const TCGTargetOpDef *tdefs;
2240         TCGType type;
2241         int i, nb_args;
2242 
2243         if (def->flags & TCG_OPF_NOT_PRESENT) {
2244             continue;
2245         }
2246 
2247         nb_args = def->nb_iargs + def->nb_oargs;
2248         if (nb_args == 0) {
2249             continue;
2250         }
2251 
2252         tdefs = tcg_target_op_def(op);
2253         /* Missing TCGTargetOpDef entry. */
2254         tcg_debug_assert(tdefs != NULL);
2255 
2256         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2257         for (i = 0; i < nb_args; i++) {
2258             const char *ct_str = tdefs->args_ct_str[i];
2259             /* Incomplete TCGTargetOpDef entry. */
2260             tcg_debug_assert(ct_str != NULL);
2261 
2262             def->args_ct[i].u.regs = 0;
2263             def->args_ct[i].ct = 0;
2264             while (*ct_str != '\0') {
2265                 switch(*ct_str) {
2266                 case '0' ... '9':
2267                     {
2268                         int oarg = *ct_str - '0';
2269                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2270                         tcg_debug_assert(oarg < def->nb_oargs);
2271                         tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
2272                         /* TCG_CT_ALIAS is for the output arguments.
2273                            The input is tagged with TCG_CT_IALIAS. */
2274                         def->args_ct[i] = def->args_ct[oarg];
2275                         def->args_ct[oarg].ct |= TCG_CT_ALIAS;
2276                         def->args_ct[oarg].alias_index = i;
2277                         def->args_ct[i].ct |= TCG_CT_IALIAS;
2278                         def->args_ct[i].alias_index = oarg;
2279                     }
2280                     ct_str++;
2281                     break;
2282                 case '&':
2283                     def->args_ct[i].ct |= TCG_CT_NEWREG;
2284                     ct_str++;
2285                     break;
2286                 case 'i':
2287                     def->args_ct[i].ct |= TCG_CT_CONST;
2288                     ct_str++;
2289                     break;
2290                 default:
2291                     ct_str = target_parse_constraint(&def->args_ct[i],
2292                                                      ct_str, type);
2293                     /* Typo in TCGTargetOpDef constraint. */
2294                     tcg_debug_assert(ct_str != NULL);
2295                 }
2296             }
2297         }
2298 
2299         /* TCGTargetOpDef entry with too much information? */
2300         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2301 
2302         /* sort the constraints (XXX: this is just an heuristic) */
2303         sort_constraints(def, 0, def->nb_oargs);
2304         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2305     }
2306 }
2307 
2308 void tcg_op_remove(TCGContext *s, TCGOp *op)
2309 {
2310     TCGLabel *label;
2311 
2312     switch (op->opc) {
2313     case INDEX_op_br:
2314         label = arg_label(op->args[0]);
2315         label->refs--;
2316         break;
2317     case INDEX_op_brcond_i32:
2318     case INDEX_op_brcond_i64:
2319         label = arg_label(op->args[3]);
2320         label->refs--;
2321         break;
2322     case INDEX_op_brcond2_i32:
2323         label = arg_label(op->args[5]);
2324         label->refs--;
2325         break;
2326     default:
2327         break;
2328     }
2329 
2330     QTAILQ_REMOVE(&s->ops, op, link);
2331     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2332     s->nb_ops--;
2333 
2334 #ifdef CONFIG_PROFILER
2335     atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2336 #endif
2337 }
2338 
2339 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2340 {
2341     TCGContext *s = tcg_ctx;
2342     TCGOp *op;
2343 
2344     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2345         op = tcg_malloc(sizeof(TCGOp));
2346     } else {
2347         op = QTAILQ_FIRST(&s->free_ops);
2348         QTAILQ_REMOVE(&s->free_ops, op, link);
2349     }
2350     memset(op, 0, offsetof(TCGOp, link));
2351     op->opc = opc;
2352     s->nb_ops++;
2353 
2354     return op;
2355 }
2356 
2357 TCGOp *tcg_emit_op(TCGOpcode opc)
2358 {
2359     TCGOp *op = tcg_op_alloc(opc);
2360     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2361     return op;
2362 }
2363 
2364 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2365 {
2366     TCGOp *new_op = tcg_op_alloc(opc);
2367     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2368     return new_op;
2369 }
2370 
2371 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2372 {
2373     TCGOp *new_op = tcg_op_alloc(opc);
2374     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2375     return new_op;
2376 }
2377 
2378 /* Reachable analysis : remove unreachable code.  */
2379 static void reachable_code_pass(TCGContext *s)
2380 {
2381     TCGOp *op, *op_next;
2382     bool dead = false;
2383 
2384     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2385         bool remove = dead;
2386         TCGLabel *label;
2387         int call_flags;
2388 
2389         switch (op->opc) {
2390         case INDEX_op_set_label:
2391             label = arg_label(op->args[0]);
2392             if (label->refs == 0) {
2393                 /*
2394                  * While there is an occasional backward branch, virtually
2395                  * all branches generated by the translators are forward.
2396                  * Which means that generally we will have already removed
2397                  * all references to the label that will be, and there is
2398                  * little to be gained by iterating.
2399                  */
2400                 remove = true;
2401             } else {
2402                 /* Once we see a label, insns become live again.  */
2403                 dead = false;
2404                 remove = false;
2405 
2406                 /*
2407                  * Optimization can fold conditional branches to unconditional.
2408                  * If we find a label with one reference which is preceded by
2409                  * an unconditional branch to it, remove both.  This needed to
2410                  * wait until the dead code in between them was removed.
2411                  */
2412                 if (label->refs == 1) {
2413                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2414                     if (op_prev->opc == INDEX_op_br &&
2415                         label == arg_label(op_prev->args[0])) {
2416                         tcg_op_remove(s, op_prev);
2417                         remove = true;
2418                     }
2419                 }
2420             }
2421             break;
2422 
2423         case INDEX_op_br:
2424         case INDEX_op_exit_tb:
2425         case INDEX_op_goto_ptr:
2426             /* Unconditional branches; everything following is dead.  */
2427             dead = true;
2428             break;
2429 
2430         case INDEX_op_call:
2431             /* Notice noreturn helper calls, raising exceptions.  */
2432             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2433             if (call_flags & TCG_CALL_NO_RETURN) {
2434                 dead = true;
2435             }
2436             break;
2437 
2438         case INDEX_op_insn_start:
2439             /* Never remove -- we need to keep these for unwind.  */
2440             remove = false;
2441             break;
2442 
2443         default:
2444             break;
2445         }
2446 
2447         if (remove) {
2448             tcg_op_remove(s, op);
2449         }
2450     }
2451 }
2452 
2453 #define TS_DEAD  1
2454 #define TS_MEM   2
2455 
2456 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2457 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2458 
2459 /* For liveness_pass_1, the register preferences for a given temp.  */
2460 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2461 {
2462     return ts->state_ptr;
2463 }
2464 
2465 /* For liveness_pass_1, reset the preferences for a given temp to the
2466  * maximal regset for its type.
2467  */
2468 static inline void la_reset_pref(TCGTemp *ts)
2469 {
2470     *la_temp_pref(ts)
2471         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2472 }
2473 
2474 /* liveness analysis: end of function: all temps are dead, and globals
2475    should be in memory. */
2476 static void la_func_end(TCGContext *s, int ng, int nt)
2477 {
2478     int i;
2479 
2480     for (i = 0; i < ng; ++i) {
2481         s->temps[i].state = TS_DEAD | TS_MEM;
2482         la_reset_pref(&s->temps[i]);
2483     }
2484     for (i = ng; i < nt; ++i) {
2485         s->temps[i].state = TS_DEAD;
2486         la_reset_pref(&s->temps[i]);
2487     }
2488 }
2489 
2490 /* liveness analysis: end of basic block: all temps are dead, globals
2491    and local temps should be in memory. */
2492 static void la_bb_end(TCGContext *s, int ng, int nt)
2493 {
2494     int i;
2495 
2496     for (i = 0; i < ng; ++i) {
2497         s->temps[i].state = TS_DEAD | TS_MEM;
2498         la_reset_pref(&s->temps[i]);
2499     }
2500     for (i = ng; i < nt; ++i) {
2501         s->temps[i].state = (s->temps[i].temp_local
2502                              ? TS_DEAD | TS_MEM
2503                              : TS_DEAD);
2504         la_reset_pref(&s->temps[i]);
2505     }
2506 }
2507 
2508 /* liveness analysis: sync globals back to memory.  */
2509 static void la_global_sync(TCGContext *s, int ng)
2510 {
2511     int i;
2512 
2513     for (i = 0; i < ng; ++i) {
2514         int state = s->temps[i].state;
2515         s->temps[i].state = state | TS_MEM;
2516         if (state == TS_DEAD) {
2517             /* If the global was previously dead, reset prefs.  */
2518             la_reset_pref(&s->temps[i]);
2519         }
2520     }
2521 }
2522 
2523 /* liveness analysis: sync globals back to memory and kill.  */
2524 static void la_global_kill(TCGContext *s, int ng)
2525 {
2526     int i;
2527 
2528     for (i = 0; i < ng; i++) {
2529         s->temps[i].state = TS_DEAD | TS_MEM;
2530         la_reset_pref(&s->temps[i]);
2531     }
2532 }
2533 
2534 /* liveness analysis: note live globals crossing calls.  */
2535 static void la_cross_call(TCGContext *s, int nt)
2536 {
2537     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2538     int i;
2539 
2540     for (i = 0; i < nt; i++) {
2541         TCGTemp *ts = &s->temps[i];
2542         if (!(ts->state & TS_DEAD)) {
2543             TCGRegSet *pset = la_temp_pref(ts);
2544             TCGRegSet set = *pset;
2545 
2546             set &= mask;
2547             /* If the combination is not possible, restart.  */
2548             if (set == 0) {
2549                 set = tcg_target_available_regs[ts->type] & mask;
2550             }
2551             *pset = set;
2552         }
2553     }
2554 }
2555 
2556 /* Liveness analysis : update the opc_arg_life array to tell if a
2557    given input arguments is dead. Instructions updating dead
2558    temporaries are removed. */
2559 static void liveness_pass_1(TCGContext *s)
2560 {
2561     int nb_globals = s->nb_globals;
2562     int nb_temps = s->nb_temps;
2563     TCGOp *op, *op_prev;
2564     TCGRegSet *prefs;
2565     int i;
2566 
2567     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2568     for (i = 0; i < nb_temps; ++i) {
2569         s->temps[i].state_ptr = prefs + i;
2570     }
2571 
2572     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2573     la_func_end(s, nb_globals, nb_temps);
2574 
2575     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2576         int nb_iargs, nb_oargs;
2577         TCGOpcode opc_new, opc_new2;
2578         bool have_opc_new2;
2579         TCGLifeData arg_life = 0;
2580         TCGTemp *ts;
2581         TCGOpcode opc = op->opc;
2582         const TCGOpDef *def = &tcg_op_defs[opc];
2583 
2584         switch (opc) {
2585         case INDEX_op_call:
2586             {
2587                 int call_flags;
2588                 int nb_call_regs;
2589 
2590                 nb_oargs = TCGOP_CALLO(op);
2591                 nb_iargs = TCGOP_CALLI(op);
2592                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2593 
2594                 /* pure functions can be removed if their result is unused */
2595                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2596                     for (i = 0; i < nb_oargs; i++) {
2597                         ts = arg_temp(op->args[i]);
2598                         if (ts->state != TS_DEAD) {
2599                             goto do_not_remove_call;
2600                         }
2601                     }
2602                     goto do_remove;
2603                 }
2604             do_not_remove_call:
2605 
2606                 /* Output args are dead.  */
2607                 for (i = 0; i < nb_oargs; i++) {
2608                     ts = arg_temp(op->args[i]);
2609                     if (ts->state & TS_DEAD) {
2610                         arg_life |= DEAD_ARG << i;
2611                     }
2612                     if (ts->state & TS_MEM) {
2613                         arg_life |= SYNC_ARG << i;
2614                     }
2615                     ts->state = TS_DEAD;
2616                     la_reset_pref(ts);
2617 
2618                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2619                     op->output_pref[i] = 0;
2620                 }
2621 
2622                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2623                                     TCG_CALL_NO_READ_GLOBALS))) {
2624                     la_global_kill(s, nb_globals);
2625                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2626                     la_global_sync(s, nb_globals);
2627                 }
2628 
2629                 /* Record arguments that die in this helper.  */
2630                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2631                     ts = arg_temp(op->args[i]);
2632                     if (ts && ts->state & TS_DEAD) {
2633                         arg_life |= DEAD_ARG << i;
2634                     }
2635                 }
2636 
2637                 /* For all live registers, remove call-clobbered prefs.  */
2638                 la_cross_call(s, nb_temps);
2639 
2640                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2641 
2642                 /* Input arguments are live for preceding opcodes.  */
2643                 for (i = 0; i < nb_iargs; i++) {
2644                     ts = arg_temp(op->args[i + nb_oargs]);
2645                     if (ts && ts->state & TS_DEAD) {
2646                         /* For those arguments that die, and will be allocated
2647                          * in registers, clear the register set for that arg,
2648                          * to be filled in below.  For args that will be on
2649                          * the stack, reset to any available reg.
2650                          */
2651                         *la_temp_pref(ts)
2652                             = (i < nb_call_regs ? 0 :
2653                                tcg_target_available_regs[ts->type]);
2654                         ts->state &= ~TS_DEAD;
2655                     }
2656                 }
2657 
2658                 /* For each input argument, add its input register to prefs.
2659                    If a temp is used once, this produces a single set bit.  */
2660                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2661                     ts = arg_temp(op->args[i + nb_oargs]);
2662                     if (ts) {
2663                         tcg_regset_set_reg(*la_temp_pref(ts),
2664                                            tcg_target_call_iarg_regs[i]);
2665                     }
2666                 }
2667             }
2668             break;
2669         case INDEX_op_insn_start:
2670             break;
2671         case INDEX_op_discard:
2672             /* mark the temporary as dead */
2673             ts = arg_temp(op->args[0]);
2674             ts->state = TS_DEAD;
2675             la_reset_pref(ts);
2676             break;
2677 
2678         case INDEX_op_add2_i32:
2679             opc_new = INDEX_op_add_i32;
2680             goto do_addsub2;
2681         case INDEX_op_sub2_i32:
2682             opc_new = INDEX_op_sub_i32;
2683             goto do_addsub2;
2684         case INDEX_op_add2_i64:
2685             opc_new = INDEX_op_add_i64;
2686             goto do_addsub2;
2687         case INDEX_op_sub2_i64:
2688             opc_new = INDEX_op_sub_i64;
2689         do_addsub2:
2690             nb_iargs = 4;
2691             nb_oargs = 2;
2692             /* Test if the high part of the operation is dead, but not
2693                the low part.  The result can be optimized to a simple
2694                add or sub.  This happens often for x86_64 guest when the
2695                cpu mode is set to 32 bit.  */
2696             if (arg_temp(op->args[1])->state == TS_DEAD) {
2697                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2698                     goto do_remove;
2699                 }
2700                 /* Replace the opcode and adjust the args in place,
2701                    leaving 3 unused args at the end.  */
2702                 op->opc = opc = opc_new;
2703                 op->args[1] = op->args[2];
2704                 op->args[2] = op->args[4];
2705                 /* Fall through and mark the single-word operation live.  */
2706                 nb_iargs = 2;
2707                 nb_oargs = 1;
2708             }
2709             goto do_not_remove;
2710 
2711         case INDEX_op_mulu2_i32:
2712             opc_new = INDEX_op_mul_i32;
2713             opc_new2 = INDEX_op_muluh_i32;
2714             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2715             goto do_mul2;
2716         case INDEX_op_muls2_i32:
2717             opc_new = INDEX_op_mul_i32;
2718             opc_new2 = INDEX_op_mulsh_i32;
2719             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2720             goto do_mul2;
2721         case INDEX_op_mulu2_i64:
2722             opc_new = INDEX_op_mul_i64;
2723             opc_new2 = INDEX_op_muluh_i64;
2724             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2725             goto do_mul2;
2726         case INDEX_op_muls2_i64:
2727             opc_new = INDEX_op_mul_i64;
2728             opc_new2 = INDEX_op_mulsh_i64;
2729             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2730             goto do_mul2;
2731         do_mul2:
2732             nb_iargs = 2;
2733             nb_oargs = 2;
2734             if (arg_temp(op->args[1])->state == TS_DEAD) {
2735                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2736                     /* Both parts of the operation are dead.  */
2737                     goto do_remove;
2738                 }
2739                 /* The high part of the operation is dead; generate the low. */
2740                 op->opc = opc = opc_new;
2741                 op->args[1] = op->args[2];
2742                 op->args[2] = op->args[3];
2743             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2744                 /* The low part of the operation is dead; generate the high. */
2745                 op->opc = opc = opc_new2;
2746                 op->args[0] = op->args[1];
2747                 op->args[1] = op->args[2];
2748                 op->args[2] = op->args[3];
2749             } else {
2750                 goto do_not_remove;
2751             }
2752             /* Mark the single-word operation live.  */
2753             nb_oargs = 1;
2754             goto do_not_remove;
2755 
2756         default:
2757             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2758             nb_iargs = def->nb_iargs;
2759             nb_oargs = def->nb_oargs;
2760 
2761             /* Test if the operation can be removed because all
2762                its outputs are dead. We assume that nb_oargs == 0
2763                implies side effects */
2764             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2765                 for (i = 0; i < nb_oargs; i++) {
2766                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2767                         goto do_not_remove;
2768                     }
2769                 }
2770                 goto do_remove;
2771             }
2772             goto do_not_remove;
2773 
2774         do_remove:
2775             tcg_op_remove(s, op);
2776             break;
2777 
2778         do_not_remove:
2779             for (i = 0; i < nb_oargs; i++) {
2780                 ts = arg_temp(op->args[i]);
2781 
2782                 /* Remember the preference of the uses that followed.  */
2783                 op->output_pref[i] = *la_temp_pref(ts);
2784 
2785                 /* Output args are dead.  */
2786                 if (ts->state & TS_DEAD) {
2787                     arg_life |= DEAD_ARG << i;
2788                 }
2789                 if (ts->state & TS_MEM) {
2790                     arg_life |= SYNC_ARG << i;
2791                 }
2792                 ts->state = TS_DEAD;
2793                 la_reset_pref(ts);
2794             }
2795 
2796             /* If end of basic block, update.  */
2797             if (def->flags & TCG_OPF_BB_EXIT) {
2798                 la_func_end(s, nb_globals, nb_temps);
2799             } else if (def->flags & TCG_OPF_BB_END) {
2800                 la_bb_end(s, nb_globals, nb_temps);
2801             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2802                 la_global_sync(s, nb_globals);
2803                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2804                     la_cross_call(s, nb_temps);
2805                 }
2806             }
2807 
2808             /* Record arguments that die in this opcode.  */
2809             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2810                 ts = arg_temp(op->args[i]);
2811                 if (ts->state & TS_DEAD) {
2812                     arg_life |= DEAD_ARG << i;
2813                 }
2814             }
2815 
2816             /* Input arguments are live for preceding opcodes.  */
2817             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2818                 ts = arg_temp(op->args[i]);
2819                 if (ts->state & TS_DEAD) {
2820                     /* For operands that were dead, initially allow
2821                        all regs for the type.  */
2822                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2823                     ts->state &= ~TS_DEAD;
2824                 }
2825             }
2826 
2827             /* Incorporate constraints for this operand.  */
2828             switch (opc) {
2829             case INDEX_op_mov_i32:
2830             case INDEX_op_mov_i64:
2831                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2832                    have proper constraints.  That said, special case
2833                    moves to propagate preferences backward.  */
2834                 if (IS_DEAD_ARG(1)) {
2835                     *la_temp_pref(arg_temp(op->args[0]))
2836                         = *la_temp_pref(arg_temp(op->args[1]));
2837                 }
2838                 break;
2839 
2840             default:
2841                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2842                     const TCGArgConstraint *ct = &def->args_ct[i];
2843                     TCGRegSet set, *pset;
2844 
2845                     ts = arg_temp(op->args[i]);
2846                     pset = la_temp_pref(ts);
2847                     set = *pset;
2848 
2849                     set &= ct->u.regs;
2850                     if (ct->ct & TCG_CT_IALIAS) {
2851                         set &= op->output_pref[ct->alias_index];
2852                     }
2853                     /* If the combination is not possible, restart.  */
2854                     if (set == 0) {
2855                         set = ct->u.regs;
2856                     }
2857                     *pset = set;
2858                 }
2859                 break;
2860             }
2861             break;
2862         }
2863         op->life = arg_life;
2864     }
2865 }
2866 
2867 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2868 static bool liveness_pass_2(TCGContext *s)
2869 {
2870     int nb_globals = s->nb_globals;
2871     int nb_temps, i;
2872     bool changes = false;
2873     TCGOp *op, *op_next;
2874 
2875     /* Create a temporary for each indirect global.  */
2876     for (i = 0; i < nb_globals; ++i) {
2877         TCGTemp *its = &s->temps[i];
2878         if (its->indirect_reg) {
2879             TCGTemp *dts = tcg_temp_alloc(s);
2880             dts->type = its->type;
2881             dts->base_type = its->base_type;
2882             its->state_ptr = dts;
2883         } else {
2884             its->state_ptr = NULL;
2885         }
2886         /* All globals begin dead.  */
2887         its->state = TS_DEAD;
2888     }
2889     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2890         TCGTemp *its = &s->temps[i];
2891         its->state_ptr = NULL;
2892         its->state = TS_DEAD;
2893     }
2894 
2895     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2896         TCGOpcode opc = op->opc;
2897         const TCGOpDef *def = &tcg_op_defs[opc];
2898         TCGLifeData arg_life = op->life;
2899         int nb_iargs, nb_oargs, call_flags;
2900         TCGTemp *arg_ts, *dir_ts;
2901 
2902         if (opc == INDEX_op_call) {
2903             nb_oargs = TCGOP_CALLO(op);
2904             nb_iargs = TCGOP_CALLI(op);
2905             call_flags = op->args[nb_oargs + nb_iargs + 1];
2906         } else {
2907             nb_iargs = def->nb_iargs;
2908             nb_oargs = def->nb_oargs;
2909 
2910             /* Set flags similar to how calls require.  */
2911             if (def->flags & TCG_OPF_BB_END) {
2912                 /* Like writing globals: save_globals */
2913                 call_flags = 0;
2914             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2915                 /* Like reading globals: sync_globals */
2916                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2917             } else {
2918                 /* No effect on globals.  */
2919                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2920                               TCG_CALL_NO_WRITE_GLOBALS);
2921             }
2922         }
2923 
2924         /* Make sure that input arguments are available.  */
2925         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2926             arg_ts = arg_temp(op->args[i]);
2927             if (arg_ts) {
2928                 dir_ts = arg_ts->state_ptr;
2929                 if (dir_ts && arg_ts->state == TS_DEAD) {
2930                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2931                                       ? INDEX_op_ld_i32
2932                                       : INDEX_op_ld_i64);
2933                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2934 
2935                     lop->args[0] = temp_arg(dir_ts);
2936                     lop->args[1] = temp_arg(arg_ts->mem_base);
2937                     lop->args[2] = arg_ts->mem_offset;
2938 
2939                     /* Loaded, but synced with memory.  */
2940                     arg_ts->state = TS_MEM;
2941                 }
2942             }
2943         }
2944 
2945         /* Perform input replacement, and mark inputs that became dead.
2946            No action is required except keeping temp_state up to date
2947            so that we reload when needed.  */
2948         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2949             arg_ts = arg_temp(op->args[i]);
2950             if (arg_ts) {
2951                 dir_ts = arg_ts->state_ptr;
2952                 if (dir_ts) {
2953                     op->args[i] = temp_arg(dir_ts);
2954                     changes = true;
2955                     if (IS_DEAD_ARG(i)) {
2956                         arg_ts->state = TS_DEAD;
2957                     }
2958                 }
2959             }
2960         }
2961 
2962         /* Liveness analysis should ensure that the following are
2963            all correct, for call sites and basic block end points.  */
2964         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2965             /* Nothing to do */
2966         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2967             for (i = 0; i < nb_globals; ++i) {
2968                 /* Liveness should see that globals are synced back,
2969                    that is, either TS_DEAD or TS_MEM.  */
2970                 arg_ts = &s->temps[i];
2971                 tcg_debug_assert(arg_ts->state_ptr == 0
2972                                  || arg_ts->state != 0);
2973             }
2974         } else {
2975             for (i = 0; i < nb_globals; ++i) {
2976                 /* Liveness should see that globals are saved back,
2977                    that is, TS_DEAD, waiting to be reloaded.  */
2978                 arg_ts = &s->temps[i];
2979                 tcg_debug_assert(arg_ts->state_ptr == 0
2980                                  || arg_ts->state == TS_DEAD);
2981             }
2982         }
2983 
2984         /* Outputs become available.  */
2985         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
2986             arg_ts = arg_temp(op->args[0]);
2987             dir_ts = arg_ts->state_ptr;
2988             if (dir_ts) {
2989                 op->args[0] = temp_arg(dir_ts);
2990                 changes = true;
2991 
2992                 /* The output is now live and modified.  */
2993                 arg_ts->state = 0;
2994 
2995                 if (NEED_SYNC_ARG(0)) {
2996                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2997                                       ? INDEX_op_st_i32
2998                                       : INDEX_op_st_i64);
2999                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3000                     TCGTemp *out_ts = dir_ts;
3001 
3002                     if (IS_DEAD_ARG(0)) {
3003                         out_ts = arg_temp(op->args[1]);
3004                         arg_ts->state = TS_DEAD;
3005                         tcg_op_remove(s, op);
3006                     } else {
3007                         arg_ts->state = TS_MEM;
3008                     }
3009 
3010                     sop->args[0] = temp_arg(out_ts);
3011                     sop->args[1] = temp_arg(arg_ts->mem_base);
3012                     sop->args[2] = arg_ts->mem_offset;
3013                 } else {
3014                     tcg_debug_assert(!IS_DEAD_ARG(0));
3015                 }
3016             }
3017         } else {
3018             for (i = 0; i < nb_oargs; i++) {
3019                 arg_ts = arg_temp(op->args[i]);
3020                 dir_ts = arg_ts->state_ptr;
3021                 if (!dir_ts) {
3022                     continue;
3023                 }
3024                 op->args[i] = temp_arg(dir_ts);
3025                 changes = true;
3026 
3027                 /* The output is now live and modified.  */
3028                 arg_ts->state = 0;
3029 
3030                 /* Sync outputs upon their last write.  */
3031                 if (NEED_SYNC_ARG(i)) {
3032                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3033                                       ? INDEX_op_st_i32
3034                                       : INDEX_op_st_i64);
3035                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3036 
3037                     sop->args[0] = temp_arg(dir_ts);
3038                     sop->args[1] = temp_arg(arg_ts->mem_base);
3039                     sop->args[2] = arg_ts->mem_offset;
3040 
3041                     arg_ts->state = TS_MEM;
3042                 }
3043                 /* Drop outputs that are dead.  */
3044                 if (IS_DEAD_ARG(i)) {
3045                     arg_ts->state = TS_DEAD;
3046                 }
3047             }
3048         }
3049     }
3050 
3051     return changes;
3052 }
3053 
3054 #ifdef CONFIG_DEBUG_TCG
3055 static void dump_regs(TCGContext *s)
3056 {
3057     TCGTemp *ts;
3058     int i;
3059     char buf[64];
3060 
3061     for(i = 0; i < s->nb_temps; i++) {
3062         ts = &s->temps[i];
3063         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3064         switch(ts->val_type) {
3065         case TEMP_VAL_REG:
3066             printf("%s", tcg_target_reg_names[ts->reg]);
3067             break;
3068         case TEMP_VAL_MEM:
3069             printf("%d(%s)", (int)ts->mem_offset,
3070                    tcg_target_reg_names[ts->mem_base->reg]);
3071             break;
3072         case TEMP_VAL_CONST:
3073             printf("$0x%" TCG_PRIlx, ts->val);
3074             break;
3075         case TEMP_VAL_DEAD:
3076             printf("D");
3077             break;
3078         default:
3079             printf("???");
3080             break;
3081         }
3082         printf("\n");
3083     }
3084 
3085     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3086         if (s->reg_to_temp[i] != NULL) {
3087             printf("%s: %s\n",
3088                    tcg_target_reg_names[i],
3089                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3090         }
3091     }
3092 }
3093 
3094 static void check_regs(TCGContext *s)
3095 {
3096     int reg;
3097     int k;
3098     TCGTemp *ts;
3099     char buf[64];
3100 
3101     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3102         ts = s->reg_to_temp[reg];
3103         if (ts != NULL) {
3104             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3105                 printf("Inconsistency for register %s:\n",
3106                        tcg_target_reg_names[reg]);
3107                 goto fail;
3108             }
3109         }
3110     }
3111     for (k = 0; k < s->nb_temps; k++) {
3112         ts = &s->temps[k];
3113         if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
3114             && s->reg_to_temp[ts->reg] != ts) {
3115             printf("Inconsistency for temp %s:\n",
3116                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3117         fail:
3118             printf("reg state:\n");
3119             dump_regs(s);
3120             tcg_abort();
3121         }
3122     }
3123 }
3124 #endif
3125 
3126 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3127 {
3128 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3129     /* Sparc64 stack is accessed with offset of 2047 */
3130     s->current_frame_offset = (s->current_frame_offset +
3131                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3132         ~(sizeof(tcg_target_long) - 1);
3133 #endif
3134     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3135         s->frame_end) {
3136         tcg_abort();
3137     }
3138     ts->mem_offset = s->current_frame_offset;
3139     ts->mem_base = s->frame_temp;
3140     ts->mem_allocated = 1;
3141     s->current_frame_offset += sizeof(tcg_target_long);
3142 }
3143 
3144 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3145 
3146 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3147    mark it free; otherwise mark it dead.  */
3148 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3149 {
3150     if (ts->fixed_reg) {
3151         return;
3152     }
3153     if (ts->val_type == TEMP_VAL_REG) {
3154         s->reg_to_temp[ts->reg] = NULL;
3155     }
3156     ts->val_type = (free_or_dead < 0
3157                     || ts->temp_local
3158                     || ts->temp_global
3159                     ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3160 }
3161 
3162 /* Mark a temporary as dead.  */
3163 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3164 {
3165     temp_free_or_dead(s, ts, 1);
3166 }
3167 
3168 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3169    registers needs to be allocated to store a constant.  If 'free_or_dead'
3170    is non-zero, subsequently release the temporary; if it is positive, the
3171    temp is dead; if it is negative, the temp is free.  */
3172 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3173                       TCGRegSet preferred_regs, int free_or_dead)
3174 {
3175     if (ts->fixed_reg) {
3176         return;
3177     }
3178     if (!ts->mem_coherent) {
3179         if (!ts->mem_allocated) {
3180             temp_allocate_frame(s, ts);
3181         }
3182         switch (ts->val_type) {
3183         case TEMP_VAL_CONST:
3184             /* If we're going to free the temp immediately, then we won't
3185                require it later in a register, so attempt to store the
3186                constant to memory directly.  */
3187             if (free_or_dead
3188                 && tcg_out_sti(s, ts->type, ts->val,
3189                                ts->mem_base->reg, ts->mem_offset)) {
3190                 break;
3191             }
3192             temp_load(s, ts, tcg_target_available_regs[ts->type],
3193                       allocated_regs, preferred_regs);
3194             /* fallthrough */
3195 
3196         case TEMP_VAL_REG:
3197             tcg_out_st(s, ts->type, ts->reg,
3198                        ts->mem_base->reg, ts->mem_offset);
3199             break;
3200 
3201         case TEMP_VAL_MEM:
3202             break;
3203 
3204         case TEMP_VAL_DEAD:
3205         default:
3206             tcg_abort();
3207         }
3208         ts->mem_coherent = 1;
3209     }
3210     if (free_or_dead) {
3211         temp_free_or_dead(s, ts, free_or_dead);
3212     }
3213 }
3214 
3215 /* free register 'reg' by spilling the corresponding temporary if necessary */
3216 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3217 {
3218     TCGTemp *ts = s->reg_to_temp[reg];
3219     if (ts != NULL) {
3220         temp_sync(s, ts, allocated_regs, 0, -1);
3221     }
3222 }
3223 
3224 /**
3225  * tcg_reg_alloc:
3226  * @required_regs: Set of registers in which we must allocate.
3227  * @allocated_regs: Set of registers which must be avoided.
3228  * @preferred_regs: Set of registers we should prefer.
3229  * @rev: True if we search the registers in "indirect" order.
3230  *
3231  * The allocated register must be in @required_regs & ~@allocated_regs,
3232  * but if we can put it in @preferred_regs we may save a move later.
3233  */
3234 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3235                             TCGRegSet allocated_regs,
3236                             TCGRegSet preferred_regs, bool rev)
3237 {
3238     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3239     TCGRegSet reg_ct[2];
3240     const int *order;
3241 
3242     reg_ct[1] = required_regs & ~allocated_regs;
3243     tcg_debug_assert(reg_ct[1] != 0);
3244     reg_ct[0] = reg_ct[1] & preferred_regs;
3245 
3246     /* Skip the preferred_regs option if it cannot be satisfied,
3247        or if the preference made no difference.  */
3248     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3249 
3250     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3251 
3252     /* Try free registers, preferences first.  */
3253     for (j = f; j < 2; j++) {
3254         TCGRegSet set = reg_ct[j];
3255 
3256         if (tcg_regset_single(set)) {
3257             /* One register in the set.  */
3258             TCGReg reg = tcg_regset_first(set);
3259             if (s->reg_to_temp[reg] == NULL) {
3260                 return reg;
3261             }
3262         } else {
3263             for (i = 0; i < n; i++) {
3264                 TCGReg reg = order[i];
3265                 if (s->reg_to_temp[reg] == NULL &&
3266                     tcg_regset_test_reg(set, reg)) {
3267                     return reg;
3268                 }
3269             }
3270         }
3271     }
3272 
3273     /* We must spill something.  */
3274     for (j = f; j < 2; j++) {
3275         TCGRegSet set = reg_ct[j];
3276 
3277         if (tcg_regset_single(set)) {
3278             /* One register in the set.  */
3279             TCGReg reg = tcg_regset_first(set);
3280             tcg_reg_free(s, reg, allocated_regs);
3281             return reg;
3282         } else {
3283             for (i = 0; i < n; i++) {
3284                 TCGReg reg = order[i];
3285                 if (tcg_regset_test_reg(set, reg)) {
3286                     tcg_reg_free(s, reg, allocated_regs);
3287                     return reg;
3288                 }
3289             }
3290         }
3291     }
3292 
3293     tcg_abort();
3294 }
3295 
3296 /* Make sure the temporary is in a register.  If needed, allocate the register
3297    from DESIRED while avoiding ALLOCATED.  */
3298 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3299                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3300 {
3301     TCGReg reg;
3302 
3303     switch (ts->val_type) {
3304     case TEMP_VAL_REG:
3305         return;
3306     case TEMP_VAL_CONST:
3307         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3308                             preferred_regs, ts->indirect_base);
3309         tcg_out_movi(s, ts->type, reg, ts->val);
3310         ts->mem_coherent = 0;
3311         break;
3312     case TEMP_VAL_MEM:
3313         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3314                             preferred_regs, ts->indirect_base);
3315         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3316         ts->mem_coherent = 1;
3317         break;
3318     case TEMP_VAL_DEAD:
3319     default:
3320         tcg_abort();
3321     }
3322     ts->reg = reg;
3323     ts->val_type = TEMP_VAL_REG;
3324     s->reg_to_temp[reg] = ts;
3325 }
3326 
3327 /* Save a temporary to memory. 'allocated_regs' is used in case a
3328    temporary registers needs to be allocated to store a constant.  */
3329 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3330 {
3331     /* The liveness analysis already ensures that globals are back
3332        in memory. Keep an tcg_debug_assert for safety. */
3333     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3334 }
3335 
3336 /* save globals to their canonical location and assume they can be
3337    modified be the following code. 'allocated_regs' is used in case a
3338    temporary registers needs to be allocated to store a constant. */
3339 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3340 {
3341     int i, n;
3342 
3343     for (i = 0, n = s->nb_globals; i < n; i++) {
3344         temp_save(s, &s->temps[i], allocated_regs);
3345     }
3346 }
3347 
3348 /* sync globals to their canonical location and assume they can be
3349    read by the following code. 'allocated_regs' is used in case a
3350    temporary registers needs to be allocated to store a constant. */
3351 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3352 {
3353     int i, n;
3354 
3355     for (i = 0, n = s->nb_globals; i < n; i++) {
3356         TCGTemp *ts = &s->temps[i];
3357         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3358                          || ts->fixed_reg
3359                          || ts->mem_coherent);
3360     }
3361 }
3362 
3363 /* at the end of a basic block, we assume all temporaries are dead and
3364    all globals are stored at their canonical location. */
3365 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3366 {
3367     int i;
3368 
3369     for (i = s->nb_globals; i < s->nb_temps; i++) {
3370         TCGTemp *ts = &s->temps[i];
3371         if (ts->temp_local) {
3372             temp_save(s, ts, allocated_regs);
3373         } else {
3374             /* The liveness analysis already ensures that temps are dead.
3375                Keep an tcg_debug_assert for safety. */
3376             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3377         }
3378     }
3379 
3380     save_globals(s, allocated_regs);
3381 }
3382 
3383 /*
3384  * Specialized code generation for INDEX_op_movi_*.
3385  */
3386 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3387                                   tcg_target_ulong val, TCGLifeData arg_life,
3388                                   TCGRegSet preferred_regs)
3389 {
3390     /* ENV should not be modified.  */
3391     tcg_debug_assert(!ots->fixed_reg);
3392 
3393     /* The movi is not explicitly generated here.  */
3394     if (ots->val_type == TEMP_VAL_REG) {
3395         s->reg_to_temp[ots->reg] = NULL;
3396     }
3397     ots->val_type = TEMP_VAL_CONST;
3398     ots->val = val;
3399     ots->mem_coherent = 0;
3400     if (NEED_SYNC_ARG(0)) {
3401         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3402     } else if (IS_DEAD_ARG(0)) {
3403         temp_dead(s, ots);
3404     }
3405 }
3406 
3407 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3408 {
3409     TCGTemp *ots = arg_temp(op->args[0]);
3410     tcg_target_ulong val = op->args[1];
3411 
3412     tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3413 }
3414 
3415 /*
3416  * Specialized code generation for INDEX_op_mov_*.
3417  */
3418 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3419 {
3420     const TCGLifeData arg_life = op->life;
3421     TCGRegSet allocated_regs, preferred_regs;
3422     TCGTemp *ts, *ots;
3423     TCGType otype, itype;
3424 
3425     allocated_regs = s->reserved_regs;
3426     preferred_regs = op->output_pref[0];
3427     ots = arg_temp(op->args[0]);
3428     ts = arg_temp(op->args[1]);
3429 
3430     /* ENV should not be modified.  */
3431     tcg_debug_assert(!ots->fixed_reg);
3432 
3433     /* Note that otype != itype for no-op truncation.  */
3434     otype = ots->type;
3435     itype = ts->type;
3436 
3437     if (ts->val_type == TEMP_VAL_CONST) {
3438         /* propagate constant or generate sti */
3439         tcg_target_ulong val = ts->val;
3440         if (IS_DEAD_ARG(1)) {
3441             temp_dead(s, ts);
3442         }
3443         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3444         return;
3445     }
3446 
3447     /* If the source value is in memory we're going to be forced
3448        to have it in a register in order to perform the copy.  Copy
3449        the SOURCE value into its own register first, that way we
3450        don't have to reload SOURCE the next time it is used. */
3451     if (ts->val_type == TEMP_VAL_MEM) {
3452         temp_load(s, ts, tcg_target_available_regs[itype],
3453                   allocated_regs, preferred_regs);
3454     }
3455 
3456     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3457     if (IS_DEAD_ARG(0)) {
3458         /* mov to a non-saved dead register makes no sense (even with
3459            liveness analysis disabled). */
3460         tcg_debug_assert(NEED_SYNC_ARG(0));
3461         if (!ots->mem_allocated) {
3462             temp_allocate_frame(s, ots);
3463         }
3464         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3465         if (IS_DEAD_ARG(1)) {
3466             temp_dead(s, ts);
3467         }
3468         temp_dead(s, ots);
3469     } else {
3470         if (IS_DEAD_ARG(1) && !ts->fixed_reg) {
3471             /* the mov can be suppressed */
3472             if (ots->val_type == TEMP_VAL_REG) {
3473                 s->reg_to_temp[ots->reg] = NULL;
3474             }
3475             ots->reg = ts->reg;
3476             temp_dead(s, ts);
3477         } else {
3478             if (ots->val_type != TEMP_VAL_REG) {
3479                 /* When allocating a new register, make sure to not spill the
3480                    input one. */
3481                 tcg_regset_set_reg(allocated_regs, ts->reg);
3482                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3483                                          allocated_regs, preferred_regs,
3484                                          ots->indirect_base);
3485             }
3486             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3487                 /*
3488                  * Cross register class move not supported.
3489                  * Store the source register into the destination slot
3490                  * and leave the destination temp as TEMP_VAL_MEM.
3491                  */
3492                 assert(!ots->fixed_reg);
3493                 if (!ts->mem_allocated) {
3494                     temp_allocate_frame(s, ots);
3495                 }
3496                 tcg_out_st(s, ts->type, ts->reg,
3497                            ots->mem_base->reg, ots->mem_offset);
3498                 ots->mem_coherent = 1;
3499                 temp_free_or_dead(s, ots, -1);
3500                 return;
3501             }
3502         }
3503         ots->val_type = TEMP_VAL_REG;
3504         ots->mem_coherent = 0;
3505         s->reg_to_temp[ots->reg] = ots;
3506         if (NEED_SYNC_ARG(0)) {
3507             temp_sync(s, ots, allocated_regs, 0, 0);
3508         }
3509     }
3510 }
3511 
3512 /*
3513  * Specialized code generation for INDEX_op_dup_vec.
3514  */
3515 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3516 {
3517     const TCGLifeData arg_life = op->life;
3518     TCGRegSet dup_out_regs, dup_in_regs;
3519     TCGTemp *its, *ots;
3520     TCGType itype, vtype;
3521     intptr_t endian_fixup;
3522     unsigned vece;
3523     bool ok;
3524 
3525     ots = arg_temp(op->args[0]);
3526     its = arg_temp(op->args[1]);
3527 
3528     /* ENV should not be modified.  */
3529     tcg_debug_assert(!ots->fixed_reg);
3530 
3531     itype = its->type;
3532     vece = TCGOP_VECE(op);
3533     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3534 
3535     if (its->val_type == TEMP_VAL_CONST) {
3536         /* Propagate constant via movi -> dupi.  */
3537         tcg_target_ulong val = its->val;
3538         if (IS_DEAD_ARG(1)) {
3539             temp_dead(s, its);
3540         }
3541         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3542         return;
3543     }
3544 
3545     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs;
3546     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs;
3547 
3548     /* Allocate the output register now.  */
3549     if (ots->val_type != TEMP_VAL_REG) {
3550         TCGRegSet allocated_regs = s->reserved_regs;
3551 
3552         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3553             /* Make sure to not spill the input register. */
3554             tcg_regset_set_reg(allocated_regs, its->reg);
3555         }
3556         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3557                                  op->output_pref[0], ots->indirect_base);
3558         ots->val_type = TEMP_VAL_REG;
3559         ots->mem_coherent = 0;
3560         s->reg_to_temp[ots->reg] = ots;
3561     }
3562 
3563     switch (its->val_type) {
3564     case TEMP_VAL_REG:
3565         /*
3566          * The dup constriaints must be broad, covering all possible VECE.
3567          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3568          * to fail, indicating that extra moves are required for that case.
3569          */
3570         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3571             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3572                 goto done;
3573             }
3574             /* Try again from memory or a vector input register.  */
3575         }
3576         if (!its->mem_coherent) {
3577             /*
3578              * The input register is not synced, and so an extra store
3579              * would be required to use memory.  Attempt an integer-vector
3580              * register move first.  We do not have a TCGRegSet for this.
3581              */
3582             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3583                 break;
3584             }
3585             /* Sync the temp back to its slot and load from there.  */
3586             temp_sync(s, its, s->reserved_regs, 0, 0);
3587         }
3588         /* fall through */
3589 
3590     case TEMP_VAL_MEM:
3591 #ifdef HOST_WORDS_BIGENDIAN
3592         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3593         endian_fixup -= 1 << vece;
3594 #else
3595         endian_fixup = 0;
3596 #endif
3597         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3598                              its->mem_offset + endian_fixup)) {
3599             goto done;
3600         }
3601         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3602         break;
3603 
3604     default:
3605         g_assert_not_reached();
3606     }
3607 
3608     /* We now have a vector input register, so dup must succeed. */
3609     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3610     tcg_debug_assert(ok);
3611 
3612  done:
3613     if (IS_DEAD_ARG(1)) {
3614         temp_dead(s, its);
3615     }
3616     if (NEED_SYNC_ARG(0)) {
3617         temp_sync(s, ots, s->reserved_regs, 0, 0);
3618     }
3619     if (IS_DEAD_ARG(0)) {
3620         temp_dead(s, ots);
3621     }
3622 }
3623 
3624 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3625 {
3626     const TCGLifeData arg_life = op->life;
3627     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3628     TCGRegSet i_allocated_regs;
3629     TCGRegSet o_allocated_regs;
3630     int i, k, nb_iargs, nb_oargs;
3631     TCGReg reg;
3632     TCGArg arg;
3633     const TCGArgConstraint *arg_ct;
3634     TCGTemp *ts;
3635     TCGArg new_args[TCG_MAX_OP_ARGS];
3636     int const_args[TCG_MAX_OP_ARGS];
3637 
3638     nb_oargs = def->nb_oargs;
3639     nb_iargs = def->nb_iargs;
3640 
3641     /* copy constants */
3642     memcpy(new_args + nb_oargs + nb_iargs,
3643            op->args + nb_oargs + nb_iargs,
3644            sizeof(TCGArg) * def->nb_cargs);
3645 
3646     i_allocated_regs = s->reserved_regs;
3647     o_allocated_regs = s->reserved_regs;
3648 
3649     /* satisfy input constraints */
3650     for (k = 0; k < nb_iargs; k++) {
3651         TCGRegSet i_preferred_regs, o_preferred_regs;
3652 
3653         i = def->sorted_args[nb_oargs + k];
3654         arg = op->args[i];
3655         arg_ct = &def->args_ct[i];
3656         ts = arg_temp(arg);
3657 
3658         if (ts->val_type == TEMP_VAL_CONST
3659             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3660             /* constant is OK for instruction */
3661             const_args[i] = 1;
3662             new_args[i] = ts->val;
3663             continue;
3664         }
3665 
3666         i_preferred_regs = o_preferred_regs = 0;
3667         if (arg_ct->ct & TCG_CT_IALIAS) {
3668             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3669             if (ts->fixed_reg) {
3670                 /* if fixed register, we must allocate a new register
3671                    if the alias is not the same register */
3672                 if (arg != op->args[arg_ct->alias_index]) {
3673                     goto allocate_in_reg;
3674                 }
3675             } else {
3676                 /* if the input is aliased to an output and if it is
3677                    not dead after the instruction, we must allocate
3678                    a new register and move it */
3679                 if (!IS_DEAD_ARG(i)) {
3680                     goto allocate_in_reg;
3681                 }
3682 
3683                 /* check if the current register has already been allocated
3684                    for another input aliased to an output */
3685                 if (ts->val_type == TEMP_VAL_REG) {
3686                     int k2, i2;
3687                     reg = ts->reg;
3688                     for (k2 = 0 ; k2 < k ; k2++) {
3689                         i2 = def->sorted_args[nb_oargs + k2];
3690                         if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
3691                             reg == new_args[i2]) {
3692                             goto allocate_in_reg;
3693                         }
3694                     }
3695                 }
3696                 i_preferred_regs = o_preferred_regs;
3697             }
3698         }
3699 
3700         temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
3701         reg = ts->reg;
3702 
3703         if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3704             /* nothing to do : the constraint is satisfied */
3705         } else {
3706         allocate_in_reg:
3707             /* allocate a new register matching the constraint
3708                and move the temporary register into it */
3709             temp_load(s, ts, tcg_target_available_regs[ts->type],
3710                       i_allocated_regs, 0);
3711             reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
3712                                 o_preferred_regs, ts->indirect_base);
3713             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3714                 /*
3715                  * Cross register class move not supported.  Sync the
3716                  * temp back to its slot and load from there.
3717                  */
3718                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3719                 tcg_out_ld(s, ts->type, reg,
3720                            ts->mem_base->reg, ts->mem_offset);
3721             }
3722         }
3723         new_args[i] = reg;
3724         const_args[i] = 0;
3725         tcg_regset_set_reg(i_allocated_regs, reg);
3726     }
3727 
3728     /* mark dead temporaries and free the associated registers */
3729     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3730         if (IS_DEAD_ARG(i)) {
3731             temp_dead(s, arg_temp(op->args[i]));
3732         }
3733     }
3734 
3735     if (def->flags & TCG_OPF_BB_END) {
3736         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3737     } else {
3738         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3739             /* XXX: permit generic clobber register list ? */
3740             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3741                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3742                     tcg_reg_free(s, i, i_allocated_regs);
3743                 }
3744             }
3745         }
3746         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3747             /* sync globals if the op has side effects and might trigger
3748                an exception. */
3749             sync_globals(s, i_allocated_regs);
3750         }
3751 
3752         /* satisfy the output constraints */
3753         for(k = 0; k < nb_oargs; k++) {
3754             i = def->sorted_args[k];
3755             arg = op->args[i];
3756             arg_ct = &def->args_ct[i];
3757             ts = arg_temp(arg);
3758 
3759             /* ENV should not be modified.  */
3760             tcg_debug_assert(!ts->fixed_reg);
3761 
3762             if ((arg_ct->ct & TCG_CT_ALIAS)
3763                 && !const_args[arg_ct->alias_index]) {
3764                 reg = new_args[arg_ct->alias_index];
3765             } else if (arg_ct->ct & TCG_CT_NEWREG) {
3766                 reg = tcg_reg_alloc(s, arg_ct->u.regs,
3767                                     i_allocated_regs | o_allocated_regs,
3768                                     op->output_pref[k], ts->indirect_base);
3769             } else {
3770                 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
3771                                     op->output_pref[k], ts->indirect_base);
3772             }
3773             tcg_regset_set_reg(o_allocated_regs, reg);
3774             if (ts->val_type == TEMP_VAL_REG) {
3775                 s->reg_to_temp[ts->reg] = NULL;
3776             }
3777             ts->val_type = TEMP_VAL_REG;
3778             ts->reg = reg;
3779             /*
3780              * Temp value is modified, so the value kept in memory is
3781              * potentially not the same.
3782              */
3783             ts->mem_coherent = 0;
3784             s->reg_to_temp[reg] = ts;
3785             new_args[i] = reg;
3786         }
3787     }
3788 
3789     /* emit instruction */
3790     if (def->flags & TCG_OPF_VECTOR) {
3791         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3792                        new_args, const_args);
3793     } else {
3794         tcg_out_op(s, op->opc, new_args, const_args);
3795     }
3796 
3797     /* move the outputs in the correct register if needed */
3798     for(i = 0; i < nb_oargs; i++) {
3799         ts = arg_temp(op->args[i]);
3800 
3801         /* ENV should not be modified.  */
3802         tcg_debug_assert(!ts->fixed_reg);
3803 
3804         if (NEED_SYNC_ARG(i)) {
3805             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3806         } else if (IS_DEAD_ARG(i)) {
3807             temp_dead(s, ts);
3808         }
3809     }
3810 }
3811 
3812 #ifdef TCG_TARGET_STACK_GROWSUP
3813 #define STACK_DIR(x) (-(x))
3814 #else
3815 #define STACK_DIR(x) (x)
3816 #endif
3817 
3818 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3819 {
3820     const int nb_oargs = TCGOP_CALLO(op);
3821     const int nb_iargs = TCGOP_CALLI(op);
3822     const TCGLifeData arg_life = op->life;
3823     int flags, nb_regs, i;
3824     TCGReg reg;
3825     TCGArg arg;
3826     TCGTemp *ts;
3827     intptr_t stack_offset;
3828     size_t call_stack_size;
3829     tcg_insn_unit *func_addr;
3830     int allocate_args;
3831     TCGRegSet allocated_regs;
3832 
3833     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3834     flags = op->args[nb_oargs + nb_iargs + 1];
3835 
3836     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3837     if (nb_regs > nb_iargs) {
3838         nb_regs = nb_iargs;
3839     }
3840 
3841     /* assign stack slots first */
3842     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3843     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3844         ~(TCG_TARGET_STACK_ALIGN - 1);
3845     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3846     if (allocate_args) {
3847         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3848            preallocate call stack */
3849         tcg_abort();
3850     }
3851 
3852     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3853     for (i = nb_regs; i < nb_iargs; i++) {
3854         arg = op->args[nb_oargs + i];
3855 #ifdef TCG_TARGET_STACK_GROWSUP
3856         stack_offset -= sizeof(tcg_target_long);
3857 #endif
3858         if (arg != TCG_CALL_DUMMY_ARG) {
3859             ts = arg_temp(arg);
3860             temp_load(s, ts, tcg_target_available_regs[ts->type],
3861                       s->reserved_regs, 0);
3862             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3863         }
3864 #ifndef TCG_TARGET_STACK_GROWSUP
3865         stack_offset += sizeof(tcg_target_long);
3866 #endif
3867     }
3868 
3869     /* assign input registers */
3870     allocated_regs = s->reserved_regs;
3871     for (i = 0; i < nb_regs; i++) {
3872         arg = op->args[nb_oargs + i];
3873         if (arg != TCG_CALL_DUMMY_ARG) {
3874             ts = arg_temp(arg);
3875             reg = tcg_target_call_iarg_regs[i];
3876 
3877             if (ts->val_type == TEMP_VAL_REG) {
3878                 if (ts->reg != reg) {
3879                     tcg_reg_free(s, reg, allocated_regs);
3880                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3881                         /*
3882                          * Cross register class move not supported.  Sync the
3883                          * temp back to its slot and load from there.
3884                          */
3885                         temp_sync(s, ts, allocated_regs, 0, 0);
3886                         tcg_out_ld(s, ts->type, reg,
3887                                    ts->mem_base->reg, ts->mem_offset);
3888                     }
3889                 }
3890             } else {
3891                 TCGRegSet arg_set = 0;
3892 
3893                 tcg_reg_free(s, reg, allocated_regs);
3894                 tcg_regset_set_reg(arg_set, reg);
3895                 temp_load(s, ts, arg_set, allocated_regs, 0);
3896             }
3897 
3898             tcg_regset_set_reg(allocated_regs, reg);
3899         }
3900     }
3901 
3902     /* mark dead temporaries and free the associated registers */
3903     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3904         if (IS_DEAD_ARG(i)) {
3905             temp_dead(s, arg_temp(op->args[i]));
3906         }
3907     }
3908 
3909     /* clobber call registers */
3910     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3911         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3912             tcg_reg_free(s, i, allocated_regs);
3913         }
3914     }
3915 
3916     /* Save globals if they might be written by the helper, sync them if
3917        they might be read. */
3918     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3919         /* Nothing to do */
3920     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3921         sync_globals(s, allocated_regs);
3922     } else {
3923         save_globals(s, allocated_regs);
3924     }
3925 
3926     tcg_out_call(s, func_addr);
3927 
3928     /* assign output registers and emit moves if needed */
3929     for(i = 0; i < nb_oargs; i++) {
3930         arg = op->args[i];
3931         ts = arg_temp(arg);
3932 
3933         /* ENV should not be modified.  */
3934         tcg_debug_assert(!ts->fixed_reg);
3935 
3936         reg = tcg_target_call_oarg_regs[i];
3937         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3938         if (ts->val_type == TEMP_VAL_REG) {
3939             s->reg_to_temp[ts->reg] = NULL;
3940         }
3941         ts->val_type = TEMP_VAL_REG;
3942         ts->reg = reg;
3943         ts->mem_coherent = 0;
3944         s->reg_to_temp[reg] = ts;
3945         if (NEED_SYNC_ARG(i)) {
3946             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
3947         } else if (IS_DEAD_ARG(i)) {
3948             temp_dead(s, ts);
3949         }
3950     }
3951 }
3952 
3953 #ifdef CONFIG_PROFILER
3954 
3955 /* avoid copy/paste errors */
3956 #define PROF_ADD(to, from, field)                       \
3957     do {                                                \
3958         (to)->field += atomic_read(&((from)->field));   \
3959     } while (0)
3960 
3961 #define PROF_MAX(to, from, field)                                       \
3962     do {                                                                \
3963         typeof((from)->field) val__ = atomic_read(&((from)->field));    \
3964         if (val__ > (to)->field) {                                      \
3965             (to)->field = val__;                                        \
3966         }                                                               \
3967     } while (0)
3968 
3969 /* Pass in a zero'ed @prof */
3970 static inline
3971 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3972 {
3973     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3974     unsigned int i;
3975 
3976     for (i = 0; i < n_ctxs; i++) {
3977         TCGContext *s = atomic_read(&tcg_ctxs[i]);
3978         const TCGProfile *orig = &s->prof;
3979 
3980         if (counters) {
3981             PROF_ADD(prof, orig, cpu_exec_time);
3982             PROF_ADD(prof, orig, tb_count1);
3983             PROF_ADD(prof, orig, tb_count);
3984             PROF_ADD(prof, orig, op_count);
3985             PROF_MAX(prof, orig, op_count_max);
3986             PROF_ADD(prof, orig, temp_count);
3987             PROF_MAX(prof, orig, temp_count_max);
3988             PROF_ADD(prof, orig, del_op_count);
3989             PROF_ADD(prof, orig, code_in_len);
3990             PROF_ADD(prof, orig, code_out_len);
3991             PROF_ADD(prof, orig, search_out_len);
3992             PROF_ADD(prof, orig, interm_time);
3993             PROF_ADD(prof, orig, code_time);
3994             PROF_ADD(prof, orig, la_time);
3995             PROF_ADD(prof, orig, opt_time);
3996             PROF_ADD(prof, orig, restore_count);
3997             PROF_ADD(prof, orig, restore_time);
3998         }
3999         if (table) {
4000             int i;
4001 
4002             for (i = 0; i < NB_OPS; i++) {
4003                 PROF_ADD(prof, orig, table_op_count[i]);
4004             }
4005         }
4006     }
4007 }
4008 
4009 #undef PROF_ADD
4010 #undef PROF_MAX
4011 
4012 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4013 {
4014     tcg_profile_snapshot(prof, true, false);
4015 }
4016 
4017 static void tcg_profile_snapshot_table(TCGProfile *prof)
4018 {
4019     tcg_profile_snapshot(prof, false, true);
4020 }
4021 
4022 void tcg_dump_op_count(void)
4023 {
4024     TCGProfile prof = {};
4025     int i;
4026 
4027     tcg_profile_snapshot_table(&prof);
4028     for (i = 0; i < NB_OPS; i++) {
4029         qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4030                     prof.table_op_count[i]);
4031     }
4032 }
4033 
4034 int64_t tcg_cpu_exec_time(void)
4035 {
4036     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
4037     unsigned int i;
4038     int64_t ret = 0;
4039 
4040     for (i = 0; i < n_ctxs; i++) {
4041         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
4042         const TCGProfile *prof = &s->prof;
4043 
4044         ret += atomic_read(&prof->cpu_exec_time);
4045     }
4046     return ret;
4047 }
4048 #else
4049 void tcg_dump_op_count(void)
4050 {
4051     qemu_printf("[TCG profiler not compiled]\n");
4052 }
4053 
4054 int64_t tcg_cpu_exec_time(void)
4055 {
4056     error_report("%s: TCG profiler not compiled", __func__);
4057     exit(EXIT_FAILURE);
4058 }
4059 #endif
4060 
4061 
4062 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4063 {
4064 #ifdef CONFIG_PROFILER
4065     TCGProfile *prof = &s->prof;
4066 #endif
4067     int i, num_insns;
4068     TCGOp *op;
4069 
4070 #ifdef CONFIG_PROFILER
4071     {
4072         int n = 0;
4073 
4074         QTAILQ_FOREACH(op, &s->ops, link) {
4075             n++;
4076         }
4077         atomic_set(&prof->op_count, prof->op_count + n);
4078         if (n > prof->op_count_max) {
4079             atomic_set(&prof->op_count_max, n);
4080         }
4081 
4082         n = s->nb_temps;
4083         atomic_set(&prof->temp_count, prof->temp_count + n);
4084         if (n > prof->temp_count_max) {
4085             atomic_set(&prof->temp_count_max, n);
4086         }
4087     }
4088 #endif
4089 
4090 #ifdef DEBUG_DISAS
4091     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4092                  && qemu_log_in_addr_range(tb->pc))) {
4093         FILE *logfile = qemu_log_lock();
4094         qemu_log("OP:\n");
4095         tcg_dump_ops(s, false);
4096         qemu_log("\n");
4097         qemu_log_unlock(logfile);
4098     }
4099 #endif
4100 
4101 #ifdef CONFIG_DEBUG_TCG
4102     /* Ensure all labels referenced have been emitted.  */
4103     {
4104         TCGLabel *l;
4105         bool error = false;
4106 
4107         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4108             if (unlikely(!l->present) && l->refs) {
4109                 qemu_log_mask(CPU_LOG_TB_OP,
4110                               "$L%d referenced but not present.\n", l->id);
4111                 error = true;
4112             }
4113         }
4114         assert(!error);
4115     }
4116 #endif
4117 
4118 #ifdef CONFIG_PROFILER
4119     atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4120 #endif
4121 
4122 #ifdef USE_TCG_OPTIMIZATIONS
4123     tcg_optimize(s);
4124 #endif
4125 
4126 #ifdef CONFIG_PROFILER
4127     atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4128     atomic_set(&prof->la_time, prof->la_time - profile_getclock());
4129 #endif
4130 
4131     reachable_code_pass(s);
4132     liveness_pass_1(s);
4133 
4134     if (s->nb_indirects > 0) {
4135 #ifdef DEBUG_DISAS
4136         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4137                      && qemu_log_in_addr_range(tb->pc))) {
4138             FILE *logfile = qemu_log_lock();
4139             qemu_log("OP before indirect lowering:\n");
4140             tcg_dump_ops(s, false);
4141             qemu_log("\n");
4142             qemu_log_unlock(logfile);
4143         }
4144 #endif
4145         /* Replace indirect temps with direct temps.  */
4146         if (liveness_pass_2(s)) {
4147             /* If changes were made, re-run liveness.  */
4148             liveness_pass_1(s);
4149         }
4150     }
4151 
4152 #ifdef CONFIG_PROFILER
4153     atomic_set(&prof->la_time, prof->la_time + profile_getclock());
4154 #endif
4155 
4156 #ifdef DEBUG_DISAS
4157     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4158                  && qemu_log_in_addr_range(tb->pc))) {
4159         FILE *logfile = qemu_log_lock();
4160         qemu_log("OP after optimization and liveness analysis:\n");
4161         tcg_dump_ops(s, true);
4162         qemu_log("\n");
4163         qemu_log_unlock(logfile);
4164     }
4165 #endif
4166 
4167     tcg_reg_alloc_start(s);
4168 
4169     s->code_buf = tb->tc.ptr;
4170     s->code_ptr = tb->tc.ptr;
4171 
4172 #ifdef TCG_TARGET_NEED_LDST_LABELS
4173     QSIMPLEQ_INIT(&s->ldst_labels);
4174 #endif
4175 #ifdef TCG_TARGET_NEED_POOL_LABELS
4176     s->pool_labels = NULL;
4177 #endif
4178 
4179     num_insns = -1;
4180     QTAILQ_FOREACH(op, &s->ops, link) {
4181         TCGOpcode opc = op->opc;
4182 
4183 #ifdef CONFIG_PROFILER
4184         atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4185 #endif
4186 
4187         switch (opc) {
4188         case INDEX_op_mov_i32:
4189         case INDEX_op_mov_i64:
4190         case INDEX_op_mov_vec:
4191             tcg_reg_alloc_mov(s, op);
4192             break;
4193         case INDEX_op_movi_i32:
4194         case INDEX_op_movi_i64:
4195         case INDEX_op_dupi_vec:
4196             tcg_reg_alloc_movi(s, op);
4197             break;
4198         case INDEX_op_dup_vec:
4199             tcg_reg_alloc_dup(s, op);
4200             break;
4201         case INDEX_op_insn_start:
4202             if (num_insns >= 0) {
4203                 size_t off = tcg_current_code_size(s);
4204                 s->gen_insn_end_off[num_insns] = off;
4205                 /* Assert that we do not overflow our stored offset.  */
4206                 assert(s->gen_insn_end_off[num_insns] == off);
4207             }
4208             num_insns++;
4209             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4210                 target_ulong a;
4211 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4212                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4213 #else
4214                 a = op->args[i];
4215 #endif
4216                 s->gen_insn_data[num_insns][i] = a;
4217             }
4218             break;
4219         case INDEX_op_discard:
4220             temp_dead(s, arg_temp(op->args[0]));
4221             break;
4222         case INDEX_op_set_label:
4223             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4224             tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
4225             break;
4226         case INDEX_op_call:
4227             tcg_reg_alloc_call(s, op);
4228             break;
4229         default:
4230             /* Sanity check that we've not introduced any unhandled opcodes. */
4231             tcg_debug_assert(tcg_op_supported(opc));
4232             /* Note: in order to speed up the code, it would be much
4233                faster to have specialized register allocator functions for
4234                some common argument patterns */
4235             tcg_reg_alloc_op(s, op);
4236             break;
4237         }
4238 #ifdef CONFIG_DEBUG_TCG
4239         check_regs(s);
4240 #endif
4241         /* Test for (pending) buffer overflow.  The assumption is that any
4242            one operation beginning below the high water mark cannot overrun
4243            the buffer completely.  Thus we can test for overflow after
4244            generating code without having to check during generation.  */
4245         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4246             return -1;
4247         }
4248         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4249         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4250             return -2;
4251         }
4252     }
4253     tcg_debug_assert(num_insns >= 0);
4254     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4255 
4256     /* Generate TB finalization at the end of block */
4257 #ifdef TCG_TARGET_NEED_LDST_LABELS
4258     i = tcg_out_ldst_finalize(s);
4259     if (i < 0) {
4260         return i;
4261     }
4262 #endif
4263 #ifdef TCG_TARGET_NEED_POOL_LABELS
4264     i = tcg_out_pool_finalize(s);
4265     if (i < 0) {
4266         return i;
4267     }
4268 #endif
4269     if (!tcg_resolve_relocs(s)) {
4270         return -2;
4271     }
4272 
4273     /* flush instruction cache */
4274     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
4275 
4276     return tcg_current_code_size(s);
4277 }
4278 
4279 #ifdef CONFIG_PROFILER
4280 void tcg_dump_info(void)
4281 {
4282     TCGProfile prof = {};
4283     const TCGProfile *s;
4284     int64_t tb_count;
4285     int64_t tb_div_count;
4286     int64_t tot;
4287 
4288     tcg_profile_snapshot_counters(&prof);
4289     s = &prof;
4290     tb_count = s->tb_count;
4291     tb_div_count = tb_count ? tb_count : 1;
4292     tot = s->interm_time + s->code_time;
4293 
4294     qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4295                 tot, tot / 2.4e9);
4296     qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4297                 " %0.1f%%)\n",
4298                 tb_count, s->tb_count1 - tb_count,
4299                 (double)(s->tb_count1 - s->tb_count)
4300                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4301     qemu_printf("avg ops/TB          %0.1f max=%d\n",
4302                 (double)s->op_count / tb_div_count, s->op_count_max);
4303     qemu_printf("deleted ops/TB      %0.2f\n",
4304                 (double)s->del_op_count / tb_div_count);
4305     qemu_printf("avg temps/TB        %0.2f max=%d\n",
4306                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4307     qemu_printf("avg host code/TB    %0.1f\n",
4308                 (double)s->code_out_len / tb_div_count);
4309     qemu_printf("avg search data/TB  %0.1f\n",
4310                 (double)s->search_out_len / tb_div_count);
4311 
4312     qemu_printf("cycles/op           %0.1f\n",
4313                 s->op_count ? (double)tot / s->op_count : 0);
4314     qemu_printf("cycles/in byte      %0.1f\n",
4315                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4316     qemu_printf("cycles/out byte     %0.1f\n",
4317                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4318     qemu_printf("cycles/search byte     %0.1f\n",
4319                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4320     if (tot == 0) {
4321         tot = 1;
4322     }
4323     qemu_printf("  gen_interm time   %0.1f%%\n",
4324                 (double)s->interm_time / tot * 100.0);
4325     qemu_printf("  gen_code time     %0.1f%%\n",
4326                 (double)s->code_time / tot * 100.0);
4327     qemu_printf("optim./code time    %0.1f%%\n",
4328                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4329                 * 100.0);
4330     qemu_printf("liveness/code time  %0.1f%%\n",
4331                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4332     qemu_printf("cpu_restore count   %" PRId64 "\n",
4333                 s->restore_count);
4334     qemu_printf("  avg cycles        %0.1f\n",
4335                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4336 }
4337 #else
4338 void tcg_dump_info(void)
4339 {
4340     qemu_printf("[TCG profiler not compiled]\n");
4341 }
4342 #endif
4343 
4344 #ifdef ELF_HOST_MACHINE
4345 /* In order to use this feature, the backend needs to do three things:
4346 
4347    (1) Define ELF_HOST_MACHINE to indicate both what value to
4348        put into the ELF image and to indicate support for the feature.
4349 
4350    (2) Define tcg_register_jit.  This should create a buffer containing
4351        the contents of a .debug_frame section that describes the post-
4352        prologue unwind info for the tcg machine.
4353 
4354    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4355 */
4356 
4357 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4358 typedef enum {
4359     JIT_NOACTION = 0,
4360     JIT_REGISTER_FN,
4361     JIT_UNREGISTER_FN
4362 } jit_actions_t;
4363 
4364 struct jit_code_entry {
4365     struct jit_code_entry *next_entry;
4366     struct jit_code_entry *prev_entry;
4367     const void *symfile_addr;
4368     uint64_t symfile_size;
4369 };
4370 
4371 struct jit_descriptor {
4372     uint32_t version;
4373     uint32_t action_flag;
4374     struct jit_code_entry *relevant_entry;
4375     struct jit_code_entry *first_entry;
4376 };
4377 
4378 void __jit_debug_register_code(void) __attribute__((noinline));
4379 void __jit_debug_register_code(void)
4380 {
4381     asm("");
4382 }
4383 
4384 /* Must statically initialize the version, because GDB may check
4385    the version before we can set it.  */
4386 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4387 
4388 /* End GDB interface.  */
4389 
4390 static int find_string(const char *strtab, const char *str)
4391 {
4392     const char *p = strtab + 1;
4393 
4394     while (1) {
4395         if (strcmp(p, str) == 0) {
4396             return p - strtab;
4397         }
4398         p += strlen(p) + 1;
4399     }
4400 }
4401 
4402 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
4403                                  const void *debug_frame,
4404                                  size_t debug_frame_size)
4405 {
4406     struct __attribute__((packed)) DebugInfo {
4407         uint32_t  len;
4408         uint16_t  version;
4409         uint32_t  abbrev;
4410         uint8_t   ptr_size;
4411         uint8_t   cu_die;
4412         uint16_t  cu_lang;
4413         uintptr_t cu_low_pc;
4414         uintptr_t cu_high_pc;
4415         uint8_t   fn_die;
4416         char      fn_name[16];
4417         uintptr_t fn_low_pc;
4418         uintptr_t fn_high_pc;
4419         uint8_t   cu_eoc;
4420     };
4421 
4422     struct ElfImage {
4423         ElfW(Ehdr) ehdr;
4424         ElfW(Phdr) phdr;
4425         ElfW(Shdr) shdr[7];
4426         ElfW(Sym)  sym[2];
4427         struct DebugInfo di;
4428         uint8_t    da[24];
4429         char       str[80];
4430     };
4431 
4432     struct ElfImage *img;
4433 
4434     static const struct ElfImage img_template = {
4435         .ehdr = {
4436             .e_ident[EI_MAG0] = ELFMAG0,
4437             .e_ident[EI_MAG1] = ELFMAG1,
4438             .e_ident[EI_MAG2] = ELFMAG2,
4439             .e_ident[EI_MAG3] = ELFMAG3,
4440             .e_ident[EI_CLASS] = ELF_CLASS,
4441             .e_ident[EI_DATA] = ELF_DATA,
4442             .e_ident[EI_VERSION] = EV_CURRENT,
4443             .e_type = ET_EXEC,
4444             .e_machine = ELF_HOST_MACHINE,
4445             .e_version = EV_CURRENT,
4446             .e_phoff = offsetof(struct ElfImage, phdr),
4447             .e_shoff = offsetof(struct ElfImage, shdr),
4448             .e_ehsize = sizeof(ElfW(Shdr)),
4449             .e_phentsize = sizeof(ElfW(Phdr)),
4450             .e_phnum = 1,
4451             .e_shentsize = sizeof(ElfW(Shdr)),
4452             .e_shnum = ARRAY_SIZE(img->shdr),
4453             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4454 #ifdef ELF_HOST_FLAGS
4455             .e_flags = ELF_HOST_FLAGS,
4456 #endif
4457 #ifdef ELF_OSABI
4458             .e_ident[EI_OSABI] = ELF_OSABI,
4459 #endif
4460         },
4461         .phdr = {
4462             .p_type = PT_LOAD,
4463             .p_flags = PF_X,
4464         },
4465         .shdr = {
4466             [0] = { .sh_type = SHT_NULL },
4467             /* Trick: The contents of code_gen_buffer are not present in
4468                this fake ELF file; that got allocated elsewhere.  Therefore
4469                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4470                will not look for contents.  We can record any address.  */
4471             [1] = { /* .text */
4472                 .sh_type = SHT_NOBITS,
4473                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4474             },
4475             [2] = { /* .debug_info */
4476                 .sh_type = SHT_PROGBITS,
4477                 .sh_offset = offsetof(struct ElfImage, di),
4478                 .sh_size = sizeof(struct DebugInfo),
4479             },
4480             [3] = { /* .debug_abbrev */
4481                 .sh_type = SHT_PROGBITS,
4482                 .sh_offset = offsetof(struct ElfImage, da),
4483                 .sh_size = sizeof(img->da),
4484             },
4485             [4] = { /* .debug_frame */
4486                 .sh_type = SHT_PROGBITS,
4487                 .sh_offset = sizeof(struct ElfImage),
4488             },
4489             [5] = { /* .symtab */
4490                 .sh_type = SHT_SYMTAB,
4491                 .sh_offset = offsetof(struct ElfImage, sym),
4492                 .sh_size = sizeof(img->sym),
4493                 .sh_info = 1,
4494                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4495                 .sh_entsize = sizeof(ElfW(Sym)),
4496             },
4497             [6] = { /* .strtab */
4498                 .sh_type = SHT_STRTAB,
4499                 .sh_offset = offsetof(struct ElfImage, str),
4500                 .sh_size = sizeof(img->str),
4501             }
4502         },
4503         .sym = {
4504             [1] = { /* code_gen_buffer */
4505                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4506                 .st_shndx = 1,
4507             }
4508         },
4509         .di = {
4510             .len = sizeof(struct DebugInfo) - 4,
4511             .version = 2,
4512             .ptr_size = sizeof(void *),
4513             .cu_die = 1,
4514             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4515             .fn_die = 2,
4516             .fn_name = "code_gen_buffer"
4517         },
4518         .da = {
4519             1,          /* abbrev number (the cu) */
4520             0x11, 1,    /* DW_TAG_compile_unit, has children */
4521             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4522             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4523             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4524             0, 0,       /* end of abbrev */
4525             2,          /* abbrev number (the fn) */
4526             0x2e, 0,    /* DW_TAG_subprogram, no children */
4527             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4528             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4529             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4530             0, 0,       /* end of abbrev */
4531             0           /* no more abbrev */
4532         },
4533         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4534                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4535     };
4536 
4537     /* We only need a single jit entry; statically allocate it.  */
4538     static struct jit_code_entry one_entry;
4539 
4540     uintptr_t buf = (uintptr_t)buf_ptr;
4541     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4542     DebugFrameHeader *dfh;
4543 
4544     img = g_malloc(img_size);
4545     *img = img_template;
4546 
4547     img->phdr.p_vaddr = buf;
4548     img->phdr.p_paddr = buf;
4549     img->phdr.p_memsz = buf_size;
4550 
4551     img->shdr[1].sh_name = find_string(img->str, ".text");
4552     img->shdr[1].sh_addr = buf;
4553     img->shdr[1].sh_size = buf_size;
4554 
4555     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4556     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4557 
4558     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4559     img->shdr[4].sh_size = debug_frame_size;
4560 
4561     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4562     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4563 
4564     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4565     img->sym[1].st_value = buf;
4566     img->sym[1].st_size = buf_size;
4567 
4568     img->di.cu_low_pc = buf;
4569     img->di.cu_high_pc = buf + buf_size;
4570     img->di.fn_low_pc = buf;
4571     img->di.fn_high_pc = buf + buf_size;
4572 
4573     dfh = (DebugFrameHeader *)(img + 1);
4574     memcpy(dfh, debug_frame, debug_frame_size);
4575     dfh->fde.func_start = buf;
4576     dfh->fde.func_len = buf_size;
4577 
4578 #ifdef DEBUG_JIT
4579     /* Enable this block to be able to debug the ELF image file creation.
4580        One can use readelf, objdump, or other inspection utilities.  */
4581     {
4582         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4583         if (f) {
4584             if (fwrite(img, img_size, 1, f) != img_size) {
4585                 /* Avoid stupid unused return value warning for fwrite.  */
4586             }
4587             fclose(f);
4588         }
4589     }
4590 #endif
4591 
4592     one_entry.symfile_addr = img;
4593     one_entry.symfile_size = img_size;
4594 
4595     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4596     __jit_debug_descriptor.relevant_entry = &one_entry;
4597     __jit_debug_descriptor.first_entry = &one_entry;
4598     __jit_debug_register_code();
4599 }
4600 #else
4601 /* No support for the feature.  Provide the entry point expected by exec.c,
4602    and implement the internal function we declared earlier.  */
4603 
4604 static void tcg_register_jit_int(void *buf, size_t size,
4605                                  const void *debug_frame,
4606                                  size_t debug_frame_size)
4607 {
4608 }
4609 
4610 void tcg_register_jit(void *buf, size_t buf_size)
4611 {
4612 }
4613 #endif /* ELF_HOST_MACHINE */
4614 
4615 #if !TCG_TARGET_MAYBE_vec
4616 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4617 {
4618     g_assert_not_reached();
4619 }
4620 #endif
4621