xref: /openbmc/qemu/tcg/tcg.c (revision ae3c12a0)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 
39 /* Note: the long term plan is to reduce the dependencies on the QEMU
40    CPU definitions. Currently they are used for qemu_ld/st
41    instructions */
42 #define NO_CPU_IO_DEFS
43 #include "cpu.h"
44 
45 #include "exec/cpu-common.h"
46 #include "exec/exec-all.h"
47 
48 #include "tcg-op.h"
49 
50 #if UINTPTR_MAX == UINT32_MAX
51 # define ELF_CLASS  ELFCLASS32
52 #else
53 # define ELF_CLASS  ELFCLASS64
54 #endif
55 #ifdef HOST_WORDS_BIGENDIAN
56 # define ELF_DATA   ELFDATA2MSB
57 #else
58 # define ELF_DATA   ELFDATA2LSB
59 #endif
60 
61 #include "elf.h"
62 #include "exec/log.h"
63 #include "sysemu/sysemu.h"
64 
65 /* Forward declarations for functions declared in tcg-target.inc.c and
66    used here. */
67 static void tcg_target_init(TCGContext *s);
68 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
69 static void tcg_target_qemu_prologue(TCGContext *s);
70 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
71                         intptr_t value, intptr_t addend);
72 
73 /* The CIE and FDE header definitions will be common to all hosts.  */
74 typedef struct {
75     uint32_t len __attribute__((aligned((sizeof(void *)))));
76     uint32_t id;
77     uint8_t version;
78     char augmentation[1];
79     uint8_t code_align;
80     uint8_t data_align;
81     uint8_t return_column;
82 } DebugFrameCIE;
83 
84 typedef struct QEMU_PACKED {
85     uint32_t len __attribute__((aligned((sizeof(void *)))));
86     uint32_t cie_offset;
87     uintptr_t func_start;
88     uintptr_t func_len;
89 } DebugFrameFDEHeader;
90 
91 typedef struct QEMU_PACKED {
92     DebugFrameCIE cie;
93     DebugFrameFDEHeader fde;
94 } DebugFrameHeader;
95 
96 static void tcg_register_jit_int(void *buf, size_t size,
97                                  const void *debug_frame,
98                                  size_t debug_frame_size)
99     __attribute__((unused));
100 
101 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
102 static const char *target_parse_constraint(TCGArgConstraint *ct,
103                                            const char *ct_str, TCGType type);
104 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
105                        intptr_t arg2);
106 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
107 static void tcg_out_movi(TCGContext *s, TCGType type,
108                          TCGReg ret, tcg_target_long arg);
109 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
110                        const int *const_args);
111 #if TCG_TARGET_MAYBE_vec
112 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
113                             TCGReg dst, TCGReg src);
114 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
115                              TCGReg dst, TCGReg base, intptr_t offset);
116 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
117                              TCGReg dst, tcg_target_long arg);
118 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
119                            unsigned vece, const TCGArg *args,
120                            const int *const_args);
121 #else
122 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
123                                    TCGReg dst, TCGReg src)
124 {
125     g_assert_not_reached();
126 }
127 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
128                                     TCGReg dst, TCGReg base, intptr_t offset)
129 {
130     g_assert_not_reached();
131 }
132 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
133                                     TCGReg dst, tcg_target_long arg)
134 {
135     g_assert_not_reached();
136 }
137 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
138                                   unsigned vece, const TCGArg *args,
139                                   const int *const_args)
140 {
141     g_assert_not_reached();
142 }
143 #endif
144 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
145                        intptr_t arg2);
146 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
147                         TCGReg base, intptr_t ofs);
148 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
149 static int tcg_target_const_match(tcg_target_long val, TCGType type,
150                                   const TCGArgConstraint *arg_ct);
151 #ifdef TCG_TARGET_NEED_LDST_LABELS
152 static int tcg_out_ldst_finalize(TCGContext *s);
153 #endif
154 
155 #define TCG_HIGHWATER 1024
156 
157 static TCGContext **tcg_ctxs;
158 static unsigned int n_tcg_ctxs;
159 TCGv_env cpu_env = 0;
160 
161 struct tcg_region_tree {
162     QemuMutex lock;
163     GTree *tree;
164     /* padding to avoid false sharing is computed at run-time */
165 };
166 
167 /*
168  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
169  * dynamically allocate from as demand dictates. Given appropriate region
170  * sizing, this minimizes flushes even when some TCG threads generate a lot
171  * more code than others.
172  */
173 struct tcg_region_state {
174     QemuMutex lock;
175 
176     /* fields set at init time */
177     void *start;
178     void *start_aligned;
179     void *end;
180     size_t n;
181     size_t size; /* size of one region */
182     size_t stride; /* .size + guard size */
183 
184     /* fields protected by the lock */
185     size_t current; /* current region index */
186     size_t agg_size_full; /* aggregate size of full regions */
187 };
188 
189 static struct tcg_region_state region;
190 /*
191  * This is an array of struct tcg_region_tree's, with padding.
192  * We use void * to simplify the computation of region_trees[i]; each
193  * struct is found every tree_size bytes.
194  */
195 static void *region_trees;
196 static size_t tree_size;
197 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
198 static TCGRegSet tcg_target_call_clobber_regs;
199 
200 #if TCG_TARGET_INSN_UNIT_SIZE == 1
201 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
202 {
203     *s->code_ptr++ = v;
204 }
205 
206 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
207                                                       uint8_t v)
208 {
209     *p = v;
210 }
211 #endif
212 
213 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
214 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
215 {
216     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
217         *s->code_ptr++ = v;
218     } else {
219         tcg_insn_unit *p = s->code_ptr;
220         memcpy(p, &v, sizeof(v));
221         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
222     }
223 }
224 
225 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
226                                                        uint16_t v)
227 {
228     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
229         *p = v;
230     } else {
231         memcpy(p, &v, sizeof(v));
232     }
233 }
234 #endif
235 
236 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
237 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
238 {
239     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
240         *s->code_ptr++ = v;
241     } else {
242         tcg_insn_unit *p = s->code_ptr;
243         memcpy(p, &v, sizeof(v));
244         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
245     }
246 }
247 
248 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
249                                                        uint32_t v)
250 {
251     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
252         *p = v;
253     } else {
254         memcpy(p, &v, sizeof(v));
255     }
256 }
257 #endif
258 
259 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
260 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
261 {
262     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
263         *s->code_ptr++ = v;
264     } else {
265         tcg_insn_unit *p = s->code_ptr;
266         memcpy(p, &v, sizeof(v));
267         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
268     }
269 }
270 
271 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
272                                                        uint64_t v)
273 {
274     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
275         *p = v;
276     } else {
277         memcpy(p, &v, sizeof(v));
278     }
279 }
280 #endif
281 
282 /* label relocation processing */
283 
284 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
285                           TCGLabel *l, intptr_t addend)
286 {
287     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
288 
289     r->type = type;
290     r->ptr = code_ptr;
291     r->addend = addend;
292     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
293 }
294 
295 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
296 {
297     tcg_debug_assert(!l->has_value);
298     l->has_value = 1;
299     l->u.value_ptr = ptr;
300 }
301 
302 TCGLabel *gen_new_label(void)
303 {
304     TCGContext *s = tcg_ctx;
305     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
306 
307     memset(l, 0, sizeof(TCGLabel));
308     l->id = s->nb_labels++;
309     QSIMPLEQ_INIT(&l->relocs);
310 
311     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
312 
313     return l;
314 }
315 
316 static bool tcg_resolve_relocs(TCGContext *s)
317 {
318     TCGLabel *l;
319 
320     QSIMPLEQ_FOREACH(l, &s->labels, next) {
321         TCGRelocation *r;
322         uintptr_t value = l->u.value;
323 
324         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
325             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
326                 return false;
327             }
328         }
329     }
330     return true;
331 }
332 
333 static void set_jmp_reset_offset(TCGContext *s, int which)
334 {
335     size_t off = tcg_current_code_size(s);
336     s->tb_jmp_reset_offset[which] = off;
337     /* Make sure that we didn't overflow the stored offset.  */
338     assert(s->tb_jmp_reset_offset[which] == off);
339 }
340 
341 #include "tcg-target.inc.c"
342 
343 /* compare a pointer @ptr and a tb_tc @s */
344 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
345 {
346     if (ptr >= s->ptr + s->size) {
347         return 1;
348     } else if (ptr < s->ptr) {
349         return -1;
350     }
351     return 0;
352 }
353 
354 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
355 {
356     const struct tb_tc *a = ap;
357     const struct tb_tc *b = bp;
358 
359     /*
360      * When both sizes are set, we know this isn't a lookup.
361      * This is the most likely case: every TB must be inserted; lookups
362      * are a lot less frequent.
363      */
364     if (likely(a->size && b->size)) {
365         if (a->ptr > b->ptr) {
366             return 1;
367         } else if (a->ptr < b->ptr) {
368             return -1;
369         }
370         /* a->ptr == b->ptr should happen only on deletions */
371         g_assert(a->size == b->size);
372         return 0;
373     }
374     /*
375      * All lookups have either .size field set to 0.
376      * From the glib sources we see that @ap is always the lookup key. However
377      * the docs provide no guarantee, so we just mark this case as likely.
378      */
379     if (likely(a->size == 0)) {
380         return ptr_cmp_tb_tc(a->ptr, b);
381     }
382     return ptr_cmp_tb_tc(b->ptr, a);
383 }
384 
385 static void tcg_region_trees_init(void)
386 {
387     size_t i;
388 
389     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
390     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
391     for (i = 0; i < region.n; i++) {
392         struct tcg_region_tree *rt = region_trees + i * tree_size;
393 
394         qemu_mutex_init(&rt->lock);
395         rt->tree = g_tree_new(tb_tc_cmp);
396     }
397 }
398 
399 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
400 {
401     size_t region_idx;
402 
403     if (p < region.start_aligned) {
404         region_idx = 0;
405     } else {
406         ptrdiff_t offset = p - region.start_aligned;
407 
408         if (offset > region.stride * (region.n - 1)) {
409             region_idx = region.n - 1;
410         } else {
411             region_idx = offset / region.stride;
412         }
413     }
414     return region_trees + region_idx * tree_size;
415 }
416 
417 void tcg_tb_insert(TranslationBlock *tb)
418 {
419     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
420 
421     qemu_mutex_lock(&rt->lock);
422     g_tree_insert(rt->tree, &tb->tc, tb);
423     qemu_mutex_unlock(&rt->lock);
424 }
425 
426 void tcg_tb_remove(TranslationBlock *tb)
427 {
428     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
429 
430     qemu_mutex_lock(&rt->lock);
431     g_tree_remove(rt->tree, &tb->tc);
432     qemu_mutex_unlock(&rt->lock);
433 }
434 
435 /*
436  * Find the TB 'tb' such that
437  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
438  * Return NULL if not found.
439  */
440 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
441 {
442     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
443     TranslationBlock *tb;
444     struct tb_tc s = { .ptr = (void *)tc_ptr };
445 
446     qemu_mutex_lock(&rt->lock);
447     tb = g_tree_lookup(rt->tree, &s);
448     qemu_mutex_unlock(&rt->lock);
449     return tb;
450 }
451 
452 static void tcg_region_tree_lock_all(void)
453 {
454     size_t i;
455 
456     for (i = 0; i < region.n; i++) {
457         struct tcg_region_tree *rt = region_trees + i * tree_size;
458 
459         qemu_mutex_lock(&rt->lock);
460     }
461 }
462 
463 static void tcg_region_tree_unlock_all(void)
464 {
465     size_t i;
466 
467     for (i = 0; i < region.n; i++) {
468         struct tcg_region_tree *rt = region_trees + i * tree_size;
469 
470         qemu_mutex_unlock(&rt->lock);
471     }
472 }
473 
474 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
475 {
476     size_t i;
477 
478     tcg_region_tree_lock_all();
479     for (i = 0; i < region.n; i++) {
480         struct tcg_region_tree *rt = region_trees + i * tree_size;
481 
482         g_tree_foreach(rt->tree, func, user_data);
483     }
484     tcg_region_tree_unlock_all();
485 }
486 
487 size_t tcg_nb_tbs(void)
488 {
489     size_t nb_tbs = 0;
490     size_t i;
491 
492     tcg_region_tree_lock_all();
493     for (i = 0; i < region.n; i++) {
494         struct tcg_region_tree *rt = region_trees + i * tree_size;
495 
496         nb_tbs += g_tree_nnodes(rt->tree);
497     }
498     tcg_region_tree_unlock_all();
499     return nb_tbs;
500 }
501 
502 static void tcg_region_tree_reset_all(void)
503 {
504     size_t i;
505 
506     tcg_region_tree_lock_all();
507     for (i = 0; i < region.n; i++) {
508         struct tcg_region_tree *rt = region_trees + i * tree_size;
509 
510         /* Increment the refcount first so that destroy acts as a reset */
511         g_tree_ref(rt->tree);
512         g_tree_destroy(rt->tree);
513     }
514     tcg_region_tree_unlock_all();
515 }
516 
517 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
518 {
519     void *start, *end;
520 
521     start = region.start_aligned + curr_region * region.stride;
522     end = start + region.size;
523 
524     if (curr_region == 0) {
525         start = region.start;
526     }
527     if (curr_region == region.n - 1) {
528         end = region.end;
529     }
530 
531     *pstart = start;
532     *pend = end;
533 }
534 
535 static void tcg_region_assign(TCGContext *s, size_t curr_region)
536 {
537     void *start, *end;
538 
539     tcg_region_bounds(curr_region, &start, &end);
540 
541     s->code_gen_buffer = start;
542     s->code_gen_ptr = start;
543     s->code_gen_buffer_size = end - start;
544     s->code_gen_highwater = end - TCG_HIGHWATER;
545 }
546 
547 static bool tcg_region_alloc__locked(TCGContext *s)
548 {
549     if (region.current == region.n) {
550         return true;
551     }
552     tcg_region_assign(s, region.current);
553     region.current++;
554     return false;
555 }
556 
557 /*
558  * Request a new region once the one in use has filled up.
559  * Returns true on error.
560  */
561 static bool tcg_region_alloc(TCGContext *s)
562 {
563     bool err;
564     /* read the region size now; alloc__locked will overwrite it on success */
565     size_t size_full = s->code_gen_buffer_size;
566 
567     qemu_mutex_lock(&region.lock);
568     err = tcg_region_alloc__locked(s);
569     if (!err) {
570         region.agg_size_full += size_full - TCG_HIGHWATER;
571     }
572     qemu_mutex_unlock(&region.lock);
573     return err;
574 }
575 
576 /*
577  * Perform a context's first region allocation.
578  * This function does _not_ increment region.agg_size_full.
579  */
580 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
581 {
582     return tcg_region_alloc__locked(s);
583 }
584 
585 /* Call from a safe-work context */
586 void tcg_region_reset_all(void)
587 {
588     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
589     unsigned int i;
590 
591     qemu_mutex_lock(&region.lock);
592     region.current = 0;
593     region.agg_size_full = 0;
594 
595     for (i = 0; i < n_ctxs; i++) {
596         TCGContext *s = atomic_read(&tcg_ctxs[i]);
597         bool err = tcg_region_initial_alloc__locked(s);
598 
599         g_assert(!err);
600     }
601     qemu_mutex_unlock(&region.lock);
602 
603     tcg_region_tree_reset_all();
604 }
605 
606 #ifdef CONFIG_USER_ONLY
607 static size_t tcg_n_regions(void)
608 {
609     return 1;
610 }
611 #else
612 /*
613  * It is likely that some vCPUs will translate more code than others, so we
614  * first try to set more regions than max_cpus, with those regions being of
615  * reasonable size. If that's not possible we make do by evenly dividing
616  * the code_gen_buffer among the vCPUs.
617  */
618 static size_t tcg_n_regions(void)
619 {
620     size_t i;
621 
622     /* Use a single region if all we have is one vCPU thread */
623     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
624         return 1;
625     }
626 
627     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
628     for (i = 8; i > 0; i--) {
629         size_t regions_per_thread = i;
630         size_t region_size;
631 
632         region_size = tcg_init_ctx.code_gen_buffer_size;
633         region_size /= max_cpus * regions_per_thread;
634 
635         if (region_size >= 2 * 1024u * 1024) {
636             return max_cpus * regions_per_thread;
637         }
638     }
639     /* If we can't, then just allocate one region per vCPU thread */
640     return max_cpus;
641 }
642 #endif
643 
644 /*
645  * Initializes region partitioning.
646  *
647  * Called at init time from the parent thread (i.e. the one calling
648  * tcg_context_init), after the target's TCG globals have been set.
649  *
650  * Region partitioning works by splitting code_gen_buffer into separate regions,
651  * and then assigning regions to TCG threads so that the threads can translate
652  * code in parallel without synchronization.
653  *
654  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
655  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
656  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
657  * must have been parsed before calling this function, since it calls
658  * qemu_tcg_mttcg_enabled().
659  *
660  * In user-mode we use a single region.  Having multiple regions in user-mode
661  * is not supported, because the number of vCPU threads (recall that each thread
662  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
663  * OS, and usually this number is huge (tens of thousands is not uncommon).
664  * Thus, given this large bound on the number of vCPU threads and the fact
665  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
666  * that the availability of at least one region per vCPU thread.
667  *
668  * However, this user-mode limitation is unlikely to be a significant problem
669  * in practice. Multi-threaded guests share most if not all of their translated
670  * code, which makes parallel code generation less appealing than in softmmu.
671  */
672 void tcg_region_init(void)
673 {
674     void *buf = tcg_init_ctx.code_gen_buffer;
675     void *aligned;
676     size_t size = tcg_init_ctx.code_gen_buffer_size;
677     size_t page_size = qemu_real_host_page_size;
678     size_t region_size;
679     size_t n_regions;
680     size_t i;
681 
682     n_regions = tcg_n_regions();
683 
684     /* The first region will be 'aligned - buf' bytes larger than the others */
685     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
686     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
687     /*
688      * Make region_size a multiple of page_size, using aligned as the start.
689      * As a result of this we might end up with a few extra pages at the end of
690      * the buffer; we will assign those to the last region.
691      */
692     region_size = (size - (aligned - buf)) / n_regions;
693     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
694 
695     /* A region must have at least 2 pages; one code, one guard */
696     g_assert(region_size >= 2 * page_size);
697 
698     /* init the region struct */
699     qemu_mutex_init(&region.lock);
700     region.n = n_regions;
701     region.size = region_size - page_size;
702     region.stride = region_size;
703     region.start = buf;
704     region.start_aligned = aligned;
705     /* page-align the end, since its last page will be a guard page */
706     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
707     /* account for that last guard page */
708     region.end -= page_size;
709 
710     /* set guard pages */
711     for (i = 0; i < region.n; i++) {
712         void *start, *end;
713         int rc;
714 
715         tcg_region_bounds(i, &start, &end);
716         rc = qemu_mprotect_none(end, page_size);
717         g_assert(!rc);
718     }
719 
720     tcg_region_trees_init();
721 
722     /* In user-mode we support only one ctx, so do the initial allocation now */
723 #ifdef CONFIG_USER_ONLY
724     {
725         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
726 
727         g_assert(!err);
728     }
729 #endif
730 }
731 
732 /*
733  * All TCG threads except the parent (i.e. the one that called tcg_context_init
734  * and registered the target's TCG globals) must register with this function
735  * before initiating translation.
736  *
737  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
738  * of tcg_region_init() for the reasoning behind this.
739  *
740  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
741  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
742  * is not used anymore for translation once this function is called.
743  *
744  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
745  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
746  */
747 #ifdef CONFIG_USER_ONLY
748 void tcg_register_thread(void)
749 {
750     tcg_ctx = &tcg_init_ctx;
751 }
752 #else
753 void tcg_register_thread(void)
754 {
755     TCGContext *s = g_malloc(sizeof(*s));
756     unsigned int i, n;
757     bool err;
758 
759     *s = tcg_init_ctx;
760 
761     /* Relink mem_base.  */
762     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
763         if (tcg_init_ctx.temps[i].mem_base) {
764             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
765             tcg_debug_assert(b >= 0 && b < n);
766             s->temps[i].mem_base = &s->temps[b];
767         }
768     }
769 
770     /* Claim an entry in tcg_ctxs */
771     n = atomic_fetch_inc(&n_tcg_ctxs);
772     g_assert(n < max_cpus);
773     atomic_set(&tcg_ctxs[n], s);
774 
775     tcg_ctx = s;
776     qemu_mutex_lock(&region.lock);
777     err = tcg_region_initial_alloc__locked(tcg_ctx);
778     g_assert(!err);
779     qemu_mutex_unlock(&region.lock);
780 }
781 #endif /* !CONFIG_USER_ONLY */
782 
783 /*
784  * Returns the size (in bytes) of all translated code (i.e. from all regions)
785  * currently in the cache.
786  * See also: tcg_code_capacity()
787  * Do not confuse with tcg_current_code_size(); that one applies to a single
788  * TCG context.
789  */
790 size_t tcg_code_size(void)
791 {
792     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
793     unsigned int i;
794     size_t total;
795 
796     qemu_mutex_lock(&region.lock);
797     total = region.agg_size_full;
798     for (i = 0; i < n_ctxs; i++) {
799         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
800         size_t size;
801 
802         size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
803         g_assert(size <= s->code_gen_buffer_size);
804         total += size;
805     }
806     qemu_mutex_unlock(&region.lock);
807     return total;
808 }
809 
810 /*
811  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
812  * regions.
813  * See also: tcg_code_size()
814  */
815 size_t tcg_code_capacity(void)
816 {
817     size_t guard_size, capacity;
818 
819     /* no need for synchronization; these variables are set at init time */
820     guard_size = region.stride - region.size;
821     capacity = region.end + guard_size - region.start;
822     capacity -= region.n * (guard_size + TCG_HIGHWATER);
823     return capacity;
824 }
825 
826 size_t tcg_tb_phys_invalidate_count(void)
827 {
828     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
829     unsigned int i;
830     size_t total = 0;
831 
832     for (i = 0; i < n_ctxs; i++) {
833         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
834 
835         total += atomic_read(&s->tb_phys_invalidate_count);
836     }
837     return total;
838 }
839 
840 /* pool based memory allocation */
841 void *tcg_malloc_internal(TCGContext *s, int size)
842 {
843     TCGPool *p;
844     int pool_size;
845 
846     if (size > TCG_POOL_CHUNK_SIZE) {
847         /* big malloc: insert a new pool (XXX: could optimize) */
848         p = g_malloc(sizeof(TCGPool) + size);
849         p->size = size;
850         p->next = s->pool_first_large;
851         s->pool_first_large = p;
852         return p->data;
853     } else {
854         p = s->pool_current;
855         if (!p) {
856             p = s->pool_first;
857             if (!p)
858                 goto new_pool;
859         } else {
860             if (!p->next) {
861             new_pool:
862                 pool_size = TCG_POOL_CHUNK_SIZE;
863                 p = g_malloc(sizeof(TCGPool) + pool_size);
864                 p->size = pool_size;
865                 p->next = NULL;
866                 if (s->pool_current)
867                     s->pool_current->next = p;
868                 else
869                     s->pool_first = p;
870             } else {
871                 p = p->next;
872             }
873         }
874     }
875     s->pool_current = p;
876     s->pool_cur = p->data + size;
877     s->pool_end = p->data + p->size;
878     return p->data;
879 }
880 
881 void tcg_pool_reset(TCGContext *s)
882 {
883     TCGPool *p, *t;
884     for (p = s->pool_first_large; p; p = t) {
885         t = p->next;
886         g_free(p);
887     }
888     s->pool_first_large = NULL;
889     s->pool_cur = s->pool_end = NULL;
890     s->pool_current = NULL;
891 }
892 
893 typedef struct TCGHelperInfo {
894     void *func;
895     const char *name;
896     unsigned flags;
897     unsigned sizemask;
898 } TCGHelperInfo;
899 
900 #include "exec/helper-proto.h"
901 
902 static const TCGHelperInfo all_helpers[] = {
903 #include "exec/helper-tcg.h"
904 };
905 static GHashTable *helper_table;
906 
907 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
908 static void process_op_defs(TCGContext *s);
909 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
910                                             TCGReg reg, const char *name);
911 
912 void tcg_context_init(TCGContext *s)
913 {
914     int op, total_args, n, i;
915     TCGOpDef *def;
916     TCGArgConstraint *args_ct;
917     int *sorted_args;
918     TCGTemp *ts;
919 
920     memset(s, 0, sizeof(*s));
921     s->nb_globals = 0;
922 
923     /* Count total number of arguments and allocate the corresponding
924        space */
925     total_args = 0;
926     for(op = 0; op < NB_OPS; op++) {
927         def = &tcg_op_defs[op];
928         n = def->nb_iargs + def->nb_oargs;
929         total_args += n;
930     }
931 
932     args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
933     sorted_args = g_malloc(sizeof(int) * total_args);
934 
935     for(op = 0; op < NB_OPS; op++) {
936         def = &tcg_op_defs[op];
937         def->args_ct = args_ct;
938         def->sorted_args = sorted_args;
939         n = def->nb_iargs + def->nb_oargs;
940         sorted_args += n;
941         args_ct += n;
942     }
943 
944     /* Register helpers.  */
945     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
946     helper_table = g_hash_table_new(NULL, NULL);
947 
948     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
949         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
950                             (gpointer)&all_helpers[i]);
951     }
952 
953     tcg_target_init(s);
954     process_op_defs(s);
955 
956     /* Reverse the order of the saved registers, assuming they're all at
957        the start of tcg_target_reg_alloc_order.  */
958     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
959         int r = tcg_target_reg_alloc_order[n];
960         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
961             break;
962         }
963     }
964     for (i = 0; i < n; ++i) {
965         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
966     }
967     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
968         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
969     }
970 
971     tcg_ctx = s;
972     /*
973      * In user-mode we simply share the init context among threads, since we
974      * use a single region. See the documentation tcg_region_init() for the
975      * reasoning behind this.
976      * In softmmu we will have at most max_cpus TCG threads.
977      */
978 #ifdef CONFIG_USER_ONLY
979     tcg_ctxs = &tcg_ctx;
980     n_tcg_ctxs = 1;
981 #else
982     tcg_ctxs = g_new(TCGContext *, max_cpus);
983 #endif
984 
985     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
986     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
987     cpu_env = temp_tcgv_ptr(ts);
988 }
989 
990 /*
991  * Allocate TBs right before their corresponding translated code, making
992  * sure that TBs and code are on different cache lines.
993  */
994 TranslationBlock *tcg_tb_alloc(TCGContext *s)
995 {
996     uintptr_t align = qemu_icache_linesize;
997     TranslationBlock *tb;
998     void *next;
999 
1000  retry:
1001     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1002     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1003 
1004     if (unlikely(next > s->code_gen_highwater)) {
1005         if (tcg_region_alloc(s)) {
1006             return NULL;
1007         }
1008         goto retry;
1009     }
1010     atomic_set(&s->code_gen_ptr, next);
1011     s->data_gen_ptr = NULL;
1012     return tb;
1013 }
1014 
1015 void tcg_prologue_init(TCGContext *s)
1016 {
1017     size_t prologue_size, total_size;
1018     void *buf0, *buf1;
1019 
1020     /* Put the prologue at the beginning of code_gen_buffer.  */
1021     buf0 = s->code_gen_buffer;
1022     total_size = s->code_gen_buffer_size;
1023     s->code_ptr = buf0;
1024     s->code_buf = buf0;
1025     s->data_gen_ptr = NULL;
1026     s->code_gen_prologue = buf0;
1027 
1028     /* Compute a high-water mark, at which we voluntarily flush the buffer
1029        and start over.  The size here is arbitrary, significantly larger
1030        than we expect the code generation for any one opcode to require.  */
1031     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1032 
1033 #ifdef TCG_TARGET_NEED_POOL_LABELS
1034     s->pool_labels = NULL;
1035 #endif
1036 
1037     /* Generate the prologue.  */
1038     tcg_target_qemu_prologue(s);
1039 
1040 #ifdef TCG_TARGET_NEED_POOL_LABELS
1041     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1042     {
1043         int result = tcg_out_pool_finalize(s);
1044         tcg_debug_assert(result == 0);
1045     }
1046 #endif
1047 
1048     buf1 = s->code_ptr;
1049     flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1050 
1051     /* Deduct the prologue from the buffer.  */
1052     prologue_size = tcg_current_code_size(s);
1053     s->code_gen_ptr = buf1;
1054     s->code_gen_buffer = buf1;
1055     s->code_buf = buf1;
1056     total_size -= prologue_size;
1057     s->code_gen_buffer_size = total_size;
1058 
1059     tcg_register_jit(s->code_gen_buffer, total_size);
1060 
1061 #ifdef DEBUG_DISAS
1062     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1063         qemu_log_lock();
1064         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1065         if (s->data_gen_ptr) {
1066             size_t code_size = s->data_gen_ptr - buf0;
1067             size_t data_size = prologue_size - code_size;
1068             size_t i;
1069 
1070             log_disas(buf0, code_size);
1071 
1072             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1073                 if (sizeof(tcg_target_ulong) == 8) {
1074                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1075                              (uintptr_t)s->data_gen_ptr + i,
1076                              *(uint64_t *)(s->data_gen_ptr + i));
1077                 } else {
1078                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1079                              (uintptr_t)s->data_gen_ptr + i,
1080                              *(uint32_t *)(s->data_gen_ptr + i));
1081                 }
1082             }
1083         } else {
1084             log_disas(buf0, prologue_size);
1085         }
1086         qemu_log("\n");
1087         qemu_log_flush();
1088         qemu_log_unlock();
1089     }
1090 #endif
1091 
1092     /* Assert that goto_ptr is implemented completely.  */
1093     if (TCG_TARGET_HAS_goto_ptr) {
1094         tcg_debug_assert(s->code_gen_epilogue != NULL);
1095     }
1096 }
1097 
1098 void tcg_func_start(TCGContext *s)
1099 {
1100     tcg_pool_reset(s);
1101     s->nb_temps = s->nb_globals;
1102 
1103     /* No temps have been previously allocated for size or locality.  */
1104     memset(s->free_temps, 0, sizeof(s->free_temps));
1105 
1106     s->nb_ops = 0;
1107     s->nb_labels = 0;
1108     s->current_frame_offset = s->frame_start;
1109 
1110 #ifdef CONFIG_DEBUG_TCG
1111     s->goto_tb_issue_mask = 0;
1112 #endif
1113 
1114     QTAILQ_INIT(&s->ops);
1115     QTAILQ_INIT(&s->free_ops);
1116     QSIMPLEQ_INIT(&s->labels);
1117 }
1118 
1119 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1120 {
1121     int n = s->nb_temps++;
1122     tcg_debug_assert(n < TCG_MAX_TEMPS);
1123     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1124 }
1125 
1126 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1127 {
1128     TCGTemp *ts;
1129 
1130     tcg_debug_assert(s->nb_globals == s->nb_temps);
1131     s->nb_globals++;
1132     ts = tcg_temp_alloc(s);
1133     ts->temp_global = 1;
1134 
1135     return ts;
1136 }
1137 
1138 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1139                                             TCGReg reg, const char *name)
1140 {
1141     TCGTemp *ts;
1142 
1143     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1144         tcg_abort();
1145     }
1146 
1147     ts = tcg_global_alloc(s);
1148     ts->base_type = type;
1149     ts->type = type;
1150     ts->fixed_reg = 1;
1151     ts->reg = reg;
1152     ts->name = name;
1153     tcg_regset_set_reg(s->reserved_regs, reg);
1154 
1155     return ts;
1156 }
1157 
1158 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1159 {
1160     s->frame_start = start;
1161     s->frame_end = start + size;
1162     s->frame_temp
1163         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1164 }
1165 
1166 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1167                                      intptr_t offset, const char *name)
1168 {
1169     TCGContext *s = tcg_ctx;
1170     TCGTemp *base_ts = tcgv_ptr_temp(base);
1171     TCGTemp *ts = tcg_global_alloc(s);
1172     int indirect_reg = 0, bigendian = 0;
1173 #ifdef HOST_WORDS_BIGENDIAN
1174     bigendian = 1;
1175 #endif
1176 
1177     if (!base_ts->fixed_reg) {
1178         /* We do not support double-indirect registers.  */
1179         tcg_debug_assert(!base_ts->indirect_reg);
1180         base_ts->indirect_base = 1;
1181         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1182                             ? 2 : 1);
1183         indirect_reg = 1;
1184     }
1185 
1186     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1187         TCGTemp *ts2 = tcg_global_alloc(s);
1188         char buf[64];
1189 
1190         ts->base_type = TCG_TYPE_I64;
1191         ts->type = TCG_TYPE_I32;
1192         ts->indirect_reg = indirect_reg;
1193         ts->mem_allocated = 1;
1194         ts->mem_base = base_ts;
1195         ts->mem_offset = offset + bigendian * 4;
1196         pstrcpy(buf, sizeof(buf), name);
1197         pstrcat(buf, sizeof(buf), "_0");
1198         ts->name = strdup(buf);
1199 
1200         tcg_debug_assert(ts2 == ts + 1);
1201         ts2->base_type = TCG_TYPE_I64;
1202         ts2->type = TCG_TYPE_I32;
1203         ts2->indirect_reg = indirect_reg;
1204         ts2->mem_allocated = 1;
1205         ts2->mem_base = base_ts;
1206         ts2->mem_offset = offset + (1 - bigendian) * 4;
1207         pstrcpy(buf, sizeof(buf), name);
1208         pstrcat(buf, sizeof(buf), "_1");
1209         ts2->name = strdup(buf);
1210     } else {
1211         ts->base_type = type;
1212         ts->type = type;
1213         ts->indirect_reg = indirect_reg;
1214         ts->mem_allocated = 1;
1215         ts->mem_base = base_ts;
1216         ts->mem_offset = offset;
1217         ts->name = name;
1218     }
1219     return ts;
1220 }
1221 
1222 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1223 {
1224     TCGContext *s = tcg_ctx;
1225     TCGTemp *ts;
1226     int idx, k;
1227 
1228     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1229     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1230     if (idx < TCG_MAX_TEMPS) {
1231         /* There is already an available temp with the right type.  */
1232         clear_bit(idx, s->free_temps[k].l);
1233 
1234         ts = &s->temps[idx];
1235         ts->temp_allocated = 1;
1236         tcg_debug_assert(ts->base_type == type);
1237         tcg_debug_assert(ts->temp_local == temp_local);
1238     } else {
1239         ts = tcg_temp_alloc(s);
1240         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1241             TCGTemp *ts2 = tcg_temp_alloc(s);
1242 
1243             ts->base_type = type;
1244             ts->type = TCG_TYPE_I32;
1245             ts->temp_allocated = 1;
1246             ts->temp_local = temp_local;
1247 
1248             tcg_debug_assert(ts2 == ts + 1);
1249             ts2->base_type = TCG_TYPE_I64;
1250             ts2->type = TCG_TYPE_I32;
1251             ts2->temp_allocated = 1;
1252             ts2->temp_local = temp_local;
1253         } else {
1254             ts->base_type = type;
1255             ts->type = type;
1256             ts->temp_allocated = 1;
1257             ts->temp_local = temp_local;
1258         }
1259     }
1260 
1261 #if defined(CONFIG_DEBUG_TCG)
1262     s->temps_in_use++;
1263 #endif
1264     return ts;
1265 }
1266 
1267 TCGv_vec tcg_temp_new_vec(TCGType type)
1268 {
1269     TCGTemp *t;
1270 
1271 #ifdef CONFIG_DEBUG_TCG
1272     switch (type) {
1273     case TCG_TYPE_V64:
1274         assert(TCG_TARGET_HAS_v64);
1275         break;
1276     case TCG_TYPE_V128:
1277         assert(TCG_TARGET_HAS_v128);
1278         break;
1279     case TCG_TYPE_V256:
1280         assert(TCG_TARGET_HAS_v256);
1281         break;
1282     default:
1283         g_assert_not_reached();
1284     }
1285 #endif
1286 
1287     t = tcg_temp_new_internal(type, 0);
1288     return temp_tcgv_vec(t);
1289 }
1290 
1291 /* Create a new temp of the same type as an existing temp.  */
1292 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1293 {
1294     TCGTemp *t = tcgv_vec_temp(match);
1295 
1296     tcg_debug_assert(t->temp_allocated != 0);
1297 
1298     t = tcg_temp_new_internal(t->base_type, 0);
1299     return temp_tcgv_vec(t);
1300 }
1301 
1302 void tcg_temp_free_internal(TCGTemp *ts)
1303 {
1304     TCGContext *s = tcg_ctx;
1305     int k, idx;
1306 
1307 #if defined(CONFIG_DEBUG_TCG)
1308     s->temps_in_use--;
1309     if (s->temps_in_use < 0) {
1310         fprintf(stderr, "More temporaries freed than allocated!\n");
1311     }
1312 #endif
1313 
1314     tcg_debug_assert(ts->temp_global == 0);
1315     tcg_debug_assert(ts->temp_allocated != 0);
1316     ts->temp_allocated = 0;
1317 
1318     idx = temp_idx(ts);
1319     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1320     set_bit(idx, s->free_temps[k].l);
1321 }
1322 
1323 TCGv_i32 tcg_const_i32(int32_t val)
1324 {
1325     TCGv_i32 t0;
1326     t0 = tcg_temp_new_i32();
1327     tcg_gen_movi_i32(t0, val);
1328     return t0;
1329 }
1330 
1331 TCGv_i64 tcg_const_i64(int64_t val)
1332 {
1333     TCGv_i64 t0;
1334     t0 = tcg_temp_new_i64();
1335     tcg_gen_movi_i64(t0, val);
1336     return t0;
1337 }
1338 
1339 TCGv_i32 tcg_const_local_i32(int32_t val)
1340 {
1341     TCGv_i32 t0;
1342     t0 = tcg_temp_local_new_i32();
1343     tcg_gen_movi_i32(t0, val);
1344     return t0;
1345 }
1346 
1347 TCGv_i64 tcg_const_local_i64(int64_t val)
1348 {
1349     TCGv_i64 t0;
1350     t0 = tcg_temp_local_new_i64();
1351     tcg_gen_movi_i64(t0, val);
1352     return t0;
1353 }
1354 
1355 #if defined(CONFIG_DEBUG_TCG)
1356 void tcg_clear_temp_count(void)
1357 {
1358     TCGContext *s = tcg_ctx;
1359     s->temps_in_use = 0;
1360 }
1361 
1362 int tcg_check_temp_count(void)
1363 {
1364     TCGContext *s = tcg_ctx;
1365     if (s->temps_in_use) {
1366         /* Clear the count so that we don't give another
1367          * warning immediately next time around.
1368          */
1369         s->temps_in_use = 0;
1370         return 1;
1371     }
1372     return 0;
1373 }
1374 #endif
1375 
1376 /* Return true if OP may appear in the opcode stream.
1377    Test the runtime variable that controls each opcode.  */
1378 bool tcg_op_supported(TCGOpcode op)
1379 {
1380     const bool have_vec
1381         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1382 
1383     switch (op) {
1384     case INDEX_op_discard:
1385     case INDEX_op_set_label:
1386     case INDEX_op_call:
1387     case INDEX_op_br:
1388     case INDEX_op_mb:
1389     case INDEX_op_insn_start:
1390     case INDEX_op_exit_tb:
1391     case INDEX_op_goto_tb:
1392     case INDEX_op_qemu_ld_i32:
1393     case INDEX_op_qemu_st_i32:
1394     case INDEX_op_qemu_ld_i64:
1395     case INDEX_op_qemu_st_i64:
1396         return true;
1397 
1398     case INDEX_op_goto_ptr:
1399         return TCG_TARGET_HAS_goto_ptr;
1400 
1401     case INDEX_op_mov_i32:
1402     case INDEX_op_movi_i32:
1403     case INDEX_op_setcond_i32:
1404     case INDEX_op_brcond_i32:
1405     case INDEX_op_ld8u_i32:
1406     case INDEX_op_ld8s_i32:
1407     case INDEX_op_ld16u_i32:
1408     case INDEX_op_ld16s_i32:
1409     case INDEX_op_ld_i32:
1410     case INDEX_op_st8_i32:
1411     case INDEX_op_st16_i32:
1412     case INDEX_op_st_i32:
1413     case INDEX_op_add_i32:
1414     case INDEX_op_sub_i32:
1415     case INDEX_op_mul_i32:
1416     case INDEX_op_and_i32:
1417     case INDEX_op_or_i32:
1418     case INDEX_op_xor_i32:
1419     case INDEX_op_shl_i32:
1420     case INDEX_op_shr_i32:
1421     case INDEX_op_sar_i32:
1422         return true;
1423 
1424     case INDEX_op_movcond_i32:
1425         return TCG_TARGET_HAS_movcond_i32;
1426     case INDEX_op_div_i32:
1427     case INDEX_op_divu_i32:
1428         return TCG_TARGET_HAS_div_i32;
1429     case INDEX_op_rem_i32:
1430     case INDEX_op_remu_i32:
1431         return TCG_TARGET_HAS_rem_i32;
1432     case INDEX_op_div2_i32:
1433     case INDEX_op_divu2_i32:
1434         return TCG_TARGET_HAS_div2_i32;
1435     case INDEX_op_rotl_i32:
1436     case INDEX_op_rotr_i32:
1437         return TCG_TARGET_HAS_rot_i32;
1438     case INDEX_op_deposit_i32:
1439         return TCG_TARGET_HAS_deposit_i32;
1440     case INDEX_op_extract_i32:
1441         return TCG_TARGET_HAS_extract_i32;
1442     case INDEX_op_sextract_i32:
1443         return TCG_TARGET_HAS_sextract_i32;
1444     case INDEX_op_extract2_i32:
1445         return TCG_TARGET_HAS_extract2_i32;
1446     case INDEX_op_add2_i32:
1447         return TCG_TARGET_HAS_add2_i32;
1448     case INDEX_op_sub2_i32:
1449         return TCG_TARGET_HAS_sub2_i32;
1450     case INDEX_op_mulu2_i32:
1451         return TCG_TARGET_HAS_mulu2_i32;
1452     case INDEX_op_muls2_i32:
1453         return TCG_TARGET_HAS_muls2_i32;
1454     case INDEX_op_muluh_i32:
1455         return TCG_TARGET_HAS_muluh_i32;
1456     case INDEX_op_mulsh_i32:
1457         return TCG_TARGET_HAS_mulsh_i32;
1458     case INDEX_op_ext8s_i32:
1459         return TCG_TARGET_HAS_ext8s_i32;
1460     case INDEX_op_ext16s_i32:
1461         return TCG_TARGET_HAS_ext16s_i32;
1462     case INDEX_op_ext8u_i32:
1463         return TCG_TARGET_HAS_ext8u_i32;
1464     case INDEX_op_ext16u_i32:
1465         return TCG_TARGET_HAS_ext16u_i32;
1466     case INDEX_op_bswap16_i32:
1467         return TCG_TARGET_HAS_bswap16_i32;
1468     case INDEX_op_bswap32_i32:
1469         return TCG_TARGET_HAS_bswap32_i32;
1470     case INDEX_op_not_i32:
1471         return TCG_TARGET_HAS_not_i32;
1472     case INDEX_op_neg_i32:
1473         return TCG_TARGET_HAS_neg_i32;
1474     case INDEX_op_andc_i32:
1475         return TCG_TARGET_HAS_andc_i32;
1476     case INDEX_op_orc_i32:
1477         return TCG_TARGET_HAS_orc_i32;
1478     case INDEX_op_eqv_i32:
1479         return TCG_TARGET_HAS_eqv_i32;
1480     case INDEX_op_nand_i32:
1481         return TCG_TARGET_HAS_nand_i32;
1482     case INDEX_op_nor_i32:
1483         return TCG_TARGET_HAS_nor_i32;
1484     case INDEX_op_clz_i32:
1485         return TCG_TARGET_HAS_clz_i32;
1486     case INDEX_op_ctz_i32:
1487         return TCG_TARGET_HAS_ctz_i32;
1488     case INDEX_op_ctpop_i32:
1489         return TCG_TARGET_HAS_ctpop_i32;
1490 
1491     case INDEX_op_brcond2_i32:
1492     case INDEX_op_setcond2_i32:
1493         return TCG_TARGET_REG_BITS == 32;
1494 
1495     case INDEX_op_mov_i64:
1496     case INDEX_op_movi_i64:
1497     case INDEX_op_setcond_i64:
1498     case INDEX_op_brcond_i64:
1499     case INDEX_op_ld8u_i64:
1500     case INDEX_op_ld8s_i64:
1501     case INDEX_op_ld16u_i64:
1502     case INDEX_op_ld16s_i64:
1503     case INDEX_op_ld32u_i64:
1504     case INDEX_op_ld32s_i64:
1505     case INDEX_op_ld_i64:
1506     case INDEX_op_st8_i64:
1507     case INDEX_op_st16_i64:
1508     case INDEX_op_st32_i64:
1509     case INDEX_op_st_i64:
1510     case INDEX_op_add_i64:
1511     case INDEX_op_sub_i64:
1512     case INDEX_op_mul_i64:
1513     case INDEX_op_and_i64:
1514     case INDEX_op_or_i64:
1515     case INDEX_op_xor_i64:
1516     case INDEX_op_shl_i64:
1517     case INDEX_op_shr_i64:
1518     case INDEX_op_sar_i64:
1519     case INDEX_op_ext_i32_i64:
1520     case INDEX_op_extu_i32_i64:
1521         return TCG_TARGET_REG_BITS == 64;
1522 
1523     case INDEX_op_movcond_i64:
1524         return TCG_TARGET_HAS_movcond_i64;
1525     case INDEX_op_div_i64:
1526     case INDEX_op_divu_i64:
1527         return TCG_TARGET_HAS_div_i64;
1528     case INDEX_op_rem_i64:
1529     case INDEX_op_remu_i64:
1530         return TCG_TARGET_HAS_rem_i64;
1531     case INDEX_op_div2_i64:
1532     case INDEX_op_divu2_i64:
1533         return TCG_TARGET_HAS_div2_i64;
1534     case INDEX_op_rotl_i64:
1535     case INDEX_op_rotr_i64:
1536         return TCG_TARGET_HAS_rot_i64;
1537     case INDEX_op_deposit_i64:
1538         return TCG_TARGET_HAS_deposit_i64;
1539     case INDEX_op_extract_i64:
1540         return TCG_TARGET_HAS_extract_i64;
1541     case INDEX_op_sextract_i64:
1542         return TCG_TARGET_HAS_sextract_i64;
1543     case INDEX_op_extract2_i64:
1544         return TCG_TARGET_HAS_extract2_i64;
1545     case INDEX_op_extrl_i64_i32:
1546         return TCG_TARGET_HAS_extrl_i64_i32;
1547     case INDEX_op_extrh_i64_i32:
1548         return TCG_TARGET_HAS_extrh_i64_i32;
1549     case INDEX_op_ext8s_i64:
1550         return TCG_TARGET_HAS_ext8s_i64;
1551     case INDEX_op_ext16s_i64:
1552         return TCG_TARGET_HAS_ext16s_i64;
1553     case INDEX_op_ext32s_i64:
1554         return TCG_TARGET_HAS_ext32s_i64;
1555     case INDEX_op_ext8u_i64:
1556         return TCG_TARGET_HAS_ext8u_i64;
1557     case INDEX_op_ext16u_i64:
1558         return TCG_TARGET_HAS_ext16u_i64;
1559     case INDEX_op_ext32u_i64:
1560         return TCG_TARGET_HAS_ext32u_i64;
1561     case INDEX_op_bswap16_i64:
1562         return TCG_TARGET_HAS_bswap16_i64;
1563     case INDEX_op_bswap32_i64:
1564         return TCG_TARGET_HAS_bswap32_i64;
1565     case INDEX_op_bswap64_i64:
1566         return TCG_TARGET_HAS_bswap64_i64;
1567     case INDEX_op_not_i64:
1568         return TCG_TARGET_HAS_not_i64;
1569     case INDEX_op_neg_i64:
1570         return TCG_TARGET_HAS_neg_i64;
1571     case INDEX_op_andc_i64:
1572         return TCG_TARGET_HAS_andc_i64;
1573     case INDEX_op_orc_i64:
1574         return TCG_TARGET_HAS_orc_i64;
1575     case INDEX_op_eqv_i64:
1576         return TCG_TARGET_HAS_eqv_i64;
1577     case INDEX_op_nand_i64:
1578         return TCG_TARGET_HAS_nand_i64;
1579     case INDEX_op_nor_i64:
1580         return TCG_TARGET_HAS_nor_i64;
1581     case INDEX_op_clz_i64:
1582         return TCG_TARGET_HAS_clz_i64;
1583     case INDEX_op_ctz_i64:
1584         return TCG_TARGET_HAS_ctz_i64;
1585     case INDEX_op_ctpop_i64:
1586         return TCG_TARGET_HAS_ctpop_i64;
1587     case INDEX_op_add2_i64:
1588         return TCG_TARGET_HAS_add2_i64;
1589     case INDEX_op_sub2_i64:
1590         return TCG_TARGET_HAS_sub2_i64;
1591     case INDEX_op_mulu2_i64:
1592         return TCG_TARGET_HAS_mulu2_i64;
1593     case INDEX_op_muls2_i64:
1594         return TCG_TARGET_HAS_muls2_i64;
1595     case INDEX_op_muluh_i64:
1596         return TCG_TARGET_HAS_muluh_i64;
1597     case INDEX_op_mulsh_i64:
1598         return TCG_TARGET_HAS_mulsh_i64;
1599 
1600     case INDEX_op_mov_vec:
1601     case INDEX_op_dup_vec:
1602     case INDEX_op_dupi_vec:
1603     case INDEX_op_dupm_vec:
1604     case INDEX_op_ld_vec:
1605     case INDEX_op_st_vec:
1606     case INDEX_op_add_vec:
1607     case INDEX_op_sub_vec:
1608     case INDEX_op_and_vec:
1609     case INDEX_op_or_vec:
1610     case INDEX_op_xor_vec:
1611     case INDEX_op_cmp_vec:
1612         return have_vec;
1613     case INDEX_op_dup2_vec:
1614         return have_vec && TCG_TARGET_REG_BITS == 32;
1615     case INDEX_op_not_vec:
1616         return have_vec && TCG_TARGET_HAS_not_vec;
1617     case INDEX_op_neg_vec:
1618         return have_vec && TCG_TARGET_HAS_neg_vec;
1619     case INDEX_op_abs_vec:
1620         return have_vec && TCG_TARGET_HAS_abs_vec;
1621     case INDEX_op_andc_vec:
1622         return have_vec && TCG_TARGET_HAS_andc_vec;
1623     case INDEX_op_orc_vec:
1624         return have_vec && TCG_TARGET_HAS_orc_vec;
1625     case INDEX_op_mul_vec:
1626         return have_vec && TCG_TARGET_HAS_mul_vec;
1627     case INDEX_op_shli_vec:
1628     case INDEX_op_shri_vec:
1629     case INDEX_op_sari_vec:
1630         return have_vec && TCG_TARGET_HAS_shi_vec;
1631     case INDEX_op_shls_vec:
1632     case INDEX_op_shrs_vec:
1633     case INDEX_op_sars_vec:
1634         return have_vec && TCG_TARGET_HAS_shs_vec;
1635     case INDEX_op_shlv_vec:
1636     case INDEX_op_shrv_vec:
1637     case INDEX_op_sarv_vec:
1638         return have_vec && TCG_TARGET_HAS_shv_vec;
1639     case INDEX_op_ssadd_vec:
1640     case INDEX_op_usadd_vec:
1641     case INDEX_op_sssub_vec:
1642     case INDEX_op_ussub_vec:
1643         return have_vec && TCG_TARGET_HAS_sat_vec;
1644     case INDEX_op_smin_vec:
1645     case INDEX_op_umin_vec:
1646     case INDEX_op_smax_vec:
1647     case INDEX_op_umax_vec:
1648         return have_vec && TCG_TARGET_HAS_minmax_vec;
1649 
1650     default:
1651         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1652         return true;
1653     }
1654 }
1655 
1656 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1657    and endian swap. Maybe it would be better to do the alignment
1658    and endian swap in tcg_reg_alloc_call(). */
1659 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1660 {
1661     int i, real_args, nb_rets, pi;
1662     unsigned sizemask, flags;
1663     TCGHelperInfo *info;
1664     TCGOp *op;
1665 
1666     info = g_hash_table_lookup(helper_table, (gpointer)func);
1667     flags = info->flags;
1668     sizemask = info->sizemask;
1669 
1670 #if defined(__sparc__) && !defined(__arch64__) \
1671     && !defined(CONFIG_TCG_INTERPRETER)
1672     /* We have 64-bit values in one register, but need to pass as two
1673        separate parameters.  Split them.  */
1674     int orig_sizemask = sizemask;
1675     int orig_nargs = nargs;
1676     TCGv_i64 retl, reth;
1677     TCGTemp *split_args[MAX_OPC_PARAM];
1678 
1679     retl = NULL;
1680     reth = NULL;
1681     if (sizemask != 0) {
1682         for (i = real_args = 0; i < nargs; ++i) {
1683             int is_64bit = sizemask & (1 << (i+1)*2);
1684             if (is_64bit) {
1685                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1686                 TCGv_i32 h = tcg_temp_new_i32();
1687                 TCGv_i32 l = tcg_temp_new_i32();
1688                 tcg_gen_extr_i64_i32(l, h, orig);
1689                 split_args[real_args++] = tcgv_i32_temp(h);
1690                 split_args[real_args++] = tcgv_i32_temp(l);
1691             } else {
1692                 split_args[real_args++] = args[i];
1693             }
1694         }
1695         nargs = real_args;
1696         args = split_args;
1697         sizemask = 0;
1698     }
1699 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1700     for (i = 0; i < nargs; ++i) {
1701         int is_64bit = sizemask & (1 << (i+1)*2);
1702         int is_signed = sizemask & (2 << (i+1)*2);
1703         if (!is_64bit) {
1704             TCGv_i64 temp = tcg_temp_new_i64();
1705             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1706             if (is_signed) {
1707                 tcg_gen_ext32s_i64(temp, orig);
1708             } else {
1709                 tcg_gen_ext32u_i64(temp, orig);
1710             }
1711             args[i] = tcgv_i64_temp(temp);
1712         }
1713     }
1714 #endif /* TCG_TARGET_EXTEND_ARGS */
1715 
1716     op = tcg_emit_op(INDEX_op_call);
1717 
1718     pi = 0;
1719     if (ret != NULL) {
1720 #if defined(__sparc__) && !defined(__arch64__) \
1721     && !defined(CONFIG_TCG_INTERPRETER)
1722         if (orig_sizemask & 1) {
1723             /* The 32-bit ABI is going to return the 64-bit value in
1724                the %o0/%o1 register pair.  Prepare for this by using
1725                two return temporaries, and reassemble below.  */
1726             retl = tcg_temp_new_i64();
1727             reth = tcg_temp_new_i64();
1728             op->args[pi++] = tcgv_i64_arg(reth);
1729             op->args[pi++] = tcgv_i64_arg(retl);
1730             nb_rets = 2;
1731         } else {
1732             op->args[pi++] = temp_arg(ret);
1733             nb_rets = 1;
1734         }
1735 #else
1736         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1737 #ifdef HOST_WORDS_BIGENDIAN
1738             op->args[pi++] = temp_arg(ret + 1);
1739             op->args[pi++] = temp_arg(ret);
1740 #else
1741             op->args[pi++] = temp_arg(ret);
1742             op->args[pi++] = temp_arg(ret + 1);
1743 #endif
1744             nb_rets = 2;
1745         } else {
1746             op->args[pi++] = temp_arg(ret);
1747             nb_rets = 1;
1748         }
1749 #endif
1750     } else {
1751         nb_rets = 0;
1752     }
1753     TCGOP_CALLO(op) = nb_rets;
1754 
1755     real_args = 0;
1756     for (i = 0; i < nargs; i++) {
1757         int is_64bit = sizemask & (1 << (i+1)*2);
1758         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1759 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1760             /* some targets want aligned 64 bit args */
1761             if (real_args & 1) {
1762                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1763                 real_args++;
1764             }
1765 #endif
1766            /* If stack grows up, then we will be placing successive
1767               arguments at lower addresses, which means we need to
1768               reverse the order compared to how we would normally
1769               treat either big or little-endian.  For those arguments
1770               that will wind up in registers, this still works for
1771               HPPA (the only current STACK_GROWSUP target) since the
1772               argument registers are *also* allocated in decreasing
1773               order.  If another such target is added, this logic may
1774               have to get more complicated to differentiate between
1775               stack arguments and register arguments.  */
1776 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1777             op->args[pi++] = temp_arg(args[i] + 1);
1778             op->args[pi++] = temp_arg(args[i]);
1779 #else
1780             op->args[pi++] = temp_arg(args[i]);
1781             op->args[pi++] = temp_arg(args[i] + 1);
1782 #endif
1783             real_args += 2;
1784             continue;
1785         }
1786 
1787         op->args[pi++] = temp_arg(args[i]);
1788         real_args++;
1789     }
1790     op->args[pi++] = (uintptr_t)func;
1791     op->args[pi++] = flags;
1792     TCGOP_CALLI(op) = real_args;
1793 
1794     /* Make sure the fields didn't overflow.  */
1795     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1796     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1797 
1798 #if defined(__sparc__) && !defined(__arch64__) \
1799     && !defined(CONFIG_TCG_INTERPRETER)
1800     /* Free all of the parts we allocated above.  */
1801     for (i = real_args = 0; i < orig_nargs; ++i) {
1802         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1803         if (is_64bit) {
1804             tcg_temp_free_internal(args[real_args++]);
1805             tcg_temp_free_internal(args[real_args++]);
1806         } else {
1807             real_args++;
1808         }
1809     }
1810     if (orig_sizemask & 1) {
1811         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1812            Note that describing these as TCGv_i64 eliminates an unnecessary
1813            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1814         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1815         tcg_temp_free_i64(retl);
1816         tcg_temp_free_i64(reth);
1817     }
1818 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1819     for (i = 0; i < nargs; ++i) {
1820         int is_64bit = sizemask & (1 << (i+1)*2);
1821         if (!is_64bit) {
1822             tcg_temp_free_internal(args[i]);
1823         }
1824     }
1825 #endif /* TCG_TARGET_EXTEND_ARGS */
1826 }
1827 
1828 static void tcg_reg_alloc_start(TCGContext *s)
1829 {
1830     int i, n;
1831     TCGTemp *ts;
1832 
1833     for (i = 0, n = s->nb_globals; i < n; i++) {
1834         ts = &s->temps[i];
1835         ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1836     }
1837     for (n = s->nb_temps; i < n; i++) {
1838         ts = &s->temps[i];
1839         ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1840         ts->mem_allocated = 0;
1841         ts->fixed_reg = 0;
1842     }
1843 
1844     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1845 }
1846 
1847 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1848                                  TCGTemp *ts)
1849 {
1850     int idx = temp_idx(ts);
1851 
1852     if (ts->temp_global) {
1853         pstrcpy(buf, buf_size, ts->name);
1854     } else if (ts->temp_local) {
1855         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1856     } else {
1857         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1858     }
1859     return buf;
1860 }
1861 
1862 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1863                              int buf_size, TCGArg arg)
1864 {
1865     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1866 }
1867 
1868 /* Find helper name.  */
1869 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1870 {
1871     const char *ret = NULL;
1872     if (helper_table) {
1873         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1874         if (info) {
1875             ret = info->name;
1876         }
1877     }
1878     return ret;
1879 }
1880 
1881 static const char * const cond_name[] =
1882 {
1883     [TCG_COND_NEVER] = "never",
1884     [TCG_COND_ALWAYS] = "always",
1885     [TCG_COND_EQ] = "eq",
1886     [TCG_COND_NE] = "ne",
1887     [TCG_COND_LT] = "lt",
1888     [TCG_COND_GE] = "ge",
1889     [TCG_COND_LE] = "le",
1890     [TCG_COND_GT] = "gt",
1891     [TCG_COND_LTU] = "ltu",
1892     [TCG_COND_GEU] = "geu",
1893     [TCG_COND_LEU] = "leu",
1894     [TCG_COND_GTU] = "gtu"
1895 };
1896 
1897 static const char * const ldst_name[] =
1898 {
1899     [MO_UB]   = "ub",
1900     [MO_SB]   = "sb",
1901     [MO_LEUW] = "leuw",
1902     [MO_LESW] = "lesw",
1903     [MO_LEUL] = "leul",
1904     [MO_LESL] = "lesl",
1905     [MO_LEQ]  = "leq",
1906     [MO_BEUW] = "beuw",
1907     [MO_BESW] = "besw",
1908     [MO_BEUL] = "beul",
1909     [MO_BESL] = "besl",
1910     [MO_BEQ]  = "beq",
1911 };
1912 
1913 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1914 #ifdef ALIGNED_ONLY
1915     [MO_UNALN >> MO_ASHIFT]    = "un+",
1916     [MO_ALIGN >> MO_ASHIFT]    = "",
1917 #else
1918     [MO_UNALN >> MO_ASHIFT]    = "",
1919     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1920 #endif
1921     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1922     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1923     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1924     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1925     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1926     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1927 };
1928 
1929 static inline bool tcg_regset_single(TCGRegSet d)
1930 {
1931     return (d & (d - 1)) == 0;
1932 }
1933 
1934 static inline TCGReg tcg_regset_first(TCGRegSet d)
1935 {
1936     if (TCG_TARGET_NB_REGS <= 32) {
1937         return ctz32(d);
1938     } else {
1939         return ctz64(d);
1940     }
1941 }
1942 
1943 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1944 {
1945     char buf[128];
1946     TCGOp *op;
1947 
1948     QTAILQ_FOREACH(op, &s->ops, link) {
1949         int i, k, nb_oargs, nb_iargs, nb_cargs;
1950         const TCGOpDef *def;
1951         TCGOpcode c;
1952         int col = 0;
1953 
1954         c = op->opc;
1955         def = &tcg_op_defs[c];
1956 
1957         if (c == INDEX_op_insn_start) {
1958             nb_oargs = 0;
1959             col += qemu_log("\n ----");
1960 
1961             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1962                 target_ulong a;
1963 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1964                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1965 #else
1966                 a = op->args[i];
1967 #endif
1968                 col += qemu_log(" " TARGET_FMT_lx, a);
1969             }
1970         } else if (c == INDEX_op_call) {
1971             /* variable number of arguments */
1972             nb_oargs = TCGOP_CALLO(op);
1973             nb_iargs = TCGOP_CALLI(op);
1974             nb_cargs = def->nb_cargs;
1975 
1976             /* function name, flags, out args */
1977             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1978                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
1979                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
1980             for (i = 0; i < nb_oargs; i++) {
1981                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1982                                                        op->args[i]));
1983             }
1984             for (i = 0; i < nb_iargs; i++) {
1985                 TCGArg arg = op->args[nb_oargs + i];
1986                 const char *t = "<dummy>";
1987                 if (arg != TCG_CALL_DUMMY_ARG) {
1988                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1989                 }
1990                 col += qemu_log(",%s", t);
1991             }
1992         } else {
1993             col += qemu_log(" %s ", def->name);
1994 
1995             nb_oargs = def->nb_oargs;
1996             nb_iargs = def->nb_iargs;
1997             nb_cargs = def->nb_cargs;
1998 
1999             if (def->flags & TCG_OPF_VECTOR) {
2000                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2001                                 8 << TCGOP_VECE(op));
2002             }
2003 
2004             k = 0;
2005             for (i = 0; i < nb_oargs; i++) {
2006                 if (k != 0) {
2007                     col += qemu_log(",");
2008                 }
2009                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2010                                                       op->args[k++]));
2011             }
2012             for (i = 0; i < nb_iargs; i++) {
2013                 if (k != 0) {
2014                     col += qemu_log(",");
2015                 }
2016                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2017                                                       op->args[k++]));
2018             }
2019             switch (c) {
2020             case INDEX_op_brcond_i32:
2021             case INDEX_op_setcond_i32:
2022             case INDEX_op_movcond_i32:
2023             case INDEX_op_brcond2_i32:
2024             case INDEX_op_setcond2_i32:
2025             case INDEX_op_brcond_i64:
2026             case INDEX_op_setcond_i64:
2027             case INDEX_op_movcond_i64:
2028             case INDEX_op_cmp_vec:
2029                 if (op->args[k] < ARRAY_SIZE(cond_name)
2030                     && cond_name[op->args[k]]) {
2031                     col += qemu_log(",%s", cond_name[op->args[k++]]);
2032                 } else {
2033                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2034                 }
2035                 i = 1;
2036                 break;
2037             case INDEX_op_qemu_ld_i32:
2038             case INDEX_op_qemu_st_i32:
2039             case INDEX_op_qemu_ld_i64:
2040             case INDEX_op_qemu_st_i64:
2041                 {
2042                     TCGMemOpIdx oi = op->args[k++];
2043                     TCGMemOp op = get_memop(oi);
2044                     unsigned ix = get_mmuidx(oi);
2045 
2046                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2047                         col += qemu_log(",$0x%x,%u", op, ix);
2048                     } else {
2049                         const char *s_al, *s_op;
2050                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2051                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2052                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2053                     }
2054                     i = 1;
2055                 }
2056                 break;
2057             default:
2058                 i = 0;
2059                 break;
2060             }
2061             switch (c) {
2062             case INDEX_op_set_label:
2063             case INDEX_op_br:
2064             case INDEX_op_brcond_i32:
2065             case INDEX_op_brcond_i64:
2066             case INDEX_op_brcond2_i32:
2067                 col += qemu_log("%s$L%d", k ? "," : "",
2068                                 arg_label(op->args[k])->id);
2069                 i++, k++;
2070                 break;
2071             default:
2072                 break;
2073             }
2074             for (; i < nb_cargs; i++, k++) {
2075                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2076             }
2077         }
2078 
2079         if (have_prefs || op->life) {
2080             for (; col < 40; ++col) {
2081                 putc(' ', qemu_logfile);
2082             }
2083         }
2084 
2085         if (op->life) {
2086             unsigned life = op->life;
2087 
2088             if (life & (SYNC_ARG * 3)) {
2089                 qemu_log("  sync:");
2090                 for (i = 0; i < 2; ++i) {
2091                     if (life & (SYNC_ARG << i)) {
2092                         qemu_log(" %d", i);
2093                     }
2094                 }
2095             }
2096             life /= DEAD_ARG;
2097             if (life) {
2098                 qemu_log("  dead:");
2099                 for (i = 0; life; ++i, life >>= 1) {
2100                     if (life & 1) {
2101                         qemu_log(" %d", i);
2102                     }
2103                 }
2104             }
2105         }
2106 
2107         if (have_prefs) {
2108             for (i = 0; i < nb_oargs; ++i) {
2109                 TCGRegSet set = op->output_pref[i];
2110 
2111                 if (i == 0) {
2112                     qemu_log("  pref=");
2113                 } else {
2114                     qemu_log(",");
2115                 }
2116                 if (set == 0) {
2117                     qemu_log("none");
2118                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2119                     qemu_log("all");
2120 #ifdef CONFIG_DEBUG_TCG
2121                 } else if (tcg_regset_single(set)) {
2122                     TCGReg reg = tcg_regset_first(set);
2123                     qemu_log("%s", tcg_target_reg_names[reg]);
2124 #endif
2125                 } else if (TCG_TARGET_NB_REGS <= 32) {
2126                     qemu_log("%#x", (uint32_t)set);
2127                 } else {
2128                     qemu_log("%#" PRIx64, (uint64_t)set);
2129                 }
2130             }
2131         }
2132 
2133         qemu_log("\n");
2134     }
2135 }
2136 
2137 /* we give more priority to constraints with less registers */
2138 static int get_constraint_priority(const TCGOpDef *def, int k)
2139 {
2140     const TCGArgConstraint *arg_ct;
2141 
2142     int i, n;
2143     arg_ct = &def->args_ct[k];
2144     if (arg_ct->ct & TCG_CT_ALIAS) {
2145         /* an alias is equivalent to a single register */
2146         n = 1;
2147     } else {
2148         if (!(arg_ct->ct & TCG_CT_REG))
2149             return 0;
2150         n = 0;
2151         for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2152             if (tcg_regset_test_reg(arg_ct->u.regs, i))
2153                 n++;
2154         }
2155     }
2156     return TCG_TARGET_NB_REGS - n + 1;
2157 }
2158 
2159 /* sort from highest priority to lowest */
2160 static void sort_constraints(TCGOpDef *def, int start, int n)
2161 {
2162     int i, j, p1, p2, tmp;
2163 
2164     for(i = 0; i < n; i++)
2165         def->sorted_args[start + i] = start + i;
2166     if (n <= 1)
2167         return;
2168     for(i = 0; i < n - 1; i++) {
2169         for(j = i + 1; j < n; j++) {
2170             p1 = get_constraint_priority(def, def->sorted_args[start + i]);
2171             p2 = get_constraint_priority(def, def->sorted_args[start + j]);
2172             if (p1 < p2) {
2173                 tmp = def->sorted_args[start + i];
2174                 def->sorted_args[start + i] = def->sorted_args[start + j];
2175                 def->sorted_args[start + j] = tmp;
2176             }
2177         }
2178     }
2179 }
2180 
2181 static void process_op_defs(TCGContext *s)
2182 {
2183     TCGOpcode op;
2184 
2185     for (op = 0; op < NB_OPS; op++) {
2186         TCGOpDef *def = &tcg_op_defs[op];
2187         const TCGTargetOpDef *tdefs;
2188         TCGType type;
2189         int i, nb_args;
2190 
2191         if (def->flags & TCG_OPF_NOT_PRESENT) {
2192             continue;
2193         }
2194 
2195         nb_args = def->nb_iargs + def->nb_oargs;
2196         if (nb_args == 0) {
2197             continue;
2198         }
2199 
2200         tdefs = tcg_target_op_def(op);
2201         /* Missing TCGTargetOpDef entry. */
2202         tcg_debug_assert(tdefs != NULL);
2203 
2204         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2205         for (i = 0; i < nb_args; i++) {
2206             const char *ct_str = tdefs->args_ct_str[i];
2207             /* Incomplete TCGTargetOpDef entry. */
2208             tcg_debug_assert(ct_str != NULL);
2209 
2210             def->args_ct[i].u.regs = 0;
2211             def->args_ct[i].ct = 0;
2212             while (*ct_str != '\0') {
2213                 switch(*ct_str) {
2214                 case '0' ... '9':
2215                     {
2216                         int oarg = *ct_str - '0';
2217                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2218                         tcg_debug_assert(oarg < def->nb_oargs);
2219                         tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
2220                         /* TCG_CT_ALIAS is for the output arguments.
2221                            The input is tagged with TCG_CT_IALIAS. */
2222                         def->args_ct[i] = def->args_ct[oarg];
2223                         def->args_ct[oarg].ct |= TCG_CT_ALIAS;
2224                         def->args_ct[oarg].alias_index = i;
2225                         def->args_ct[i].ct |= TCG_CT_IALIAS;
2226                         def->args_ct[i].alias_index = oarg;
2227                     }
2228                     ct_str++;
2229                     break;
2230                 case '&':
2231                     def->args_ct[i].ct |= TCG_CT_NEWREG;
2232                     ct_str++;
2233                     break;
2234                 case 'i':
2235                     def->args_ct[i].ct |= TCG_CT_CONST;
2236                     ct_str++;
2237                     break;
2238                 default:
2239                     ct_str = target_parse_constraint(&def->args_ct[i],
2240                                                      ct_str, type);
2241                     /* Typo in TCGTargetOpDef constraint. */
2242                     tcg_debug_assert(ct_str != NULL);
2243                 }
2244             }
2245         }
2246 
2247         /* TCGTargetOpDef entry with too much information? */
2248         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2249 
2250         /* sort the constraints (XXX: this is just an heuristic) */
2251         sort_constraints(def, 0, def->nb_oargs);
2252         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2253     }
2254 }
2255 
2256 void tcg_op_remove(TCGContext *s, TCGOp *op)
2257 {
2258     TCGLabel *label;
2259 
2260     switch (op->opc) {
2261     case INDEX_op_br:
2262         label = arg_label(op->args[0]);
2263         label->refs--;
2264         break;
2265     case INDEX_op_brcond_i32:
2266     case INDEX_op_brcond_i64:
2267         label = arg_label(op->args[3]);
2268         label->refs--;
2269         break;
2270     case INDEX_op_brcond2_i32:
2271         label = arg_label(op->args[5]);
2272         label->refs--;
2273         break;
2274     default:
2275         break;
2276     }
2277 
2278     QTAILQ_REMOVE(&s->ops, op, link);
2279     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2280     s->nb_ops--;
2281 
2282 #ifdef CONFIG_PROFILER
2283     atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2284 #endif
2285 }
2286 
2287 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2288 {
2289     TCGContext *s = tcg_ctx;
2290     TCGOp *op;
2291 
2292     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2293         op = tcg_malloc(sizeof(TCGOp));
2294     } else {
2295         op = QTAILQ_FIRST(&s->free_ops);
2296         QTAILQ_REMOVE(&s->free_ops, op, link);
2297     }
2298     memset(op, 0, offsetof(TCGOp, link));
2299     op->opc = opc;
2300     s->nb_ops++;
2301 
2302     return op;
2303 }
2304 
2305 TCGOp *tcg_emit_op(TCGOpcode opc)
2306 {
2307     TCGOp *op = tcg_op_alloc(opc);
2308     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2309     return op;
2310 }
2311 
2312 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2313 {
2314     TCGOp *new_op = tcg_op_alloc(opc);
2315     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2316     return new_op;
2317 }
2318 
2319 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2320 {
2321     TCGOp *new_op = tcg_op_alloc(opc);
2322     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2323     return new_op;
2324 }
2325 
2326 /* Reachable analysis : remove unreachable code.  */
2327 static void reachable_code_pass(TCGContext *s)
2328 {
2329     TCGOp *op, *op_next;
2330     bool dead = false;
2331 
2332     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2333         bool remove = dead;
2334         TCGLabel *label;
2335         int call_flags;
2336 
2337         switch (op->opc) {
2338         case INDEX_op_set_label:
2339             label = arg_label(op->args[0]);
2340             if (label->refs == 0) {
2341                 /*
2342                  * While there is an occasional backward branch, virtually
2343                  * all branches generated by the translators are forward.
2344                  * Which means that generally we will have already removed
2345                  * all references to the label that will be, and there is
2346                  * little to be gained by iterating.
2347                  */
2348                 remove = true;
2349             } else {
2350                 /* Once we see a label, insns become live again.  */
2351                 dead = false;
2352                 remove = false;
2353 
2354                 /*
2355                  * Optimization can fold conditional branches to unconditional.
2356                  * If we find a label with one reference which is preceded by
2357                  * an unconditional branch to it, remove both.  This needed to
2358                  * wait until the dead code in between them was removed.
2359                  */
2360                 if (label->refs == 1) {
2361                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2362                     if (op_prev->opc == INDEX_op_br &&
2363                         label == arg_label(op_prev->args[0])) {
2364                         tcg_op_remove(s, op_prev);
2365                         remove = true;
2366                     }
2367                 }
2368             }
2369             break;
2370 
2371         case INDEX_op_br:
2372         case INDEX_op_exit_tb:
2373         case INDEX_op_goto_ptr:
2374             /* Unconditional branches; everything following is dead.  */
2375             dead = true;
2376             break;
2377 
2378         case INDEX_op_call:
2379             /* Notice noreturn helper calls, raising exceptions.  */
2380             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2381             if (call_flags & TCG_CALL_NO_RETURN) {
2382                 dead = true;
2383             }
2384             break;
2385 
2386         case INDEX_op_insn_start:
2387             /* Never remove -- we need to keep these for unwind.  */
2388             remove = false;
2389             break;
2390 
2391         default:
2392             break;
2393         }
2394 
2395         if (remove) {
2396             tcg_op_remove(s, op);
2397         }
2398     }
2399 }
2400 
2401 #define TS_DEAD  1
2402 #define TS_MEM   2
2403 
2404 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2405 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2406 
2407 /* For liveness_pass_1, the register preferences for a given temp.  */
2408 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2409 {
2410     return ts->state_ptr;
2411 }
2412 
2413 /* For liveness_pass_1, reset the preferences for a given temp to the
2414  * maximal regset for its type.
2415  */
2416 static inline void la_reset_pref(TCGTemp *ts)
2417 {
2418     *la_temp_pref(ts)
2419         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2420 }
2421 
2422 /* liveness analysis: end of function: all temps are dead, and globals
2423    should be in memory. */
2424 static void la_func_end(TCGContext *s, int ng, int nt)
2425 {
2426     int i;
2427 
2428     for (i = 0; i < ng; ++i) {
2429         s->temps[i].state = TS_DEAD | TS_MEM;
2430         la_reset_pref(&s->temps[i]);
2431     }
2432     for (i = ng; i < nt; ++i) {
2433         s->temps[i].state = TS_DEAD;
2434         la_reset_pref(&s->temps[i]);
2435     }
2436 }
2437 
2438 /* liveness analysis: end of basic block: all temps are dead, globals
2439    and local temps should be in memory. */
2440 static void la_bb_end(TCGContext *s, int ng, int nt)
2441 {
2442     int i;
2443 
2444     for (i = 0; i < ng; ++i) {
2445         s->temps[i].state = TS_DEAD | TS_MEM;
2446         la_reset_pref(&s->temps[i]);
2447     }
2448     for (i = ng; i < nt; ++i) {
2449         s->temps[i].state = (s->temps[i].temp_local
2450                              ? TS_DEAD | TS_MEM
2451                              : TS_DEAD);
2452         la_reset_pref(&s->temps[i]);
2453     }
2454 }
2455 
2456 /* liveness analysis: sync globals back to memory.  */
2457 static void la_global_sync(TCGContext *s, int ng)
2458 {
2459     int i;
2460 
2461     for (i = 0; i < ng; ++i) {
2462         int state = s->temps[i].state;
2463         s->temps[i].state = state | TS_MEM;
2464         if (state == TS_DEAD) {
2465             /* If the global was previously dead, reset prefs.  */
2466             la_reset_pref(&s->temps[i]);
2467         }
2468     }
2469 }
2470 
2471 /* liveness analysis: sync globals back to memory and kill.  */
2472 static void la_global_kill(TCGContext *s, int ng)
2473 {
2474     int i;
2475 
2476     for (i = 0; i < ng; i++) {
2477         s->temps[i].state = TS_DEAD | TS_MEM;
2478         la_reset_pref(&s->temps[i]);
2479     }
2480 }
2481 
2482 /* liveness analysis: note live globals crossing calls.  */
2483 static void la_cross_call(TCGContext *s, int nt)
2484 {
2485     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2486     int i;
2487 
2488     for (i = 0; i < nt; i++) {
2489         TCGTemp *ts = &s->temps[i];
2490         if (!(ts->state & TS_DEAD)) {
2491             TCGRegSet *pset = la_temp_pref(ts);
2492             TCGRegSet set = *pset;
2493 
2494             set &= mask;
2495             /* If the combination is not possible, restart.  */
2496             if (set == 0) {
2497                 set = tcg_target_available_regs[ts->type] & mask;
2498             }
2499             *pset = set;
2500         }
2501     }
2502 }
2503 
2504 /* Liveness analysis : update the opc_arg_life array to tell if a
2505    given input arguments is dead. Instructions updating dead
2506    temporaries are removed. */
2507 static void liveness_pass_1(TCGContext *s)
2508 {
2509     int nb_globals = s->nb_globals;
2510     int nb_temps = s->nb_temps;
2511     TCGOp *op, *op_prev;
2512     TCGRegSet *prefs;
2513     int i;
2514 
2515     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2516     for (i = 0; i < nb_temps; ++i) {
2517         s->temps[i].state_ptr = prefs + i;
2518     }
2519 
2520     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2521     la_func_end(s, nb_globals, nb_temps);
2522 
2523     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2524         int nb_iargs, nb_oargs;
2525         TCGOpcode opc_new, opc_new2;
2526         bool have_opc_new2;
2527         TCGLifeData arg_life = 0;
2528         TCGTemp *ts;
2529         TCGOpcode opc = op->opc;
2530         const TCGOpDef *def = &tcg_op_defs[opc];
2531 
2532         switch (opc) {
2533         case INDEX_op_call:
2534             {
2535                 int call_flags;
2536                 int nb_call_regs;
2537 
2538                 nb_oargs = TCGOP_CALLO(op);
2539                 nb_iargs = TCGOP_CALLI(op);
2540                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2541 
2542                 /* pure functions can be removed if their result is unused */
2543                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2544                     for (i = 0; i < nb_oargs; i++) {
2545                         ts = arg_temp(op->args[i]);
2546                         if (ts->state != TS_DEAD) {
2547                             goto do_not_remove_call;
2548                         }
2549                     }
2550                     goto do_remove;
2551                 }
2552             do_not_remove_call:
2553 
2554                 /* Output args are dead.  */
2555                 for (i = 0; i < nb_oargs; i++) {
2556                     ts = arg_temp(op->args[i]);
2557                     if (ts->state & TS_DEAD) {
2558                         arg_life |= DEAD_ARG << i;
2559                     }
2560                     if (ts->state & TS_MEM) {
2561                         arg_life |= SYNC_ARG << i;
2562                     }
2563                     ts->state = TS_DEAD;
2564                     la_reset_pref(ts);
2565 
2566                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2567                     op->output_pref[i] = 0;
2568                 }
2569 
2570                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2571                                     TCG_CALL_NO_READ_GLOBALS))) {
2572                     la_global_kill(s, nb_globals);
2573                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2574                     la_global_sync(s, nb_globals);
2575                 }
2576 
2577                 /* Record arguments that die in this helper.  */
2578                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2579                     ts = arg_temp(op->args[i]);
2580                     if (ts && ts->state & TS_DEAD) {
2581                         arg_life |= DEAD_ARG << i;
2582                     }
2583                 }
2584 
2585                 /* For all live registers, remove call-clobbered prefs.  */
2586                 la_cross_call(s, nb_temps);
2587 
2588                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2589 
2590                 /* Input arguments are live for preceding opcodes.  */
2591                 for (i = 0; i < nb_iargs; i++) {
2592                     ts = arg_temp(op->args[i + nb_oargs]);
2593                     if (ts && ts->state & TS_DEAD) {
2594                         /* For those arguments that die, and will be allocated
2595                          * in registers, clear the register set for that arg,
2596                          * to be filled in below.  For args that will be on
2597                          * the stack, reset to any available reg.
2598                          */
2599                         *la_temp_pref(ts)
2600                             = (i < nb_call_regs ? 0 :
2601                                tcg_target_available_regs[ts->type]);
2602                         ts->state &= ~TS_DEAD;
2603                     }
2604                 }
2605 
2606                 /* For each input argument, add its input register to prefs.
2607                    If a temp is used once, this produces a single set bit.  */
2608                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2609                     ts = arg_temp(op->args[i + nb_oargs]);
2610                     if (ts) {
2611                         tcg_regset_set_reg(*la_temp_pref(ts),
2612                                            tcg_target_call_iarg_regs[i]);
2613                     }
2614                 }
2615             }
2616             break;
2617         case INDEX_op_insn_start:
2618             break;
2619         case INDEX_op_discard:
2620             /* mark the temporary as dead */
2621             ts = arg_temp(op->args[0]);
2622             ts->state = TS_DEAD;
2623             la_reset_pref(ts);
2624             break;
2625 
2626         case INDEX_op_add2_i32:
2627             opc_new = INDEX_op_add_i32;
2628             goto do_addsub2;
2629         case INDEX_op_sub2_i32:
2630             opc_new = INDEX_op_sub_i32;
2631             goto do_addsub2;
2632         case INDEX_op_add2_i64:
2633             opc_new = INDEX_op_add_i64;
2634             goto do_addsub2;
2635         case INDEX_op_sub2_i64:
2636             opc_new = INDEX_op_sub_i64;
2637         do_addsub2:
2638             nb_iargs = 4;
2639             nb_oargs = 2;
2640             /* Test if the high part of the operation is dead, but not
2641                the low part.  The result can be optimized to a simple
2642                add or sub.  This happens often for x86_64 guest when the
2643                cpu mode is set to 32 bit.  */
2644             if (arg_temp(op->args[1])->state == TS_DEAD) {
2645                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2646                     goto do_remove;
2647                 }
2648                 /* Replace the opcode and adjust the args in place,
2649                    leaving 3 unused args at the end.  */
2650                 op->opc = opc = opc_new;
2651                 op->args[1] = op->args[2];
2652                 op->args[2] = op->args[4];
2653                 /* Fall through and mark the single-word operation live.  */
2654                 nb_iargs = 2;
2655                 nb_oargs = 1;
2656             }
2657             goto do_not_remove;
2658 
2659         case INDEX_op_mulu2_i32:
2660             opc_new = INDEX_op_mul_i32;
2661             opc_new2 = INDEX_op_muluh_i32;
2662             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2663             goto do_mul2;
2664         case INDEX_op_muls2_i32:
2665             opc_new = INDEX_op_mul_i32;
2666             opc_new2 = INDEX_op_mulsh_i32;
2667             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2668             goto do_mul2;
2669         case INDEX_op_mulu2_i64:
2670             opc_new = INDEX_op_mul_i64;
2671             opc_new2 = INDEX_op_muluh_i64;
2672             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2673             goto do_mul2;
2674         case INDEX_op_muls2_i64:
2675             opc_new = INDEX_op_mul_i64;
2676             opc_new2 = INDEX_op_mulsh_i64;
2677             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2678             goto do_mul2;
2679         do_mul2:
2680             nb_iargs = 2;
2681             nb_oargs = 2;
2682             if (arg_temp(op->args[1])->state == TS_DEAD) {
2683                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2684                     /* Both parts of the operation are dead.  */
2685                     goto do_remove;
2686                 }
2687                 /* The high part of the operation is dead; generate the low. */
2688                 op->opc = opc = opc_new;
2689                 op->args[1] = op->args[2];
2690                 op->args[2] = op->args[3];
2691             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2692                 /* The low part of the operation is dead; generate the high. */
2693                 op->opc = opc = opc_new2;
2694                 op->args[0] = op->args[1];
2695                 op->args[1] = op->args[2];
2696                 op->args[2] = op->args[3];
2697             } else {
2698                 goto do_not_remove;
2699             }
2700             /* Mark the single-word operation live.  */
2701             nb_oargs = 1;
2702             goto do_not_remove;
2703 
2704         default:
2705             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2706             nb_iargs = def->nb_iargs;
2707             nb_oargs = def->nb_oargs;
2708 
2709             /* Test if the operation can be removed because all
2710                its outputs are dead. We assume that nb_oargs == 0
2711                implies side effects */
2712             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2713                 for (i = 0; i < nb_oargs; i++) {
2714                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2715                         goto do_not_remove;
2716                     }
2717                 }
2718                 goto do_remove;
2719             }
2720             goto do_not_remove;
2721 
2722         do_remove:
2723             tcg_op_remove(s, op);
2724             break;
2725 
2726         do_not_remove:
2727             for (i = 0; i < nb_oargs; i++) {
2728                 ts = arg_temp(op->args[i]);
2729 
2730                 /* Remember the preference of the uses that followed.  */
2731                 op->output_pref[i] = *la_temp_pref(ts);
2732 
2733                 /* Output args are dead.  */
2734                 if (ts->state & TS_DEAD) {
2735                     arg_life |= DEAD_ARG << i;
2736                 }
2737                 if (ts->state & TS_MEM) {
2738                     arg_life |= SYNC_ARG << i;
2739                 }
2740                 ts->state = TS_DEAD;
2741                 la_reset_pref(ts);
2742             }
2743 
2744             /* If end of basic block, update.  */
2745             if (def->flags & TCG_OPF_BB_EXIT) {
2746                 la_func_end(s, nb_globals, nb_temps);
2747             } else if (def->flags & TCG_OPF_BB_END) {
2748                 la_bb_end(s, nb_globals, nb_temps);
2749             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2750                 la_global_sync(s, nb_globals);
2751                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2752                     la_cross_call(s, nb_temps);
2753                 }
2754             }
2755 
2756             /* Record arguments that die in this opcode.  */
2757             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2758                 ts = arg_temp(op->args[i]);
2759                 if (ts->state & TS_DEAD) {
2760                     arg_life |= DEAD_ARG << i;
2761                 }
2762             }
2763 
2764             /* Input arguments are live for preceding opcodes.  */
2765             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2766                 ts = arg_temp(op->args[i]);
2767                 if (ts->state & TS_DEAD) {
2768                     /* For operands that were dead, initially allow
2769                        all regs for the type.  */
2770                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2771                     ts->state &= ~TS_DEAD;
2772                 }
2773             }
2774 
2775             /* Incorporate constraints for this operand.  */
2776             switch (opc) {
2777             case INDEX_op_mov_i32:
2778             case INDEX_op_mov_i64:
2779                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2780                    have proper constraints.  That said, special case
2781                    moves to propagate preferences backward.  */
2782                 if (IS_DEAD_ARG(1)) {
2783                     *la_temp_pref(arg_temp(op->args[0]))
2784                         = *la_temp_pref(arg_temp(op->args[1]));
2785                 }
2786                 break;
2787 
2788             default:
2789                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2790                     const TCGArgConstraint *ct = &def->args_ct[i];
2791                     TCGRegSet set, *pset;
2792 
2793                     ts = arg_temp(op->args[i]);
2794                     pset = la_temp_pref(ts);
2795                     set = *pset;
2796 
2797                     set &= ct->u.regs;
2798                     if (ct->ct & TCG_CT_IALIAS) {
2799                         set &= op->output_pref[ct->alias_index];
2800                     }
2801                     /* If the combination is not possible, restart.  */
2802                     if (set == 0) {
2803                         set = ct->u.regs;
2804                     }
2805                     *pset = set;
2806                 }
2807                 break;
2808             }
2809             break;
2810         }
2811         op->life = arg_life;
2812     }
2813 }
2814 
2815 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2816 static bool liveness_pass_2(TCGContext *s)
2817 {
2818     int nb_globals = s->nb_globals;
2819     int nb_temps, i;
2820     bool changes = false;
2821     TCGOp *op, *op_next;
2822 
2823     /* Create a temporary for each indirect global.  */
2824     for (i = 0; i < nb_globals; ++i) {
2825         TCGTemp *its = &s->temps[i];
2826         if (its->indirect_reg) {
2827             TCGTemp *dts = tcg_temp_alloc(s);
2828             dts->type = its->type;
2829             dts->base_type = its->base_type;
2830             its->state_ptr = dts;
2831         } else {
2832             its->state_ptr = NULL;
2833         }
2834         /* All globals begin dead.  */
2835         its->state = TS_DEAD;
2836     }
2837     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2838         TCGTemp *its = &s->temps[i];
2839         its->state_ptr = NULL;
2840         its->state = TS_DEAD;
2841     }
2842 
2843     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2844         TCGOpcode opc = op->opc;
2845         const TCGOpDef *def = &tcg_op_defs[opc];
2846         TCGLifeData arg_life = op->life;
2847         int nb_iargs, nb_oargs, call_flags;
2848         TCGTemp *arg_ts, *dir_ts;
2849 
2850         if (opc == INDEX_op_call) {
2851             nb_oargs = TCGOP_CALLO(op);
2852             nb_iargs = TCGOP_CALLI(op);
2853             call_flags = op->args[nb_oargs + nb_iargs + 1];
2854         } else {
2855             nb_iargs = def->nb_iargs;
2856             nb_oargs = def->nb_oargs;
2857 
2858             /* Set flags similar to how calls require.  */
2859             if (def->flags & TCG_OPF_BB_END) {
2860                 /* Like writing globals: save_globals */
2861                 call_flags = 0;
2862             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2863                 /* Like reading globals: sync_globals */
2864                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2865             } else {
2866                 /* No effect on globals.  */
2867                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2868                               TCG_CALL_NO_WRITE_GLOBALS);
2869             }
2870         }
2871 
2872         /* Make sure that input arguments are available.  */
2873         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2874             arg_ts = arg_temp(op->args[i]);
2875             if (arg_ts) {
2876                 dir_ts = arg_ts->state_ptr;
2877                 if (dir_ts && arg_ts->state == TS_DEAD) {
2878                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2879                                       ? INDEX_op_ld_i32
2880                                       : INDEX_op_ld_i64);
2881                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2882 
2883                     lop->args[0] = temp_arg(dir_ts);
2884                     lop->args[1] = temp_arg(arg_ts->mem_base);
2885                     lop->args[2] = arg_ts->mem_offset;
2886 
2887                     /* Loaded, but synced with memory.  */
2888                     arg_ts->state = TS_MEM;
2889                 }
2890             }
2891         }
2892 
2893         /* Perform input replacement, and mark inputs that became dead.
2894            No action is required except keeping temp_state up to date
2895            so that we reload when needed.  */
2896         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2897             arg_ts = arg_temp(op->args[i]);
2898             if (arg_ts) {
2899                 dir_ts = arg_ts->state_ptr;
2900                 if (dir_ts) {
2901                     op->args[i] = temp_arg(dir_ts);
2902                     changes = true;
2903                     if (IS_DEAD_ARG(i)) {
2904                         arg_ts->state = TS_DEAD;
2905                     }
2906                 }
2907             }
2908         }
2909 
2910         /* Liveness analysis should ensure that the following are
2911            all correct, for call sites and basic block end points.  */
2912         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2913             /* Nothing to do */
2914         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2915             for (i = 0; i < nb_globals; ++i) {
2916                 /* Liveness should see that globals are synced back,
2917                    that is, either TS_DEAD or TS_MEM.  */
2918                 arg_ts = &s->temps[i];
2919                 tcg_debug_assert(arg_ts->state_ptr == 0
2920                                  || arg_ts->state != 0);
2921             }
2922         } else {
2923             for (i = 0; i < nb_globals; ++i) {
2924                 /* Liveness should see that globals are saved back,
2925                    that is, TS_DEAD, waiting to be reloaded.  */
2926                 arg_ts = &s->temps[i];
2927                 tcg_debug_assert(arg_ts->state_ptr == 0
2928                                  || arg_ts->state == TS_DEAD);
2929             }
2930         }
2931 
2932         /* Outputs become available.  */
2933         for (i = 0; i < nb_oargs; i++) {
2934             arg_ts = arg_temp(op->args[i]);
2935             dir_ts = arg_ts->state_ptr;
2936             if (!dir_ts) {
2937                 continue;
2938             }
2939             op->args[i] = temp_arg(dir_ts);
2940             changes = true;
2941 
2942             /* The output is now live and modified.  */
2943             arg_ts->state = 0;
2944 
2945             /* Sync outputs upon their last write.  */
2946             if (NEED_SYNC_ARG(i)) {
2947                 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2948                                   ? INDEX_op_st_i32
2949                                   : INDEX_op_st_i64);
2950                 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2951 
2952                 sop->args[0] = temp_arg(dir_ts);
2953                 sop->args[1] = temp_arg(arg_ts->mem_base);
2954                 sop->args[2] = arg_ts->mem_offset;
2955 
2956                 arg_ts->state = TS_MEM;
2957             }
2958             /* Drop outputs that are dead.  */
2959             if (IS_DEAD_ARG(i)) {
2960                 arg_ts->state = TS_DEAD;
2961             }
2962         }
2963     }
2964 
2965     return changes;
2966 }
2967 
2968 #ifdef CONFIG_DEBUG_TCG
2969 static void dump_regs(TCGContext *s)
2970 {
2971     TCGTemp *ts;
2972     int i;
2973     char buf[64];
2974 
2975     for(i = 0; i < s->nb_temps; i++) {
2976         ts = &s->temps[i];
2977         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2978         switch(ts->val_type) {
2979         case TEMP_VAL_REG:
2980             printf("%s", tcg_target_reg_names[ts->reg]);
2981             break;
2982         case TEMP_VAL_MEM:
2983             printf("%d(%s)", (int)ts->mem_offset,
2984                    tcg_target_reg_names[ts->mem_base->reg]);
2985             break;
2986         case TEMP_VAL_CONST:
2987             printf("$0x%" TCG_PRIlx, ts->val);
2988             break;
2989         case TEMP_VAL_DEAD:
2990             printf("D");
2991             break;
2992         default:
2993             printf("???");
2994             break;
2995         }
2996         printf("\n");
2997     }
2998 
2999     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3000         if (s->reg_to_temp[i] != NULL) {
3001             printf("%s: %s\n",
3002                    tcg_target_reg_names[i],
3003                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3004         }
3005     }
3006 }
3007 
3008 static void check_regs(TCGContext *s)
3009 {
3010     int reg;
3011     int k;
3012     TCGTemp *ts;
3013     char buf[64];
3014 
3015     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3016         ts = s->reg_to_temp[reg];
3017         if (ts != NULL) {
3018             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3019                 printf("Inconsistency for register %s:\n",
3020                        tcg_target_reg_names[reg]);
3021                 goto fail;
3022             }
3023         }
3024     }
3025     for (k = 0; k < s->nb_temps; k++) {
3026         ts = &s->temps[k];
3027         if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
3028             && s->reg_to_temp[ts->reg] != ts) {
3029             printf("Inconsistency for temp %s:\n",
3030                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3031         fail:
3032             printf("reg state:\n");
3033             dump_regs(s);
3034             tcg_abort();
3035         }
3036     }
3037 }
3038 #endif
3039 
3040 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3041 {
3042 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3043     /* Sparc64 stack is accessed with offset of 2047 */
3044     s->current_frame_offset = (s->current_frame_offset +
3045                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3046         ~(sizeof(tcg_target_long) - 1);
3047 #endif
3048     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3049         s->frame_end) {
3050         tcg_abort();
3051     }
3052     ts->mem_offset = s->current_frame_offset;
3053     ts->mem_base = s->frame_temp;
3054     ts->mem_allocated = 1;
3055     s->current_frame_offset += sizeof(tcg_target_long);
3056 }
3057 
3058 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3059 
3060 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3061    mark it free; otherwise mark it dead.  */
3062 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3063 {
3064     if (ts->fixed_reg) {
3065         return;
3066     }
3067     if (ts->val_type == TEMP_VAL_REG) {
3068         s->reg_to_temp[ts->reg] = NULL;
3069     }
3070     ts->val_type = (free_or_dead < 0
3071                     || ts->temp_local
3072                     || ts->temp_global
3073                     ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3074 }
3075 
3076 /* Mark a temporary as dead.  */
3077 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3078 {
3079     temp_free_or_dead(s, ts, 1);
3080 }
3081 
3082 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3083    registers needs to be allocated to store a constant.  If 'free_or_dead'
3084    is non-zero, subsequently release the temporary; if it is positive, the
3085    temp is dead; if it is negative, the temp is free.  */
3086 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3087                       TCGRegSet preferred_regs, int free_or_dead)
3088 {
3089     if (ts->fixed_reg) {
3090         return;
3091     }
3092     if (!ts->mem_coherent) {
3093         if (!ts->mem_allocated) {
3094             temp_allocate_frame(s, ts);
3095         }
3096         switch (ts->val_type) {
3097         case TEMP_VAL_CONST:
3098             /* If we're going to free the temp immediately, then we won't
3099                require it later in a register, so attempt to store the
3100                constant to memory directly.  */
3101             if (free_or_dead
3102                 && tcg_out_sti(s, ts->type, ts->val,
3103                                ts->mem_base->reg, ts->mem_offset)) {
3104                 break;
3105             }
3106             temp_load(s, ts, tcg_target_available_regs[ts->type],
3107                       allocated_regs, preferred_regs);
3108             /* fallthrough */
3109 
3110         case TEMP_VAL_REG:
3111             tcg_out_st(s, ts->type, ts->reg,
3112                        ts->mem_base->reg, ts->mem_offset);
3113             break;
3114 
3115         case TEMP_VAL_MEM:
3116             break;
3117 
3118         case TEMP_VAL_DEAD:
3119         default:
3120             tcg_abort();
3121         }
3122         ts->mem_coherent = 1;
3123     }
3124     if (free_or_dead) {
3125         temp_free_or_dead(s, ts, free_or_dead);
3126     }
3127 }
3128 
3129 /* free register 'reg' by spilling the corresponding temporary if necessary */
3130 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3131 {
3132     TCGTemp *ts = s->reg_to_temp[reg];
3133     if (ts != NULL) {
3134         temp_sync(s, ts, allocated_regs, 0, -1);
3135     }
3136 }
3137 
3138 /**
3139  * tcg_reg_alloc:
3140  * @required_regs: Set of registers in which we must allocate.
3141  * @allocated_regs: Set of registers which must be avoided.
3142  * @preferred_regs: Set of registers we should prefer.
3143  * @rev: True if we search the registers in "indirect" order.
3144  *
3145  * The allocated register must be in @required_regs & ~@allocated_regs,
3146  * but if we can put it in @preferred_regs we may save a move later.
3147  */
3148 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3149                             TCGRegSet allocated_regs,
3150                             TCGRegSet preferred_regs, bool rev)
3151 {
3152     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3153     TCGRegSet reg_ct[2];
3154     const int *order;
3155 
3156     reg_ct[1] = required_regs & ~allocated_regs;
3157     tcg_debug_assert(reg_ct[1] != 0);
3158     reg_ct[0] = reg_ct[1] & preferred_regs;
3159 
3160     /* Skip the preferred_regs option if it cannot be satisfied,
3161        or if the preference made no difference.  */
3162     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3163 
3164     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3165 
3166     /* Try free registers, preferences first.  */
3167     for (j = f; j < 2; j++) {
3168         TCGRegSet set = reg_ct[j];
3169 
3170         if (tcg_regset_single(set)) {
3171             /* One register in the set.  */
3172             TCGReg reg = tcg_regset_first(set);
3173             if (s->reg_to_temp[reg] == NULL) {
3174                 return reg;
3175             }
3176         } else {
3177             for (i = 0; i < n; i++) {
3178                 TCGReg reg = order[i];
3179                 if (s->reg_to_temp[reg] == NULL &&
3180                     tcg_regset_test_reg(set, reg)) {
3181                     return reg;
3182                 }
3183             }
3184         }
3185     }
3186 
3187     /* We must spill something.  */
3188     for (j = f; j < 2; j++) {
3189         TCGRegSet set = reg_ct[j];
3190 
3191         if (tcg_regset_single(set)) {
3192             /* One register in the set.  */
3193             TCGReg reg = tcg_regset_first(set);
3194             tcg_reg_free(s, reg, allocated_regs);
3195             return reg;
3196         } else {
3197             for (i = 0; i < n; i++) {
3198                 TCGReg reg = order[i];
3199                 if (tcg_regset_test_reg(set, reg)) {
3200                     tcg_reg_free(s, reg, allocated_regs);
3201                     return reg;
3202                 }
3203             }
3204         }
3205     }
3206 
3207     tcg_abort();
3208 }
3209 
3210 /* Make sure the temporary is in a register.  If needed, allocate the register
3211    from DESIRED while avoiding ALLOCATED.  */
3212 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3213                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3214 {
3215     TCGReg reg;
3216 
3217     switch (ts->val_type) {
3218     case TEMP_VAL_REG:
3219         return;
3220     case TEMP_VAL_CONST:
3221         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3222                             preferred_regs, ts->indirect_base);
3223         tcg_out_movi(s, ts->type, reg, ts->val);
3224         ts->mem_coherent = 0;
3225         break;
3226     case TEMP_VAL_MEM:
3227         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3228                             preferred_regs, ts->indirect_base);
3229         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3230         ts->mem_coherent = 1;
3231         break;
3232     case TEMP_VAL_DEAD:
3233     default:
3234         tcg_abort();
3235     }
3236     ts->reg = reg;
3237     ts->val_type = TEMP_VAL_REG;
3238     s->reg_to_temp[reg] = ts;
3239 }
3240 
3241 /* Save a temporary to memory. 'allocated_regs' is used in case a
3242    temporary registers needs to be allocated to store a constant.  */
3243 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3244 {
3245     /* The liveness analysis already ensures that globals are back
3246        in memory. Keep an tcg_debug_assert for safety. */
3247     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3248 }
3249 
3250 /* save globals to their canonical location and assume they can be
3251    modified be the following code. 'allocated_regs' is used in case a
3252    temporary registers needs to be allocated to store a constant. */
3253 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3254 {
3255     int i, n;
3256 
3257     for (i = 0, n = s->nb_globals; i < n; i++) {
3258         temp_save(s, &s->temps[i], allocated_regs);
3259     }
3260 }
3261 
3262 /* sync globals to their canonical location and assume they can be
3263    read by the following code. 'allocated_regs' is used in case a
3264    temporary registers needs to be allocated to store a constant. */
3265 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3266 {
3267     int i, n;
3268 
3269     for (i = 0, n = s->nb_globals; i < n; i++) {
3270         TCGTemp *ts = &s->temps[i];
3271         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3272                          || ts->fixed_reg
3273                          || ts->mem_coherent);
3274     }
3275 }
3276 
3277 /* at the end of a basic block, we assume all temporaries are dead and
3278    all globals are stored at their canonical location. */
3279 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3280 {
3281     int i;
3282 
3283     for (i = s->nb_globals; i < s->nb_temps; i++) {
3284         TCGTemp *ts = &s->temps[i];
3285         if (ts->temp_local) {
3286             temp_save(s, ts, allocated_regs);
3287         } else {
3288             /* The liveness analysis already ensures that temps are dead.
3289                Keep an tcg_debug_assert for safety. */
3290             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3291         }
3292     }
3293 
3294     save_globals(s, allocated_regs);
3295 }
3296 
3297 /*
3298  * Specialized code generation for INDEX_op_movi_*.
3299  */
3300 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3301                                   tcg_target_ulong val, TCGLifeData arg_life,
3302                                   TCGRegSet preferred_regs)
3303 {
3304     /* ENV should not be modified.  */
3305     tcg_debug_assert(!ots->fixed_reg);
3306 
3307     /* The movi is not explicitly generated here.  */
3308     if (ots->val_type == TEMP_VAL_REG) {
3309         s->reg_to_temp[ots->reg] = NULL;
3310     }
3311     ots->val_type = TEMP_VAL_CONST;
3312     ots->val = val;
3313     ots->mem_coherent = 0;
3314     if (NEED_SYNC_ARG(0)) {
3315         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3316     } else if (IS_DEAD_ARG(0)) {
3317         temp_dead(s, ots);
3318     }
3319 }
3320 
3321 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3322 {
3323     TCGTemp *ots = arg_temp(op->args[0]);
3324     tcg_target_ulong val = op->args[1];
3325 
3326     tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3327 }
3328 
3329 /*
3330  * Specialized code generation for INDEX_op_mov_*.
3331  */
3332 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3333 {
3334     const TCGLifeData arg_life = op->life;
3335     TCGRegSet allocated_regs, preferred_regs;
3336     TCGTemp *ts, *ots;
3337     TCGType otype, itype;
3338 
3339     allocated_regs = s->reserved_regs;
3340     preferred_regs = op->output_pref[0];
3341     ots = arg_temp(op->args[0]);
3342     ts = arg_temp(op->args[1]);
3343 
3344     /* ENV should not be modified.  */
3345     tcg_debug_assert(!ots->fixed_reg);
3346 
3347     /* Note that otype != itype for no-op truncation.  */
3348     otype = ots->type;
3349     itype = ts->type;
3350 
3351     if (ts->val_type == TEMP_VAL_CONST) {
3352         /* propagate constant or generate sti */
3353         tcg_target_ulong val = ts->val;
3354         if (IS_DEAD_ARG(1)) {
3355             temp_dead(s, ts);
3356         }
3357         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3358         return;
3359     }
3360 
3361     /* If the source value is in memory we're going to be forced
3362        to have it in a register in order to perform the copy.  Copy
3363        the SOURCE value into its own register first, that way we
3364        don't have to reload SOURCE the next time it is used. */
3365     if (ts->val_type == TEMP_VAL_MEM) {
3366         temp_load(s, ts, tcg_target_available_regs[itype],
3367                   allocated_regs, preferred_regs);
3368     }
3369 
3370     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3371     if (IS_DEAD_ARG(0)) {
3372         /* mov to a non-saved dead register makes no sense (even with
3373            liveness analysis disabled). */
3374         tcg_debug_assert(NEED_SYNC_ARG(0));
3375         if (!ots->mem_allocated) {
3376             temp_allocate_frame(s, ots);
3377         }
3378         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3379         if (IS_DEAD_ARG(1)) {
3380             temp_dead(s, ts);
3381         }
3382         temp_dead(s, ots);
3383     } else {
3384         if (IS_DEAD_ARG(1) && !ts->fixed_reg) {
3385             /* the mov can be suppressed */
3386             if (ots->val_type == TEMP_VAL_REG) {
3387                 s->reg_to_temp[ots->reg] = NULL;
3388             }
3389             ots->reg = ts->reg;
3390             temp_dead(s, ts);
3391         } else {
3392             if (ots->val_type != TEMP_VAL_REG) {
3393                 /* When allocating a new register, make sure to not spill the
3394                    input one. */
3395                 tcg_regset_set_reg(allocated_regs, ts->reg);
3396                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3397                                          allocated_regs, preferred_regs,
3398                                          ots->indirect_base);
3399             }
3400             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3401                 /*
3402                  * Cross register class move not supported.
3403                  * Store the source register into the destination slot
3404                  * and leave the destination temp as TEMP_VAL_MEM.
3405                  */
3406                 assert(!ots->fixed_reg);
3407                 if (!ts->mem_allocated) {
3408                     temp_allocate_frame(s, ots);
3409                 }
3410                 tcg_out_st(s, ts->type, ts->reg,
3411                            ots->mem_base->reg, ots->mem_offset);
3412                 ots->mem_coherent = 1;
3413                 temp_free_or_dead(s, ots, -1);
3414                 return;
3415             }
3416         }
3417         ots->val_type = TEMP_VAL_REG;
3418         ots->mem_coherent = 0;
3419         s->reg_to_temp[ots->reg] = ots;
3420         if (NEED_SYNC_ARG(0)) {
3421             temp_sync(s, ots, allocated_regs, 0, 0);
3422         }
3423     }
3424 }
3425 
3426 /*
3427  * Specialized code generation for INDEX_op_dup_vec.
3428  */
3429 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3430 {
3431     const TCGLifeData arg_life = op->life;
3432     TCGRegSet dup_out_regs, dup_in_regs;
3433     TCGTemp *its, *ots;
3434     TCGType itype, vtype;
3435     intptr_t endian_fixup;
3436     unsigned vece;
3437     bool ok;
3438 
3439     ots = arg_temp(op->args[0]);
3440     its = arg_temp(op->args[1]);
3441 
3442     /* ENV should not be modified.  */
3443     tcg_debug_assert(!ots->fixed_reg);
3444 
3445     itype = its->type;
3446     vece = TCGOP_VECE(op);
3447     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3448 
3449     if (its->val_type == TEMP_VAL_CONST) {
3450         /* Propagate constant via movi -> dupi.  */
3451         tcg_target_ulong val = its->val;
3452         if (IS_DEAD_ARG(1)) {
3453             temp_dead(s, its);
3454         }
3455         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3456         return;
3457     }
3458 
3459     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs;
3460     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs;
3461 
3462     /* Allocate the output register now.  */
3463     if (ots->val_type != TEMP_VAL_REG) {
3464         TCGRegSet allocated_regs = s->reserved_regs;
3465 
3466         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3467             /* Make sure to not spill the input register. */
3468             tcg_regset_set_reg(allocated_regs, its->reg);
3469         }
3470         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3471                                  op->output_pref[0], ots->indirect_base);
3472         ots->val_type = TEMP_VAL_REG;
3473         ots->mem_coherent = 0;
3474         s->reg_to_temp[ots->reg] = ots;
3475     }
3476 
3477     switch (its->val_type) {
3478     case TEMP_VAL_REG:
3479         /*
3480          * The dup constriaints must be broad, covering all possible VECE.
3481          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3482          * to fail, indicating that extra moves are required for that case.
3483          */
3484         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3485             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3486                 goto done;
3487             }
3488             /* Try again from memory or a vector input register.  */
3489         }
3490         if (!its->mem_coherent) {
3491             /*
3492              * The input register is not synced, and so an extra store
3493              * would be required to use memory.  Attempt an integer-vector
3494              * register move first.  We do not have a TCGRegSet for this.
3495              */
3496             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3497                 break;
3498             }
3499             /* Sync the temp back to its slot and load from there.  */
3500             temp_sync(s, its, s->reserved_regs, 0, 0);
3501         }
3502         /* fall through */
3503 
3504     case TEMP_VAL_MEM:
3505 #ifdef HOST_WORDS_BIGENDIAN
3506         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3507         endian_fixup -= 1 << vece;
3508 #else
3509         endian_fixup = 0;
3510 #endif
3511         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3512                              its->mem_offset + endian_fixup)) {
3513             goto done;
3514         }
3515         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3516         break;
3517 
3518     default:
3519         g_assert_not_reached();
3520     }
3521 
3522     /* We now have a vector input register, so dup must succeed. */
3523     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3524     tcg_debug_assert(ok);
3525 
3526  done:
3527     if (IS_DEAD_ARG(1)) {
3528         temp_dead(s, its);
3529     }
3530     if (NEED_SYNC_ARG(0)) {
3531         temp_sync(s, ots, s->reserved_regs, 0, 0);
3532     }
3533     if (IS_DEAD_ARG(0)) {
3534         temp_dead(s, ots);
3535     }
3536 }
3537 
3538 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3539 {
3540     const TCGLifeData arg_life = op->life;
3541     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3542     TCGRegSet i_allocated_regs;
3543     TCGRegSet o_allocated_regs;
3544     int i, k, nb_iargs, nb_oargs;
3545     TCGReg reg;
3546     TCGArg arg;
3547     const TCGArgConstraint *arg_ct;
3548     TCGTemp *ts;
3549     TCGArg new_args[TCG_MAX_OP_ARGS];
3550     int const_args[TCG_MAX_OP_ARGS];
3551 
3552     nb_oargs = def->nb_oargs;
3553     nb_iargs = def->nb_iargs;
3554 
3555     /* copy constants */
3556     memcpy(new_args + nb_oargs + nb_iargs,
3557            op->args + nb_oargs + nb_iargs,
3558            sizeof(TCGArg) * def->nb_cargs);
3559 
3560     i_allocated_regs = s->reserved_regs;
3561     o_allocated_regs = s->reserved_regs;
3562 
3563     /* satisfy input constraints */
3564     for (k = 0; k < nb_iargs; k++) {
3565         TCGRegSet i_preferred_regs, o_preferred_regs;
3566 
3567         i = def->sorted_args[nb_oargs + k];
3568         arg = op->args[i];
3569         arg_ct = &def->args_ct[i];
3570         ts = arg_temp(arg);
3571 
3572         if (ts->val_type == TEMP_VAL_CONST
3573             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3574             /* constant is OK for instruction */
3575             const_args[i] = 1;
3576             new_args[i] = ts->val;
3577             continue;
3578         }
3579 
3580         i_preferred_regs = o_preferred_regs = 0;
3581         if (arg_ct->ct & TCG_CT_IALIAS) {
3582             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3583             if (ts->fixed_reg) {
3584                 /* if fixed register, we must allocate a new register
3585                    if the alias is not the same register */
3586                 if (arg != op->args[arg_ct->alias_index]) {
3587                     goto allocate_in_reg;
3588                 }
3589             } else {
3590                 /* if the input is aliased to an output and if it is
3591                    not dead after the instruction, we must allocate
3592                    a new register and move it */
3593                 if (!IS_DEAD_ARG(i)) {
3594                     goto allocate_in_reg;
3595                 }
3596 
3597                 /* check if the current register has already been allocated
3598                    for another input aliased to an output */
3599                 if (ts->val_type == TEMP_VAL_REG) {
3600                     int k2, i2;
3601                     reg = ts->reg;
3602                     for (k2 = 0 ; k2 < k ; k2++) {
3603                         i2 = def->sorted_args[nb_oargs + k2];
3604                         if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
3605                             reg == new_args[i2]) {
3606                             goto allocate_in_reg;
3607                         }
3608                     }
3609                 }
3610                 i_preferred_regs = o_preferred_regs;
3611             }
3612         }
3613 
3614         temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
3615         reg = ts->reg;
3616 
3617         if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3618             /* nothing to do : the constraint is satisfied */
3619         } else {
3620         allocate_in_reg:
3621             /* allocate a new register matching the constraint
3622                and move the temporary register into it */
3623             temp_load(s, ts, tcg_target_available_regs[ts->type],
3624                       i_allocated_regs, 0);
3625             reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
3626                                 o_preferred_regs, ts->indirect_base);
3627             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3628                 /*
3629                  * Cross register class move not supported.  Sync the
3630                  * temp back to its slot and load from there.
3631                  */
3632                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3633                 tcg_out_ld(s, ts->type, reg,
3634                            ts->mem_base->reg, ts->mem_offset);
3635             }
3636         }
3637         new_args[i] = reg;
3638         const_args[i] = 0;
3639         tcg_regset_set_reg(i_allocated_regs, reg);
3640     }
3641 
3642     /* mark dead temporaries and free the associated registers */
3643     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3644         if (IS_DEAD_ARG(i)) {
3645             temp_dead(s, arg_temp(op->args[i]));
3646         }
3647     }
3648 
3649     if (def->flags & TCG_OPF_BB_END) {
3650         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3651     } else {
3652         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3653             /* XXX: permit generic clobber register list ? */
3654             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3655                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3656                     tcg_reg_free(s, i, i_allocated_regs);
3657                 }
3658             }
3659         }
3660         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3661             /* sync globals if the op has side effects and might trigger
3662                an exception. */
3663             sync_globals(s, i_allocated_regs);
3664         }
3665 
3666         /* satisfy the output constraints */
3667         for(k = 0; k < nb_oargs; k++) {
3668             i = def->sorted_args[k];
3669             arg = op->args[i];
3670             arg_ct = &def->args_ct[i];
3671             ts = arg_temp(arg);
3672 
3673             /* ENV should not be modified.  */
3674             tcg_debug_assert(!ts->fixed_reg);
3675 
3676             if ((arg_ct->ct & TCG_CT_ALIAS)
3677                 && !const_args[arg_ct->alias_index]) {
3678                 reg = new_args[arg_ct->alias_index];
3679             } else if (arg_ct->ct & TCG_CT_NEWREG) {
3680                 reg = tcg_reg_alloc(s, arg_ct->u.regs,
3681                                     i_allocated_regs | o_allocated_regs,
3682                                     op->output_pref[k], ts->indirect_base);
3683             } else {
3684                 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
3685                                     op->output_pref[k], ts->indirect_base);
3686             }
3687             tcg_regset_set_reg(o_allocated_regs, reg);
3688             if (ts->val_type == TEMP_VAL_REG) {
3689                 s->reg_to_temp[ts->reg] = NULL;
3690             }
3691             ts->val_type = TEMP_VAL_REG;
3692             ts->reg = reg;
3693             /*
3694              * Temp value is modified, so the value kept in memory is
3695              * potentially not the same.
3696              */
3697             ts->mem_coherent = 0;
3698             s->reg_to_temp[reg] = ts;
3699             new_args[i] = reg;
3700         }
3701     }
3702 
3703     /* emit instruction */
3704     if (def->flags & TCG_OPF_VECTOR) {
3705         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3706                        new_args, const_args);
3707     } else {
3708         tcg_out_op(s, op->opc, new_args, const_args);
3709     }
3710 
3711     /* move the outputs in the correct register if needed */
3712     for(i = 0; i < nb_oargs; i++) {
3713         ts = arg_temp(op->args[i]);
3714 
3715         /* ENV should not be modified.  */
3716         tcg_debug_assert(!ts->fixed_reg);
3717 
3718         if (NEED_SYNC_ARG(i)) {
3719             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3720         } else if (IS_DEAD_ARG(i)) {
3721             temp_dead(s, ts);
3722         }
3723     }
3724 }
3725 
3726 #ifdef TCG_TARGET_STACK_GROWSUP
3727 #define STACK_DIR(x) (-(x))
3728 #else
3729 #define STACK_DIR(x) (x)
3730 #endif
3731 
3732 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3733 {
3734     const int nb_oargs = TCGOP_CALLO(op);
3735     const int nb_iargs = TCGOP_CALLI(op);
3736     const TCGLifeData arg_life = op->life;
3737     int flags, nb_regs, i;
3738     TCGReg reg;
3739     TCGArg arg;
3740     TCGTemp *ts;
3741     intptr_t stack_offset;
3742     size_t call_stack_size;
3743     tcg_insn_unit *func_addr;
3744     int allocate_args;
3745     TCGRegSet allocated_regs;
3746 
3747     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3748     flags = op->args[nb_oargs + nb_iargs + 1];
3749 
3750     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3751     if (nb_regs > nb_iargs) {
3752         nb_regs = nb_iargs;
3753     }
3754 
3755     /* assign stack slots first */
3756     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3757     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3758         ~(TCG_TARGET_STACK_ALIGN - 1);
3759     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3760     if (allocate_args) {
3761         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3762            preallocate call stack */
3763         tcg_abort();
3764     }
3765 
3766     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3767     for (i = nb_regs; i < nb_iargs; i++) {
3768         arg = op->args[nb_oargs + i];
3769 #ifdef TCG_TARGET_STACK_GROWSUP
3770         stack_offset -= sizeof(tcg_target_long);
3771 #endif
3772         if (arg != TCG_CALL_DUMMY_ARG) {
3773             ts = arg_temp(arg);
3774             temp_load(s, ts, tcg_target_available_regs[ts->type],
3775                       s->reserved_regs, 0);
3776             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3777         }
3778 #ifndef TCG_TARGET_STACK_GROWSUP
3779         stack_offset += sizeof(tcg_target_long);
3780 #endif
3781     }
3782 
3783     /* assign input registers */
3784     allocated_regs = s->reserved_regs;
3785     for (i = 0; i < nb_regs; i++) {
3786         arg = op->args[nb_oargs + i];
3787         if (arg != TCG_CALL_DUMMY_ARG) {
3788             ts = arg_temp(arg);
3789             reg = tcg_target_call_iarg_regs[i];
3790 
3791             if (ts->val_type == TEMP_VAL_REG) {
3792                 if (ts->reg != reg) {
3793                     tcg_reg_free(s, reg, allocated_regs);
3794                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3795                         /*
3796                          * Cross register class move not supported.  Sync the
3797                          * temp back to its slot and load from there.
3798                          */
3799                         temp_sync(s, ts, allocated_regs, 0, 0);
3800                         tcg_out_ld(s, ts->type, reg,
3801                                    ts->mem_base->reg, ts->mem_offset);
3802                     }
3803                 }
3804             } else {
3805                 TCGRegSet arg_set = 0;
3806 
3807                 tcg_reg_free(s, reg, allocated_regs);
3808                 tcg_regset_set_reg(arg_set, reg);
3809                 temp_load(s, ts, arg_set, allocated_regs, 0);
3810             }
3811 
3812             tcg_regset_set_reg(allocated_regs, reg);
3813         }
3814     }
3815 
3816     /* mark dead temporaries and free the associated registers */
3817     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3818         if (IS_DEAD_ARG(i)) {
3819             temp_dead(s, arg_temp(op->args[i]));
3820         }
3821     }
3822 
3823     /* clobber call registers */
3824     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3825         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3826             tcg_reg_free(s, i, allocated_regs);
3827         }
3828     }
3829 
3830     /* Save globals if they might be written by the helper, sync them if
3831        they might be read. */
3832     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3833         /* Nothing to do */
3834     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3835         sync_globals(s, allocated_regs);
3836     } else {
3837         save_globals(s, allocated_regs);
3838     }
3839 
3840     tcg_out_call(s, func_addr);
3841 
3842     /* assign output registers and emit moves if needed */
3843     for(i = 0; i < nb_oargs; i++) {
3844         arg = op->args[i];
3845         ts = arg_temp(arg);
3846 
3847         /* ENV should not be modified.  */
3848         tcg_debug_assert(!ts->fixed_reg);
3849 
3850         reg = tcg_target_call_oarg_regs[i];
3851         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3852         if (ts->val_type == TEMP_VAL_REG) {
3853             s->reg_to_temp[ts->reg] = NULL;
3854         }
3855         ts->val_type = TEMP_VAL_REG;
3856         ts->reg = reg;
3857         ts->mem_coherent = 0;
3858         s->reg_to_temp[reg] = ts;
3859         if (NEED_SYNC_ARG(i)) {
3860             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
3861         } else if (IS_DEAD_ARG(i)) {
3862             temp_dead(s, ts);
3863         }
3864     }
3865 }
3866 
3867 #ifdef CONFIG_PROFILER
3868 
3869 /* avoid copy/paste errors */
3870 #define PROF_ADD(to, from, field)                       \
3871     do {                                                \
3872         (to)->field += atomic_read(&((from)->field));   \
3873     } while (0)
3874 
3875 #define PROF_MAX(to, from, field)                                       \
3876     do {                                                                \
3877         typeof((from)->field) val__ = atomic_read(&((from)->field));    \
3878         if (val__ > (to)->field) {                                      \
3879             (to)->field = val__;                                        \
3880         }                                                               \
3881     } while (0)
3882 
3883 /* Pass in a zero'ed @prof */
3884 static inline
3885 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3886 {
3887     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3888     unsigned int i;
3889 
3890     for (i = 0; i < n_ctxs; i++) {
3891         TCGContext *s = atomic_read(&tcg_ctxs[i]);
3892         const TCGProfile *orig = &s->prof;
3893 
3894         if (counters) {
3895             PROF_ADD(prof, orig, cpu_exec_time);
3896             PROF_ADD(prof, orig, tb_count1);
3897             PROF_ADD(prof, orig, tb_count);
3898             PROF_ADD(prof, orig, op_count);
3899             PROF_MAX(prof, orig, op_count_max);
3900             PROF_ADD(prof, orig, temp_count);
3901             PROF_MAX(prof, orig, temp_count_max);
3902             PROF_ADD(prof, orig, del_op_count);
3903             PROF_ADD(prof, orig, code_in_len);
3904             PROF_ADD(prof, orig, code_out_len);
3905             PROF_ADD(prof, orig, search_out_len);
3906             PROF_ADD(prof, orig, interm_time);
3907             PROF_ADD(prof, orig, code_time);
3908             PROF_ADD(prof, orig, la_time);
3909             PROF_ADD(prof, orig, opt_time);
3910             PROF_ADD(prof, orig, restore_count);
3911             PROF_ADD(prof, orig, restore_time);
3912         }
3913         if (table) {
3914             int i;
3915 
3916             for (i = 0; i < NB_OPS; i++) {
3917                 PROF_ADD(prof, orig, table_op_count[i]);
3918             }
3919         }
3920     }
3921 }
3922 
3923 #undef PROF_ADD
3924 #undef PROF_MAX
3925 
3926 static void tcg_profile_snapshot_counters(TCGProfile *prof)
3927 {
3928     tcg_profile_snapshot(prof, true, false);
3929 }
3930 
3931 static void tcg_profile_snapshot_table(TCGProfile *prof)
3932 {
3933     tcg_profile_snapshot(prof, false, true);
3934 }
3935 
3936 void tcg_dump_op_count(void)
3937 {
3938     TCGProfile prof = {};
3939     int i;
3940 
3941     tcg_profile_snapshot_table(&prof);
3942     for (i = 0; i < NB_OPS; i++) {
3943         qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
3944                     prof.table_op_count[i]);
3945     }
3946 }
3947 
3948 int64_t tcg_cpu_exec_time(void)
3949 {
3950     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3951     unsigned int i;
3952     int64_t ret = 0;
3953 
3954     for (i = 0; i < n_ctxs; i++) {
3955         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
3956         const TCGProfile *prof = &s->prof;
3957 
3958         ret += atomic_read(&prof->cpu_exec_time);
3959     }
3960     return ret;
3961 }
3962 #else
3963 void tcg_dump_op_count(void)
3964 {
3965     qemu_printf("[TCG profiler not compiled]\n");
3966 }
3967 
3968 int64_t tcg_cpu_exec_time(void)
3969 {
3970     error_report("%s: TCG profiler not compiled", __func__);
3971     exit(EXIT_FAILURE);
3972 }
3973 #endif
3974 
3975 
3976 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
3977 {
3978 #ifdef CONFIG_PROFILER
3979     TCGProfile *prof = &s->prof;
3980 #endif
3981     int i, num_insns;
3982     TCGOp *op;
3983 
3984 #ifdef CONFIG_PROFILER
3985     {
3986         int n = 0;
3987 
3988         QTAILQ_FOREACH(op, &s->ops, link) {
3989             n++;
3990         }
3991         atomic_set(&prof->op_count, prof->op_count + n);
3992         if (n > prof->op_count_max) {
3993             atomic_set(&prof->op_count_max, n);
3994         }
3995 
3996         n = s->nb_temps;
3997         atomic_set(&prof->temp_count, prof->temp_count + n);
3998         if (n > prof->temp_count_max) {
3999             atomic_set(&prof->temp_count_max, n);
4000         }
4001     }
4002 #endif
4003 
4004 #ifdef DEBUG_DISAS
4005     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4006                  && qemu_log_in_addr_range(tb->pc))) {
4007         qemu_log_lock();
4008         qemu_log("OP:\n");
4009         tcg_dump_ops(s, false);
4010         qemu_log("\n");
4011         qemu_log_unlock();
4012     }
4013 #endif
4014 
4015 #ifdef CONFIG_DEBUG_TCG
4016     /* Ensure all labels referenced have been emitted.  */
4017     {
4018         TCGLabel *l;
4019         bool error = false;
4020 
4021         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4022             if (unlikely(!l->present) && l->refs) {
4023                 qemu_log_mask(CPU_LOG_TB_OP,
4024                               "$L%d referenced but not present.\n", l->id);
4025                 error = true;
4026             }
4027         }
4028         assert(!error);
4029     }
4030 #endif
4031 
4032 #ifdef CONFIG_PROFILER
4033     atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4034 #endif
4035 
4036 #ifdef USE_TCG_OPTIMIZATIONS
4037     tcg_optimize(s);
4038 #endif
4039 
4040 #ifdef CONFIG_PROFILER
4041     atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4042     atomic_set(&prof->la_time, prof->la_time - profile_getclock());
4043 #endif
4044 
4045     reachable_code_pass(s);
4046     liveness_pass_1(s);
4047 
4048     if (s->nb_indirects > 0) {
4049 #ifdef DEBUG_DISAS
4050         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4051                      && qemu_log_in_addr_range(tb->pc))) {
4052             qemu_log_lock();
4053             qemu_log("OP before indirect lowering:\n");
4054             tcg_dump_ops(s, false);
4055             qemu_log("\n");
4056             qemu_log_unlock();
4057         }
4058 #endif
4059         /* Replace indirect temps with direct temps.  */
4060         if (liveness_pass_2(s)) {
4061             /* If changes were made, re-run liveness.  */
4062             liveness_pass_1(s);
4063         }
4064     }
4065 
4066 #ifdef CONFIG_PROFILER
4067     atomic_set(&prof->la_time, prof->la_time + profile_getclock());
4068 #endif
4069 
4070 #ifdef DEBUG_DISAS
4071     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4072                  && qemu_log_in_addr_range(tb->pc))) {
4073         qemu_log_lock();
4074         qemu_log("OP after optimization and liveness analysis:\n");
4075         tcg_dump_ops(s, true);
4076         qemu_log("\n");
4077         qemu_log_unlock();
4078     }
4079 #endif
4080 
4081     tcg_reg_alloc_start(s);
4082 
4083     s->code_buf = tb->tc.ptr;
4084     s->code_ptr = tb->tc.ptr;
4085 
4086 #ifdef TCG_TARGET_NEED_LDST_LABELS
4087     QSIMPLEQ_INIT(&s->ldst_labels);
4088 #endif
4089 #ifdef TCG_TARGET_NEED_POOL_LABELS
4090     s->pool_labels = NULL;
4091 #endif
4092 
4093     num_insns = -1;
4094     QTAILQ_FOREACH(op, &s->ops, link) {
4095         TCGOpcode opc = op->opc;
4096 
4097 #ifdef CONFIG_PROFILER
4098         atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4099 #endif
4100 
4101         switch (opc) {
4102         case INDEX_op_mov_i32:
4103         case INDEX_op_mov_i64:
4104         case INDEX_op_mov_vec:
4105             tcg_reg_alloc_mov(s, op);
4106             break;
4107         case INDEX_op_movi_i32:
4108         case INDEX_op_movi_i64:
4109         case INDEX_op_dupi_vec:
4110             tcg_reg_alloc_movi(s, op);
4111             break;
4112         case INDEX_op_dup_vec:
4113             tcg_reg_alloc_dup(s, op);
4114             break;
4115         case INDEX_op_insn_start:
4116             if (num_insns >= 0) {
4117                 size_t off = tcg_current_code_size(s);
4118                 s->gen_insn_end_off[num_insns] = off;
4119                 /* Assert that we do not overflow our stored offset.  */
4120                 assert(s->gen_insn_end_off[num_insns] == off);
4121             }
4122             num_insns++;
4123             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4124                 target_ulong a;
4125 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4126                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4127 #else
4128                 a = op->args[i];
4129 #endif
4130                 s->gen_insn_data[num_insns][i] = a;
4131             }
4132             break;
4133         case INDEX_op_discard:
4134             temp_dead(s, arg_temp(op->args[0]));
4135             break;
4136         case INDEX_op_set_label:
4137             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4138             tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
4139             break;
4140         case INDEX_op_call:
4141             tcg_reg_alloc_call(s, op);
4142             break;
4143         default:
4144             /* Sanity check that we've not introduced any unhandled opcodes. */
4145             tcg_debug_assert(tcg_op_supported(opc));
4146             /* Note: in order to speed up the code, it would be much
4147                faster to have specialized register allocator functions for
4148                some common argument patterns */
4149             tcg_reg_alloc_op(s, op);
4150             break;
4151         }
4152 #ifdef CONFIG_DEBUG_TCG
4153         check_regs(s);
4154 #endif
4155         /* Test for (pending) buffer overflow.  The assumption is that any
4156            one operation beginning below the high water mark cannot overrun
4157            the buffer completely.  Thus we can test for overflow after
4158            generating code without having to check during generation.  */
4159         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4160             return -1;
4161         }
4162         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4163         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4164             return -2;
4165         }
4166     }
4167     tcg_debug_assert(num_insns >= 0);
4168     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4169 
4170     /* Generate TB finalization at the end of block */
4171 #ifdef TCG_TARGET_NEED_LDST_LABELS
4172     i = tcg_out_ldst_finalize(s);
4173     if (i < 0) {
4174         return i;
4175     }
4176 #endif
4177 #ifdef TCG_TARGET_NEED_POOL_LABELS
4178     i = tcg_out_pool_finalize(s);
4179     if (i < 0) {
4180         return i;
4181     }
4182 #endif
4183     if (!tcg_resolve_relocs(s)) {
4184         return -2;
4185     }
4186 
4187     /* flush instruction cache */
4188     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
4189 
4190     return tcg_current_code_size(s);
4191 }
4192 
4193 #ifdef CONFIG_PROFILER
4194 void tcg_dump_info(void)
4195 {
4196     TCGProfile prof = {};
4197     const TCGProfile *s;
4198     int64_t tb_count;
4199     int64_t tb_div_count;
4200     int64_t tot;
4201 
4202     tcg_profile_snapshot_counters(&prof);
4203     s = &prof;
4204     tb_count = s->tb_count;
4205     tb_div_count = tb_count ? tb_count : 1;
4206     tot = s->interm_time + s->code_time;
4207 
4208     qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4209                 tot, tot / 2.4e9);
4210     qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4211                 " %0.1f%%)\n",
4212                 tb_count, s->tb_count1 - tb_count,
4213                 (double)(s->tb_count1 - s->tb_count)
4214                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4215     qemu_printf("avg ops/TB          %0.1f max=%d\n",
4216                 (double)s->op_count / tb_div_count, s->op_count_max);
4217     qemu_printf("deleted ops/TB      %0.2f\n",
4218                 (double)s->del_op_count / tb_div_count);
4219     qemu_printf("avg temps/TB        %0.2f max=%d\n",
4220                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4221     qemu_printf("avg host code/TB    %0.1f\n",
4222                 (double)s->code_out_len / tb_div_count);
4223     qemu_printf("avg search data/TB  %0.1f\n",
4224                 (double)s->search_out_len / tb_div_count);
4225 
4226     qemu_printf("cycles/op           %0.1f\n",
4227                 s->op_count ? (double)tot / s->op_count : 0);
4228     qemu_printf("cycles/in byte      %0.1f\n",
4229                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4230     qemu_printf("cycles/out byte     %0.1f\n",
4231                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4232     qemu_printf("cycles/search byte     %0.1f\n",
4233                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4234     if (tot == 0) {
4235         tot = 1;
4236     }
4237     qemu_printf("  gen_interm time   %0.1f%%\n",
4238                 (double)s->interm_time / tot * 100.0);
4239     qemu_printf("  gen_code time     %0.1f%%\n",
4240                 (double)s->code_time / tot * 100.0);
4241     qemu_printf("optim./code time    %0.1f%%\n",
4242                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4243                 * 100.0);
4244     qemu_printf("liveness/code time  %0.1f%%\n",
4245                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4246     qemu_printf("cpu_restore count   %" PRId64 "\n",
4247                 s->restore_count);
4248     qemu_printf("  avg cycles        %0.1f\n",
4249                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4250 }
4251 #else
4252 void tcg_dump_info(void)
4253 {
4254     qemu_printf("[TCG profiler not compiled]\n");
4255 }
4256 #endif
4257 
4258 #ifdef ELF_HOST_MACHINE
4259 /* In order to use this feature, the backend needs to do three things:
4260 
4261    (1) Define ELF_HOST_MACHINE to indicate both what value to
4262        put into the ELF image and to indicate support for the feature.
4263 
4264    (2) Define tcg_register_jit.  This should create a buffer containing
4265        the contents of a .debug_frame section that describes the post-
4266        prologue unwind info for the tcg machine.
4267 
4268    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4269 */
4270 
4271 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4272 typedef enum {
4273     JIT_NOACTION = 0,
4274     JIT_REGISTER_FN,
4275     JIT_UNREGISTER_FN
4276 } jit_actions_t;
4277 
4278 struct jit_code_entry {
4279     struct jit_code_entry *next_entry;
4280     struct jit_code_entry *prev_entry;
4281     const void *symfile_addr;
4282     uint64_t symfile_size;
4283 };
4284 
4285 struct jit_descriptor {
4286     uint32_t version;
4287     uint32_t action_flag;
4288     struct jit_code_entry *relevant_entry;
4289     struct jit_code_entry *first_entry;
4290 };
4291 
4292 void __jit_debug_register_code(void) __attribute__((noinline));
4293 void __jit_debug_register_code(void)
4294 {
4295     asm("");
4296 }
4297 
4298 /* Must statically initialize the version, because GDB may check
4299    the version before we can set it.  */
4300 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4301 
4302 /* End GDB interface.  */
4303 
4304 static int find_string(const char *strtab, const char *str)
4305 {
4306     const char *p = strtab + 1;
4307 
4308     while (1) {
4309         if (strcmp(p, str) == 0) {
4310             return p - strtab;
4311         }
4312         p += strlen(p) + 1;
4313     }
4314 }
4315 
4316 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
4317                                  const void *debug_frame,
4318                                  size_t debug_frame_size)
4319 {
4320     struct __attribute__((packed)) DebugInfo {
4321         uint32_t  len;
4322         uint16_t  version;
4323         uint32_t  abbrev;
4324         uint8_t   ptr_size;
4325         uint8_t   cu_die;
4326         uint16_t  cu_lang;
4327         uintptr_t cu_low_pc;
4328         uintptr_t cu_high_pc;
4329         uint8_t   fn_die;
4330         char      fn_name[16];
4331         uintptr_t fn_low_pc;
4332         uintptr_t fn_high_pc;
4333         uint8_t   cu_eoc;
4334     };
4335 
4336     struct ElfImage {
4337         ElfW(Ehdr) ehdr;
4338         ElfW(Phdr) phdr;
4339         ElfW(Shdr) shdr[7];
4340         ElfW(Sym)  sym[2];
4341         struct DebugInfo di;
4342         uint8_t    da[24];
4343         char       str[80];
4344     };
4345 
4346     struct ElfImage *img;
4347 
4348     static const struct ElfImage img_template = {
4349         .ehdr = {
4350             .e_ident[EI_MAG0] = ELFMAG0,
4351             .e_ident[EI_MAG1] = ELFMAG1,
4352             .e_ident[EI_MAG2] = ELFMAG2,
4353             .e_ident[EI_MAG3] = ELFMAG3,
4354             .e_ident[EI_CLASS] = ELF_CLASS,
4355             .e_ident[EI_DATA] = ELF_DATA,
4356             .e_ident[EI_VERSION] = EV_CURRENT,
4357             .e_type = ET_EXEC,
4358             .e_machine = ELF_HOST_MACHINE,
4359             .e_version = EV_CURRENT,
4360             .e_phoff = offsetof(struct ElfImage, phdr),
4361             .e_shoff = offsetof(struct ElfImage, shdr),
4362             .e_ehsize = sizeof(ElfW(Shdr)),
4363             .e_phentsize = sizeof(ElfW(Phdr)),
4364             .e_phnum = 1,
4365             .e_shentsize = sizeof(ElfW(Shdr)),
4366             .e_shnum = ARRAY_SIZE(img->shdr),
4367             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4368 #ifdef ELF_HOST_FLAGS
4369             .e_flags = ELF_HOST_FLAGS,
4370 #endif
4371 #ifdef ELF_OSABI
4372             .e_ident[EI_OSABI] = ELF_OSABI,
4373 #endif
4374         },
4375         .phdr = {
4376             .p_type = PT_LOAD,
4377             .p_flags = PF_X,
4378         },
4379         .shdr = {
4380             [0] = { .sh_type = SHT_NULL },
4381             /* Trick: The contents of code_gen_buffer are not present in
4382                this fake ELF file; that got allocated elsewhere.  Therefore
4383                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4384                will not look for contents.  We can record any address.  */
4385             [1] = { /* .text */
4386                 .sh_type = SHT_NOBITS,
4387                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4388             },
4389             [2] = { /* .debug_info */
4390                 .sh_type = SHT_PROGBITS,
4391                 .sh_offset = offsetof(struct ElfImage, di),
4392                 .sh_size = sizeof(struct DebugInfo),
4393             },
4394             [3] = { /* .debug_abbrev */
4395                 .sh_type = SHT_PROGBITS,
4396                 .sh_offset = offsetof(struct ElfImage, da),
4397                 .sh_size = sizeof(img->da),
4398             },
4399             [4] = { /* .debug_frame */
4400                 .sh_type = SHT_PROGBITS,
4401                 .sh_offset = sizeof(struct ElfImage),
4402             },
4403             [5] = { /* .symtab */
4404                 .sh_type = SHT_SYMTAB,
4405                 .sh_offset = offsetof(struct ElfImage, sym),
4406                 .sh_size = sizeof(img->sym),
4407                 .sh_info = 1,
4408                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4409                 .sh_entsize = sizeof(ElfW(Sym)),
4410             },
4411             [6] = { /* .strtab */
4412                 .sh_type = SHT_STRTAB,
4413                 .sh_offset = offsetof(struct ElfImage, str),
4414                 .sh_size = sizeof(img->str),
4415             }
4416         },
4417         .sym = {
4418             [1] = { /* code_gen_buffer */
4419                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4420                 .st_shndx = 1,
4421             }
4422         },
4423         .di = {
4424             .len = sizeof(struct DebugInfo) - 4,
4425             .version = 2,
4426             .ptr_size = sizeof(void *),
4427             .cu_die = 1,
4428             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4429             .fn_die = 2,
4430             .fn_name = "code_gen_buffer"
4431         },
4432         .da = {
4433             1,          /* abbrev number (the cu) */
4434             0x11, 1,    /* DW_TAG_compile_unit, has children */
4435             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4436             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4437             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4438             0, 0,       /* end of abbrev */
4439             2,          /* abbrev number (the fn) */
4440             0x2e, 0,    /* DW_TAG_subprogram, no children */
4441             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4442             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4443             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4444             0, 0,       /* end of abbrev */
4445             0           /* no more abbrev */
4446         },
4447         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4448                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4449     };
4450 
4451     /* We only need a single jit entry; statically allocate it.  */
4452     static struct jit_code_entry one_entry;
4453 
4454     uintptr_t buf = (uintptr_t)buf_ptr;
4455     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4456     DebugFrameHeader *dfh;
4457 
4458     img = g_malloc(img_size);
4459     *img = img_template;
4460 
4461     img->phdr.p_vaddr = buf;
4462     img->phdr.p_paddr = buf;
4463     img->phdr.p_memsz = buf_size;
4464 
4465     img->shdr[1].sh_name = find_string(img->str, ".text");
4466     img->shdr[1].sh_addr = buf;
4467     img->shdr[1].sh_size = buf_size;
4468 
4469     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4470     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4471 
4472     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4473     img->shdr[4].sh_size = debug_frame_size;
4474 
4475     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4476     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4477 
4478     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4479     img->sym[1].st_value = buf;
4480     img->sym[1].st_size = buf_size;
4481 
4482     img->di.cu_low_pc = buf;
4483     img->di.cu_high_pc = buf + buf_size;
4484     img->di.fn_low_pc = buf;
4485     img->di.fn_high_pc = buf + buf_size;
4486 
4487     dfh = (DebugFrameHeader *)(img + 1);
4488     memcpy(dfh, debug_frame, debug_frame_size);
4489     dfh->fde.func_start = buf;
4490     dfh->fde.func_len = buf_size;
4491 
4492 #ifdef DEBUG_JIT
4493     /* Enable this block to be able to debug the ELF image file creation.
4494        One can use readelf, objdump, or other inspection utilities.  */
4495     {
4496         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4497         if (f) {
4498             if (fwrite(img, img_size, 1, f) != img_size) {
4499                 /* Avoid stupid unused return value warning for fwrite.  */
4500             }
4501             fclose(f);
4502         }
4503     }
4504 #endif
4505 
4506     one_entry.symfile_addr = img;
4507     one_entry.symfile_size = img_size;
4508 
4509     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4510     __jit_debug_descriptor.relevant_entry = &one_entry;
4511     __jit_debug_descriptor.first_entry = &one_entry;
4512     __jit_debug_register_code();
4513 }
4514 #else
4515 /* No support for the feature.  Provide the entry point expected by exec.c,
4516    and implement the internal function we declared earlier.  */
4517 
4518 static void tcg_register_jit_int(void *buf, size_t size,
4519                                  const void *debug_frame,
4520                                  size_t debug_frame_size)
4521 {
4522 }
4523 
4524 void tcg_register_jit(void *buf, size_t buf_size)
4525 {
4526 }
4527 #endif /* ELF_HOST_MACHINE */
4528 
4529 #if !TCG_TARGET_MAYBE_vec
4530 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4531 {
4532     g_assert_not_reached();
4533 }
4534 #endif
4535