xref: /openbmc/qemu/tcg/tcg.c (revision 5ee5c14c)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 
39 /* Note: the long term plan is to reduce the dependencies on the QEMU
40    CPU definitions. Currently they are used for qemu_ld/st
41    instructions */
42 #define NO_CPU_IO_DEFS
43 #include "cpu.h"
44 
45 #include "exec/cpu-common.h"
46 #include "exec/exec-all.h"
47 
48 #include "tcg-op.h"
49 
50 #if UINTPTR_MAX == UINT32_MAX
51 # define ELF_CLASS  ELFCLASS32
52 #else
53 # define ELF_CLASS  ELFCLASS64
54 #endif
55 #ifdef HOST_WORDS_BIGENDIAN
56 # define ELF_DATA   ELFDATA2MSB
57 #else
58 # define ELF_DATA   ELFDATA2LSB
59 #endif
60 
61 #include "elf.h"
62 #include "exec/log.h"
63 #include "sysemu/sysemu.h"
64 
65 /* Forward declarations for functions declared in tcg-target.inc.c and
66    used here. */
67 static void tcg_target_init(TCGContext *s);
68 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
69 static void tcg_target_qemu_prologue(TCGContext *s);
70 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
71                         intptr_t value, intptr_t addend);
72 
73 /* The CIE and FDE header definitions will be common to all hosts.  */
74 typedef struct {
75     uint32_t len __attribute__((aligned((sizeof(void *)))));
76     uint32_t id;
77     uint8_t version;
78     char augmentation[1];
79     uint8_t code_align;
80     uint8_t data_align;
81     uint8_t return_column;
82 } DebugFrameCIE;
83 
84 typedef struct QEMU_PACKED {
85     uint32_t len __attribute__((aligned((sizeof(void *)))));
86     uint32_t cie_offset;
87     uintptr_t func_start;
88     uintptr_t func_len;
89 } DebugFrameFDEHeader;
90 
91 typedef struct QEMU_PACKED {
92     DebugFrameCIE cie;
93     DebugFrameFDEHeader fde;
94 } DebugFrameHeader;
95 
96 static void tcg_register_jit_int(void *buf, size_t size,
97                                  const void *debug_frame,
98                                  size_t debug_frame_size)
99     __attribute__((unused));
100 
101 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
102 static const char *target_parse_constraint(TCGArgConstraint *ct,
103                                            const char *ct_str, TCGType type);
104 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
105                        intptr_t arg2);
106 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
107 static void tcg_out_movi(TCGContext *s, TCGType type,
108                          TCGReg ret, tcg_target_long arg);
109 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
110                        const int *const_args);
111 #if TCG_TARGET_MAYBE_vec
112 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
113                             TCGReg dst, TCGReg src);
114 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
115                              TCGReg dst, TCGReg base, intptr_t offset);
116 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
117                              TCGReg dst, tcg_target_long arg);
118 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
119                            unsigned vece, const TCGArg *args,
120                            const int *const_args);
121 #else
122 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
123                                    TCGReg dst, TCGReg src)
124 {
125     g_assert_not_reached();
126 }
127 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
128                                     TCGReg dst, TCGReg base, intptr_t offset)
129 {
130     g_assert_not_reached();
131 }
132 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
133                                     TCGReg dst, tcg_target_long arg)
134 {
135     g_assert_not_reached();
136 }
137 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
138                                   unsigned vece, const TCGArg *args,
139                                   const int *const_args)
140 {
141     g_assert_not_reached();
142 }
143 #endif
144 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
145                        intptr_t arg2);
146 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
147                         TCGReg base, intptr_t ofs);
148 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
149 static int tcg_target_const_match(tcg_target_long val, TCGType type,
150                                   const TCGArgConstraint *arg_ct);
151 #ifdef TCG_TARGET_NEED_LDST_LABELS
152 static int tcg_out_ldst_finalize(TCGContext *s);
153 #endif
154 
155 #define TCG_HIGHWATER 1024
156 
157 static TCGContext **tcg_ctxs;
158 static unsigned int n_tcg_ctxs;
159 TCGv_env cpu_env = 0;
160 
161 struct tcg_region_tree {
162     QemuMutex lock;
163     GTree *tree;
164     /* padding to avoid false sharing is computed at run-time */
165 };
166 
167 /*
168  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
169  * dynamically allocate from as demand dictates. Given appropriate region
170  * sizing, this minimizes flushes even when some TCG threads generate a lot
171  * more code than others.
172  */
173 struct tcg_region_state {
174     QemuMutex lock;
175 
176     /* fields set at init time */
177     void *start;
178     void *start_aligned;
179     void *end;
180     size_t n;
181     size_t size; /* size of one region */
182     size_t stride; /* .size + guard size */
183 
184     /* fields protected by the lock */
185     size_t current; /* current region index */
186     size_t agg_size_full; /* aggregate size of full regions */
187 };
188 
189 static struct tcg_region_state region;
190 /*
191  * This is an array of struct tcg_region_tree's, with padding.
192  * We use void * to simplify the computation of region_trees[i]; each
193  * struct is found every tree_size bytes.
194  */
195 static void *region_trees;
196 static size_t tree_size;
197 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
198 static TCGRegSet tcg_target_call_clobber_regs;
199 
200 #if TCG_TARGET_INSN_UNIT_SIZE == 1
201 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
202 {
203     *s->code_ptr++ = v;
204 }
205 
206 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
207                                                       uint8_t v)
208 {
209     *p = v;
210 }
211 #endif
212 
213 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
214 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
215 {
216     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
217         *s->code_ptr++ = v;
218     } else {
219         tcg_insn_unit *p = s->code_ptr;
220         memcpy(p, &v, sizeof(v));
221         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
222     }
223 }
224 
225 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
226                                                        uint16_t v)
227 {
228     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
229         *p = v;
230     } else {
231         memcpy(p, &v, sizeof(v));
232     }
233 }
234 #endif
235 
236 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
237 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
238 {
239     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
240         *s->code_ptr++ = v;
241     } else {
242         tcg_insn_unit *p = s->code_ptr;
243         memcpy(p, &v, sizeof(v));
244         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
245     }
246 }
247 
248 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
249                                                        uint32_t v)
250 {
251     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
252         *p = v;
253     } else {
254         memcpy(p, &v, sizeof(v));
255     }
256 }
257 #endif
258 
259 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
260 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
261 {
262     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
263         *s->code_ptr++ = v;
264     } else {
265         tcg_insn_unit *p = s->code_ptr;
266         memcpy(p, &v, sizeof(v));
267         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
268     }
269 }
270 
271 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
272                                                        uint64_t v)
273 {
274     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
275         *p = v;
276     } else {
277         memcpy(p, &v, sizeof(v));
278     }
279 }
280 #endif
281 
282 /* label relocation processing */
283 
284 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
285                           TCGLabel *l, intptr_t addend)
286 {
287     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
288 
289     r->type = type;
290     r->ptr = code_ptr;
291     r->addend = addend;
292     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
293 }
294 
295 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
296 {
297     tcg_debug_assert(!l->has_value);
298     l->has_value = 1;
299     l->u.value_ptr = ptr;
300 }
301 
302 TCGLabel *gen_new_label(void)
303 {
304     TCGContext *s = tcg_ctx;
305     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
306 
307     memset(l, 0, sizeof(TCGLabel));
308     l->id = s->nb_labels++;
309     QSIMPLEQ_INIT(&l->relocs);
310 
311     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
312 
313     return l;
314 }
315 
316 static bool tcg_resolve_relocs(TCGContext *s)
317 {
318     TCGLabel *l;
319 
320     QSIMPLEQ_FOREACH(l, &s->labels, next) {
321         TCGRelocation *r;
322         uintptr_t value = l->u.value;
323 
324         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
325             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
326                 return false;
327             }
328         }
329     }
330     return true;
331 }
332 
333 static void set_jmp_reset_offset(TCGContext *s, int which)
334 {
335     size_t off = tcg_current_code_size(s);
336     s->tb_jmp_reset_offset[which] = off;
337     /* Make sure that we didn't overflow the stored offset.  */
338     assert(s->tb_jmp_reset_offset[which] == off);
339 }
340 
341 #include "tcg-target.inc.c"
342 
343 /* compare a pointer @ptr and a tb_tc @s */
344 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
345 {
346     if (ptr >= s->ptr + s->size) {
347         return 1;
348     } else if (ptr < s->ptr) {
349         return -1;
350     }
351     return 0;
352 }
353 
354 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
355 {
356     const struct tb_tc *a = ap;
357     const struct tb_tc *b = bp;
358 
359     /*
360      * When both sizes are set, we know this isn't a lookup.
361      * This is the most likely case: every TB must be inserted; lookups
362      * are a lot less frequent.
363      */
364     if (likely(a->size && b->size)) {
365         if (a->ptr > b->ptr) {
366             return 1;
367         } else if (a->ptr < b->ptr) {
368             return -1;
369         }
370         /* a->ptr == b->ptr should happen only on deletions */
371         g_assert(a->size == b->size);
372         return 0;
373     }
374     /*
375      * All lookups have either .size field set to 0.
376      * From the glib sources we see that @ap is always the lookup key. However
377      * the docs provide no guarantee, so we just mark this case as likely.
378      */
379     if (likely(a->size == 0)) {
380         return ptr_cmp_tb_tc(a->ptr, b);
381     }
382     return ptr_cmp_tb_tc(b->ptr, a);
383 }
384 
385 static void tcg_region_trees_init(void)
386 {
387     size_t i;
388 
389     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
390     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
391     for (i = 0; i < region.n; i++) {
392         struct tcg_region_tree *rt = region_trees + i * tree_size;
393 
394         qemu_mutex_init(&rt->lock);
395         rt->tree = g_tree_new(tb_tc_cmp);
396     }
397 }
398 
399 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
400 {
401     size_t region_idx;
402 
403     if (p < region.start_aligned) {
404         region_idx = 0;
405     } else {
406         ptrdiff_t offset = p - region.start_aligned;
407 
408         if (offset > region.stride * (region.n - 1)) {
409             region_idx = region.n - 1;
410         } else {
411             region_idx = offset / region.stride;
412         }
413     }
414     return region_trees + region_idx * tree_size;
415 }
416 
417 void tcg_tb_insert(TranslationBlock *tb)
418 {
419     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
420 
421     qemu_mutex_lock(&rt->lock);
422     g_tree_insert(rt->tree, &tb->tc, tb);
423     qemu_mutex_unlock(&rt->lock);
424 }
425 
426 void tcg_tb_remove(TranslationBlock *tb)
427 {
428     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
429 
430     qemu_mutex_lock(&rt->lock);
431     g_tree_remove(rt->tree, &tb->tc);
432     qemu_mutex_unlock(&rt->lock);
433 }
434 
435 /*
436  * Find the TB 'tb' such that
437  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
438  * Return NULL if not found.
439  */
440 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
441 {
442     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
443     TranslationBlock *tb;
444     struct tb_tc s = { .ptr = (void *)tc_ptr };
445 
446     qemu_mutex_lock(&rt->lock);
447     tb = g_tree_lookup(rt->tree, &s);
448     qemu_mutex_unlock(&rt->lock);
449     return tb;
450 }
451 
452 static void tcg_region_tree_lock_all(void)
453 {
454     size_t i;
455 
456     for (i = 0; i < region.n; i++) {
457         struct tcg_region_tree *rt = region_trees + i * tree_size;
458 
459         qemu_mutex_lock(&rt->lock);
460     }
461 }
462 
463 static void tcg_region_tree_unlock_all(void)
464 {
465     size_t i;
466 
467     for (i = 0; i < region.n; i++) {
468         struct tcg_region_tree *rt = region_trees + i * tree_size;
469 
470         qemu_mutex_unlock(&rt->lock);
471     }
472 }
473 
474 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
475 {
476     size_t i;
477 
478     tcg_region_tree_lock_all();
479     for (i = 0; i < region.n; i++) {
480         struct tcg_region_tree *rt = region_trees + i * tree_size;
481 
482         g_tree_foreach(rt->tree, func, user_data);
483     }
484     tcg_region_tree_unlock_all();
485 }
486 
487 size_t tcg_nb_tbs(void)
488 {
489     size_t nb_tbs = 0;
490     size_t i;
491 
492     tcg_region_tree_lock_all();
493     for (i = 0; i < region.n; i++) {
494         struct tcg_region_tree *rt = region_trees + i * tree_size;
495 
496         nb_tbs += g_tree_nnodes(rt->tree);
497     }
498     tcg_region_tree_unlock_all();
499     return nb_tbs;
500 }
501 
502 static void tcg_region_tree_reset_all(void)
503 {
504     size_t i;
505 
506     tcg_region_tree_lock_all();
507     for (i = 0; i < region.n; i++) {
508         struct tcg_region_tree *rt = region_trees + i * tree_size;
509 
510         /* Increment the refcount first so that destroy acts as a reset */
511         g_tree_ref(rt->tree);
512         g_tree_destroy(rt->tree);
513     }
514     tcg_region_tree_unlock_all();
515 }
516 
517 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
518 {
519     void *start, *end;
520 
521     start = region.start_aligned + curr_region * region.stride;
522     end = start + region.size;
523 
524     if (curr_region == 0) {
525         start = region.start;
526     }
527     if (curr_region == region.n - 1) {
528         end = region.end;
529     }
530 
531     *pstart = start;
532     *pend = end;
533 }
534 
535 static void tcg_region_assign(TCGContext *s, size_t curr_region)
536 {
537     void *start, *end;
538 
539     tcg_region_bounds(curr_region, &start, &end);
540 
541     s->code_gen_buffer = start;
542     s->code_gen_ptr = start;
543     s->code_gen_buffer_size = end - start;
544     s->code_gen_highwater = end - TCG_HIGHWATER;
545 }
546 
547 static bool tcg_region_alloc__locked(TCGContext *s)
548 {
549     if (region.current == region.n) {
550         return true;
551     }
552     tcg_region_assign(s, region.current);
553     region.current++;
554     return false;
555 }
556 
557 /*
558  * Request a new region once the one in use has filled up.
559  * Returns true on error.
560  */
561 static bool tcg_region_alloc(TCGContext *s)
562 {
563     bool err;
564     /* read the region size now; alloc__locked will overwrite it on success */
565     size_t size_full = s->code_gen_buffer_size;
566 
567     qemu_mutex_lock(&region.lock);
568     err = tcg_region_alloc__locked(s);
569     if (!err) {
570         region.agg_size_full += size_full - TCG_HIGHWATER;
571     }
572     qemu_mutex_unlock(&region.lock);
573     return err;
574 }
575 
576 /*
577  * Perform a context's first region allocation.
578  * This function does _not_ increment region.agg_size_full.
579  */
580 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
581 {
582     return tcg_region_alloc__locked(s);
583 }
584 
585 /* Call from a safe-work context */
586 void tcg_region_reset_all(void)
587 {
588     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
589     unsigned int i;
590 
591     qemu_mutex_lock(&region.lock);
592     region.current = 0;
593     region.agg_size_full = 0;
594 
595     for (i = 0; i < n_ctxs; i++) {
596         TCGContext *s = atomic_read(&tcg_ctxs[i]);
597         bool err = tcg_region_initial_alloc__locked(s);
598 
599         g_assert(!err);
600     }
601     qemu_mutex_unlock(&region.lock);
602 
603     tcg_region_tree_reset_all();
604 }
605 
606 #ifdef CONFIG_USER_ONLY
607 static size_t tcg_n_regions(void)
608 {
609     return 1;
610 }
611 #else
612 /*
613  * It is likely that some vCPUs will translate more code than others, so we
614  * first try to set more regions than max_cpus, with those regions being of
615  * reasonable size. If that's not possible we make do by evenly dividing
616  * the code_gen_buffer among the vCPUs.
617  */
618 static size_t tcg_n_regions(void)
619 {
620     size_t i;
621 
622     /* Use a single region if all we have is one vCPU thread */
623     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
624         return 1;
625     }
626 
627     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
628     for (i = 8; i > 0; i--) {
629         size_t regions_per_thread = i;
630         size_t region_size;
631 
632         region_size = tcg_init_ctx.code_gen_buffer_size;
633         region_size /= max_cpus * regions_per_thread;
634 
635         if (region_size >= 2 * 1024u * 1024) {
636             return max_cpus * regions_per_thread;
637         }
638     }
639     /* If we can't, then just allocate one region per vCPU thread */
640     return max_cpus;
641 }
642 #endif
643 
644 /*
645  * Initializes region partitioning.
646  *
647  * Called at init time from the parent thread (i.e. the one calling
648  * tcg_context_init), after the target's TCG globals have been set.
649  *
650  * Region partitioning works by splitting code_gen_buffer into separate regions,
651  * and then assigning regions to TCG threads so that the threads can translate
652  * code in parallel without synchronization.
653  *
654  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
655  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
656  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
657  * must have been parsed before calling this function, since it calls
658  * qemu_tcg_mttcg_enabled().
659  *
660  * In user-mode we use a single region.  Having multiple regions in user-mode
661  * is not supported, because the number of vCPU threads (recall that each thread
662  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
663  * OS, and usually this number is huge (tens of thousands is not uncommon).
664  * Thus, given this large bound on the number of vCPU threads and the fact
665  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
666  * that the availability of at least one region per vCPU thread.
667  *
668  * However, this user-mode limitation is unlikely to be a significant problem
669  * in practice. Multi-threaded guests share most if not all of their translated
670  * code, which makes parallel code generation less appealing than in softmmu.
671  */
672 void tcg_region_init(void)
673 {
674     void *buf = tcg_init_ctx.code_gen_buffer;
675     void *aligned;
676     size_t size = tcg_init_ctx.code_gen_buffer_size;
677     size_t page_size = qemu_real_host_page_size;
678     size_t region_size;
679     size_t n_regions;
680     size_t i;
681 
682     n_regions = tcg_n_regions();
683 
684     /* The first region will be 'aligned - buf' bytes larger than the others */
685     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
686     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
687     /*
688      * Make region_size a multiple of page_size, using aligned as the start.
689      * As a result of this we might end up with a few extra pages at the end of
690      * the buffer; we will assign those to the last region.
691      */
692     region_size = (size - (aligned - buf)) / n_regions;
693     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
694 
695     /* A region must have at least 2 pages; one code, one guard */
696     g_assert(region_size >= 2 * page_size);
697 
698     /* init the region struct */
699     qemu_mutex_init(&region.lock);
700     region.n = n_regions;
701     region.size = region_size - page_size;
702     region.stride = region_size;
703     region.start = buf;
704     region.start_aligned = aligned;
705     /* page-align the end, since its last page will be a guard page */
706     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
707     /* account for that last guard page */
708     region.end -= page_size;
709 
710     /* set guard pages */
711     for (i = 0; i < region.n; i++) {
712         void *start, *end;
713         int rc;
714 
715         tcg_region_bounds(i, &start, &end);
716         rc = qemu_mprotect_none(end, page_size);
717         g_assert(!rc);
718     }
719 
720     tcg_region_trees_init();
721 
722     /* In user-mode we support only one ctx, so do the initial allocation now */
723 #ifdef CONFIG_USER_ONLY
724     {
725         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
726 
727         g_assert(!err);
728     }
729 #endif
730 }
731 
732 /*
733  * All TCG threads except the parent (i.e. the one that called tcg_context_init
734  * and registered the target's TCG globals) must register with this function
735  * before initiating translation.
736  *
737  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
738  * of tcg_region_init() for the reasoning behind this.
739  *
740  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
741  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
742  * is not used anymore for translation once this function is called.
743  *
744  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
745  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
746  */
747 #ifdef CONFIG_USER_ONLY
748 void tcg_register_thread(void)
749 {
750     tcg_ctx = &tcg_init_ctx;
751 }
752 #else
753 void tcg_register_thread(void)
754 {
755     TCGContext *s = g_malloc(sizeof(*s));
756     unsigned int i, n;
757     bool err;
758 
759     *s = tcg_init_ctx;
760 
761     /* Relink mem_base.  */
762     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
763         if (tcg_init_ctx.temps[i].mem_base) {
764             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
765             tcg_debug_assert(b >= 0 && b < n);
766             s->temps[i].mem_base = &s->temps[b];
767         }
768     }
769 
770     /* Claim an entry in tcg_ctxs */
771     n = atomic_fetch_inc(&n_tcg_ctxs);
772     g_assert(n < max_cpus);
773     atomic_set(&tcg_ctxs[n], s);
774 
775     tcg_ctx = s;
776     qemu_mutex_lock(&region.lock);
777     err = tcg_region_initial_alloc__locked(tcg_ctx);
778     g_assert(!err);
779     qemu_mutex_unlock(&region.lock);
780 }
781 #endif /* !CONFIG_USER_ONLY */
782 
783 /*
784  * Returns the size (in bytes) of all translated code (i.e. from all regions)
785  * currently in the cache.
786  * See also: tcg_code_capacity()
787  * Do not confuse with tcg_current_code_size(); that one applies to a single
788  * TCG context.
789  */
790 size_t tcg_code_size(void)
791 {
792     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
793     unsigned int i;
794     size_t total;
795 
796     qemu_mutex_lock(&region.lock);
797     total = region.agg_size_full;
798     for (i = 0; i < n_ctxs; i++) {
799         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
800         size_t size;
801 
802         size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
803         g_assert(size <= s->code_gen_buffer_size);
804         total += size;
805     }
806     qemu_mutex_unlock(&region.lock);
807     return total;
808 }
809 
810 /*
811  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
812  * regions.
813  * See also: tcg_code_size()
814  */
815 size_t tcg_code_capacity(void)
816 {
817     size_t guard_size, capacity;
818 
819     /* no need for synchronization; these variables are set at init time */
820     guard_size = region.stride - region.size;
821     capacity = region.end + guard_size - region.start;
822     capacity -= region.n * (guard_size + TCG_HIGHWATER);
823     return capacity;
824 }
825 
826 size_t tcg_tb_phys_invalidate_count(void)
827 {
828     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
829     unsigned int i;
830     size_t total = 0;
831 
832     for (i = 0; i < n_ctxs; i++) {
833         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
834 
835         total += atomic_read(&s->tb_phys_invalidate_count);
836     }
837     return total;
838 }
839 
840 /* pool based memory allocation */
841 void *tcg_malloc_internal(TCGContext *s, int size)
842 {
843     TCGPool *p;
844     int pool_size;
845 
846     if (size > TCG_POOL_CHUNK_SIZE) {
847         /* big malloc: insert a new pool (XXX: could optimize) */
848         p = g_malloc(sizeof(TCGPool) + size);
849         p->size = size;
850         p->next = s->pool_first_large;
851         s->pool_first_large = p;
852         return p->data;
853     } else {
854         p = s->pool_current;
855         if (!p) {
856             p = s->pool_first;
857             if (!p)
858                 goto new_pool;
859         } else {
860             if (!p->next) {
861             new_pool:
862                 pool_size = TCG_POOL_CHUNK_SIZE;
863                 p = g_malloc(sizeof(TCGPool) + pool_size);
864                 p->size = pool_size;
865                 p->next = NULL;
866                 if (s->pool_current)
867                     s->pool_current->next = p;
868                 else
869                     s->pool_first = p;
870             } else {
871                 p = p->next;
872             }
873         }
874     }
875     s->pool_current = p;
876     s->pool_cur = p->data + size;
877     s->pool_end = p->data + p->size;
878     return p->data;
879 }
880 
881 void tcg_pool_reset(TCGContext *s)
882 {
883     TCGPool *p, *t;
884     for (p = s->pool_first_large; p; p = t) {
885         t = p->next;
886         g_free(p);
887     }
888     s->pool_first_large = NULL;
889     s->pool_cur = s->pool_end = NULL;
890     s->pool_current = NULL;
891 }
892 
893 typedef struct TCGHelperInfo {
894     void *func;
895     const char *name;
896     unsigned flags;
897     unsigned sizemask;
898 } TCGHelperInfo;
899 
900 #include "exec/helper-proto.h"
901 
902 static const TCGHelperInfo all_helpers[] = {
903 #include "exec/helper-tcg.h"
904 };
905 static GHashTable *helper_table;
906 
907 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
908 static void process_op_defs(TCGContext *s);
909 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
910                                             TCGReg reg, const char *name);
911 
912 void tcg_context_init(TCGContext *s)
913 {
914     int op, total_args, n, i;
915     TCGOpDef *def;
916     TCGArgConstraint *args_ct;
917     int *sorted_args;
918     TCGTemp *ts;
919 
920     memset(s, 0, sizeof(*s));
921     s->nb_globals = 0;
922 
923     /* Count total number of arguments and allocate the corresponding
924        space */
925     total_args = 0;
926     for(op = 0; op < NB_OPS; op++) {
927         def = &tcg_op_defs[op];
928         n = def->nb_iargs + def->nb_oargs;
929         total_args += n;
930     }
931 
932     args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
933     sorted_args = g_malloc(sizeof(int) * total_args);
934 
935     for(op = 0; op < NB_OPS; op++) {
936         def = &tcg_op_defs[op];
937         def->args_ct = args_ct;
938         def->sorted_args = sorted_args;
939         n = def->nb_iargs + def->nb_oargs;
940         sorted_args += n;
941         args_ct += n;
942     }
943 
944     /* Register helpers.  */
945     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
946     helper_table = g_hash_table_new(NULL, NULL);
947 
948     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
949         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
950                             (gpointer)&all_helpers[i]);
951     }
952 
953     tcg_target_init(s);
954     process_op_defs(s);
955 
956     /* Reverse the order of the saved registers, assuming they're all at
957        the start of tcg_target_reg_alloc_order.  */
958     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
959         int r = tcg_target_reg_alloc_order[n];
960         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
961             break;
962         }
963     }
964     for (i = 0; i < n; ++i) {
965         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
966     }
967     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
968         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
969     }
970 
971     tcg_ctx = s;
972     /*
973      * In user-mode we simply share the init context among threads, since we
974      * use a single region. See the documentation tcg_region_init() for the
975      * reasoning behind this.
976      * In softmmu we will have at most max_cpus TCG threads.
977      */
978 #ifdef CONFIG_USER_ONLY
979     tcg_ctxs = &tcg_ctx;
980     n_tcg_ctxs = 1;
981 #else
982     tcg_ctxs = g_new(TCGContext *, max_cpus);
983 #endif
984 
985     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
986     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
987     cpu_env = temp_tcgv_ptr(ts);
988 }
989 
990 /*
991  * Allocate TBs right before their corresponding translated code, making
992  * sure that TBs and code are on different cache lines.
993  */
994 TranslationBlock *tcg_tb_alloc(TCGContext *s)
995 {
996     uintptr_t align = qemu_icache_linesize;
997     TranslationBlock *tb;
998     void *next;
999 
1000  retry:
1001     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1002     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1003 
1004     if (unlikely(next > s->code_gen_highwater)) {
1005         if (tcg_region_alloc(s)) {
1006             return NULL;
1007         }
1008         goto retry;
1009     }
1010     atomic_set(&s->code_gen_ptr, next);
1011     s->data_gen_ptr = NULL;
1012     return tb;
1013 }
1014 
1015 void tcg_prologue_init(TCGContext *s)
1016 {
1017     size_t prologue_size, total_size;
1018     void *buf0, *buf1;
1019 
1020     /* Put the prologue at the beginning of code_gen_buffer.  */
1021     buf0 = s->code_gen_buffer;
1022     total_size = s->code_gen_buffer_size;
1023     s->code_ptr = buf0;
1024     s->code_buf = buf0;
1025     s->data_gen_ptr = NULL;
1026     s->code_gen_prologue = buf0;
1027 
1028     /* Compute a high-water mark, at which we voluntarily flush the buffer
1029        and start over.  The size here is arbitrary, significantly larger
1030        than we expect the code generation for any one opcode to require.  */
1031     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1032 
1033 #ifdef TCG_TARGET_NEED_POOL_LABELS
1034     s->pool_labels = NULL;
1035 #endif
1036 
1037     /* Generate the prologue.  */
1038     tcg_target_qemu_prologue(s);
1039 
1040 #ifdef TCG_TARGET_NEED_POOL_LABELS
1041     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1042     {
1043         int result = tcg_out_pool_finalize(s);
1044         tcg_debug_assert(result == 0);
1045     }
1046 #endif
1047 
1048     buf1 = s->code_ptr;
1049     flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1050 
1051     /* Deduct the prologue from the buffer.  */
1052     prologue_size = tcg_current_code_size(s);
1053     s->code_gen_ptr = buf1;
1054     s->code_gen_buffer = buf1;
1055     s->code_buf = buf1;
1056     total_size -= prologue_size;
1057     s->code_gen_buffer_size = total_size;
1058 
1059     tcg_register_jit(s->code_gen_buffer, total_size);
1060 
1061 #ifdef DEBUG_DISAS
1062     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1063         qemu_log_lock();
1064         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1065         if (s->data_gen_ptr) {
1066             size_t code_size = s->data_gen_ptr - buf0;
1067             size_t data_size = prologue_size - code_size;
1068             size_t i;
1069 
1070             log_disas(buf0, code_size);
1071 
1072             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1073                 if (sizeof(tcg_target_ulong) == 8) {
1074                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1075                              (uintptr_t)s->data_gen_ptr + i,
1076                              *(uint64_t *)(s->data_gen_ptr + i));
1077                 } else {
1078                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1079                              (uintptr_t)s->data_gen_ptr + i,
1080                              *(uint32_t *)(s->data_gen_ptr + i));
1081                 }
1082             }
1083         } else {
1084             log_disas(buf0, prologue_size);
1085         }
1086         qemu_log("\n");
1087         qemu_log_flush();
1088         qemu_log_unlock();
1089     }
1090 #endif
1091 
1092     /* Assert that goto_ptr is implemented completely.  */
1093     if (TCG_TARGET_HAS_goto_ptr) {
1094         tcg_debug_assert(s->code_gen_epilogue != NULL);
1095     }
1096 }
1097 
1098 void tcg_func_start(TCGContext *s)
1099 {
1100     tcg_pool_reset(s);
1101     s->nb_temps = s->nb_globals;
1102 
1103     /* No temps have been previously allocated for size or locality.  */
1104     memset(s->free_temps, 0, sizeof(s->free_temps));
1105 
1106     s->nb_ops = 0;
1107     s->nb_labels = 0;
1108     s->current_frame_offset = s->frame_start;
1109 
1110 #ifdef CONFIG_DEBUG_TCG
1111     s->goto_tb_issue_mask = 0;
1112 #endif
1113 
1114     QTAILQ_INIT(&s->ops);
1115     QTAILQ_INIT(&s->free_ops);
1116     QSIMPLEQ_INIT(&s->labels);
1117 }
1118 
1119 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1120 {
1121     int n = s->nb_temps++;
1122     tcg_debug_assert(n < TCG_MAX_TEMPS);
1123     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1124 }
1125 
1126 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1127 {
1128     TCGTemp *ts;
1129 
1130     tcg_debug_assert(s->nb_globals == s->nb_temps);
1131     s->nb_globals++;
1132     ts = tcg_temp_alloc(s);
1133     ts->temp_global = 1;
1134 
1135     return ts;
1136 }
1137 
1138 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1139                                             TCGReg reg, const char *name)
1140 {
1141     TCGTemp *ts;
1142 
1143     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1144         tcg_abort();
1145     }
1146 
1147     ts = tcg_global_alloc(s);
1148     ts->base_type = type;
1149     ts->type = type;
1150     ts->fixed_reg = 1;
1151     ts->reg = reg;
1152     ts->name = name;
1153     tcg_regset_set_reg(s->reserved_regs, reg);
1154 
1155     return ts;
1156 }
1157 
1158 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1159 {
1160     s->frame_start = start;
1161     s->frame_end = start + size;
1162     s->frame_temp
1163         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1164 }
1165 
1166 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1167                                      intptr_t offset, const char *name)
1168 {
1169     TCGContext *s = tcg_ctx;
1170     TCGTemp *base_ts = tcgv_ptr_temp(base);
1171     TCGTemp *ts = tcg_global_alloc(s);
1172     int indirect_reg = 0, bigendian = 0;
1173 #ifdef HOST_WORDS_BIGENDIAN
1174     bigendian = 1;
1175 #endif
1176 
1177     if (!base_ts->fixed_reg) {
1178         /* We do not support double-indirect registers.  */
1179         tcg_debug_assert(!base_ts->indirect_reg);
1180         base_ts->indirect_base = 1;
1181         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1182                             ? 2 : 1);
1183         indirect_reg = 1;
1184     }
1185 
1186     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1187         TCGTemp *ts2 = tcg_global_alloc(s);
1188         char buf[64];
1189 
1190         ts->base_type = TCG_TYPE_I64;
1191         ts->type = TCG_TYPE_I32;
1192         ts->indirect_reg = indirect_reg;
1193         ts->mem_allocated = 1;
1194         ts->mem_base = base_ts;
1195         ts->mem_offset = offset + bigendian * 4;
1196         pstrcpy(buf, sizeof(buf), name);
1197         pstrcat(buf, sizeof(buf), "_0");
1198         ts->name = strdup(buf);
1199 
1200         tcg_debug_assert(ts2 == ts + 1);
1201         ts2->base_type = TCG_TYPE_I64;
1202         ts2->type = TCG_TYPE_I32;
1203         ts2->indirect_reg = indirect_reg;
1204         ts2->mem_allocated = 1;
1205         ts2->mem_base = base_ts;
1206         ts2->mem_offset = offset + (1 - bigendian) * 4;
1207         pstrcpy(buf, sizeof(buf), name);
1208         pstrcat(buf, sizeof(buf), "_1");
1209         ts2->name = strdup(buf);
1210     } else {
1211         ts->base_type = type;
1212         ts->type = type;
1213         ts->indirect_reg = indirect_reg;
1214         ts->mem_allocated = 1;
1215         ts->mem_base = base_ts;
1216         ts->mem_offset = offset;
1217         ts->name = name;
1218     }
1219     return ts;
1220 }
1221 
1222 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1223 {
1224     TCGContext *s = tcg_ctx;
1225     TCGTemp *ts;
1226     int idx, k;
1227 
1228     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1229     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1230     if (idx < TCG_MAX_TEMPS) {
1231         /* There is already an available temp with the right type.  */
1232         clear_bit(idx, s->free_temps[k].l);
1233 
1234         ts = &s->temps[idx];
1235         ts->temp_allocated = 1;
1236         tcg_debug_assert(ts->base_type == type);
1237         tcg_debug_assert(ts->temp_local == temp_local);
1238     } else {
1239         ts = tcg_temp_alloc(s);
1240         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1241             TCGTemp *ts2 = tcg_temp_alloc(s);
1242 
1243             ts->base_type = type;
1244             ts->type = TCG_TYPE_I32;
1245             ts->temp_allocated = 1;
1246             ts->temp_local = temp_local;
1247 
1248             tcg_debug_assert(ts2 == ts + 1);
1249             ts2->base_type = TCG_TYPE_I64;
1250             ts2->type = TCG_TYPE_I32;
1251             ts2->temp_allocated = 1;
1252             ts2->temp_local = temp_local;
1253         } else {
1254             ts->base_type = type;
1255             ts->type = type;
1256             ts->temp_allocated = 1;
1257             ts->temp_local = temp_local;
1258         }
1259     }
1260 
1261 #if defined(CONFIG_DEBUG_TCG)
1262     s->temps_in_use++;
1263 #endif
1264     return ts;
1265 }
1266 
1267 TCGv_vec tcg_temp_new_vec(TCGType type)
1268 {
1269     TCGTemp *t;
1270 
1271 #ifdef CONFIG_DEBUG_TCG
1272     switch (type) {
1273     case TCG_TYPE_V64:
1274         assert(TCG_TARGET_HAS_v64);
1275         break;
1276     case TCG_TYPE_V128:
1277         assert(TCG_TARGET_HAS_v128);
1278         break;
1279     case TCG_TYPE_V256:
1280         assert(TCG_TARGET_HAS_v256);
1281         break;
1282     default:
1283         g_assert_not_reached();
1284     }
1285 #endif
1286 
1287     t = tcg_temp_new_internal(type, 0);
1288     return temp_tcgv_vec(t);
1289 }
1290 
1291 /* Create a new temp of the same type as an existing temp.  */
1292 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1293 {
1294     TCGTemp *t = tcgv_vec_temp(match);
1295 
1296     tcg_debug_assert(t->temp_allocated != 0);
1297 
1298     t = tcg_temp_new_internal(t->base_type, 0);
1299     return temp_tcgv_vec(t);
1300 }
1301 
1302 void tcg_temp_free_internal(TCGTemp *ts)
1303 {
1304     TCGContext *s = tcg_ctx;
1305     int k, idx;
1306 
1307 #if defined(CONFIG_DEBUG_TCG)
1308     s->temps_in_use--;
1309     if (s->temps_in_use < 0) {
1310         fprintf(stderr, "More temporaries freed than allocated!\n");
1311     }
1312 #endif
1313 
1314     tcg_debug_assert(ts->temp_global == 0);
1315     tcg_debug_assert(ts->temp_allocated != 0);
1316     ts->temp_allocated = 0;
1317 
1318     idx = temp_idx(ts);
1319     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1320     set_bit(idx, s->free_temps[k].l);
1321 }
1322 
1323 TCGv_i32 tcg_const_i32(int32_t val)
1324 {
1325     TCGv_i32 t0;
1326     t0 = tcg_temp_new_i32();
1327     tcg_gen_movi_i32(t0, val);
1328     return t0;
1329 }
1330 
1331 TCGv_i64 tcg_const_i64(int64_t val)
1332 {
1333     TCGv_i64 t0;
1334     t0 = tcg_temp_new_i64();
1335     tcg_gen_movi_i64(t0, val);
1336     return t0;
1337 }
1338 
1339 TCGv_i32 tcg_const_local_i32(int32_t val)
1340 {
1341     TCGv_i32 t0;
1342     t0 = tcg_temp_local_new_i32();
1343     tcg_gen_movi_i32(t0, val);
1344     return t0;
1345 }
1346 
1347 TCGv_i64 tcg_const_local_i64(int64_t val)
1348 {
1349     TCGv_i64 t0;
1350     t0 = tcg_temp_local_new_i64();
1351     tcg_gen_movi_i64(t0, val);
1352     return t0;
1353 }
1354 
1355 #if defined(CONFIG_DEBUG_TCG)
1356 void tcg_clear_temp_count(void)
1357 {
1358     TCGContext *s = tcg_ctx;
1359     s->temps_in_use = 0;
1360 }
1361 
1362 int tcg_check_temp_count(void)
1363 {
1364     TCGContext *s = tcg_ctx;
1365     if (s->temps_in_use) {
1366         /* Clear the count so that we don't give another
1367          * warning immediately next time around.
1368          */
1369         s->temps_in_use = 0;
1370         return 1;
1371     }
1372     return 0;
1373 }
1374 #endif
1375 
1376 /* Return true if OP may appear in the opcode stream.
1377    Test the runtime variable that controls each opcode.  */
1378 bool tcg_op_supported(TCGOpcode op)
1379 {
1380     const bool have_vec
1381         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1382 
1383     switch (op) {
1384     case INDEX_op_discard:
1385     case INDEX_op_set_label:
1386     case INDEX_op_call:
1387     case INDEX_op_br:
1388     case INDEX_op_mb:
1389     case INDEX_op_insn_start:
1390     case INDEX_op_exit_tb:
1391     case INDEX_op_goto_tb:
1392     case INDEX_op_qemu_ld_i32:
1393     case INDEX_op_qemu_st_i32:
1394     case INDEX_op_qemu_ld_i64:
1395     case INDEX_op_qemu_st_i64:
1396         return true;
1397 
1398     case INDEX_op_goto_ptr:
1399         return TCG_TARGET_HAS_goto_ptr;
1400 
1401     case INDEX_op_mov_i32:
1402     case INDEX_op_movi_i32:
1403     case INDEX_op_setcond_i32:
1404     case INDEX_op_brcond_i32:
1405     case INDEX_op_ld8u_i32:
1406     case INDEX_op_ld8s_i32:
1407     case INDEX_op_ld16u_i32:
1408     case INDEX_op_ld16s_i32:
1409     case INDEX_op_ld_i32:
1410     case INDEX_op_st8_i32:
1411     case INDEX_op_st16_i32:
1412     case INDEX_op_st_i32:
1413     case INDEX_op_add_i32:
1414     case INDEX_op_sub_i32:
1415     case INDEX_op_mul_i32:
1416     case INDEX_op_and_i32:
1417     case INDEX_op_or_i32:
1418     case INDEX_op_xor_i32:
1419     case INDEX_op_shl_i32:
1420     case INDEX_op_shr_i32:
1421     case INDEX_op_sar_i32:
1422         return true;
1423 
1424     case INDEX_op_movcond_i32:
1425         return TCG_TARGET_HAS_movcond_i32;
1426     case INDEX_op_div_i32:
1427     case INDEX_op_divu_i32:
1428         return TCG_TARGET_HAS_div_i32;
1429     case INDEX_op_rem_i32:
1430     case INDEX_op_remu_i32:
1431         return TCG_TARGET_HAS_rem_i32;
1432     case INDEX_op_div2_i32:
1433     case INDEX_op_divu2_i32:
1434         return TCG_TARGET_HAS_div2_i32;
1435     case INDEX_op_rotl_i32:
1436     case INDEX_op_rotr_i32:
1437         return TCG_TARGET_HAS_rot_i32;
1438     case INDEX_op_deposit_i32:
1439         return TCG_TARGET_HAS_deposit_i32;
1440     case INDEX_op_extract_i32:
1441         return TCG_TARGET_HAS_extract_i32;
1442     case INDEX_op_sextract_i32:
1443         return TCG_TARGET_HAS_sextract_i32;
1444     case INDEX_op_extract2_i32:
1445         return TCG_TARGET_HAS_extract2_i32;
1446     case INDEX_op_add2_i32:
1447         return TCG_TARGET_HAS_add2_i32;
1448     case INDEX_op_sub2_i32:
1449         return TCG_TARGET_HAS_sub2_i32;
1450     case INDEX_op_mulu2_i32:
1451         return TCG_TARGET_HAS_mulu2_i32;
1452     case INDEX_op_muls2_i32:
1453         return TCG_TARGET_HAS_muls2_i32;
1454     case INDEX_op_muluh_i32:
1455         return TCG_TARGET_HAS_muluh_i32;
1456     case INDEX_op_mulsh_i32:
1457         return TCG_TARGET_HAS_mulsh_i32;
1458     case INDEX_op_ext8s_i32:
1459         return TCG_TARGET_HAS_ext8s_i32;
1460     case INDEX_op_ext16s_i32:
1461         return TCG_TARGET_HAS_ext16s_i32;
1462     case INDEX_op_ext8u_i32:
1463         return TCG_TARGET_HAS_ext8u_i32;
1464     case INDEX_op_ext16u_i32:
1465         return TCG_TARGET_HAS_ext16u_i32;
1466     case INDEX_op_bswap16_i32:
1467         return TCG_TARGET_HAS_bswap16_i32;
1468     case INDEX_op_bswap32_i32:
1469         return TCG_TARGET_HAS_bswap32_i32;
1470     case INDEX_op_not_i32:
1471         return TCG_TARGET_HAS_not_i32;
1472     case INDEX_op_neg_i32:
1473         return TCG_TARGET_HAS_neg_i32;
1474     case INDEX_op_andc_i32:
1475         return TCG_TARGET_HAS_andc_i32;
1476     case INDEX_op_orc_i32:
1477         return TCG_TARGET_HAS_orc_i32;
1478     case INDEX_op_eqv_i32:
1479         return TCG_TARGET_HAS_eqv_i32;
1480     case INDEX_op_nand_i32:
1481         return TCG_TARGET_HAS_nand_i32;
1482     case INDEX_op_nor_i32:
1483         return TCG_TARGET_HAS_nor_i32;
1484     case INDEX_op_clz_i32:
1485         return TCG_TARGET_HAS_clz_i32;
1486     case INDEX_op_ctz_i32:
1487         return TCG_TARGET_HAS_ctz_i32;
1488     case INDEX_op_ctpop_i32:
1489         return TCG_TARGET_HAS_ctpop_i32;
1490 
1491     case INDEX_op_brcond2_i32:
1492     case INDEX_op_setcond2_i32:
1493         return TCG_TARGET_REG_BITS == 32;
1494 
1495     case INDEX_op_mov_i64:
1496     case INDEX_op_movi_i64:
1497     case INDEX_op_setcond_i64:
1498     case INDEX_op_brcond_i64:
1499     case INDEX_op_ld8u_i64:
1500     case INDEX_op_ld8s_i64:
1501     case INDEX_op_ld16u_i64:
1502     case INDEX_op_ld16s_i64:
1503     case INDEX_op_ld32u_i64:
1504     case INDEX_op_ld32s_i64:
1505     case INDEX_op_ld_i64:
1506     case INDEX_op_st8_i64:
1507     case INDEX_op_st16_i64:
1508     case INDEX_op_st32_i64:
1509     case INDEX_op_st_i64:
1510     case INDEX_op_add_i64:
1511     case INDEX_op_sub_i64:
1512     case INDEX_op_mul_i64:
1513     case INDEX_op_and_i64:
1514     case INDEX_op_or_i64:
1515     case INDEX_op_xor_i64:
1516     case INDEX_op_shl_i64:
1517     case INDEX_op_shr_i64:
1518     case INDEX_op_sar_i64:
1519     case INDEX_op_ext_i32_i64:
1520     case INDEX_op_extu_i32_i64:
1521         return TCG_TARGET_REG_BITS == 64;
1522 
1523     case INDEX_op_movcond_i64:
1524         return TCG_TARGET_HAS_movcond_i64;
1525     case INDEX_op_div_i64:
1526     case INDEX_op_divu_i64:
1527         return TCG_TARGET_HAS_div_i64;
1528     case INDEX_op_rem_i64:
1529     case INDEX_op_remu_i64:
1530         return TCG_TARGET_HAS_rem_i64;
1531     case INDEX_op_div2_i64:
1532     case INDEX_op_divu2_i64:
1533         return TCG_TARGET_HAS_div2_i64;
1534     case INDEX_op_rotl_i64:
1535     case INDEX_op_rotr_i64:
1536         return TCG_TARGET_HAS_rot_i64;
1537     case INDEX_op_deposit_i64:
1538         return TCG_TARGET_HAS_deposit_i64;
1539     case INDEX_op_extract_i64:
1540         return TCG_TARGET_HAS_extract_i64;
1541     case INDEX_op_sextract_i64:
1542         return TCG_TARGET_HAS_sextract_i64;
1543     case INDEX_op_extract2_i64:
1544         return TCG_TARGET_HAS_extract2_i64;
1545     case INDEX_op_extrl_i64_i32:
1546         return TCG_TARGET_HAS_extrl_i64_i32;
1547     case INDEX_op_extrh_i64_i32:
1548         return TCG_TARGET_HAS_extrh_i64_i32;
1549     case INDEX_op_ext8s_i64:
1550         return TCG_TARGET_HAS_ext8s_i64;
1551     case INDEX_op_ext16s_i64:
1552         return TCG_TARGET_HAS_ext16s_i64;
1553     case INDEX_op_ext32s_i64:
1554         return TCG_TARGET_HAS_ext32s_i64;
1555     case INDEX_op_ext8u_i64:
1556         return TCG_TARGET_HAS_ext8u_i64;
1557     case INDEX_op_ext16u_i64:
1558         return TCG_TARGET_HAS_ext16u_i64;
1559     case INDEX_op_ext32u_i64:
1560         return TCG_TARGET_HAS_ext32u_i64;
1561     case INDEX_op_bswap16_i64:
1562         return TCG_TARGET_HAS_bswap16_i64;
1563     case INDEX_op_bswap32_i64:
1564         return TCG_TARGET_HAS_bswap32_i64;
1565     case INDEX_op_bswap64_i64:
1566         return TCG_TARGET_HAS_bswap64_i64;
1567     case INDEX_op_not_i64:
1568         return TCG_TARGET_HAS_not_i64;
1569     case INDEX_op_neg_i64:
1570         return TCG_TARGET_HAS_neg_i64;
1571     case INDEX_op_andc_i64:
1572         return TCG_TARGET_HAS_andc_i64;
1573     case INDEX_op_orc_i64:
1574         return TCG_TARGET_HAS_orc_i64;
1575     case INDEX_op_eqv_i64:
1576         return TCG_TARGET_HAS_eqv_i64;
1577     case INDEX_op_nand_i64:
1578         return TCG_TARGET_HAS_nand_i64;
1579     case INDEX_op_nor_i64:
1580         return TCG_TARGET_HAS_nor_i64;
1581     case INDEX_op_clz_i64:
1582         return TCG_TARGET_HAS_clz_i64;
1583     case INDEX_op_ctz_i64:
1584         return TCG_TARGET_HAS_ctz_i64;
1585     case INDEX_op_ctpop_i64:
1586         return TCG_TARGET_HAS_ctpop_i64;
1587     case INDEX_op_add2_i64:
1588         return TCG_TARGET_HAS_add2_i64;
1589     case INDEX_op_sub2_i64:
1590         return TCG_TARGET_HAS_sub2_i64;
1591     case INDEX_op_mulu2_i64:
1592         return TCG_TARGET_HAS_mulu2_i64;
1593     case INDEX_op_muls2_i64:
1594         return TCG_TARGET_HAS_muls2_i64;
1595     case INDEX_op_muluh_i64:
1596         return TCG_TARGET_HAS_muluh_i64;
1597     case INDEX_op_mulsh_i64:
1598         return TCG_TARGET_HAS_mulsh_i64;
1599 
1600     case INDEX_op_mov_vec:
1601     case INDEX_op_dup_vec:
1602     case INDEX_op_dupi_vec:
1603     case INDEX_op_dupm_vec:
1604     case INDEX_op_ld_vec:
1605     case INDEX_op_st_vec:
1606     case INDEX_op_add_vec:
1607     case INDEX_op_sub_vec:
1608     case INDEX_op_and_vec:
1609     case INDEX_op_or_vec:
1610     case INDEX_op_xor_vec:
1611     case INDEX_op_cmp_vec:
1612         return have_vec;
1613     case INDEX_op_dup2_vec:
1614         return have_vec && TCG_TARGET_REG_BITS == 32;
1615     case INDEX_op_not_vec:
1616         return have_vec && TCG_TARGET_HAS_not_vec;
1617     case INDEX_op_neg_vec:
1618         return have_vec && TCG_TARGET_HAS_neg_vec;
1619     case INDEX_op_andc_vec:
1620         return have_vec && TCG_TARGET_HAS_andc_vec;
1621     case INDEX_op_orc_vec:
1622         return have_vec && TCG_TARGET_HAS_orc_vec;
1623     case INDEX_op_mul_vec:
1624         return have_vec && TCG_TARGET_HAS_mul_vec;
1625     case INDEX_op_shli_vec:
1626     case INDEX_op_shri_vec:
1627     case INDEX_op_sari_vec:
1628         return have_vec && TCG_TARGET_HAS_shi_vec;
1629     case INDEX_op_shls_vec:
1630     case INDEX_op_shrs_vec:
1631     case INDEX_op_sars_vec:
1632         return have_vec && TCG_TARGET_HAS_shs_vec;
1633     case INDEX_op_shlv_vec:
1634     case INDEX_op_shrv_vec:
1635     case INDEX_op_sarv_vec:
1636         return have_vec && TCG_TARGET_HAS_shv_vec;
1637     case INDEX_op_ssadd_vec:
1638     case INDEX_op_usadd_vec:
1639     case INDEX_op_sssub_vec:
1640     case INDEX_op_ussub_vec:
1641         return have_vec && TCG_TARGET_HAS_sat_vec;
1642     case INDEX_op_smin_vec:
1643     case INDEX_op_umin_vec:
1644     case INDEX_op_smax_vec:
1645     case INDEX_op_umax_vec:
1646         return have_vec && TCG_TARGET_HAS_minmax_vec;
1647 
1648     default:
1649         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1650         return true;
1651     }
1652 }
1653 
1654 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1655    and endian swap. Maybe it would be better to do the alignment
1656    and endian swap in tcg_reg_alloc_call(). */
1657 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1658 {
1659     int i, real_args, nb_rets, pi;
1660     unsigned sizemask, flags;
1661     TCGHelperInfo *info;
1662     TCGOp *op;
1663 
1664     info = g_hash_table_lookup(helper_table, (gpointer)func);
1665     flags = info->flags;
1666     sizemask = info->sizemask;
1667 
1668 #if defined(__sparc__) && !defined(__arch64__) \
1669     && !defined(CONFIG_TCG_INTERPRETER)
1670     /* We have 64-bit values in one register, but need to pass as two
1671        separate parameters.  Split them.  */
1672     int orig_sizemask = sizemask;
1673     int orig_nargs = nargs;
1674     TCGv_i64 retl, reth;
1675     TCGTemp *split_args[MAX_OPC_PARAM];
1676 
1677     retl = NULL;
1678     reth = NULL;
1679     if (sizemask != 0) {
1680         for (i = real_args = 0; i < nargs; ++i) {
1681             int is_64bit = sizemask & (1 << (i+1)*2);
1682             if (is_64bit) {
1683                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1684                 TCGv_i32 h = tcg_temp_new_i32();
1685                 TCGv_i32 l = tcg_temp_new_i32();
1686                 tcg_gen_extr_i64_i32(l, h, orig);
1687                 split_args[real_args++] = tcgv_i32_temp(h);
1688                 split_args[real_args++] = tcgv_i32_temp(l);
1689             } else {
1690                 split_args[real_args++] = args[i];
1691             }
1692         }
1693         nargs = real_args;
1694         args = split_args;
1695         sizemask = 0;
1696     }
1697 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1698     for (i = 0; i < nargs; ++i) {
1699         int is_64bit = sizemask & (1 << (i+1)*2);
1700         int is_signed = sizemask & (2 << (i+1)*2);
1701         if (!is_64bit) {
1702             TCGv_i64 temp = tcg_temp_new_i64();
1703             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1704             if (is_signed) {
1705                 tcg_gen_ext32s_i64(temp, orig);
1706             } else {
1707                 tcg_gen_ext32u_i64(temp, orig);
1708             }
1709             args[i] = tcgv_i64_temp(temp);
1710         }
1711     }
1712 #endif /* TCG_TARGET_EXTEND_ARGS */
1713 
1714     op = tcg_emit_op(INDEX_op_call);
1715 
1716     pi = 0;
1717     if (ret != NULL) {
1718 #if defined(__sparc__) && !defined(__arch64__) \
1719     && !defined(CONFIG_TCG_INTERPRETER)
1720         if (orig_sizemask & 1) {
1721             /* The 32-bit ABI is going to return the 64-bit value in
1722                the %o0/%o1 register pair.  Prepare for this by using
1723                two return temporaries, and reassemble below.  */
1724             retl = tcg_temp_new_i64();
1725             reth = tcg_temp_new_i64();
1726             op->args[pi++] = tcgv_i64_arg(reth);
1727             op->args[pi++] = tcgv_i64_arg(retl);
1728             nb_rets = 2;
1729         } else {
1730             op->args[pi++] = temp_arg(ret);
1731             nb_rets = 1;
1732         }
1733 #else
1734         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1735 #ifdef HOST_WORDS_BIGENDIAN
1736             op->args[pi++] = temp_arg(ret + 1);
1737             op->args[pi++] = temp_arg(ret);
1738 #else
1739             op->args[pi++] = temp_arg(ret);
1740             op->args[pi++] = temp_arg(ret + 1);
1741 #endif
1742             nb_rets = 2;
1743         } else {
1744             op->args[pi++] = temp_arg(ret);
1745             nb_rets = 1;
1746         }
1747 #endif
1748     } else {
1749         nb_rets = 0;
1750     }
1751     TCGOP_CALLO(op) = nb_rets;
1752 
1753     real_args = 0;
1754     for (i = 0; i < nargs; i++) {
1755         int is_64bit = sizemask & (1 << (i+1)*2);
1756         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1757 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1758             /* some targets want aligned 64 bit args */
1759             if (real_args & 1) {
1760                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1761                 real_args++;
1762             }
1763 #endif
1764            /* If stack grows up, then we will be placing successive
1765               arguments at lower addresses, which means we need to
1766               reverse the order compared to how we would normally
1767               treat either big or little-endian.  For those arguments
1768               that will wind up in registers, this still works for
1769               HPPA (the only current STACK_GROWSUP target) since the
1770               argument registers are *also* allocated in decreasing
1771               order.  If another such target is added, this logic may
1772               have to get more complicated to differentiate between
1773               stack arguments and register arguments.  */
1774 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1775             op->args[pi++] = temp_arg(args[i] + 1);
1776             op->args[pi++] = temp_arg(args[i]);
1777 #else
1778             op->args[pi++] = temp_arg(args[i]);
1779             op->args[pi++] = temp_arg(args[i] + 1);
1780 #endif
1781             real_args += 2;
1782             continue;
1783         }
1784 
1785         op->args[pi++] = temp_arg(args[i]);
1786         real_args++;
1787     }
1788     op->args[pi++] = (uintptr_t)func;
1789     op->args[pi++] = flags;
1790     TCGOP_CALLI(op) = real_args;
1791 
1792     /* Make sure the fields didn't overflow.  */
1793     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1794     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1795 
1796 #if defined(__sparc__) && !defined(__arch64__) \
1797     && !defined(CONFIG_TCG_INTERPRETER)
1798     /* Free all of the parts we allocated above.  */
1799     for (i = real_args = 0; i < orig_nargs; ++i) {
1800         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1801         if (is_64bit) {
1802             tcg_temp_free_internal(args[real_args++]);
1803             tcg_temp_free_internal(args[real_args++]);
1804         } else {
1805             real_args++;
1806         }
1807     }
1808     if (orig_sizemask & 1) {
1809         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1810            Note that describing these as TCGv_i64 eliminates an unnecessary
1811            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1812         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1813         tcg_temp_free_i64(retl);
1814         tcg_temp_free_i64(reth);
1815     }
1816 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1817     for (i = 0; i < nargs; ++i) {
1818         int is_64bit = sizemask & (1 << (i+1)*2);
1819         if (!is_64bit) {
1820             tcg_temp_free_internal(args[i]);
1821         }
1822     }
1823 #endif /* TCG_TARGET_EXTEND_ARGS */
1824 }
1825 
1826 static void tcg_reg_alloc_start(TCGContext *s)
1827 {
1828     int i, n;
1829     TCGTemp *ts;
1830 
1831     for (i = 0, n = s->nb_globals; i < n; i++) {
1832         ts = &s->temps[i];
1833         ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1834     }
1835     for (n = s->nb_temps; i < n; i++) {
1836         ts = &s->temps[i];
1837         ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1838         ts->mem_allocated = 0;
1839         ts->fixed_reg = 0;
1840     }
1841 
1842     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1843 }
1844 
1845 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1846                                  TCGTemp *ts)
1847 {
1848     int idx = temp_idx(ts);
1849 
1850     if (ts->temp_global) {
1851         pstrcpy(buf, buf_size, ts->name);
1852     } else if (ts->temp_local) {
1853         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1854     } else {
1855         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1856     }
1857     return buf;
1858 }
1859 
1860 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1861                              int buf_size, TCGArg arg)
1862 {
1863     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1864 }
1865 
1866 /* Find helper name.  */
1867 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1868 {
1869     const char *ret = NULL;
1870     if (helper_table) {
1871         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1872         if (info) {
1873             ret = info->name;
1874         }
1875     }
1876     return ret;
1877 }
1878 
1879 static const char * const cond_name[] =
1880 {
1881     [TCG_COND_NEVER] = "never",
1882     [TCG_COND_ALWAYS] = "always",
1883     [TCG_COND_EQ] = "eq",
1884     [TCG_COND_NE] = "ne",
1885     [TCG_COND_LT] = "lt",
1886     [TCG_COND_GE] = "ge",
1887     [TCG_COND_LE] = "le",
1888     [TCG_COND_GT] = "gt",
1889     [TCG_COND_LTU] = "ltu",
1890     [TCG_COND_GEU] = "geu",
1891     [TCG_COND_LEU] = "leu",
1892     [TCG_COND_GTU] = "gtu"
1893 };
1894 
1895 static const char * const ldst_name[] =
1896 {
1897     [MO_UB]   = "ub",
1898     [MO_SB]   = "sb",
1899     [MO_LEUW] = "leuw",
1900     [MO_LESW] = "lesw",
1901     [MO_LEUL] = "leul",
1902     [MO_LESL] = "lesl",
1903     [MO_LEQ]  = "leq",
1904     [MO_BEUW] = "beuw",
1905     [MO_BESW] = "besw",
1906     [MO_BEUL] = "beul",
1907     [MO_BESL] = "besl",
1908     [MO_BEQ]  = "beq",
1909 };
1910 
1911 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1912 #ifdef ALIGNED_ONLY
1913     [MO_UNALN >> MO_ASHIFT]    = "un+",
1914     [MO_ALIGN >> MO_ASHIFT]    = "",
1915 #else
1916     [MO_UNALN >> MO_ASHIFT]    = "",
1917     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1918 #endif
1919     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1920     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1921     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1922     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1923     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1924     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1925 };
1926 
1927 static inline bool tcg_regset_single(TCGRegSet d)
1928 {
1929     return (d & (d - 1)) == 0;
1930 }
1931 
1932 static inline TCGReg tcg_regset_first(TCGRegSet d)
1933 {
1934     if (TCG_TARGET_NB_REGS <= 32) {
1935         return ctz32(d);
1936     } else {
1937         return ctz64(d);
1938     }
1939 }
1940 
1941 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1942 {
1943     char buf[128];
1944     TCGOp *op;
1945 
1946     QTAILQ_FOREACH(op, &s->ops, link) {
1947         int i, k, nb_oargs, nb_iargs, nb_cargs;
1948         const TCGOpDef *def;
1949         TCGOpcode c;
1950         int col = 0;
1951 
1952         c = op->opc;
1953         def = &tcg_op_defs[c];
1954 
1955         if (c == INDEX_op_insn_start) {
1956             nb_oargs = 0;
1957             col += qemu_log("\n ----");
1958 
1959             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1960                 target_ulong a;
1961 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1962                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1963 #else
1964                 a = op->args[i];
1965 #endif
1966                 col += qemu_log(" " TARGET_FMT_lx, a);
1967             }
1968         } else if (c == INDEX_op_call) {
1969             /* variable number of arguments */
1970             nb_oargs = TCGOP_CALLO(op);
1971             nb_iargs = TCGOP_CALLI(op);
1972             nb_cargs = def->nb_cargs;
1973 
1974             /* function name, flags, out args */
1975             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1976                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
1977                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
1978             for (i = 0; i < nb_oargs; i++) {
1979                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1980                                                        op->args[i]));
1981             }
1982             for (i = 0; i < nb_iargs; i++) {
1983                 TCGArg arg = op->args[nb_oargs + i];
1984                 const char *t = "<dummy>";
1985                 if (arg != TCG_CALL_DUMMY_ARG) {
1986                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1987                 }
1988                 col += qemu_log(",%s", t);
1989             }
1990         } else {
1991             col += qemu_log(" %s ", def->name);
1992 
1993             nb_oargs = def->nb_oargs;
1994             nb_iargs = def->nb_iargs;
1995             nb_cargs = def->nb_cargs;
1996 
1997             if (def->flags & TCG_OPF_VECTOR) {
1998                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
1999                                 8 << TCGOP_VECE(op));
2000             }
2001 
2002             k = 0;
2003             for (i = 0; i < nb_oargs; i++) {
2004                 if (k != 0) {
2005                     col += qemu_log(",");
2006                 }
2007                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2008                                                       op->args[k++]));
2009             }
2010             for (i = 0; i < nb_iargs; i++) {
2011                 if (k != 0) {
2012                     col += qemu_log(",");
2013                 }
2014                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2015                                                       op->args[k++]));
2016             }
2017             switch (c) {
2018             case INDEX_op_brcond_i32:
2019             case INDEX_op_setcond_i32:
2020             case INDEX_op_movcond_i32:
2021             case INDEX_op_brcond2_i32:
2022             case INDEX_op_setcond2_i32:
2023             case INDEX_op_brcond_i64:
2024             case INDEX_op_setcond_i64:
2025             case INDEX_op_movcond_i64:
2026             case INDEX_op_cmp_vec:
2027                 if (op->args[k] < ARRAY_SIZE(cond_name)
2028                     && cond_name[op->args[k]]) {
2029                     col += qemu_log(",%s", cond_name[op->args[k++]]);
2030                 } else {
2031                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2032                 }
2033                 i = 1;
2034                 break;
2035             case INDEX_op_qemu_ld_i32:
2036             case INDEX_op_qemu_st_i32:
2037             case INDEX_op_qemu_ld_i64:
2038             case INDEX_op_qemu_st_i64:
2039                 {
2040                     TCGMemOpIdx oi = op->args[k++];
2041                     TCGMemOp op = get_memop(oi);
2042                     unsigned ix = get_mmuidx(oi);
2043 
2044                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2045                         col += qemu_log(",$0x%x,%u", op, ix);
2046                     } else {
2047                         const char *s_al, *s_op;
2048                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2049                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2050                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2051                     }
2052                     i = 1;
2053                 }
2054                 break;
2055             default:
2056                 i = 0;
2057                 break;
2058             }
2059             switch (c) {
2060             case INDEX_op_set_label:
2061             case INDEX_op_br:
2062             case INDEX_op_brcond_i32:
2063             case INDEX_op_brcond_i64:
2064             case INDEX_op_brcond2_i32:
2065                 col += qemu_log("%s$L%d", k ? "," : "",
2066                                 arg_label(op->args[k])->id);
2067                 i++, k++;
2068                 break;
2069             default:
2070                 break;
2071             }
2072             for (; i < nb_cargs; i++, k++) {
2073                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2074             }
2075         }
2076 
2077         if (have_prefs || op->life) {
2078             for (; col < 40; ++col) {
2079                 putc(' ', qemu_logfile);
2080             }
2081         }
2082 
2083         if (op->life) {
2084             unsigned life = op->life;
2085 
2086             if (life & (SYNC_ARG * 3)) {
2087                 qemu_log("  sync:");
2088                 for (i = 0; i < 2; ++i) {
2089                     if (life & (SYNC_ARG << i)) {
2090                         qemu_log(" %d", i);
2091                     }
2092                 }
2093             }
2094             life /= DEAD_ARG;
2095             if (life) {
2096                 qemu_log("  dead:");
2097                 for (i = 0; life; ++i, life >>= 1) {
2098                     if (life & 1) {
2099                         qemu_log(" %d", i);
2100                     }
2101                 }
2102             }
2103         }
2104 
2105         if (have_prefs) {
2106             for (i = 0; i < nb_oargs; ++i) {
2107                 TCGRegSet set = op->output_pref[i];
2108 
2109                 if (i == 0) {
2110                     qemu_log("  pref=");
2111                 } else {
2112                     qemu_log(",");
2113                 }
2114                 if (set == 0) {
2115                     qemu_log("none");
2116                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2117                     qemu_log("all");
2118 #ifdef CONFIG_DEBUG_TCG
2119                 } else if (tcg_regset_single(set)) {
2120                     TCGReg reg = tcg_regset_first(set);
2121                     qemu_log("%s", tcg_target_reg_names[reg]);
2122 #endif
2123                 } else if (TCG_TARGET_NB_REGS <= 32) {
2124                     qemu_log("%#x", (uint32_t)set);
2125                 } else {
2126                     qemu_log("%#" PRIx64, (uint64_t)set);
2127                 }
2128             }
2129         }
2130 
2131         qemu_log("\n");
2132     }
2133 }
2134 
2135 /* we give more priority to constraints with less registers */
2136 static int get_constraint_priority(const TCGOpDef *def, int k)
2137 {
2138     const TCGArgConstraint *arg_ct;
2139 
2140     int i, n;
2141     arg_ct = &def->args_ct[k];
2142     if (arg_ct->ct & TCG_CT_ALIAS) {
2143         /* an alias is equivalent to a single register */
2144         n = 1;
2145     } else {
2146         if (!(arg_ct->ct & TCG_CT_REG))
2147             return 0;
2148         n = 0;
2149         for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2150             if (tcg_regset_test_reg(arg_ct->u.regs, i))
2151                 n++;
2152         }
2153     }
2154     return TCG_TARGET_NB_REGS - n + 1;
2155 }
2156 
2157 /* sort from highest priority to lowest */
2158 static void sort_constraints(TCGOpDef *def, int start, int n)
2159 {
2160     int i, j, p1, p2, tmp;
2161 
2162     for(i = 0; i < n; i++)
2163         def->sorted_args[start + i] = start + i;
2164     if (n <= 1)
2165         return;
2166     for(i = 0; i < n - 1; i++) {
2167         for(j = i + 1; j < n; j++) {
2168             p1 = get_constraint_priority(def, def->sorted_args[start + i]);
2169             p2 = get_constraint_priority(def, def->sorted_args[start + j]);
2170             if (p1 < p2) {
2171                 tmp = def->sorted_args[start + i];
2172                 def->sorted_args[start + i] = def->sorted_args[start + j];
2173                 def->sorted_args[start + j] = tmp;
2174             }
2175         }
2176     }
2177 }
2178 
2179 static void process_op_defs(TCGContext *s)
2180 {
2181     TCGOpcode op;
2182 
2183     for (op = 0; op < NB_OPS; op++) {
2184         TCGOpDef *def = &tcg_op_defs[op];
2185         const TCGTargetOpDef *tdefs;
2186         TCGType type;
2187         int i, nb_args;
2188 
2189         if (def->flags & TCG_OPF_NOT_PRESENT) {
2190             continue;
2191         }
2192 
2193         nb_args = def->nb_iargs + def->nb_oargs;
2194         if (nb_args == 0) {
2195             continue;
2196         }
2197 
2198         tdefs = tcg_target_op_def(op);
2199         /* Missing TCGTargetOpDef entry. */
2200         tcg_debug_assert(tdefs != NULL);
2201 
2202         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2203         for (i = 0; i < nb_args; i++) {
2204             const char *ct_str = tdefs->args_ct_str[i];
2205             /* Incomplete TCGTargetOpDef entry. */
2206             tcg_debug_assert(ct_str != NULL);
2207 
2208             def->args_ct[i].u.regs = 0;
2209             def->args_ct[i].ct = 0;
2210             while (*ct_str != '\0') {
2211                 switch(*ct_str) {
2212                 case '0' ... '9':
2213                     {
2214                         int oarg = *ct_str - '0';
2215                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2216                         tcg_debug_assert(oarg < def->nb_oargs);
2217                         tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
2218                         /* TCG_CT_ALIAS is for the output arguments.
2219                            The input is tagged with TCG_CT_IALIAS. */
2220                         def->args_ct[i] = def->args_ct[oarg];
2221                         def->args_ct[oarg].ct |= TCG_CT_ALIAS;
2222                         def->args_ct[oarg].alias_index = i;
2223                         def->args_ct[i].ct |= TCG_CT_IALIAS;
2224                         def->args_ct[i].alias_index = oarg;
2225                     }
2226                     ct_str++;
2227                     break;
2228                 case '&':
2229                     def->args_ct[i].ct |= TCG_CT_NEWREG;
2230                     ct_str++;
2231                     break;
2232                 case 'i':
2233                     def->args_ct[i].ct |= TCG_CT_CONST;
2234                     ct_str++;
2235                     break;
2236                 default:
2237                     ct_str = target_parse_constraint(&def->args_ct[i],
2238                                                      ct_str, type);
2239                     /* Typo in TCGTargetOpDef constraint. */
2240                     tcg_debug_assert(ct_str != NULL);
2241                 }
2242             }
2243         }
2244 
2245         /* TCGTargetOpDef entry with too much information? */
2246         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2247 
2248         /* sort the constraints (XXX: this is just an heuristic) */
2249         sort_constraints(def, 0, def->nb_oargs);
2250         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2251     }
2252 }
2253 
2254 void tcg_op_remove(TCGContext *s, TCGOp *op)
2255 {
2256     TCGLabel *label;
2257 
2258     switch (op->opc) {
2259     case INDEX_op_br:
2260         label = arg_label(op->args[0]);
2261         label->refs--;
2262         break;
2263     case INDEX_op_brcond_i32:
2264     case INDEX_op_brcond_i64:
2265         label = arg_label(op->args[3]);
2266         label->refs--;
2267         break;
2268     case INDEX_op_brcond2_i32:
2269         label = arg_label(op->args[5]);
2270         label->refs--;
2271         break;
2272     default:
2273         break;
2274     }
2275 
2276     QTAILQ_REMOVE(&s->ops, op, link);
2277     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2278     s->nb_ops--;
2279 
2280 #ifdef CONFIG_PROFILER
2281     atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2282 #endif
2283 }
2284 
2285 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2286 {
2287     TCGContext *s = tcg_ctx;
2288     TCGOp *op;
2289 
2290     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2291         op = tcg_malloc(sizeof(TCGOp));
2292     } else {
2293         op = QTAILQ_FIRST(&s->free_ops);
2294         QTAILQ_REMOVE(&s->free_ops, op, link);
2295     }
2296     memset(op, 0, offsetof(TCGOp, link));
2297     op->opc = opc;
2298     s->nb_ops++;
2299 
2300     return op;
2301 }
2302 
2303 TCGOp *tcg_emit_op(TCGOpcode opc)
2304 {
2305     TCGOp *op = tcg_op_alloc(opc);
2306     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2307     return op;
2308 }
2309 
2310 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2311 {
2312     TCGOp *new_op = tcg_op_alloc(opc);
2313     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2314     return new_op;
2315 }
2316 
2317 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2318 {
2319     TCGOp *new_op = tcg_op_alloc(opc);
2320     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2321     return new_op;
2322 }
2323 
2324 /* Reachable analysis : remove unreachable code.  */
2325 static void reachable_code_pass(TCGContext *s)
2326 {
2327     TCGOp *op, *op_next;
2328     bool dead = false;
2329 
2330     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2331         bool remove = dead;
2332         TCGLabel *label;
2333         int call_flags;
2334 
2335         switch (op->opc) {
2336         case INDEX_op_set_label:
2337             label = arg_label(op->args[0]);
2338             if (label->refs == 0) {
2339                 /*
2340                  * While there is an occasional backward branch, virtually
2341                  * all branches generated by the translators are forward.
2342                  * Which means that generally we will have already removed
2343                  * all references to the label that will be, and there is
2344                  * little to be gained by iterating.
2345                  */
2346                 remove = true;
2347             } else {
2348                 /* Once we see a label, insns become live again.  */
2349                 dead = false;
2350                 remove = false;
2351 
2352                 /*
2353                  * Optimization can fold conditional branches to unconditional.
2354                  * If we find a label with one reference which is preceded by
2355                  * an unconditional branch to it, remove both.  This needed to
2356                  * wait until the dead code in between them was removed.
2357                  */
2358                 if (label->refs == 1) {
2359                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2360                     if (op_prev->opc == INDEX_op_br &&
2361                         label == arg_label(op_prev->args[0])) {
2362                         tcg_op_remove(s, op_prev);
2363                         remove = true;
2364                     }
2365                 }
2366             }
2367             break;
2368 
2369         case INDEX_op_br:
2370         case INDEX_op_exit_tb:
2371         case INDEX_op_goto_ptr:
2372             /* Unconditional branches; everything following is dead.  */
2373             dead = true;
2374             break;
2375 
2376         case INDEX_op_call:
2377             /* Notice noreturn helper calls, raising exceptions.  */
2378             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2379             if (call_flags & TCG_CALL_NO_RETURN) {
2380                 dead = true;
2381             }
2382             break;
2383 
2384         case INDEX_op_insn_start:
2385             /* Never remove -- we need to keep these for unwind.  */
2386             remove = false;
2387             break;
2388 
2389         default:
2390             break;
2391         }
2392 
2393         if (remove) {
2394             tcg_op_remove(s, op);
2395         }
2396     }
2397 }
2398 
2399 #define TS_DEAD  1
2400 #define TS_MEM   2
2401 
2402 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2403 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2404 
2405 /* For liveness_pass_1, the register preferences for a given temp.  */
2406 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2407 {
2408     return ts->state_ptr;
2409 }
2410 
2411 /* For liveness_pass_1, reset the preferences for a given temp to the
2412  * maximal regset for its type.
2413  */
2414 static inline void la_reset_pref(TCGTemp *ts)
2415 {
2416     *la_temp_pref(ts)
2417         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2418 }
2419 
2420 /* liveness analysis: end of function: all temps are dead, and globals
2421    should be in memory. */
2422 static void la_func_end(TCGContext *s, int ng, int nt)
2423 {
2424     int i;
2425 
2426     for (i = 0; i < ng; ++i) {
2427         s->temps[i].state = TS_DEAD | TS_MEM;
2428         la_reset_pref(&s->temps[i]);
2429     }
2430     for (i = ng; i < nt; ++i) {
2431         s->temps[i].state = TS_DEAD;
2432         la_reset_pref(&s->temps[i]);
2433     }
2434 }
2435 
2436 /* liveness analysis: end of basic block: all temps are dead, globals
2437    and local temps should be in memory. */
2438 static void la_bb_end(TCGContext *s, int ng, int nt)
2439 {
2440     int i;
2441 
2442     for (i = 0; i < ng; ++i) {
2443         s->temps[i].state = TS_DEAD | TS_MEM;
2444         la_reset_pref(&s->temps[i]);
2445     }
2446     for (i = ng; i < nt; ++i) {
2447         s->temps[i].state = (s->temps[i].temp_local
2448                              ? TS_DEAD | TS_MEM
2449                              : TS_DEAD);
2450         la_reset_pref(&s->temps[i]);
2451     }
2452 }
2453 
2454 /* liveness analysis: sync globals back to memory.  */
2455 static void la_global_sync(TCGContext *s, int ng)
2456 {
2457     int i;
2458 
2459     for (i = 0; i < ng; ++i) {
2460         int state = s->temps[i].state;
2461         s->temps[i].state = state | TS_MEM;
2462         if (state == TS_DEAD) {
2463             /* If the global was previously dead, reset prefs.  */
2464             la_reset_pref(&s->temps[i]);
2465         }
2466     }
2467 }
2468 
2469 /* liveness analysis: sync globals back to memory and kill.  */
2470 static void la_global_kill(TCGContext *s, int ng)
2471 {
2472     int i;
2473 
2474     for (i = 0; i < ng; i++) {
2475         s->temps[i].state = TS_DEAD | TS_MEM;
2476         la_reset_pref(&s->temps[i]);
2477     }
2478 }
2479 
2480 /* liveness analysis: note live globals crossing calls.  */
2481 static void la_cross_call(TCGContext *s, int nt)
2482 {
2483     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2484     int i;
2485 
2486     for (i = 0; i < nt; i++) {
2487         TCGTemp *ts = &s->temps[i];
2488         if (!(ts->state & TS_DEAD)) {
2489             TCGRegSet *pset = la_temp_pref(ts);
2490             TCGRegSet set = *pset;
2491 
2492             set &= mask;
2493             /* If the combination is not possible, restart.  */
2494             if (set == 0) {
2495                 set = tcg_target_available_regs[ts->type] & mask;
2496             }
2497             *pset = set;
2498         }
2499     }
2500 }
2501 
2502 /* Liveness analysis : update the opc_arg_life array to tell if a
2503    given input arguments is dead. Instructions updating dead
2504    temporaries are removed. */
2505 static void liveness_pass_1(TCGContext *s)
2506 {
2507     int nb_globals = s->nb_globals;
2508     int nb_temps = s->nb_temps;
2509     TCGOp *op, *op_prev;
2510     TCGRegSet *prefs;
2511     int i;
2512 
2513     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2514     for (i = 0; i < nb_temps; ++i) {
2515         s->temps[i].state_ptr = prefs + i;
2516     }
2517 
2518     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2519     la_func_end(s, nb_globals, nb_temps);
2520 
2521     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2522         int nb_iargs, nb_oargs;
2523         TCGOpcode opc_new, opc_new2;
2524         bool have_opc_new2;
2525         TCGLifeData arg_life = 0;
2526         TCGTemp *ts;
2527         TCGOpcode opc = op->opc;
2528         const TCGOpDef *def = &tcg_op_defs[opc];
2529 
2530         switch (opc) {
2531         case INDEX_op_call:
2532             {
2533                 int call_flags;
2534                 int nb_call_regs;
2535 
2536                 nb_oargs = TCGOP_CALLO(op);
2537                 nb_iargs = TCGOP_CALLI(op);
2538                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2539 
2540                 /* pure functions can be removed if their result is unused */
2541                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2542                     for (i = 0; i < nb_oargs; i++) {
2543                         ts = arg_temp(op->args[i]);
2544                         if (ts->state != TS_DEAD) {
2545                             goto do_not_remove_call;
2546                         }
2547                     }
2548                     goto do_remove;
2549                 }
2550             do_not_remove_call:
2551 
2552                 /* Output args are dead.  */
2553                 for (i = 0; i < nb_oargs; i++) {
2554                     ts = arg_temp(op->args[i]);
2555                     if (ts->state & TS_DEAD) {
2556                         arg_life |= DEAD_ARG << i;
2557                     }
2558                     if (ts->state & TS_MEM) {
2559                         arg_life |= SYNC_ARG << i;
2560                     }
2561                     ts->state = TS_DEAD;
2562                     la_reset_pref(ts);
2563 
2564                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2565                     op->output_pref[i] = 0;
2566                 }
2567 
2568                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2569                                     TCG_CALL_NO_READ_GLOBALS))) {
2570                     la_global_kill(s, nb_globals);
2571                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2572                     la_global_sync(s, nb_globals);
2573                 }
2574 
2575                 /* Record arguments that die in this helper.  */
2576                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2577                     ts = arg_temp(op->args[i]);
2578                     if (ts && ts->state & TS_DEAD) {
2579                         arg_life |= DEAD_ARG << i;
2580                     }
2581                 }
2582 
2583                 /* For all live registers, remove call-clobbered prefs.  */
2584                 la_cross_call(s, nb_temps);
2585 
2586                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2587 
2588                 /* Input arguments are live for preceding opcodes.  */
2589                 for (i = 0; i < nb_iargs; i++) {
2590                     ts = arg_temp(op->args[i + nb_oargs]);
2591                     if (ts && ts->state & TS_DEAD) {
2592                         /* For those arguments that die, and will be allocated
2593                          * in registers, clear the register set for that arg,
2594                          * to be filled in below.  For args that will be on
2595                          * the stack, reset to any available reg.
2596                          */
2597                         *la_temp_pref(ts)
2598                             = (i < nb_call_regs ? 0 :
2599                                tcg_target_available_regs[ts->type]);
2600                         ts->state &= ~TS_DEAD;
2601                     }
2602                 }
2603 
2604                 /* For each input argument, add its input register to prefs.
2605                    If a temp is used once, this produces a single set bit.  */
2606                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2607                     ts = arg_temp(op->args[i + nb_oargs]);
2608                     if (ts) {
2609                         tcg_regset_set_reg(*la_temp_pref(ts),
2610                                            tcg_target_call_iarg_regs[i]);
2611                     }
2612                 }
2613             }
2614             break;
2615         case INDEX_op_insn_start:
2616             break;
2617         case INDEX_op_discard:
2618             /* mark the temporary as dead */
2619             ts = arg_temp(op->args[0]);
2620             ts->state = TS_DEAD;
2621             la_reset_pref(ts);
2622             break;
2623 
2624         case INDEX_op_add2_i32:
2625             opc_new = INDEX_op_add_i32;
2626             goto do_addsub2;
2627         case INDEX_op_sub2_i32:
2628             opc_new = INDEX_op_sub_i32;
2629             goto do_addsub2;
2630         case INDEX_op_add2_i64:
2631             opc_new = INDEX_op_add_i64;
2632             goto do_addsub2;
2633         case INDEX_op_sub2_i64:
2634             opc_new = INDEX_op_sub_i64;
2635         do_addsub2:
2636             nb_iargs = 4;
2637             nb_oargs = 2;
2638             /* Test if the high part of the operation is dead, but not
2639                the low part.  The result can be optimized to a simple
2640                add or sub.  This happens often for x86_64 guest when the
2641                cpu mode is set to 32 bit.  */
2642             if (arg_temp(op->args[1])->state == TS_DEAD) {
2643                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2644                     goto do_remove;
2645                 }
2646                 /* Replace the opcode and adjust the args in place,
2647                    leaving 3 unused args at the end.  */
2648                 op->opc = opc = opc_new;
2649                 op->args[1] = op->args[2];
2650                 op->args[2] = op->args[4];
2651                 /* Fall through and mark the single-word operation live.  */
2652                 nb_iargs = 2;
2653                 nb_oargs = 1;
2654             }
2655             goto do_not_remove;
2656 
2657         case INDEX_op_mulu2_i32:
2658             opc_new = INDEX_op_mul_i32;
2659             opc_new2 = INDEX_op_muluh_i32;
2660             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2661             goto do_mul2;
2662         case INDEX_op_muls2_i32:
2663             opc_new = INDEX_op_mul_i32;
2664             opc_new2 = INDEX_op_mulsh_i32;
2665             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2666             goto do_mul2;
2667         case INDEX_op_mulu2_i64:
2668             opc_new = INDEX_op_mul_i64;
2669             opc_new2 = INDEX_op_muluh_i64;
2670             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2671             goto do_mul2;
2672         case INDEX_op_muls2_i64:
2673             opc_new = INDEX_op_mul_i64;
2674             opc_new2 = INDEX_op_mulsh_i64;
2675             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2676             goto do_mul2;
2677         do_mul2:
2678             nb_iargs = 2;
2679             nb_oargs = 2;
2680             if (arg_temp(op->args[1])->state == TS_DEAD) {
2681                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2682                     /* Both parts of the operation are dead.  */
2683                     goto do_remove;
2684                 }
2685                 /* The high part of the operation is dead; generate the low. */
2686                 op->opc = opc = opc_new;
2687                 op->args[1] = op->args[2];
2688                 op->args[2] = op->args[3];
2689             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2690                 /* The low part of the operation is dead; generate the high. */
2691                 op->opc = opc = opc_new2;
2692                 op->args[0] = op->args[1];
2693                 op->args[1] = op->args[2];
2694                 op->args[2] = op->args[3];
2695             } else {
2696                 goto do_not_remove;
2697             }
2698             /* Mark the single-word operation live.  */
2699             nb_oargs = 1;
2700             goto do_not_remove;
2701 
2702         default:
2703             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2704             nb_iargs = def->nb_iargs;
2705             nb_oargs = def->nb_oargs;
2706 
2707             /* Test if the operation can be removed because all
2708                its outputs are dead. We assume that nb_oargs == 0
2709                implies side effects */
2710             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2711                 for (i = 0; i < nb_oargs; i++) {
2712                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2713                         goto do_not_remove;
2714                     }
2715                 }
2716                 goto do_remove;
2717             }
2718             goto do_not_remove;
2719 
2720         do_remove:
2721             tcg_op_remove(s, op);
2722             break;
2723 
2724         do_not_remove:
2725             for (i = 0; i < nb_oargs; i++) {
2726                 ts = arg_temp(op->args[i]);
2727 
2728                 /* Remember the preference of the uses that followed.  */
2729                 op->output_pref[i] = *la_temp_pref(ts);
2730 
2731                 /* Output args are dead.  */
2732                 if (ts->state & TS_DEAD) {
2733                     arg_life |= DEAD_ARG << i;
2734                 }
2735                 if (ts->state & TS_MEM) {
2736                     arg_life |= SYNC_ARG << i;
2737                 }
2738                 ts->state = TS_DEAD;
2739                 la_reset_pref(ts);
2740             }
2741 
2742             /* If end of basic block, update.  */
2743             if (def->flags & TCG_OPF_BB_EXIT) {
2744                 la_func_end(s, nb_globals, nb_temps);
2745             } else if (def->flags & TCG_OPF_BB_END) {
2746                 la_bb_end(s, nb_globals, nb_temps);
2747             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2748                 la_global_sync(s, nb_globals);
2749                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2750                     la_cross_call(s, nb_temps);
2751                 }
2752             }
2753 
2754             /* Record arguments that die in this opcode.  */
2755             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2756                 ts = arg_temp(op->args[i]);
2757                 if (ts->state & TS_DEAD) {
2758                     arg_life |= DEAD_ARG << i;
2759                 }
2760             }
2761 
2762             /* Input arguments are live for preceding opcodes.  */
2763             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2764                 ts = arg_temp(op->args[i]);
2765                 if (ts->state & TS_DEAD) {
2766                     /* For operands that were dead, initially allow
2767                        all regs for the type.  */
2768                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2769                     ts->state &= ~TS_DEAD;
2770                 }
2771             }
2772 
2773             /* Incorporate constraints for this operand.  */
2774             switch (opc) {
2775             case INDEX_op_mov_i32:
2776             case INDEX_op_mov_i64:
2777                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2778                    have proper constraints.  That said, special case
2779                    moves to propagate preferences backward.  */
2780                 if (IS_DEAD_ARG(1)) {
2781                     *la_temp_pref(arg_temp(op->args[0]))
2782                         = *la_temp_pref(arg_temp(op->args[1]));
2783                 }
2784                 break;
2785 
2786             default:
2787                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2788                     const TCGArgConstraint *ct = &def->args_ct[i];
2789                     TCGRegSet set, *pset;
2790 
2791                     ts = arg_temp(op->args[i]);
2792                     pset = la_temp_pref(ts);
2793                     set = *pset;
2794 
2795                     set &= ct->u.regs;
2796                     if (ct->ct & TCG_CT_IALIAS) {
2797                         set &= op->output_pref[ct->alias_index];
2798                     }
2799                     /* If the combination is not possible, restart.  */
2800                     if (set == 0) {
2801                         set = ct->u.regs;
2802                     }
2803                     *pset = set;
2804                 }
2805                 break;
2806             }
2807             break;
2808         }
2809         op->life = arg_life;
2810     }
2811 }
2812 
2813 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2814 static bool liveness_pass_2(TCGContext *s)
2815 {
2816     int nb_globals = s->nb_globals;
2817     int nb_temps, i;
2818     bool changes = false;
2819     TCGOp *op, *op_next;
2820 
2821     /* Create a temporary for each indirect global.  */
2822     for (i = 0; i < nb_globals; ++i) {
2823         TCGTemp *its = &s->temps[i];
2824         if (its->indirect_reg) {
2825             TCGTemp *dts = tcg_temp_alloc(s);
2826             dts->type = its->type;
2827             dts->base_type = its->base_type;
2828             its->state_ptr = dts;
2829         } else {
2830             its->state_ptr = NULL;
2831         }
2832         /* All globals begin dead.  */
2833         its->state = TS_DEAD;
2834     }
2835     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2836         TCGTemp *its = &s->temps[i];
2837         its->state_ptr = NULL;
2838         its->state = TS_DEAD;
2839     }
2840 
2841     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2842         TCGOpcode opc = op->opc;
2843         const TCGOpDef *def = &tcg_op_defs[opc];
2844         TCGLifeData arg_life = op->life;
2845         int nb_iargs, nb_oargs, call_flags;
2846         TCGTemp *arg_ts, *dir_ts;
2847 
2848         if (opc == INDEX_op_call) {
2849             nb_oargs = TCGOP_CALLO(op);
2850             nb_iargs = TCGOP_CALLI(op);
2851             call_flags = op->args[nb_oargs + nb_iargs + 1];
2852         } else {
2853             nb_iargs = def->nb_iargs;
2854             nb_oargs = def->nb_oargs;
2855 
2856             /* Set flags similar to how calls require.  */
2857             if (def->flags & TCG_OPF_BB_END) {
2858                 /* Like writing globals: save_globals */
2859                 call_flags = 0;
2860             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2861                 /* Like reading globals: sync_globals */
2862                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2863             } else {
2864                 /* No effect on globals.  */
2865                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2866                               TCG_CALL_NO_WRITE_GLOBALS);
2867             }
2868         }
2869 
2870         /* Make sure that input arguments are available.  */
2871         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2872             arg_ts = arg_temp(op->args[i]);
2873             if (arg_ts) {
2874                 dir_ts = arg_ts->state_ptr;
2875                 if (dir_ts && arg_ts->state == TS_DEAD) {
2876                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2877                                       ? INDEX_op_ld_i32
2878                                       : INDEX_op_ld_i64);
2879                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2880 
2881                     lop->args[0] = temp_arg(dir_ts);
2882                     lop->args[1] = temp_arg(arg_ts->mem_base);
2883                     lop->args[2] = arg_ts->mem_offset;
2884 
2885                     /* Loaded, but synced with memory.  */
2886                     arg_ts->state = TS_MEM;
2887                 }
2888             }
2889         }
2890 
2891         /* Perform input replacement, and mark inputs that became dead.
2892            No action is required except keeping temp_state up to date
2893            so that we reload when needed.  */
2894         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2895             arg_ts = arg_temp(op->args[i]);
2896             if (arg_ts) {
2897                 dir_ts = arg_ts->state_ptr;
2898                 if (dir_ts) {
2899                     op->args[i] = temp_arg(dir_ts);
2900                     changes = true;
2901                     if (IS_DEAD_ARG(i)) {
2902                         arg_ts->state = TS_DEAD;
2903                     }
2904                 }
2905             }
2906         }
2907 
2908         /* Liveness analysis should ensure that the following are
2909            all correct, for call sites and basic block end points.  */
2910         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2911             /* Nothing to do */
2912         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2913             for (i = 0; i < nb_globals; ++i) {
2914                 /* Liveness should see that globals are synced back,
2915                    that is, either TS_DEAD or TS_MEM.  */
2916                 arg_ts = &s->temps[i];
2917                 tcg_debug_assert(arg_ts->state_ptr == 0
2918                                  || arg_ts->state != 0);
2919             }
2920         } else {
2921             for (i = 0; i < nb_globals; ++i) {
2922                 /* Liveness should see that globals are saved back,
2923                    that is, TS_DEAD, waiting to be reloaded.  */
2924                 arg_ts = &s->temps[i];
2925                 tcg_debug_assert(arg_ts->state_ptr == 0
2926                                  || arg_ts->state == TS_DEAD);
2927             }
2928         }
2929 
2930         /* Outputs become available.  */
2931         for (i = 0; i < nb_oargs; i++) {
2932             arg_ts = arg_temp(op->args[i]);
2933             dir_ts = arg_ts->state_ptr;
2934             if (!dir_ts) {
2935                 continue;
2936             }
2937             op->args[i] = temp_arg(dir_ts);
2938             changes = true;
2939 
2940             /* The output is now live and modified.  */
2941             arg_ts->state = 0;
2942 
2943             /* Sync outputs upon their last write.  */
2944             if (NEED_SYNC_ARG(i)) {
2945                 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2946                                   ? INDEX_op_st_i32
2947                                   : INDEX_op_st_i64);
2948                 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2949 
2950                 sop->args[0] = temp_arg(dir_ts);
2951                 sop->args[1] = temp_arg(arg_ts->mem_base);
2952                 sop->args[2] = arg_ts->mem_offset;
2953 
2954                 arg_ts->state = TS_MEM;
2955             }
2956             /* Drop outputs that are dead.  */
2957             if (IS_DEAD_ARG(i)) {
2958                 arg_ts->state = TS_DEAD;
2959             }
2960         }
2961     }
2962 
2963     return changes;
2964 }
2965 
2966 #ifdef CONFIG_DEBUG_TCG
2967 static void dump_regs(TCGContext *s)
2968 {
2969     TCGTemp *ts;
2970     int i;
2971     char buf[64];
2972 
2973     for(i = 0; i < s->nb_temps; i++) {
2974         ts = &s->temps[i];
2975         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2976         switch(ts->val_type) {
2977         case TEMP_VAL_REG:
2978             printf("%s", tcg_target_reg_names[ts->reg]);
2979             break;
2980         case TEMP_VAL_MEM:
2981             printf("%d(%s)", (int)ts->mem_offset,
2982                    tcg_target_reg_names[ts->mem_base->reg]);
2983             break;
2984         case TEMP_VAL_CONST:
2985             printf("$0x%" TCG_PRIlx, ts->val);
2986             break;
2987         case TEMP_VAL_DEAD:
2988             printf("D");
2989             break;
2990         default:
2991             printf("???");
2992             break;
2993         }
2994         printf("\n");
2995     }
2996 
2997     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2998         if (s->reg_to_temp[i] != NULL) {
2999             printf("%s: %s\n",
3000                    tcg_target_reg_names[i],
3001                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3002         }
3003     }
3004 }
3005 
3006 static void check_regs(TCGContext *s)
3007 {
3008     int reg;
3009     int k;
3010     TCGTemp *ts;
3011     char buf[64];
3012 
3013     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3014         ts = s->reg_to_temp[reg];
3015         if (ts != NULL) {
3016             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3017                 printf("Inconsistency for register %s:\n",
3018                        tcg_target_reg_names[reg]);
3019                 goto fail;
3020             }
3021         }
3022     }
3023     for (k = 0; k < s->nb_temps; k++) {
3024         ts = &s->temps[k];
3025         if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
3026             && s->reg_to_temp[ts->reg] != ts) {
3027             printf("Inconsistency for temp %s:\n",
3028                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3029         fail:
3030             printf("reg state:\n");
3031             dump_regs(s);
3032             tcg_abort();
3033         }
3034     }
3035 }
3036 #endif
3037 
3038 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3039 {
3040 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3041     /* Sparc64 stack is accessed with offset of 2047 */
3042     s->current_frame_offset = (s->current_frame_offset +
3043                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3044         ~(sizeof(tcg_target_long) - 1);
3045 #endif
3046     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3047         s->frame_end) {
3048         tcg_abort();
3049     }
3050     ts->mem_offset = s->current_frame_offset;
3051     ts->mem_base = s->frame_temp;
3052     ts->mem_allocated = 1;
3053     s->current_frame_offset += sizeof(tcg_target_long);
3054 }
3055 
3056 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3057 
3058 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3059    mark it free; otherwise mark it dead.  */
3060 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3061 {
3062     if (ts->fixed_reg) {
3063         return;
3064     }
3065     if (ts->val_type == TEMP_VAL_REG) {
3066         s->reg_to_temp[ts->reg] = NULL;
3067     }
3068     ts->val_type = (free_or_dead < 0
3069                     || ts->temp_local
3070                     || ts->temp_global
3071                     ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3072 }
3073 
3074 /* Mark a temporary as dead.  */
3075 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3076 {
3077     temp_free_or_dead(s, ts, 1);
3078 }
3079 
3080 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3081    registers needs to be allocated to store a constant.  If 'free_or_dead'
3082    is non-zero, subsequently release the temporary; if it is positive, the
3083    temp is dead; if it is negative, the temp is free.  */
3084 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3085                       TCGRegSet preferred_regs, int free_or_dead)
3086 {
3087     if (ts->fixed_reg) {
3088         return;
3089     }
3090     if (!ts->mem_coherent) {
3091         if (!ts->mem_allocated) {
3092             temp_allocate_frame(s, ts);
3093         }
3094         switch (ts->val_type) {
3095         case TEMP_VAL_CONST:
3096             /* If we're going to free the temp immediately, then we won't
3097                require it later in a register, so attempt to store the
3098                constant to memory directly.  */
3099             if (free_or_dead
3100                 && tcg_out_sti(s, ts->type, ts->val,
3101                                ts->mem_base->reg, ts->mem_offset)) {
3102                 break;
3103             }
3104             temp_load(s, ts, tcg_target_available_regs[ts->type],
3105                       allocated_regs, preferred_regs);
3106             /* fallthrough */
3107 
3108         case TEMP_VAL_REG:
3109             tcg_out_st(s, ts->type, ts->reg,
3110                        ts->mem_base->reg, ts->mem_offset);
3111             break;
3112 
3113         case TEMP_VAL_MEM:
3114             break;
3115 
3116         case TEMP_VAL_DEAD:
3117         default:
3118             tcg_abort();
3119         }
3120         ts->mem_coherent = 1;
3121     }
3122     if (free_or_dead) {
3123         temp_free_or_dead(s, ts, free_or_dead);
3124     }
3125 }
3126 
3127 /* free register 'reg' by spilling the corresponding temporary if necessary */
3128 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3129 {
3130     TCGTemp *ts = s->reg_to_temp[reg];
3131     if (ts != NULL) {
3132         temp_sync(s, ts, allocated_regs, 0, -1);
3133     }
3134 }
3135 
3136 /**
3137  * tcg_reg_alloc:
3138  * @required_regs: Set of registers in which we must allocate.
3139  * @allocated_regs: Set of registers which must be avoided.
3140  * @preferred_regs: Set of registers we should prefer.
3141  * @rev: True if we search the registers in "indirect" order.
3142  *
3143  * The allocated register must be in @required_regs & ~@allocated_regs,
3144  * but if we can put it in @preferred_regs we may save a move later.
3145  */
3146 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3147                             TCGRegSet allocated_regs,
3148                             TCGRegSet preferred_regs, bool rev)
3149 {
3150     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3151     TCGRegSet reg_ct[2];
3152     const int *order;
3153 
3154     reg_ct[1] = required_regs & ~allocated_regs;
3155     tcg_debug_assert(reg_ct[1] != 0);
3156     reg_ct[0] = reg_ct[1] & preferred_regs;
3157 
3158     /* Skip the preferred_regs option if it cannot be satisfied,
3159        or if the preference made no difference.  */
3160     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3161 
3162     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3163 
3164     /* Try free registers, preferences first.  */
3165     for (j = f; j < 2; j++) {
3166         TCGRegSet set = reg_ct[j];
3167 
3168         if (tcg_regset_single(set)) {
3169             /* One register in the set.  */
3170             TCGReg reg = tcg_regset_first(set);
3171             if (s->reg_to_temp[reg] == NULL) {
3172                 return reg;
3173             }
3174         } else {
3175             for (i = 0; i < n; i++) {
3176                 TCGReg reg = order[i];
3177                 if (s->reg_to_temp[reg] == NULL &&
3178                     tcg_regset_test_reg(set, reg)) {
3179                     return reg;
3180                 }
3181             }
3182         }
3183     }
3184 
3185     /* We must spill something.  */
3186     for (j = f; j < 2; j++) {
3187         TCGRegSet set = reg_ct[j];
3188 
3189         if (tcg_regset_single(set)) {
3190             /* One register in the set.  */
3191             TCGReg reg = tcg_regset_first(set);
3192             tcg_reg_free(s, reg, allocated_regs);
3193             return reg;
3194         } else {
3195             for (i = 0; i < n; i++) {
3196                 TCGReg reg = order[i];
3197                 if (tcg_regset_test_reg(set, reg)) {
3198                     tcg_reg_free(s, reg, allocated_regs);
3199                     return reg;
3200                 }
3201             }
3202         }
3203     }
3204 
3205     tcg_abort();
3206 }
3207 
3208 /* Make sure the temporary is in a register.  If needed, allocate the register
3209    from DESIRED while avoiding ALLOCATED.  */
3210 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3211                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3212 {
3213     TCGReg reg;
3214 
3215     switch (ts->val_type) {
3216     case TEMP_VAL_REG:
3217         return;
3218     case TEMP_VAL_CONST:
3219         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3220                             preferred_regs, ts->indirect_base);
3221         tcg_out_movi(s, ts->type, reg, ts->val);
3222         ts->mem_coherent = 0;
3223         break;
3224     case TEMP_VAL_MEM:
3225         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3226                             preferred_regs, ts->indirect_base);
3227         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3228         ts->mem_coherent = 1;
3229         break;
3230     case TEMP_VAL_DEAD:
3231     default:
3232         tcg_abort();
3233     }
3234     ts->reg = reg;
3235     ts->val_type = TEMP_VAL_REG;
3236     s->reg_to_temp[reg] = ts;
3237 }
3238 
3239 /* Save a temporary to memory. 'allocated_regs' is used in case a
3240    temporary registers needs to be allocated to store a constant.  */
3241 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3242 {
3243     /* The liveness analysis already ensures that globals are back
3244        in memory. Keep an tcg_debug_assert for safety. */
3245     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3246 }
3247 
3248 /* save globals to their canonical location and assume they can be
3249    modified be the following code. 'allocated_regs' is used in case a
3250    temporary registers needs to be allocated to store a constant. */
3251 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3252 {
3253     int i, n;
3254 
3255     for (i = 0, n = s->nb_globals; i < n; i++) {
3256         temp_save(s, &s->temps[i], allocated_regs);
3257     }
3258 }
3259 
3260 /* sync globals to their canonical location and assume they can be
3261    read by the following code. 'allocated_regs' is used in case a
3262    temporary registers needs to be allocated to store a constant. */
3263 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3264 {
3265     int i, n;
3266 
3267     for (i = 0, n = s->nb_globals; i < n; i++) {
3268         TCGTemp *ts = &s->temps[i];
3269         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3270                          || ts->fixed_reg
3271                          || ts->mem_coherent);
3272     }
3273 }
3274 
3275 /* at the end of a basic block, we assume all temporaries are dead and
3276    all globals are stored at their canonical location. */
3277 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3278 {
3279     int i;
3280 
3281     for (i = s->nb_globals; i < s->nb_temps; i++) {
3282         TCGTemp *ts = &s->temps[i];
3283         if (ts->temp_local) {
3284             temp_save(s, ts, allocated_regs);
3285         } else {
3286             /* The liveness analysis already ensures that temps are dead.
3287                Keep an tcg_debug_assert for safety. */
3288             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3289         }
3290     }
3291 
3292     save_globals(s, allocated_regs);
3293 }
3294 
3295 /*
3296  * Specialized code generation for INDEX_op_movi_*.
3297  */
3298 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3299                                   tcg_target_ulong val, TCGLifeData arg_life,
3300                                   TCGRegSet preferred_regs)
3301 {
3302     /* ENV should not be modified.  */
3303     tcg_debug_assert(!ots->fixed_reg);
3304 
3305     /* The movi is not explicitly generated here.  */
3306     if (ots->val_type == TEMP_VAL_REG) {
3307         s->reg_to_temp[ots->reg] = NULL;
3308     }
3309     ots->val_type = TEMP_VAL_CONST;
3310     ots->val = val;
3311     ots->mem_coherent = 0;
3312     if (NEED_SYNC_ARG(0)) {
3313         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3314     } else if (IS_DEAD_ARG(0)) {
3315         temp_dead(s, ots);
3316     }
3317 }
3318 
3319 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3320 {
3321     TCGTemp *ots = arg_temp(op->args[0]);
3322     tcg_target_ulong val = op->args[1];
3323 
3324     tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3325 }
3326 
3327 /*
3328  * Specialized code generation for INDEX_op_mov_*.
3329  */
3330 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3331 {
3332     const TCGLifeData arg_life = op->life;
3333     TCGRegSet allocated_regs, preferred_regs;
3334     TCGTemp *ts, *ots;
3335     TCGType otype, itype;
3336 
3337     allocated_regs = s->reserved_regs;
3338     preferred_regs = op->output_pref[0];
3339     ots = arg_temp(op->args[0]);
3340     ts = arg_temp(op->args[1]);
3341 
3342     /* ENV should not be modified.  */
3343     tcg_debug_assert(!ots->fixed_reg);
3344 
3345     /* Note that otype != itype for no-op truncation.  */
3346     otype = ots->type;
3347     itype = ts->type;
3348 
3349     if (ts->val_type == TEMP_VAL_CONST) {
3350         /* propagate constant or generate sti */
3351         tcg_target_ulong val = ts->val;
3352         if (IS_DEAD_ARG(1)) {
3353             temp_dead(s, ts);
3354         }
3355         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3356         return;
3357     }
3358 
3359     /* If the source value is in memory we're going to be forced
3360        to have it in a register in order to perform the copy.  Copy
3361        the SOURCE value into its own register first, that way we
3362        don't have to reload SOURCE the next time it is used. */
3363     if (ts->val_type == TEMP_VAL_MEM) {
3364         temp_load(s, ts, tcg_target_available_regs[itype],
3365                   allocated_regs, preferred_regs);
3366     }
3367 
3368     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3369     if (IS_DEAD_ARG(0)) {
3370         /* mov to a non-saved dead register makes no sense (even with
3371            liveness analysis disabled). */
3372         tcg_debug_assert(NEED_SYNC_ARG(0));
3373         if (!ots->mem_allocated) {
3374             temp_allocate_frame(s, ots);
3375         }
3376         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3377         if (IS_DEAD_ARG(1)) {
3378             temp_dead(s, ts);
3379         }
3380         temp_dead(s, ots);
3381     } else {
3382         if (IS_DEAD_ARG(1) && !ts->fixed_reg) {
3383             /* the mov can be suppressed */
3384             if (ots->val_type == TEMP_VAL_REG) {
3385                 s->reg_to_temp[ots->reg] = NULL;
3386             }
3387             ots->reg = ts->reg;
3388             temp_dead(s, ts);
3389         } else {
3390             if (ots->val_type != TEMP_VAL_REG) {
3391                 /* When allocating a new register, make sure to not spill the
3392                    input one. */
3393                 tcg_regset_set_reg(allocated_regs, ts->reg);
3394                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3395                                          allocated_regs, preferred_regs,
3396                                          ots->indirect_base);
3397             }
3398             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3399                 /*
3400                  * Cross register class move not supported.
3401                  * Store the source register into the destination slot
3402                  * and leave the destination temp as TEMP_VAL_MEM.
3403                  */
3404                 assert(!ots->fixed_reg);
3405                 if (!ts->mem_allocated) {
3406                     temp_allocate_frame(s, ots);
3407                 }
3408                 tcg_out_st(s, ts->type, ts->reg,
3409                            ots->mem_base->reg, ots->mem_offset);
3410                 ots->mem_coherent = 1;
3411                 temp_free_or_dead(s, ots, -1);
3412                 return;
3413             }
3414         }
3415         ots->val_type = TEMP_VAL_REG;
3416         ots->mem_coherent = 0;
3417         s->reg_to_temp[ots->reg] = ots;
3418         if (NEED_SYNC_ARG(0)) {
3419             temp_sync(s, ots, allocated_regs, 0, 0);
3420         }
3421     }
3422 }
3423 
3424 /*
3425  * Specialized code generation for INDEX_op_dup_vec.
3426  */
3427 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3428 {
3429     const TCGLifeData arg_life = op->life;
3430     TCGRegSet dup_out_regs, dup_in_regs;
3431     TCGTemp *its, *ots;
3432     TCGType itype, vtype;
3433     intptr_t endian_fixup;
3434     unsigned vece;
3435     bool ok;
3436 
3437     ots = arg_temp(op->args[0]);
3438     its = arg_temp(op->args[1]);
3439 
3440     /* ENV should not be modified.  */
3441     tcg_debug_assert(!ots->fixed_reg);
3442 
3443     itype = its->type;
3444     vece = TCGOP_VECE(op);
3445     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3446 
3447     if (its->val_type == TEMP_VAL_CONST) {
3448         /* Propagate constant via movi -> dupi.  */
3449         tcg_target_ulong val = its->val;
3450         if (IS_DEAD_ARG(1)) {
3451             temp_dead(s, its);
3452         }
3453         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3454         return;
3455     }
3456 
3457     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs;
3458     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs;
3459 
3460     /* Allocate the output register now.  */
3461     if (ots->val_type != TEMP_VAL_REG) {
3462         TCGRegSet allocated_regs = s->reserved_regs;
3463 
3464         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3465             /* Make sure to not spill the input register. */
3466             tcg_regset_set_reg(allocated_regs, its->reg);
3467         }
3468         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3469                                  op->output_pref[0], ots->indirect_base);
3470         ots->val_type = TEMP_VAL_REG;
3471         ots->mem_coherent = 0;
3472         s->reg_to_temp[ots->reg] = ots;
3473     }
3474 
3475     switch (its->val_type) {
3476     case TEMP_VAL_REG:
3477         /*
3478          * The dup constriaints must be broad, covering all possible VECE.
3479          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3480          * to fail, indicating that extra moves are required for that case.
3481          */
3482         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3483             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3484                 goto done;
3485             }
3486             /* Try again from memory or a vector input register.  */
3487         }
3488         if (!its->mem_coherent) {
3489             /*
3490              * The input register is not synced, and so an extra store
3491              * would be required to use memory.  Attempt an integer-vector
3492              * register move first.  We do not have a TCGRegSet for this.
3493              */
3494             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3495                 break;
3496             }
3497             /* Sync the temp back to its slot and load from there.  */
3498             temp_sync(s, its, s->reserved_regs, 0, 0);
3499         }
3500         /* fall through */
3501 
3502     case TEMP_VAL_MEM:
3503 #ifdef HOST_WORDS_BIGENDIAN
3504         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3505         endian_fixup -= 1 << vece;
3506 #else
3507         endian_fixup = 0;
3508 #endif
3509         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3510                              its->mem_offset + endian_fixup)) {
3511             goto done;
3512         }
3513         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3514         break;
3515 
3516     default:
3517         g_assert_not_reached();
3518     }
3519 
3520     /* We now have a vector input register, so dup must succeed. */
3521     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3522     tcg_debug_assert(ok);
3523 
3524  done:
3525     if (IS_DEAD_ARG(1)) {
3526         temp_dead(s, its);
3527     }
3528     if (NEED_SYNC_ARG(0)) {
3529         temp_sync(s, ots, s->reserved_regs, 0, 0);
3530     }
3531     if (IS_DEAD_ARG(0)) {
3532         temp_dead(s, ots);
3533     }
3534 }
3535 
3536 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3537 {
3538     const TCGLifeData arg_life = op->life;
3539     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3540     TCGRegSet i_allocated_regs;
3541     TCGRegSet o_allocated_regs;
3542     int i, k, nb_iargs, nb_oargs;
3543     TCGReg reg;
3544     TCGArg arg;
3545     const TCGArgConstraint *arg_ct;
3546     TCGTemp *ts;
3547     TCGArg new_args[TCG_MAX_OP_ARGS];
3548     int const_args[TCG_MAX_OP_ARGS];
3549 
3550     nb_oargs = def->nb_oargs;
3551     nb_iargs = def->nb_iargs;
3552 
3553     /* copy constants */
3554     memcpy(new_args + nb_oargs + nb_iargs,
3555            op->args + nb_oargs + nb_iargs,
3556            sizeof(TCGArg) * def->nb_cargs);
3557 
3558     i_allocated_regs = s->reserved_regs;
3559     o_allocated_regs = s->reserved_regs;
3560 
3561     /* satisfy input constraints */
3562     for (k = 0; k < nb_iargs; k++) {
3563         TCGRegSet i_preferred_regs, o_preferred_regs;
3564 
3565         i = def->sorted_args[nb_oargs + k];
3566         arg = op->args[i];
3567         arg_ct = &def->args_ct[i];
3568         ts = arg_temp(arg);
3569 
3570         if (ts->val_type == TEMP_VAL_CONST
3571             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3572             /* constant is OK for instruction */
3573             const_args[i] = 1;
3574             new_args[i] = ts->val;
3575             continue;
3576         }
3577 
3578         i_preferred_regs = o_preferred_regs = 0;
3579         if (arg_ct->ct & TCG_CT_IALIAS) {
3580             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3581             if (ts->fixed_reg) {
3582                 /* if fixed register, we must allocate a new register
3583                    if the alias is not the same register */
3584                 if (arg != op->args[arg_ct->alias_index]) {
3585                     goto allocate_in_reg;
3586                 }
3587             } else {
3588                 /* if the input is aliased to an output and if it is
3589                    not dead after the instruction, we must allocate
3590                    a new register and move it */
3591                 if (!IS_DEAD_ARG(i)) {
3592                     goto allocate_in_reg;
3593                 }
3594 
3595                 /* check if the current register has already been allocated
3596                    for another input aliased to an output */
3597                 if (ts->val_type == TEMP_VAL_REG) {
3598                     int k2, i2;
3599                     reg = ts->reg;
3600                     for (k2 = 0 ; k2 < k ; k2++) {
3601                         i2 = def->sorted_args[nb_oargs + k2];
3602                         if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
3603                             reg == new_args[i2]) {
3604                             goto allocate_in_reg;
3605                         }
3606                     }
3607                 }
3608                 i_preferred_regs = o_preferred_regs;
3609             }
3610         }
3611 
3612         temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
3613         reg = ts->reg;
3614 
3615         if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3616             /* nothing to do : the constraint is satisfied */
3617         } else {
3618         allocate_in_reg:
3619             /* allocate a new register matching the constraint
3620                and move the temporary register into it */
3621             temp_load(s, ts, tcg_target_available_regs[ts->type],
3622                       i_allocated_regs, 0);
3623             reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
3624                                 o_preferred_regs, ts->indirect_base);
3625             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3626                 /*
3627                  * Cross register class move not supported.  Sync the
3628                  * temp back to its slot and load from there.
3629                  */
3630                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3631                 tcg_out_ld(s, ts->type, reg,
3632                            ts->mem_base->reg, ts->mem_offset);
3633             }
3634         }
3635         new_args[i] = reg;
3636         const_args[i] = 0;
3637         tcg_regset_set_reg(i_allocated_regs, reg);
3638     }
3639 
3640     /* mark dead temporaries and free the associated registers */
3641     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3642         if (IS_DEAD_ARG(i)) {
3643             temp_dead(s, arg_temp(op->args[i]));
3644         }
3645     }
3646 
3647     if (def->flags & TCG_OPF_BB_END) {
3648         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3649     } else {
3650         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3651             /* XXX: permit generic clobber register list ? */
3652             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3653                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3654                     tcg_reg_free(s, i, i_allocated_regs);
3655                 }
3656             }
3657         }
3658         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3659             /* sync globals if the op has side effects and might trigger
3660                an exception. */
3661             sync_globals(s, i_allocated_regs);
3662         }
3663 
3664         /* satisfy the output constraints */
3665         for(k = 0; k < nb_oargs; k++) {
3666             i = def->sorted_args[k];
3667             arg = op->args[i];
3668             arg_ct = &def->args_ct[i];
3669             ts = arg_temp(arg);
3670 
3671             /* ENV should not be modified.  */
3672             tcg_debug_assert(!ts->fixed_reg);
3673 
3674             if ((arg_ct->ct & TCG_CT_ALIAS)
3675                 && !const_args[arg_ct->alias_index]) {
3676                 reg = new_args[arg_ct->alias_index];
3677             } else if (arg_ct->ct & TCG_CT_NEWREG) {
3678                 reg = tcg_reg_alloc(s, arg_ct->u.regs,
3679                                     i_allocated_regs | o_allocated_regs,
3680                                     op->output_pref[k], ts->indirect_base);
3681             } else {
3682                 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
3683                                     op->output_pref[k], ts->indirect_base);
3684             }
3685             tcg_regset_set_reg(o_allocated_regs, reg);
3686             if (ts->val_type == TEMP_VAL_REG) {
3687                 s->reg_to_temp[ts->reg] = NULL;
3688             }
3689             ts->val_type = TEMP_VAL_REG;
3690             ts->reg = reg;
3691             /*
3692              * Temp value is modified, so the value kept in memory is
3693              * potentially not the same.
3694              */
3695             ts->mem_coherent = 0;
3696             s->reg_to_temp[reg] = ts;
3697             new_args[i] = reg;
3698         }
3699     }
3700 
3701     /* emit instruction */
3702     if (def->flags & TCG_OPF_VECTOR) {
3703         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3704                        new_args, const_args);
3705     } else {
3706         tcg_out_op(s, op->opc, new_args, const_args);
3707     }
3708 
3709     /* move the outputs in the correct register if needed */
3710     for(i = 0; i < nb_oargs; i++) {
3711         ts = arg_temp(op->args[i]);
3712 
3713         /* ENV should not be modified.  */
3714         tcg_debug_assert(!ts->fixed_reg);
3715 
3716         if (NEED_SYNC_ARG(i)) {
3717             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3718         } else if (IS_DEAD_ARG(i)) {
3719             temp_dead(s, ts);
3720         }
3721     }
3722 }
3723 
3724 #ifdef TCG_TARGET_STACK_GROWSUP
3725 #define STACK_DIR(x) (-(x))
3726 #else
3727 #define STACK_DIR(x) (x)
3728 #endif
3729 
3730 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3731 {
3732     const int nb_oargs = TCGOP_CALLO(op);
3733     const int nb_iargs = TCGOP_CALLI(op);
3734     const TCGLifeData arg_life = op->life;
3735     int flags, nb_regs, i;
3736     TCGReg reg;
3737     TCGArg arg;
3738     TCGTemp *ts;
3739     intptr_t stack_offset;
3740     size_t call_stack_size;
3741     tcg_insn_unit *func_addr;
3742     int allocate_args;
3743     TCGRegSet allocated_regs;
3744 
3745     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3746     flags = op->args[nb_oargs + nb_iargs + 1];
3747 
3748     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3749     if (nb_regs > nb_iargs) {
3750         nb_regs = nb_iargs;
3751     }
3752 
3753     /* assign stack slots first */
3754     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3755     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3756         ~(TCG_TARGET_STACK_ALIGN - 1);
3757     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3758     if (allocate_args) {
3759         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3760            preallocate call stack */
3761         tcg_abort();
3762     }
3763 
3764     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3765     for (i = nb_regs; i < nb_iargs; i++) {
3766         arg = op->args[nb_oargs + i];
3767 #ifdef TCG_TARGET_STACK_GROWSUP
3768         stack_offset -= sizeof(tcg_target_long);
3769 #endif
3770         if (arg != TCG_CALL_DUMMY_ARG) {
3771             ts = arg_temp(arg);
3772             temp_load(s, ts, tcg_target_available_regs[ts->type],
3773                       s->reserved_regs, 0);
3774             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3775         }
3776 #ifndef TCG_TARGET_STACK_GROWSUP
3777         stack_offset += sizeof(tcg_target_long);
3778 #endif
3779     }
3780 
3781     /* assign input registers */
3782     allocated_regs = s->reserved_regs;
3783     for (i = 0; i < nb_regs; i++) {
3784         arg = op->args[nb_oargs + i];
3785         if (arg != TCG_CALL_DUMMY_ARG) {
3786             ts = arg_temp(arg);
3787             reg = tcg_target_call_iarg_regs[i];
3788 
3789             if (ts->val_type == TEMP_VAL_REG) {
3790                 if (ts->reg != reg) {
3791                     tcg_reg_free(s, reg, allocated_regs);
3792                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3793                         /*
3794                          * Cross register class move not supported.  Sync the
3795                          * temp back to its slot and load from there.
3796                          */
3797                         temp_sync(s, ts, allocated_regs, 0, 0);
3798                         tcg_out_ld(s, ts->type, reg,
3799                                    ts->mem_base->reg, ts->mem_offset);
3800                     }
3801                 }
3802             } else {
3803                 TCGRegSet arg_set = 0;
3804 
3805                 tcg_reg_free(s, reg, allocated_regs);
3806                 tcg_regset_set_reg(arg_set, reg);
3807                 temp_load(s, ts, arg_set, allocated_regs, 0);
3808             }
3809 
3810             tcg_regset_set_reg(allocated_regs, reg);
3811         }
3812     }
3813 
3814     /* mark dead temporaries and free the associated registers */
3815     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3816         if (IS_DEAD_ARG(i)) {
3817             temp_dead(s, arg_temp(op->args[i]));
3818         }
3819     }
3820 
3821     /* clobber call registers */
3822     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3823         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3824             tcg_reg_free(s, i, allocated_regs);
3825         }
3826     }
3827 
3828     /* Save globals if they might be written by the helper, sync them if
3829        they might be read. */
3830     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3831         /* Nothing to do */
3832     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3833         sync_globals(s, allocated_regs);
3834     } else {
3835         save_globals(s, allocated_regs);
3836     }
3837 
3838     tcg_out_call(s, func_addr);
3839 
3840     /* assign output registers and emit moves if needed */
3841     for(i = 0; i < nb_oargs; i++) {
3842         arg = op->args[i];
3843         ts = arg_temp(arg);
3844 
3845         /* ENV should not be modified.  */
3846         tcg_debug_assert(!ts->fixed_reg);
3847 
3848         reg = tcg_target_call_oarg_regs[i];
3849         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3850         if (ts->val_type == TEMP_VAL_REG) {
3851             s->reg_to_temp[ts->reg] = NULL;
3852         }
3853         ts->val_type = TEMP_VAL_REG;
3854         ts->reg = reg;
3855         ts->mem_coherent = 0;
3856         s->reg_to_temp[reg] = ts;
3857         if (NEED_SYNC_ARG(i)) {
3858             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
3859         } else if (IS_DEAD_ARG(i)) {
3860             temp_dead(s, ts);
3861         }
3862     }
3863 }
3864 
3865 #ifdef CONFIG_PROFILER
3866 
3867 /* avoid copy/paste errors */
3868 #define PROF_ADD(to, from, field)                       \
3869     do {                                                \
3870         (to)->field += atomic_read(&((from)->field));   \
3871     } while (0)
3872 
3873 #define PROF_MAX(to, from, field)                                       \
3874     do {                                                                \
3875         typeof((from)->field) val__ = atomic_read(&((from)->field));    \
3876         if (val__ > (to)->field) {                                      \
3877             (to)->field = val__;                                        \
3878         }                                                               \
3879     } while (0)
3880 
3881 /* Pass in a zero'ed @prof */
3882 static inline
3883 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3884 {
3885     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3886     unsigned int i;
3887 
3888     for (i = 0; i < n_ctxs; i++) {
3889         TCGContext *s = atomic_read(&tcg_ctxs[i]);
3890         const TCGProfile *orig = &s->prof;
3891 
3892         if (counters) {
3893             PROF_ADD(prof, orig, cpu_exec_time);
3894             PROF_ADD(prof, orig, tb_count1);
3895             PROF_ADD(prof, orig, tb_count);
3896             PROF_ADD(prof, orig, op_count);
3897             PROF_MAX(prof, orig, op_count_max);
3898             PROF_ADD(prof, orig, temp_count);
3899             PROF_MAX(prof, orig, temp_count_max);
3900             PROF_ADD(prof, orig, del_op_count);
3901             PROF_ADD(prof, orig, code_in_len);
3902             PROF_ADD(prof, orig, code_out_len);
3903             PROF_ADD(prof, orig, search_out_len);
3904             PROF_ADD(prof, orig, interm_time);
3905             PROF_ADD(prof, orig, code_time);
3906             PROF_ADD(prof, orig, la_time);
3907             PROF_ADD(prof, orig, opt_time);
3908             PROF_ADD(prof, orig, restore_count);
3909             PROF_ADD(prof, orig, restore_time);
3910         }
3911         if (table) {
3912             int i;
3913 
3914             for (i = 0; i < NB_OPS; i++) {
3915                 PROF_ADD(prof, orig, table_op_count[i]);
3916             }
3917         }
3918     }
3919 }
3920 
3921 #undef PROF_ADD
3922 #undef PROF_MAX
3923 
3924 static void tcg_profile_snapshot_counters(TCGProfile *prof)
3925 {
3926     tcg_profile_snapshot(prof, true, false);
3927 }
3928 
3929 static void tcg_profile_snapshot_table(TCGProfile *prof)
3930 {
3931     tcg_profile_snapshot(prof, false, true);
3932 }
3933 
3934 void tcg_dump_op_count(void)
3935 {
3936     TCGProfile prof = {};
3937     int i;
3938 
3939     tcg_profile_snapshot_table(&prof);
3940     for (i = 0; i < NB_OPS; i++) {
3941         qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
3942                     prof.table_op_count[i]);
3943     }
3944 }
3945 
3946 int64_t tcg_cpu_exec_time(void)
3947 {
3948     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3949     unsigned int i;
3950     int64_t ret = 0;
3951 
3952     for (i = 0; i < n_ctxs; i++) {
3953         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
3954         const TCGProfile *prof = &s->prof;
3955 
3956         ret += atomic_read(&prof->cpu_exec_time);
3957     }
3958     return ret;
3959 }
3960 #else
3961 void tcg_dump_op_count(void)
3962 {
3963     qemu_printf("[TCG profiler not compiled]\n");
3964 }
3965 
3966 int64_t tcg_cpu_exec_time(void)
3967 {
3968     error_report("%s: TCG profiler not compiled", __func__);
3969     exit(EXIT_FAILURE);
3970 }
3971 #endif
3972 
3973 
3974 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
3975 {
3976 #ifdef CONFIG_PROFILER
3977     TCGProfile *prof = &s->prof;
3978 #endif
3979     int i, num_insns;
3980     TCGOp *op;
3981 
3982 #ifdef CONFIG_PROFILER
3983     {
3984         int n = 0;
3985 
3986         QTAILQ_FOREACH(op, &s->ops, link) {
3987             n++;
3988         }
3989         atomic_set(&prof->op_count, prof->op_count + n);
3990         if (n > prof->op_count_max) {
3991             atomic_set(&prof->op_count_max, n);
3992         }
3993 
3994         n = s->nb_temps;
3995         atomic_set(&prof->temp_count, prof->temp_count + n);
3996         if (n > prof->temp_count_max) {
3997             atomic_set(&prof->temp_count_max, n);
3998         }
3999     }
4000 #endif
4001 
4002 #ifdef DEBUG_DISAS
4003     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4004                  && qemu_log_in_addr_range(tb->pc))) {
4005         qemu_log_lock();
4006         qemu_log("OP:\n");
4007         tcg_dump_ops(s, false);
4008         qemu_log("\n");
4009         qemu_log_unlock();
4010     }
4011 #endif
4012 
4013 #ifdef CONFIG_DEBUG_TCG
4014     /* Ensure all labels referenced have been emitted.  */
4015     {
4016         TCGLabel *l;
4017         bool error = false;
4018 
4019         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4020             if (unlikely(!l->present) && l->refs) {
4021                 qemu_log_mask(CPU_LOG_TB_OP,
4022                               "$L%d referenced but not present.\n", l->id);
4023                 error = true;
4024             }
4025         }
4026         assert(!error);
4027     }
4028 #endif
4029 
4030 #ifdef CONFIG_PROFILER
4031     atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4032 #endif
4033 
4034 #ifdef USE_TCG_OPTIMIZATIONS
4035     tcg_optimize(s);
4036 #endif
4037 
4038 #ifdef CONFIG_PROFILER
4039     atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4040     atomic_set(&prof->la_time, prof->la_time - profile_getclock());
4041 #endif
4042 
4043     reachable_code_pass(s);
4044     liveness_pass_1(s);
4045 
4046     if (s->nb_indirects > 0) {
4047 #ifdef DEBUG_DISAS
4048         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4049                      && qemu_log_in_addr_range(tb->pc))) {
4050             qemu_log_lock();
4051             qemu_log("OP before indirect lowering:\n");
4052             tcg_dump_ops(s, false);
4053             qemu_log("\n");
4054             qemu_log_unlock();
4055         }
4056 #endif
4057         /* Replace indirect temps with direct temps.  */
4058         if (liveness_pass_2(s)) {
4059             /* If changes were made, re-run liveness.  */
4060             liveness_pass_1(s);
4061         }
4062     }
4063 
4064 #ifdef CONFIG_PROFILER
4065     atomic_set(&prof->la_time, prof->la_time + profile_getclock());
4066 #endif
4067 
4068 #ifdef DEBUG_DISAS
4069     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4070                  && qemu_log_in_addr_range(tb->pc))) {
4071         qemu_log_lock();
4072         qemu_log("OP after optimization and liveness analysis:\n");
4073         tcg_dump_ops(s, true);
4074         qemu_log("\n");
4075         qemu_log_unlock();
4076     }
4077 #endif
4078 
4079     tcg_reg_alloc_start(s);
4080 
4081     s->code_buf = tb->tc.ptr;
4082     s->code_ptr = tb->tc.ptr;
4083 
4084 #ifdef TCG_TARGET_NEED_LDST_LABELS
4085     QSIMPLEQ_INIT(&s->ldst_labels);
4086 #endif
4087 #ifdef TCG_TARGET_NEED_POOL_LABELS
4088     s->pool_labels = NULL;
4089 #endif
4090 
4091     num_insns = -1;
4092     QTAILQ_FOREACH(op, &s->ops, link) {
4093         TCGOpcode opc = op->opc;
4094 
4095 #ifdef CONFIG_PROFILER
4096         atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4097 #endif
4098 
4099         switch (opc) {
4100         case INDEX_op_mov_i32:
4101         case INDEX_op_mov_i64:
4102         case INDEX_op_mov_vec:
4103             tcg_reg_alloc_mov(s, op);
4104             break;
4105         case INDEX_op_movi_i32:
4106         case INDEX_op_movi_i64:
4107         case INDEX_op_dupi_vec:
4108             tcg_reg_alloc_movi(s, op);
4109             break;
4110         case INDEX_op_dup_vec:
4111             tcg_reg_alloc_dup(s, op);
4112             break;
4113         case INDEX_op_insn_start:
4114             if (num_insns >= 0) {
4115                 size_t off = tcg_current_code_size(s);
4116                 s->gen_insn_end_off[num_insns] = off;
4117                 /* Assert that we do not overflow our stored offset.  */
4118                 assert(s->gen_insn_end_off[num_insns] == off);
4119             }
4120             num_insns++;
4121             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4122                 target_ulong a;
4123 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4124                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4125 #else
4126                 a = op->args[i];
4127 #endif
4128                 s->gen_insn_data[num_insns][i] = a;
4129             }
4130             break;
4131         case INDEX_op_discard:
4132             temp_dead(s, arg_temp(op->args[0]));
4133             break;
4134         case INDEX_op_set_label:
4135             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4136             tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
4137             break;
4138         case INDEX_op_call:
4139             tcg_reg_alloc_call(s, op);
4140             break;
4141         default:
4142             /* Sanity check that we've not introduced any unhandled opcodes. */
4143             tcg_debug_assert(tcg_op_supported(opc));
4144             /* Note: in order to speed up the code, it would be much
4145                faster to have specialized register allocator functions for
4146                some common argument patterns */
4147             tcg_reg_alloc_op(s, op);
4148             break;
4149         }
4150 #ifdef CONFIG_DEBUG_TCG
4151         check_regs(s);
4152 #endif
4153         /* Test for (pending) buffer overflow.  The assumption is that any
4154            one operation beginning below the high water mark cannot overrun
4155            the buffer completely.  Thus we can test for overflow after
4156            generating code without having to check during generation.  */
4157         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4158             return -1;
4159         }
4160         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4161         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4162             return -2;
4163         }
4164     }
4165     tcg_debug_assert(num_insns >= 0);
4166     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4167 
4168     /* Generate TB finalization at the end of block */
4169 #ifdef TCG_TARGET_NEED_LDST_LABELS
4170     i = tcg_out_ldst_finalize(s);
4171     if (i < 0) {
4172         return i;
4173     }
4174 #endif
4175 #ifdef TCG_TARGET_NEED_POOL_LABELS
4176     i = tcg_out_pool_finalize(s);
4177     if (i < 0) {
4178         return i;
4179     }
4180 #endif
4181     if (!tcg_resolve_relocs(s)) {
4182         return -2;
4183     }
4184 
4185     /* flush instruction cache */
4186     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
4187 
4188     return tcg_current_code_size(s);
4189 }
4190 
4191 #ifdef CONFIG_PROFILER
4192 void tcg_dump_info(void)
4193 {
4194     TCGProfile prof = {};
4195     const TCGProfile *s;
4196     int64_t tb_count;
4197     int64_t tb_div_count;
4198     int64_t tot;
4199 
4200     tcg_profile_snapshot_counters(&prof);
4201     s = &prof;
4202     tb_count = s->tb_count;
4203     tb_div_count = tb_count ? tb_count : 1;
4204     tot = s->interm_time + s->code_time;
4205 
4206     qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4207                 tot, tot / 2.4e9);
4208     qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4209                 " %0.1f%%)\n",
4210                 tb_count, s->tb_count1 - tb_count,
4211                 (double)(s->tb_count1 - s->tb_count)
4212                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4213     qemu_printf("avg ops/TB          %0.1f max=%d\n",
4214                 (double)s->op_count / tb_div_count, s->op_count_max);
4215     qemu_printf("deleted ops/TB      %0.2f\n",
4216                 (double)s->del_op_count / tb_div_count);
4217     qemu_printf("avg temps/TB        %0.2f max=%d\n",
4218                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4219     qemu_printf("avg host code/TB    %0.1f\n",
4220                 (double)s->code_out_len / tb_div_count);
4221     qemu_printf("avg search data/TB  %0.1f\n",
4222                 (double)s->search_out_len / tb_div_count);
4223 
4224     qemu_printf("cycles/op           %0.1f\n",
4225                 s->op_count ? (double)tot / s->op_count : 0);
4226     qemu_printf("cycles/in byte      %0.1f\n",
4227                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4228     qemu_printf("cycles/out byte     %0.1f\n",
4229                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4230     qemu_printf("cycles/search byte     %0.1f\n",
4231                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4232     if (tot == 0) {
4233         tot = 1;
4234     }
4235     qemu_printf("  gen_interm time   %0.1f%%\n",
4236                 (double)s->interm_time / tot * 100.0);
4237     qemu_printf("  gen_code time     %0.1f%%\n",
4238                 (double)s->code_time / tot * 100.0);
4239     qemu_printf("optim./code time    %0.1f%%\n",
4240                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4241                 * 100.0);
4242     qemu_printf("liveness/code time  %0.1f%%\n",
4243                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4244     qemu_printf("cpu_restore count   %" PRId64 "\n",
4245                 s->restore_count);
4246     qemu_printf("  avg cycles        %0.1f\n",
4247                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4248 }
4249 #else
4250 void tcg_dump_info(void)
4251 {
4252     qemu_printf("[TCG profiler not compiled]\n");
4253 }
4254 #endif
4255 
4256 #ifdef ELF_HOST_MACHINE
4257 /* In order to use this feature, the backend needs to do three things:
4258 
4259    (1) Define ELF_HOST_MACHINE to indicate both what value to
4260        put into the ELF image and to indicate support for the feature.
4261 
4262    (2) Define tcg_register_jit.  This should create a buffer containing
4263        the contents of a .debug_frame section that describes the post-
4264        prologue unwind info for the tcg machine.
4265 
4266    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4267 */
4268 
4269 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4270 typedef enum {
4271     JIT_NOACTION = 0,
4272     JIT_REGISTER_FN,
4273     JIT_UNREGISTER_FN
4274 } jit_actions_t;
4275 
4276 struct jit_code_entry {
4277     struct jit_code_entry *next_entry;
4278     struct jit_code_entry *prev_entry;
4279     const void *symfile_addr;
4280     uint64_t symfile_size;
4281 };
4282 
4283 struct jit_descriptor {
4284     uint32_t version;
4285     uint32_t action_flag;
4286     struct jit_code_entry *relevant_entry;
4287     struct jit_code_entry *first_entry;
4288 };
4289 
4290 void __jit_debug_register_code(void) __attribute__((noinline));
4291 void __jit_debug_register_code(void)
4292 {
4293     asm("");
4294 }
4295 
4296 /* Must statically initialize the version, because GDB may check
4297    the version before we can set it.  */
4298 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4299 
4300 /* End GDB interface.  */
4301 
4302 static int find_string(const char *strtab, const char *str)
4303 {
4304     const char *p = strtab + 1;
4305 
4306     while (1) {
4307         if (strcmp(p, str) == 0) {
4308             return p - strtab;
4309         }
4310         p += strlen(p) + 1;
4311     }
4312 }
4313 
4314 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
4315                                  const void *debug_frame,
4316                                  size_t debug_frame_size)
4317 {
4318     struct __attribute__((packed)) DebugInfo {
4319         uint32_t  len;
4320         uint16_t  version;
4321         uint32_t  abbrev;
4322         uint8_t   ptr_size;
4323         uint8_t   cu_die;
4324         uint16_t  cu_lang;
4325         uintptr_t cu_low_pc;
4326         uintptr_t cu_high_pc;
4327         uint8_t   fn_die;
4328         char      fn_name[16];
4329         uintptr_t fn_low_pc;
4330         uintptr_t fn_high_pc;
4331         uint8_t   cu_eoc;
4332     };
4333 
4334     struct ElfImage {
4335         ElfW(Ehdr) ehdr;
4336         ElfW(Phdr) phdr;
4337         ElfW(Shdr) shdr[7];
4338         ElfW(Sym)  sym[2];
4339         struct DebugInfo di;
4340         uint8_t    da[24];
4341         char       str[80];
4342     };
4343 
4344     struct ElfImage *img;
4345 
4346     static const struct ElfImage img_template = {
4347         .ehdr = {
4348             .e_ident[EI_MAG0] = ELFMAG0,
4349             .e_ident[EI_MAG1] = ELFMAG1,
4350             .e_ident[EI_MAG2] = ELFMAG2,
4351             .e_ident[EI_MAG3] = ELFMAG3,
4352             .e_ident[EI_CLASS] = ELF_CLASS,
4353             .e_ident[EI_DATA] = ELF_DATA,
4354             .e_ident[EI_VERSION] = EV_CURRENT,
4355             .e_type = ET_EXEC,
4356             .e_machine = ELF_HOST_MACHINE,
4357             .e_version = EV_CURRENT,
4358             .e_phoff = offsetof(struct ElfImage, phdr),
4359             .e_shoff = offsetof(struct ElfImage, shdr),
4360             .e_ehsize = sizeof(ElfW(Shdr)),
4361             .e_phentsize = sizeof(ElfW(Phdr)),
4362             .e_phnum = 1,
4363             .e_shentsize = sizeof(ElfW(Shdr)),
4364             .e_shnum = ARRAY_SIZE(img->shdr),
4365             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4366 #ifdef ELF_HOST_FLAGS
4367             .e_flags = ELF_HOST_FLAGS,
4368 #endif
4369 #ifdef ELF_OSABI
4370             .e_ident[EI_OSABI] = ELF_OSABI,
4371 #endif
4372         },
4373         .phdr = {
4374             .p_type = PT_LOAD,
4375             .p_flags = PF_X,
4376         },
4377         .shdr = {
4378             [0] = { .sh_type = SHT_NULL },
4379             /* Trick: The contents of code_gen_buffer are not present in
4380                this fake ELF file; that got allocated elsewhere.  Therefore
4381                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4382                will not look for contents.  We can record any address.  */
4383             [1] = { /* .text */
4384                 .sh_type = SHT_NOBITS,
4385                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4386             },
4387             [2] = { /* .debug_info */
4388                 .sh_type = SHT_PROGBITS,
4389                 .sh_offset = offsetof(struct ElfImage, di),
4390                 .sh_size = sizeof(struct DebugInfo),
4391             },
4392             [3] = { /* .debug_abbrev */
4393                 .sh_type = SHT_PROGBITS,
4394                 .sh_offset = offsetof(struct ElfImage, da),
4395                 .sh_size = sizeof(img->da),
4396             },
4397             [4] = { /* .debug_frame */
4398                 .sh_type = SHT_PROGBITS,
4399                 .sh_offset = sizeof(struct ElfImage),
4400             },
4401             [5] = { /* .symtab */
4402                 .sh_type = SHT_SYMTAB,
4403                 .sh_offset = offsetof(struct ElfImage, sym),
4404                 .sh_size = sizeof(img->sym),
4405                 .sh_info = 1,
4406                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4407                 .sh_entsize = sizeof(ElfW(Sym)),
4408             },
4409             [6] = { /* .strtab */
4410                 .sh_type = SHT_STRTAB,
4411                 .sh_offset = offsetof(struct ElfImage, str),
4412                 .sh_size = sizeof(img->str),
4413             }
4414         },
4415         .sym = {
4416             [1] = { /* code_gen_buffer */
4417                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4418                 .st_shndx = 1,
4419             }
4420         },
4421         .di = {
4422             .len = sizeof(struct DebugInfo) - 4,
4423             .version = 2,
4424             .ptr_size = sizeof(void *),
4425             .cu_die = 1,
4426             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4427             .fn_die = 2,
4428             .fn_name = "code_gen_buffer"
4429         },
4430         .da = {
4431             1,          /* abbrev number (the cu) */
4432             0x11, 1,    /* DW_TAG_compile_unit, has children */
4433             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4434             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4435             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4436             0, 0,       /* end of abbrev */
4437             2,          /* abbrev number (the fn) */
4438             0x2e, 0,    /* DW_TAG_subprogram, no children */
4439             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4440             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4441             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4442             0, 0,       /* end of abbrev */
4443             0           /* no more abbrev */
4444         },
4445         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4446                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4447     };
4448 
4449     /* We only need a single jit entry; statically allocate it.  */
4450     static struct jit_code_entry one_entry;
4451 
4452     uintptr_t buf = (uintptr_t)buf_ptr;
4453     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4454     DebugFrameHeader *dfh;
4455 
4456     img = g_malloc(img_size);
4457     *img = img_template;
4458 
4459     img->phdr.p_vaddr = buf;
4460     img->phdr.p_paddr = buf;
4461     img->phdr.p_memsz = buf_size;
4462 
4463     img->shdr[1].sh_name = find_string(img->str, ".text");
4464     img->shdr[1].sh_addr = buf;
4465     img->shdr[1].sh_size = buf_size;
4466 
4467     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4468     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4469 
4470     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4471     img->shdr[4].sh_size = debug_frame_size;
4472 
4473     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4474     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4475 
4476     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4477     img->sym[1].st_value = buf;
4478     img->sym[1].st_size = buf_size;
4479 
4480     img->di.cu_low_pc = buf;
4481     img->di.cu_high_pc = buf + buf_size;
4482     img->di.fn_low_pc = buf;
4483     img->di.fn_high_pc = buf + buf_size;
4484 
4485     dfh = (DebugFrameHeader *)(img + 1);
4486     memcpy(dfh, debug_frame, debug_frame_size);
4487     dfh->fde.func_start = buf;
4488     dfh->fde.func_len = buf_size;
4489 
4490 #ifdef DEBUG_JIT
4491     /* Enable this block to be able to debug the ELF image file creation.
4492        One can use readelf, objdump, or other inspection utilities.  */
4493     {
4494         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4495         if (f) {
4496             if (fwrite(img, img_size, 1, f) != img_size) {
4497                 /* Avoid stupid unused return value warning for fwrite.  */
4498             }
4499             fclose(f);
4500         }
4501     }
4502 #endif
4503 
4504     one_entry.symfile_addr = img;
4505     one_entry.symfile_size = img_size;
4506 
4507     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4508     __jit_debug_descriptor.relevant_entry = &one_entry;
4509     __jit_debug_descriptor.first_entry = &one_entry;
4510     __jit_debug_register_code();
4511 }
4512 #else
4513 /* No support for the feature.  Provide the entry point expected by exec.c,
4514    and implement the internal function we declared earlier.  */
4515 
4516 static void tcg_register_jit_int(void *buf, size_t size,
4517                                  const void *debug_frame,
4518                                  size_t debug_frame_size)
4519 {
4520 }
4521 
4522 void tcg_register_jit(void *buf, size_t buf_size)
4523 {
4524 }
4525 #endif /* ELF_HOST_MACHINE */
4526 
4527 #if !TCG_TARGET_MAYBE_vec
4528 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4529 {
4530     g_assert_not_reached();
4531 }
4532 #endif
4533