xref: /openbmc/qemu/tcg/tcg.c (revision 671872b6731ed746f025566e3ef2bc8d5ec1a779)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/timer.h"
37 
38 /* Note: the long term plan is to reduce the dependencies on the QEMU
39    CPU definitions. Currently they are used for qemu_ld/st
40    instructions */
41 #define NO_CPU_IO_DEFS
42 #include "cpu.h"
43 
44 #include "exec/cpu-common.h"
45 #include "exec/exec-all.h"
46 
47 #include "tcg-op.h"
48 
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS  ELFCLASS32
51 #else
52 # define ELF_CLASS  ELFCLASS64
53 #endif
54 #ifdef HOST_WORDS_BIGENDIAN
55 # define ELF_DATA   ELFDATA2MSB
56 #else
57 # define ELF_DATA   ELFDATA2LSB
58 #endif
59 
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "sysemu/sysemu.h"
63 
64 /* Forward declarations for functions declared in tcg-target.inc.c and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
68 static void tcg_target_qemu_prologue(TCGContext *s);
69 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
70                         intptr_t value, intptr_t addend);
71 
72 /* The CIE and FDE header definitions will be common to all hosts.  */
73 typedef struct {
74     uint32_t len __attribute__((aligned((sizeof(void *)))));
75     uint32_t id;
76     uint8_t version;
77     char augmentation[1];
78     uint8_t code_align;
79     uint8_t data_align;
80     uint8_t return_column;
81 } DebugFrameCIE;
82 
83 typedef struct QEMU_PACKED {
84     uint32_t len __attribute__((aligned((sizeof(void *)))));
85     uint32_t cie_offset;
86     uintptr_t func_start;
87     uintptr_t func_len;
88 } DebugFrameFDEHeader;
89 
90 typedef struct QEMU_PACKED {
91     DebugFrameCIE cie;
92     DebugFrameFDEHeader fde;
93 } DebugFrameHeader;
94 
95 static void tcg_register_jit_int(void *buf, size_t size,
96                                  const void *debug_frame,
97                                  size_t debug_frame_size)
98     __attribute__((unused));
99 
100 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
101 static const char *target_parse_constraint(TCGArgConstraint *ct,
102                                            const char *ct_str, TCGType type);
103 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
104                        intptr_t arg2);
105 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
106 static void tcg_out_movi(TCGContext *s, TCGType type,
107                          TCGReg ret, tcg_target_long arg);
108 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
109                        const int *const_args);
110 #if TCG_TARGET_MAYBE_vec
111 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
112                            unsigned vece, const TCGArg *args,
113                            const int *const_args);
114 #else
115 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
116                                   unsigned vece, const TCGArg *args,
117                                   const int *const_args)
118 {
119     g_assert_not_reached();
120 }
121 #endif
122 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
123                        intptr_t arg2);
124 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
125                         TCGReg base, intptr_t ofs);
126 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
127 static int tcg_target_const_match(tcg_target_long val, TCGType type,
128                                   const TCGArgConstraint *arg_ct);
129 #ifdef TCG_TARGET_NEED_LDST_LABELS
130 static bool tcg_out_ldst_finalize(TCGContext *s);
131 #endif
132 
133 #define TCG_HIGHWATER 1024
134 
135 static TCGContext **tcg_ctxs;
136 static unsigned int n_tcg_ctxs;
137 TCGv_env cpu_env = 0;
138 
139 struct tcg_region_tree {
140     QemuMutex lock;
141     GTree *tree;
142     /* padding to avoid false sharing is computed at run-time */
143 };
144 
145 /*
146  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
147  * dynamically allocate from as demand dictates. Given appropriate region
148  * sizing, this minimizes flushes even when some TCG threads generate a lot
149  * more code than others.
150  */
151 struct tcg_region_state {
152     QemuMutex lock;
153 
154     /* fields set at init time */
155     void *start;
156     void *start_aligned;
157     void *end;
158     size_t n;
159     size_t size; /* size of one region */
160     size_t stride; /* .size + guard size */
161 
162     /* fields protected by the lock */
163     size_t current; /* current region index */
164     size_t agg_size_full; /* aggregate size of full regions */
165 };
166 
167 static struct tcg_region_state region;
168 /*
169  * This is an array of struct tcg_region_tree's, with padding.
170  * We use void * to simplify the computation of region_trees[i]; each
171  * struct is found every tree_size bytes.
172  */
173 static void *region_trees;
174 static size_t tree_size;
175 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
176 static TCGRegSet tcg_target_call_clobber_regs;
177 
178 #if TCG_TARGET_INSN_UNIT_SIZE == 1
179 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
180 {
181     *s->code_ptr++ = v;
182 }
183 
184 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
185                                                       uint8_t v)
186 {
187     *p = v;
188 }
189 #endif
190 
191 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
192 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
193 {
194     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
195         *s->code_ptr++ = v;
196     } else {
197         tcg_insn_unit *p = s->code_ptr;
198         memcpy(p, &v, sizeof(v));
199         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
200     }
201 }
202 
203 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
204                                                        uint16_t v)
205 {
206     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
207         *p = v;
208     } else {
209         memcpy(p, &v, sizeof(v));
210     }
211 }
212 #endif
213 
214 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
215 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
216 {
217     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
218         *s->code_ptr++ = v;
219     } else {
220         tcg_insn_unit *p = s->code_ptr;
221         memcpy(p, &v, sizeof(v));
222         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
223     }
224 }
225 
226 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
227                                                        uint32_t v)
228 {
229     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
230         *p = v;
231     } else {
232         memcpy(p, &v, sizeof(v));
233     }
234 }
235 #endif
236 
237 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
238 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
239 {
240     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
241         *s->code_ptr++ = v;
242     } else {
243         tcg_insn_unit *p = s->code_ptr;
244         memcpy(p, &v, sizeof(v));
245         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
246     }
247 }
248 
249 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
250                                                        uint64_t v)
251 {
252     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
253         *p = v;
254     } else {
255         memcpy(p, &v, sizeof(v));
256     }
257 }
258 #endif
259 
260 /* label relocation processing */
261 
262 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
263                           TCGLabel *l, intptr_t addend)
264 {
265     TCGRelocation *r;
266 
267     if (l->has_value) {
268         /* FIXME: This may break relocations on RISC targets that
269            modify instruction fields in place.  The caller may not have
270            written the initial value.  */
271         patch_reloc(code_ptr, type, l->u.value, addend);
272     } else {
273         /* add a new relocation entry */
274         r = tcg_malloc(sizeof(TCGRelocation));
275         r->type = type;
276         r->ptr = code_ptr;
277         r->addend = addend;
278         r->next = l->u.first_reloc;
279         l->u.first_reloc = r;
280     }
281 }
282 
283 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
284 {
285     intptr_t value = (intptr_t)ptr;
286     TCGRelocation *r;
287 
288     tcg_debug_assert(!l->has_value);
289 
290     for (r = l->u.first_reloc; r != NULL; r = r->next) {
291         patch_reloc(r->ptr, r->type, value, r->addend);
292     }
293 
294     l->has_value = 1;
295     l->u.value_ptr = ptr;
296 }
297 
298 TCGLabel *gen_new_label(void)
299 {
300     TCGContext *s = tcg_ctx;
301     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
302 
303     *l = (TCGLabel){
304         .id = s->nb_labels++
305     };
306 
307     return l;
308 }
309 
310 static void set_jmp_reset_offset(TCGContext *s, int which)
311 {
312     size_t off = tcg_current_code_size(s);
313     s->tb_jmp_reset_offset[which] = off;
314     /* Make sure that we didn't overflow the stored offset.  */
315     assert(s->tb_jmp_reset_offset[which] == off);
316 }
317 
318 #include "tcg-target.inc.c"
319 
320 /* compare a pointer @ptr and a tb_tc @s */
321 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
322 {
323     if (ptr >= s->ptr + s->size) {
324         return 1;
325     } else if (ptr < s->ptr) {
326         return -1;
327     }
328     return 0;
329 }
330 
331 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
332 {
333     const struct tb_tc *a = ap;
334     const struct tb_tc *b = bp;
335 
336     /*
337      * When both sizes are set, we know this isn't a lookup.
338      * This is the most likely case: every TB must be inserted; lookups
339      * are a lot less frequent.
340      */
341     if (likely(a->size && b->size)) {
342         if (a->ptr > b->ptr) {
343             return 1;
344         } else if (a->ptr < b->ptr) {
345             return -1;
346         }
347         /* a->ptr == b->ptr should happen only on deletions */
348         g_assert(a->size == b->size);
349         return 0;
350     }
351     /*
352      * All lookups have either .size field set to 0.
353      * From the glib sources we see that @ap is always the lookup key. However
354      * the docs provide no guarantee, so we just mark this case as likely.
355      */
356     if (likely(a->size == 0)) {
357         return ptr_cmp_tb_tc(a->ptr, b);
358     }
359     return ptr_cmp_tb_tc(b->ptr, a);
360 }
361 
362 static void tcg_region_trees_init(void)
363 {
364     size_t i;
365 
366     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
367     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
368     for (i = 0; i < region.n; i++) {
369         struct tcg_region_tree *rt = region_trees + i * tree_size;
370 
371         qemu_mutex_init(&rt->lock);
372         rt->tree = g_tree_new(tb_tc_cmp);
373     }
374 }
375 
376 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
377 {
378     size_t region_idx;
379 
380     if (p < region.start_aligned) {
381         region_idx = 0;
382     } else {
383         ptrdiff_t offset = p - region.start_aligned;
384 
385         if (offset > region.stride * (region.n - 1)) {
386             region_idx = region.n - 1;
387         } else {
388             region_idx = offset / region.stride;
389         }
390     }
391     return region_trees + region_idx * tree_size;
392 }
393 
394 void tcg_tb_insert(TranslationBlock *tb)
395 {
396     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
397 
398     qemu_mutex_lock(&rt->lock);
399     g_tree_insert(rt->tree, &tb->tc, tb);
400     qemu_mutex_unlock(&rt->lock);
401 }
402 
403 void tcg_tb_remove(TranslationBlock *tb)
404 {
405     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
406 
407     qemu_mutex_lock(&rt->lock);
408     g_tree_remove(rt->tree, &tb->tc);
409     qemu_mutex_unlock(&rt->lock);
410 }
411 
412 /*
413  * Find the TB 'tb' such that
414  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
415  * Return NULL if not found.
416  */
417 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
418 {
419     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
420     TranslationBlock *tb;
421     struct tb_tc s = { .ptr = (void *)tc_ptr };
422 
423     qemu_mutex_lock(&rt->lock);
424     tb = g_tree_lookup(rt->tree, &s);
425     qemu_mutex_unlock(&rt->lock);
426     return tb;
427 }
428 
429 static void tcg_region_tree_lock_all(void)
430 {
431     size_t i;
432 
433     for (i = 0; i < region.n; i++) {
434         struct tcg_region_tree *rt = region_trees + i * tree_size;
435 
436         qemu_mutex_lock(&rt->lock);
437     }
438 }
439 
440 static void tcg_region_tree_unlock_all(void)
441 {
442     size_t i;
443 
444     for (i = 0; i < region.n; i++) {
445         struct tcg_region_tree *rt = region_trees + i * tree_size;
446 
447         qemu_mutex_unlock(&rt->lock);
448     }
449 }
450 
451 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
452 {
453     size_t i;
454 
455     tcg_region_tree_lock_all();
456     for (i = 0; i < region.n; i++) {
457         struct tcg_region_tree *rt = region_trees + i * tree_size;
458 
459         g_tree_foreach(rt->tree, func, user_data);
460     }
461     tcg_region_tree_unlock_all();
462 }
463 
464 size_t tcg_nb_tbs(void)
465 {
466     size_t nb_tbs = 0;
467     size_t i;
468 
469     tcg_region_tree_lock_all();
470     for (i = 0; i < region.n; i++) {
471         struct tcg_region_tree *rt = region_trees + i * tree_size;
472 
473         nb_tbs += g_tree_nnodes(rt->tree);
474     }
475     tcg_region_tree_unlock_all();
476     return nb_tbs;
477 }
478 
479 static void tcg_region_tree_reset_all(void)
480 {
481     size_t i;
482 
483     tcg_region_tree_lock_all();
484     for (i = 0; i < region.n; i++) {
485         struct tcg_region_tree *rt = region_trees + i * tree_size;
486 
487         /* Increment the refcount first so that destroy acts as a reset */
488         g_tree_ref(rt->tree);
489         g_tree_destroy(rt->tree);
490     }
491     tcg_region_tree_unlock_all();
492 }
493 
494 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
495 {
496     void *start, *end;
497 
498     start = region.start_aligned + curr_region * region.stride;
499     end = start + region.size;
500 
501     if (curr_region == 0) {
502         start = region.start;
503     }
504     if (curr_region == region.n - 1) {
505         end = region.end;
506     }
507 
508     *pstart = start;
509     *pend = end;
510 }
511 
512 static void tcg_region_assign(TCGContext *s, size_t curr_region)
513 {
514     void *start, *end;
515 
516     tcg_region_bounds(curr_region, &start, &end);
517 
518     s->code_gen_buffer = start;
519     s->code_gen_ptr = start;
520     s->code_gen_buffer_size = end - start;
521     s->code_gen_highwater = end - TCG_HIGHWATER;
522 }
523 
524 static bool tcg_region_alloc__locked(TCGContext *s)
525 {
526     if (region.current == region.n) {
527         return true;
528     }
529     tcg_region_assign(s, region.current);
530     region.current++;
531     return false;
532 }
533 
534 /*
535  * Request a new region once the one in use has filled up.
536  * Returns true on error.
537  */
538 static bool tcg_region_alloc(TCGContext *s)
539 {
540     bool err;
541     /* read the region size now; alloc__locked will overwrite it on success */
542     size_t size_full = s->code_gen_buffer_size;
543 
544     qemu_mutex_lock(&region.lock);
545     err = tcg_region_alloc__locked(s);
546     if (!err) {
547         region.agg_size_full += size_full - TCG_HIGHWATER;
548     }
549     qemu_mutex_unlock(&region.lock);
550     return err;
551 }
552 
553 /*
554  * Perform a context's first region allocation.
555  * This function does _not_ increment region.agg_size_full.
556  */
557 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
558 {
559     return tcg_region_alloc__locked(s);
560 }
561 
562 /* Call from a safe-work context */
563 void tcg_region_reset_all(void)
564 {
565     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
566     unsigned int i;
567 
568     qemu_mutex_lock(&region.lock);
569     region.current = 0;
570     region.agg_size_full = 0;
571 
572     for (i = 0; i < n_ctxs; i++) {
573         TCGContext *s = atomic_read(&tcg_ctxs[i]);
574         bool err = tcg_region_initial_alloc__locked(s);
575 
576         g_assert(!err);
577     }
578     qemu_mutex_unlock(&region.lock);
579 
580     tcg_region_tree_reset_all();
581 }
582 
583 #ifdef CONFIG_USER_ONLY
584 static size_t tcg_n_regions(void)
585 {
586     return 1;
587 }
588 #else
589 /*
590  * It is likely that some vCPUs will translate more code than others, so we
591  * first try to set more regions than max_cpus, with those regions being of
592  * reasonable size. If that's not possible we make do by evenly dividing
593  * the code_gen_buffer among the vCPUs.
594  */
595 static size_t tcg_n_regions(void)
596 {
597     size_t i;
598 
599     /* Use a single region if all we have is one vCPU thread */
600     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
601         return 1;
602     }
603 
604     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
605     for (i = 8; i > 0; i--) {
606         size_t regions_per_thread = i;
607         size_t region_size;
608 
609         region_size = tcg_init_ctx.code_gen_buffer_size;
610         region_size /= max_cpus * regions_per_thread;
611 
612         if (region_size >= 2 * 1024u * 1024) {
613             return max_cpus * regions_per_thread;
614         }
615     }
616     /* If we can't, then just allocate one region per vCPU thread */
617     return max_cpus;
618 }
619 #endif
620 
621 /*
622  * Initializes region partitioning.
623  *
624  * Called at init time from the parent thread (i.e. the one calling
625  * tcg_context_init), after the target's TCG globals have been set.
626  *
627  * Region partitioning works by splitting code_gen_buffer into separate regions,
628  * and then assigning regions to TCG threads so that the threads can translate
629  * code in parallel without synchronization.
630  *
631  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
632  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
633  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
634  * must have been parsed before calling this function, since it calls
635  * qemu_tcg_mttcg_enabled().
636  *
637  * In user-mode we use a single region.  Having multiple regions in user-mode
638  * is not supported, because the number of vCPU threads (recall that each thread
639  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
640  * OS, and usually this number is huge (tens of thousands is not uncommon).
641  * Thus, given this large bound on the number of vCPU threads and the fact
642  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
643  * that the availability of at least one region per vCPU thread.
644  *
645  * However, this user-mode limitation is unlikely to be a significant problem
646  * in practice. Multi-threaded guests share most if not all of their translated
647  * code, which makes parallel code generation less appealing than in softmmu.
648  */
649 void tcg_region_init(void)
650 {
651     void *buf = tcg_init_ctx.code_gen_buffer;
652     void *aligned;
653     size_t size = tcg_init_ctx.code_gen_buffer_size;
654     size_t page_size = qemu_real_host_page_size;
655     size_t region_size;
656     size_t n_regions;
657     size_t i;
658 
659     n_regions = tcg_n_regions();
660 
661     /* The first region will be 'aligned - buf' bytes larger than the others */
662     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
663     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
664     /*
665      * Make region_size a multiple of page_size, using aligned as the start.
666      * As a result of this we might end up with a few extra pages at the end of
667      * the buffer; we will assign those to the last region.
668      */
669     region_size = (size - (aligned - buf)) / n_regions;
670     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
671 
672     /* A region must have at least 2 pages; one code, one guard */
673     g_assert(region_size >= 2 * page_size);
674 
675     /* init the region struct */
676     qemu_mutex_init(&region.lock);
677     region.n = n_regions;
678     region.size = region_size - page_size;
679     region.stride = region_size;
680     region.start = buf;
681     region.start_aligned = aligned;
682     /* page-align the end, since its last page will be a guard page */
683     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
684     /* account for that last guard page */
685     region.end -= page_size;
686 
687     /* set guard pages */
688     for (i = 0; i < region.n; i++) {
689         void *start, *end;
690         int rc;
691 
692         tcg_region_bounds(i, &start, &end);
693         rc = qemu_mprotect_none(end, page_size);
694         g_assert(!rc);
695     }
696 
697     tcg_region_trees_init();
698 
699     /* In user-mode we support only one ctx, so do the initial allocation now */
700 #ifdef CONFIG_USER_ONLY
701     {
702         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
703 
704         g_assert(!err);
705     }
706 #endif
707 }
708 
709 /*
710  * All TCG threads except the parent (i.e. the one that called tcg_context_init
711  * and registered the target's TCG globals) must register with this function
712  * before initiating translation.
713  *
714  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
715  * of tcg_region_init() for the reasoning behind this.
716  *
717  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
718  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
719  * is not used anymore for translation once this function is called.
720  *
721  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
722  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
723  */
724 #ifdef CONFIG_USER_ONLY
725 void tcg_register_thread(void)
726 {
727     tcg_ctx = &tcg_init_ctx;
728 }
729 #else
730 void tcg_register_thread(void)
731 {
732     TCGContext *s = g_malloc(sizeof(*s));
733     unsigned int i, n;
734     bool err;
735 
736     *s = tcg_init_ctx;
737 
738     /* Relink mem_base.  */
739     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
740         if (tcg_init_ctx.temps[i].mem_base) {
741             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
742             tcg_debug_assert(b >= 0 && b < n);
743             s->temps[i].mem_base = &s->temps[b];
744         }
745     }
746 
747     /* Claim an entry in tcg_ctxs */
748     n = atomic_fetch_inc(&n_tcg_ctxs);
749     g_assert(n < max_cpus);
750     atomic_set(&tcg_ctxs[n], s);
751 
752     tcg_ctx = s;
753     qemu_mutex_lock(&region.lock);
754     err = tcg_region_initial_alloc__locked(tcg_ctx);
755     g_assert(!err);
756     qemu_mutex_unlock(&region.lock);
757 }
758 #endif /* !CONFIG_USER_ONLY */
759 
760 /*
761  * Returns the size (in bytes) of all translated code (i.e. from all regions)
762  * currently in the cache.
763  * See also: tcg_code_capacity()
764  * Do not confuse with tcg_current_code_size(); that one applies to a single
765  * TCG context.
766  */
767 size_t tcg_code_size(void)
768 {
769     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
770     unsigned int i;
771     size_t total;
772 
773     qemu_mutex_lock(&region.lock);
774     total = region.agg_size_full;
775     for (i = 0; i < n_ctxs; i++) {
776         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
777         size_t size;
778 
779         size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
780         g_assert(size <= s->code_gen_buffer_size);
781         total += size;
782     }
783     qemu_mutex_unlock(&region.lock);
784     return total;
785 }
786 
787 /*
788  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
789  * regions.
790  * See also: tcg_code_size()
791  */
792 size_t tcg_code_capacity(void)
793 {
794     size_t guard_size, capacity;
795 
796     /* no need for synchronization; these variables are set at init time */
797     guard_size = region.stride - region.size;
798     capacity = region.end + guard_size - region.start;
799     capacity -= region.n * (guard_size + TCG_HIGHWATER);
800     return capacity;
801 }
802 
803 size_t tcg_tb_phys_invalidate_count(void)
804 {
805     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
806     unsigned int i;
807     size_t total = 0;
808 
809     for (i = 0; i < n_ctxs; i++) {
810         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
811 
812         total += atomic_read(&s->tb_phys_invalidate_count);
813     }
814     return total;
815 }
816 
817 /* pool based memory allocation */
818 void *tcg_malloc_internal(TCGContext *s, int size)
819 {
820     TCGPool *p;
821     int pool_size;
822 
823     if (size > TCG_POOL_CHUNK_SIZE) {
824         /* big malloc: insert a new pool (XXX: could optimize) */
825         p = g_malloc(sizeof(TCGPool) + size);
826         p->size = size;
827         p->next = s->pool_first_large;
828         s->pool_first_large = p;
829         return p->data;
830     } else {
831         p = s->pool_current;
832         if (!p) {
833             p = s->pool_first;
834             if (!p)
835                 goto new_pool;
836         } else {
837             if (!p->next) {
838             new_pool:
839                 pool_size = TCG_POOL_CHUNK_SIZE;
840                 p = g_malloc(sizeof(TCGPool) + pool_size);
841                 p->size = pool_size;
842                 p->next = NULL;
843                 if (s->pool_current)
844                     s->pool_current->next = p;
845                 else
846                     s->pool_first = p;
847             } else {
848                 p = p->next;
849             }
850         }
851     }
852     s->pool_current = p;
853     s->pool_cur = p->data + size;
854     s->pool_end = p->data + p->size;
855     return p->data;
856 }
857 
858 void tcg_pool_reset(TCGContext *s)
859 {
860     TCGPool *p, *t;
861     for (p = s->pool_first_large; p; p = t) {
862         t = p->next;
863         g_free(p);
864     }
865     s->pool_first_large = NULL;
866     s->pool_cur = s->pool_end = NULL;
867     s->pool_current = NULL;
868 }
869 
870 typedef struct TCGHelperInfo {
871     void *func;
872     const char *name;
873     unsigned flags;
874     unsigned sizemask;
875 } TCGHelperInfo;
876 
877 #include "exec/helper-proto.h"
878 
879 static const TCGHelperInfo all_helpers[] = {
880 #include "exec/helper-tcg.h"
881 };
882 static GHashTable *helper_table;
883 
884 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
885 static void process_op_defs(TCGContext *s);
886 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
887                                             TCGReg reg, const char *name);
888 
889 void tcg_context_init(TCGContext *s)
890 {
891     int op, total_args, n, i;
892     TCGOpDef *def;
893     TCGArgConstraint *args_ct;
894     int *sorted_args;
895     TCGTemp *ts;
896 
897     memset(s, 0, sizeof(*s));
898     s->nb_globals = 0;
899 
900     /* Count total number of arguments and allocate the corresponding
901        space */
902     total_args = 0;
903     for(op = 0; op < NB_OPS; op++) {
904         def = &tcg_op_defs[op];
905         n = def->nb_iargs + def->nb_oargs;
906         total_args += n;
907     }
908 
909     args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
910     sorted_args = g_malloc(sizeof(int) * total_args);
911 
912     for(op = 0; op < NB_OPS; op++) {
913         def = &tcg_op_defs[op];
914         def->args_ct = args_ct;
915         def->sorted_args = sorted_args;
916         n = def->nb_iargs + def->nb_oargs;
917         sorted_args += n;
918         args_ct += n;
919     }
920 
921     /* Register helpers.  */
922     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
923     helper_table = g_hash_table_new(NULL, NULL);
924 
925     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
926         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
927                             (gpointer)&all_helpers[i]);
928     }
929 
930     tcg_target_init(s);
931     process_op_defs(s);
932 
933     /* Reverse the order of the saved registers, assuming they're all at
934        the start of tcg_target_reg_alloc_order.  */
935     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
936         int r = tcg_target_reg_alloc_order[n];
937         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
938             break;
939         }
940     }
941     for (i = 0; i < n; ++i) {
942         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
943     }
944     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
945         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
946     }
947 
948     tcg_ctx = s;
949     /*
950      * In user-mode we simply share the init context among threads, since we
951      * use a single region. See the documentation tcg_region_init() for the
952      * reasoning behind this.
953      * In softmmu we will have at most max_cpus TCG threads.
954      */
955 #ifdef CONFIG_USER_ONLY
956     tcg_ctxs = &tcg_ctx;
957     n_tcg_ctxs = 1;
958 #else
959     tcg_ctxs = g_new(TCGContext *, max_cpus);
960 #endif
961 
962     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
963     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
964     cpu_env = temp_tcgv_ptr(ts);
965 }
966 
967 /*
968  * Allocate TBs right before their corresponding translated code, making
969  * sure that TBs and code are on different cache lines.
970  */
971 TranslationBlock *tcg_tb_alloc(TCGContext *s)
972 {
973     uintptr_t align = qemu_icache_linesize;
974     TranslationBlock *tb;
975     void *next;
976 
977  retry:
978     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
979     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
980 
981     if (unlikely(next > s->code_gen_highwater)) {
982         if (tcg_region_alloc(s)) {
983             return NULL;
984         }
985         goto retry;
986     }
987     atomic_set(&s->code_gen_ptr, next);
988     s->data_gen_ptr = NULL;
989     return tb;
990 }
991 
992 void tcg_prologue_init(TCGContext *s)
993 {
994     size_t prologue_size, total_size;
995     void *buf0, *buf1;
996 
997     /* Put the prologue at the beginning of code_gen_buffer.  */
998     buf0 = s->code_gen_buffer;
999     total_size = s->code_gen_buffer_size;
1000     s->code_ptr = buf0;
1001     s->code_buf = buf0;
1002     s->data_gen_ptr = NULL;
1003     s->code_gen_prologue = buf0;
1004 
1005     /* Compute a high-water mark, at which we voluntarily flush the buffer
1006        and start over.  The size here is arbitrary, significantly larger
1007        than we expect the code generation for any one opcode to require.  */
1008     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1009 
1010 #ifdef TCG_TARGET_NEED_POOL_LABELS
1011     s->pool_labels = NULL;
1012 #endif
1013 
1014     /* Generate the prologue.  */
1015     tcg_target_qemu_prologue(s);
1016 
1017 #ifdef TCG_TARGET_NEED_POOL_LABELS
1018     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1019     {
1020         bool ok = tcg_out_pool_finalize(s);
1021         tcg_debug_assert(ok);
1022     }
1023 #endif
1024 
1025     buf1 = s->code_ptr;
1026     flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1027 
1028     /* Deduct the prologue from the buffer.  */
1029     prologue_size = tcg_current_code_size(s);
1030     s->code_gen_ptr = buf1;
1031     s->code_gen_buffer = buf1;
1032     s->code_buf = buf1;
1033     total_size -= prologue_size;
1034     s->code_gen_buffer_size = total_size;
1035 
1036     tcg_register_jit(s->code_gen_buffer, total_size);
1037 
1038 #ifdef DEBUG_DISAS
1039     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1040         qemu_log_lock();
1041         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1042         if (s->data_gen_ptr) {
1043             size_t code_size = s->data_gen_ptr - buf0;
1044             size_t data_size = prologue_size - code_size;
1045             size_t i;
1046 
1047             log_disas(buf0, code_size);
1048 
1049             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1050                 if (sizeof(tcg_target_ulong) == 8) {
1051                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1052                              (uintptr_t)s->data_gen_ptr + i,
1053                              *(uint64_t *)(s->data_gen_ptr + i));
1054                 } else {
1055                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1056                              (uintptr_t)s->data_gen_ptr + i,
1057                              *(uint32_t *)(s->data_gen_ptr + i));
1058                 }
1059             }
1060         } else {
1061             log_disas(buf0, prologue_size);
1062         }
1063         qemu_log("\n");
1064         qemu_log_flush();
1065         qemu_log_unlock();
1066     }
1067 #endif
1068 
1069     /* Assert that goto_ptr is implemented completely.  */
1070     if (TCG_TARGET_HAS_goto_ptr) {
1071         tcg_debug_assert(s->code_gen_epilogue != NULL);
1072     }
1073 }
1074 
1075 void tcg_func_start(TCGContext *s)
1076 {
1077     tcg_pool_reset(s);
1078     s->nb_temps = s->nb_globals;
1079 
1080     /* No temps have been previously allocated for size or locality.  */
1081     memset(s->free_temps, 0, sizeof(s->free_temps));
1082 
1083     s->nb_ops = 0;
1084     s->nb_labels = 0;
1085     s->current_frame_offset = s->frame_start;
1086 
1087 #ifdef CONFIG_DEBUG_TCG
1088     s->goto_tb_issue_mask = 0;
1089 #endif
1090 
1091     QTAILQ_INIT(&s->ops);
1092     QTAILQ_INIT(&s->free_ops);
1093 }
1094 
1095 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1096 {
1097     int n = s->nb_temps++;
1098     tcg_debug_assert(n < TCG_MAX_TEMPS);
1099     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1100 }
1101 
1102 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1103 {
1104     TCGTemp *ts;
1105 
1106     tcg_debug_assert(s->nb_globals == s->nb_temps);
1107     s->nb_globals++;
1108     ts = tcg_temp_alloc(s);
1109     ts->temp_global = 1;
1110 
1111     return ts;
1112 }
1113 
1114 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1115                                             TCGReg reg, const char *name)
1116 {
1117     TCGTemp *ts;
1118 
1119     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1120         tcg_abort();
1121     }
1122 
1123     ts = tcg_global_alloc(s);
1124     ts->base_type = type;
1125     ts->type = type;
1126     ts->fixed_reg = 1;
1127     ts->reg = reg;
1128     ts->name = name;
1129     tcg_regset_set_reg(s->reserved_regs, reg);
1130 
1131     return ts;
1132 }
1133 
1134 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1135 {
1136     s->frame_start = start;
1137     s->frame_end = start + size;
1138     s->frame_temp
1139         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1140 }
1141 
1142 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1143                                      intptr_t offset, const char *name)
1144 {
1145     TCGContext *s = tcg_ctx;
1146     TCGTemp *base_ts = tcgv_ptr_temp(base);
1147     TCGTemp *ts = tcg_global_alloc(s);
1148     int indirect_reg = 0, bigendian = 0;
1149 #ifdef HOST_WORDS_BIGENDIAN
1150     bigendian = 1;
1151 #endif
1152 
1153     if (!base_ts->fixed_reg) {
1154         /* We do not support double-indirect registers.  */
1155         tcg_debug_assert(!base_ts->indirect_reg);
1156         base_ts->indirect_base = 1;
1157         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1158                             ? 2 : 1);
1159         indirect_reg = 1;
1160     }
1161 
1162     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1163         TCGTemp *ts2 = tcg_global_alloc(s);
1164         char buf[64];
1165 
1166         ts->base_type = TCG_TYPE_I64;
1167         ts->type = TCG_TYPE_I32;
1168         ts->indirect_reg = indirect_reg;
1169         ts->mem_allocated = 1;
1170         ts->mem_base = base_ts;
1171         ts->mem_offset = offset + bigendian * 4;
1172         pstrcpy(buf, sizeof(buf), name);
1173         pstrcat(buf, sizeof(buf), "_0");
1174         ts->name = strdup(buf);
1175 
1176         tcg_debug_assert(ts2 == ts + 1);
1177         ts2->base_type = TCG_TYPE_I64;
1178         ts2->type = TCG_TYPE_I32;
1179         ts2->indirect_reg = indirect_reg;
1180         ts2->mem_allocated = 1;
1181         ts2->mem_base = base_ts;
1182         ts2->mem_offset = offset + (1 - bigendian) * 4;
1183         pstrcpy(buf, sizeof(buf), name);
1184         pstrcat(buf, sizeof(buf), "_1");
1185         ts2->name = strdup(buf);
1186     } else {
1187         ts->base_type = type;
1188         ts->type = type;
1189         ts->indirect_reg = indirect_reg;
1190         ts->mem_allocated = 1;
1191         ts->mem_base = base_ts;
1192         ts->mem_offset = offset;
1193         ts->name = name;
1194     }
1195     return ts;
1196 }
1197 
1198 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1199 {
1200     TCGContext *s = tcg_ctx;
1201     TCGTemp *ts;
1202     int idx, k;
1203 
1204     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1205     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1206     if (idx < TCG_MAX_TEMPS) {
1207         /* There is already an available temp with the right type.  */
1208         clear_bit(idx, s->free_temps[k].l);
1209 
1210         ts = &s->temps[idx];
1211         ts->temp_allocated = 1;
1212         tcg_debug_assert(ts->base_type == type);
1213         tcg_debug_assert(ts->temp_local == temp_local);
1214     } else {
1215         ts = tcg_temp_alloc(s);
1216         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1217             TCGTemp *ts2 = tcg_temp_alloc(s);
1218 
1219             ts->base_type = type;
1220             ts->type = TCG_TYPE_I32;
1221             ts->temp_allocated = 1;
1222             ts->temp_local = temp_local;
1223 
1224             tcg_debug_assert(ts2 == ts + 1);
1225             ts2->base_type = TCG_TYPE_I64;
1226             ts2->type = TCG_TYPE_I32;
1227             ts2->temp_allocated = 1;
1228             ts2->temp_local = temp_local;
1229         } else {
1230             ts->base_type = type;
1231             ts->type = type;
1232             ts->temp_allocated = 1;
1233             ts->temp_local = temp_local;
1234         }
1235     }
1236 
1237 #if defined(CONFIG_DEBUG_TCG)
1238     s->temps_in_use++;
1239 #endif
1240     return ts;
1241 }
1242 
1243 TCGv_vec tcg_temp_new_vec(TCGType type)
1244 {
1245     TCGTemp *t;
1246 
1247 #ifdef CONFIG_DEBUG_TCG
1248     switch (type) {
1249     case TCG_TYPE_V64:
1250         assert(TCG_TARGET_HAS_v64);
1251         break;
1252     case TCG_TYPE_V128:
1253         assert(TCG_TARGET_HAS_v128);
1254         break;
1255     case TCG_TYPE_V256:
1256         assert(TCG_TARGET_HAS_v256);
1257         break;
1258     default:
1259         g_assert_not_reached();
1260     }
1261 #endif
1262 
1263     t = tcg_temp_new_internal(type, 0);
1264     return temp_tcgv_vec(t);
1265 }
1266 
1267 /* Create a new temp of the same type as an existing temp.  */
1268 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1269 {
1270     TCGTemp *t = tcgv_vec_temp(match);
1271 
1272     tcg_debug_assert(t->temp_allocated != 0);
1273 
1274     t = tcg_temp_new_internal(t->base_type, 0);
1275     return temp_tcgv_vec(t);
1276 }
1277 
1278 void tcg_temp_free_internal(TCGTemp *ts)
1279 {
1280     TCGContext *s = tcg_ctx;
1281     int k, idx;
1282 
1283 #if defined(CONFIG_DEBUG_TCG)
1284     s->temps_in_use--;
1285     if (s->temps_in_use < 0) {
1286         fprintf(stderr, "More temporaries freed than allocated!\n");
1287     }
1288 #endif
1289 
1290     tcg_debug_assert(ts->temp_global == 0);
1291     tcg_debug_assert(ts->temp_allocated != 0);
1292     ts->temp_allocated = 0;
1293 
1294     idx = temp_idx(ts);
1295     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1296     set_bit(idx, s->free_temps[k].l);
1297 }
1298 
1299 TCGv_i32 tcg_const_i32(int32_t val)
1300 {
1301     TCGv_i32 t0;
1302     t0 = tcg_temp_new_i32();
1303     tcg_gen_movi_i32(t0, val);
1304     return t0;
1305 }
1306 
1307 TCGv_i64 tcg_const_i64(int64_t val)
1308 {
1309     TCGv_i64 t0;
1310     t0 = tcg_temp_new_i64();
1311     tcg_gen_movi_i64(t0, val);
1312     return t0;
1313 }
1314 
1315 TCGv_i32 tcg_const_local_i32(int32_t val)
1316 {
1317     TCGv_i32 t0;
1318     t0 = tcg_temp_local_new_i32();
1319     tcg_gen_movi_i32(t0, val);
1320     return t0;
1321 }
1322 
1323 TCGv_i64 tcg_const_local_i64(int64_t val)
1324 {
1325     TCGv_i64 t0;
1326     t0 = tcg_temp_local_new_i64();
1327     tcg_gen_movi_i64(t0, val);
1328     return t0;
1329 }
1330 
1331 #if defined(CONFIG_DEBUG_TCG)
1332 void tcg_clear_temp_count(void)
1333 {
1334     TCGContext *s = tcg_ctx;
1335     s->temps_in_use = 0;
1336 }
1337 
1338 int tcg_check_temp_count(void)
1339 {
1340     TCGContext *s = tcg_ctx;
1341     if (s->temps_in_use) {
1342         /* Clear the count so that we don't give another
1343          * warning immediately next time around.
1344          */
1345         s->temps_in_use = 0;
1346         return 1;
1347     }
1348     return 0;
1349 }
1350 #endif
1351 
1352 /* Return true if OP may appear in the opcode stream.
1353    Test the runtime variable that controls each opcode.  */
1354 bool tcg_op_supported(TCGOpcode op)
1355 {
1356     const bool have_vec
1357         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1358 
1359     switch (op) {
1360     case INDEX_op_discard:
1361     case INDEX_op_set_label:
1362     case INDEX_op_call:
1363     case INDEX_op_br:
1364     case INDEX_op_mb:
1365     case INDEX_op_insn_start:
1366     case INDEX_op_exit_tb:
1367     case INDEX_op_goto_tb:
1368     case INDEX_op_qemu_ld_i32:
1369     case INDEX_op_qemu_st_i32:
1370     case INDEX_op_qemu_ld_i64:
1371     case INDEX_op_qemu_st_i64:
1372         return true;
1373 
1374     case INDEX_op_goto_ptr:
1375         return TCG_TARGET_HAS_goto_ptr;
1376 
1377     case INDEX_op_mov_i32:
1378     case INDEX_op_movi_i32:
1379     case INDEX_op_setcond_i32:
1380     case INDEX_op_brcond_i32:
1381     case INDEX_op_ld8u_i32:
1382     case INDEX_op_ld8s_i32:
1383     case INDEX_op_ld16u_i32:
1384     case INDEX_op_ld16s_i32:
1385     case INDEX_op_ld_i32:
1386     case INDEX_op_st8_i32:
1387     case INDEX_op_st16_i32:
1388     case INDEX_op_st_i32:
1389     case INDEX_op_add_i32:
1390     case INDEX_op_sub_i32:
1391     case INDEX_op_mul_i32:
1392     case INDEX_op_and_i32:
1393     case INDEX_op_or_i32:
1394     case INDEX_op_xor_i32:
1395     case INDEX_op_shl_i32:
1396     case INDEX_op_shr_i32:
1397     case INDEX_op_sar_i32:
1398         return true;
1399 
1400     case INDEX_op_movcond_i32:
1401         return TCG_TARGET_HAS_movcond_i32;
1402     case INDEX_op_div_i32:
1403     case INDEX_op_divu_i32:
1404         return TCG_TARGET_HAS_div_i32;
1405     case INDEX_op_rem_i32:
1406     case INDEX_op_remu_i32:
1407         return TCG_TARGET_HAS_rem_i32;
1408     case INDEX_op_div2_i32:
1409     case INDEX_op_divu2_i32:
1410         return TCG_TARGET_HAS_div2_i32;
1411     case INDEX_op_rotl_i32:
1412     case INDEX_op_rotr_i32:
1413         return TCG_TARGET_HAS_rot_i32;
1414     case INDEX_op_deposit_i32:
1415         return TCG_TARGET_HAS_deposit_i32;
1416     case INDEX_op_extract_i32:
1417         return TCG_TARGET_HAS_extract_i32;
1418     case INDEX_op_sextract_i32:
1419         return TCG_TARGET_HAS_sextract_i32;
1420     case INDEX_op_add2_i32:
1421         return TCG_TARGET_HAS_add2_i32;
1422     case INDEX_op_sub2_i32:
1423         return TCG_TARGET_HAS_sub2_i32;
1424     case INDEX_op_mulu2_i32:
1425         return TCG_TARGET_HAS_mulu2_i32;
1426     case INDEX_op_muls2_i32:
1427         return TCG_TARGET_HAS_muls2_i32;
1428     case INDEX_op_muluh_i32:
1429         return TCG_TARGET_HAS_muluh_i32;
1430     case INDEX_op_mulsh_i32:
1431         return TCG_TARGET_HAS_mulsh_i32;
1432     case INDEX_op_ext8s_i32:
1433         return TCG_TARGET_HAS_ext8s_i32;
1434     case INDEX_op_ext16s_i32:
1435         return TCG_TARGET_HAS_ext16s_i32;
1436     case INDEX_op_ext8u_i32:
1437         return TCG_TARGET_HAS_ext8u_i32;
1438     case INDEX_op_ext16u_i32:
1439         return TCG_TARGET_HAS_ext16u_i32;
1440     case INDEX_op_bswap16_i32:
1441         return TCG_TARGET_HAS_bswap16_i32;
1442     case INDEX_op_bswap32_i32:
1443         return TCG_TARGET_HAS_bswap32_i32;
1444     case INDEX_op_not_i32:
1445         return TCG_TARGET_HAS_not_i32;
1446     case INDEX_op_neg_i32:
1447         return TCG_TARGET_HAS_neg_i32;
1448     case INDEX_op_andc_i32:
1449         return TCG_TARGET_HAS_andc_i32;
1450     case INDEX_op_orc_i32:
1451         return TCG_TARGET_HAS_orc_i32;
1452     case INDEX_op_eqv_i32:
1453         return TCG_TARGET_HAS_eqv_i32;
1454     case INDEX_op_nand_i32:
1455         return TCG_TARGET_HAS_nand_i32;
1456     case INDEX_op_nor_i32:
1457         return TCG_TARGET_HAS_nor_i32;
1458     case INDEX_op_clz_i32:
1459         return TCG_TARGET_HAS_clz_i32;
1460     case INDEX_op_ctz_i32:
1461         return TCG_TARGET_HAS_ctz_i32;
1462     case INDEX_op_ctpop_i32:
1463         return TCG_TARGET_HAS_ctpop_i32;
1464 
1465     case INDEX_op_brcond2_i32:
1466     case INDEX_op_setcond2_i32:
1467         return TCG_TARGET_REG_BITS == 32;
1468 
1469     case INDEX_op_mov_i64:
1470     case INDEX_op_movi_i64:
1471     case INDEX_op_setcond_i64:
1472     case INDEX_op_brcond_i64:
1473     case INDEX_op_ld8u_i64:
1474     case INDEX_op_ld8s_i64:
1475     case INDEX_op_ld16u_i64:
1476     case INDEX_op_ld16s_i64:
1477     case INDEX_op_ld32u_i64:
1478     case INDEX_op_ld32s_i64:
1479     case INDEX_op_ld_i64:
1480     case INDEX_op_st8_i64:
1481     case INDEX_op_st16_i64:
1482     case INDEX_op_st32_i64:
1483     case INDEX_op_st_i64:
1484     case INDEX_op_add_i64:
1485     case INDEX_op_sub_i64:
1486     case INDEX_op_mul_i64:
1487     case INDEX_op_and_i64:
1488     case INDEX_op_or_i64:
1489     case INDEX_op_xor_i64:
1490     case INDEX_op_shl_i64:
1491     case INDEX_op_shr_i64:
1492     case INDEX_op_sar_i64:
1493     case INDEX_op_ext_i32_i64:
1494     case INDEX_op_extu_i32_i64:
1495         return TCG_TARGET_REG_BITS == 64;
1496 
1497     case INDEX_op_movcond_i64:
1498         return TCG_TARGET_HAS_movcond_i64;
1499     case INDEX_op_div_i64:
1500     case INDEX_op_divu_i64:
1501         return TCG_TARGET_HAS_div_i64;
1502     case INDEX_op_rem_i64:
1503     case INDEX_op_remu_i64:
1504         return TCG_TARGET_HAS_rem_i64;
1505     case INDEX_op_div2_i64:
1506     case INDEX_op_divu2_i64:
1507         return TCG_TARGET_HAS_div2_i64;
1508     case INDEX_op_rotl_i64:
1509     case INDEX_op_rotr_i64:
1510         return TCG_TARGET_HAS_rot_i64;
1511     case INDEX_op_deposit_i64:
1512         return TCG_TARGET_HAS_deposit_i64;
1513     case INDEX_op_extract_i64:
1514         return TCG_TARGET_HAS_extract_i64;
1515     case INDEX_op_sextract_i64:
1516         return TCG_TARGET_HAS_sextract_i64;
1517     case INDEX_op_extrl_i64_i32:
1518         return TCG_TARGET_HAS_extrl_i64_i32;
1519     case INDEX_op_extrh_i64_i32:
1520         return TCG_TARGET_HAS_extrh_i64_i32;
1521     case INDEX_op_ext8s_i64:
1522         return TCG_TARGET_HAS_ext8s_i64;
1523     case INDEX_op_ext16s_i64:
1524         return TCG_TARGET_HAS_ext16s_i64;
1525     case INDEX_op_ext32s_i64:
1526         return TCG_TARGET_HAS_ext32s_i64;
1527     case INDEX_op_ext8u_i64:
1528         return TCG_TARGET_HAS_ext8u_i64;
1529     case INDEX_op_ext16u_i64:
1530         return TCG_TARGET_HAS_ext16u_i64;
1531     case INDEX_op_ext32u_i64:
1532         return TCG_TARGET_HAS_ext32u_i64;
1533     case INDEX_op_bswap16_i64:
1534         return TCG_TARGET_HAS_bswap16_i64;
1535     case INDEX_op_bswap32_i64:
1536         return TCG_TARGET_HAS_bswap32_i64;
1537     case INDEX_op_bswap64_i64:
1538         return TCG_TARGET_HAS_bswap64_i64;
1539     case INDEX_op_not_i64:
1540         return TCG_TARGET_HAS_not_i64;
1541     case INDEX_op_neg_i64:
1542         return TCG_TARGET_HAS_neg_i64;
1543     case INDEX_op_andc_i64:
1544         return TCG_TARGET_HAS_andc_i64;
1545     case INDEX_op_orc_i64:
1546         return TCG_TARGET_HAS_orc_i64;
1547     case INDEX_op_eqv_i64:
1548         return TCG_TARGET_HAS_eqv_i64;
1549     case INDEX_op_nand_i64:
1550         return TCG_TARGET_HAS_nand_i64;
1551     case INDEX_op_nor_i64:
1552         return TCG_TARGET_HAS_nor_i64;
1553     case INDEX_op_clz_i64:
1554         return TCG_TARGET_HAS_clz_i64;
1555     case INDEX_op_ctz_i64:
1556         return TCG_TARGET_HAS_ctz_i64;
1557     case INDEX_op_ctpop_i64:
1558         return TCG_TARGET_HAS_ctpop_i64;
1559     case INDEX_op_add2_i64:
1560         return TCG_TARGET_HAS_add2_i64;
1561     case INDEX_op_sub2_i64:
1562         return TCG_TARGET_HAS_sub2_i64;
1563     case INDEX_op_mulu2_i64:
1564         return TCG_TARGET_HAS_mulu2_i64;
1565     case INDEX_op_muls2_i64:
1566         return TCG_TARGET_HAS_muls2_i64;
1567     case INDEX_op_muluh_i64:
1568         return TCG_TARGET_HAS_muluh_i64;
1569     case INDEX_op_mulsh_i64:
1570         return TCG_TARGET_HAS_mulsh_i64;
1571 
1572     case INDEX_op_mov_vec:
1573     case INDEX_op_dup_vec:
1574     case INDEX_op_dupi_vec:
1575     case INDEX_op_ld_vec:
1576     case INDEX_op_st_vec:
1577     case INDEX_op_add_vec:
1578     case INDEX_op_sub_vec:
1579     case INDEX_op_and_vec:
1580     case INDEX_op_or_vec:
1581     case INDEX_op_xor_vec:
1582     case INDEX_op_cmp_vec:
1583         return have_vec;
1584     case INDEX_op_dup2_vec:
1585         return have_vec && TCG_TARGET_REG_BITS == 32;
1586     case INDEX_op_not_vec:
1587         return have_vec && TCG_TARGET_HAS_not_vec;
1588     case INDEX_op_neg_vec:
1589         return have_vec && TCG_TARGET_HAS_neg_vec;
1590     case INDEX_op_andc_vec:
1591         return have_vec && TCG_TARGET_HAS_andc_vec;
1592     case INDEX_op_orc_vec:
1593         return have_vec && TCG_TARGET_HAS_orc_vec;
1594     case INDEX_op_mul_vec:
1595         return have_vec && TCG_TARGET_HAS_mul_vec;
1596     case INDEX_op_shli_vec:
1597     case INDEX_op_shri_vec:
1598     case INDEX_op_sari_vec:
1599         return have_vec && TCG_TARGET_HAS_shi_vec;
1600     case INDEX_op_shls_vec:
1601     case INDEX_op_shrs_vec:
1602     case INDEX_op_sars_vec:
1603         return have_vec && TCG_TARGET_HAS_shs_vec;
1604     case INDEX_op_shlv_vec:
1605     case INDEX_op_shrv_vec:
1606     case INDEX_op_sarv_vec:
1607         return have_vec && TCG_TARGET_HAS_shv_vec;
1608 
1609     default:
1610         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1611         return true;
1612     }
1613 }
1614 
1615 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1616    and endian swap. Maybe it would be better to do the alignment
1617    and endian swap in tcg_reg_alloc_call(). */
1618 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1619 {
1620     int i, real_args, nb_rets, pi;
1621     unsigned sizemask, flags;
1622     TCGHelperInfo *info;
1623     TCGOp *op;
1624 
1625     info = g_hash_table_lookup(helper_table, (gpointer)func);
1626     flags = info->flags;
1627     sizemask = info->sizemask;
1628 
1629 #if defined(__sparc__) && !defined(__arch64__) \
1630     && !defined(CONFIG_TCG_INTERPRETER)
1631     /* We have 64-bit values in one register, but need to pass as two
1632        separate parameters.  Split them.  */
1633     int orig_sizemask = sizemask;
1634     int orig_nargs = nargs;
1635     TCGv_i64 retl, reth;
1636     TCGTemp *split_args[MAX_OPC_PARAM];
1637 
1638     retl = NULL;
1639     reth = NULL;
1640     if (sizemask != 0) {
1641         for (i = real_args = 0; i < nargs; ++i) {
1642             int is_64bit = sizemask & (1 << (i+1)*2);
1643             if (is_64bit) {
1644                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1645                 TCGv_i32 h = tcg_temp_new_i32();
1646                 TCGv_i32 l = tcg_temp_new_i32();
1647                 tcg_gen_extr_i64_i32(l, h, orig);
1648                 split_args[real_args++] = tcgv_i32_temp(h);
1649                 split_args[real_args++] = tcgv_i32_temp(l);
1650             } else {
1651                 split_args[real_args++] = args[i];
1652             }
1653         }
1654         nargs = real_args;
1655         args = split_args;
1656         sizemask = 0;
1657     }
1658 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1659     for (i = 0; i < nargs; ++i) {
1660         int is_64bit = sizemask & (1 << (i+1)*2);
1661         int is_signed = sizemask & (2 << (i+1)*2);
1662         if (!is_64bit) {
1663             TCGv_i64 temp = tcg_temp_new_i64();
1664             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1665             if (is_signed) {
1666                 tcg_gen_ext32s_i64(temp, orig);
1667             } else {
1668                 tcg_gen_ext32u_i64(temp, orig);
1669             }
1670             args[i] = tcgv_i64_temp(temp);
1671         }
1672     }
1673 #endif /* TCG_TARGET_EXTEND_ARGS */
1674 
1675     op = tcg_emit_op(INDEX_op_call);
1676 
1677     pi = 0;
1678     if (ret != NULL) {
1679 #if defined(__sparc__) && !defined(__arch64__) \
1680     && !defined(CONFIG_TCG_INTERPRETER)
1681         if (orig_sizemask & 1) {
1682             /* The 32-bit ABI is going to return the 64-bit value in
1683                the %o0/%o1 register pair.  Prepare for this by using
1684                two return temporaries, and reassemble below.  */
1685             retl = tcg_temp_new_i64();
1686             reth = tcg_temp_new_i64();
1687             op->args[pi++] = tcgv_i64_arg(reth);
1688             op->args[pi++] = tcgv_i64_arg(retl);
1689             nb_rets = 2;
1690         } else {
1691             op->args[pi++] = temp_arg(ret);
1692             nb_rets = 1;
1693         }
1694 #else
1695         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1696 #ifdef HOST_WORDS_BIGENDIAN
1697             op->args[pi++] = temp_arg(ret + 1);
1698             op->args[pi++] = temp_arg(ret);
1699 #else
1700             op->args[pi++] = temp_arg(ret);
1701             op->args[pi++] = temp_arg(ret + 1);
1702 #endif
1703             nb_rets = 2;
1704         } else {
1705             op->args[pi++] = temp_arg(ret);
1706             nb_rets = 1;
1707         }
1708 #endif
1709     } else {
1710         nb_rets = 0;
1711     }
1712     TCGOP_CALLO(op) = nb_rets;
1713 
1714     real_args = 0;
1715     for (i = 0; i < nargs; i++) {
1716         int is_64bit = sizemask & (1 << (i+1)*2);
1717         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1718 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1719             /* some targets want aligned 64 bit args */
1720             if (real_args & 1) {
1721                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1722                 real_args++;
1723             }
1724 #endif
1725            /* If stack grows up, then we will be placing successive
1726               arguments at lower addresses, which means we need to
1727               reverse the order compared to how we would normally
1728               treat either big or little-endian.  For those arguments
1729               that will wind up in registers, this still works for
1730               HPPA (the only current STACK_GROWSUP target) since the
1731               argument registers are *also* allocated in decreasing
1732               order.  If another such target is added, this logic may
1733               have to get more complicated to differentiate between
1734               stack arguments and register arguments.  */
1735 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1736             op->args[pi++] = temp_arg(args[i] + 1);
1737             op->args[pi++] = temp_arg(args[i]);
1738 #else
1739             op->args[pi++] = temp_arg(args[i]);
1740             op->args[pi++] = temp_arg(args[i] + 1);
1741 #endif
1742             real_args += 2;
1743             continue;
1744         }
1745 
1746         op->args[pi++] = temp_arg(args[i]);
1747         real_args++;
1748     }
1749     op->args[pi++] = (uintptr_t)func;
1750     op->args[pi++] = flags;
1751     TCGOP_CALLI(op) = real_args;
1752 
1753     /* Make sure the fields didn't overflow.  */
1754     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1755     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1756 
1757 #if defined(__sparc__) && !defined(__arch64__) \
1758     && !defined(CONFIG_TCG_INTERPRETER)
1759     /* Free all of the parts we allocated above.  */
1760     for (i = real_args = 0; i < orig_nargs; ++i) {
1761         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1762         if (is_64bit) {
1763             tcg_temp_free_internal(args[real_args++]);
1764             tcg_temp_free_internal(args[real_args++]);
1765         } else {
1766             real_args++;
1767         }
1768     }
1769     if (orig_sizemask & 1) {
1770         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1771            Note that describing these as TCGv_i64 eliminates an unnecessary
1772            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1773         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1774         tcg_temp_free_i64(retl);
1775         tcg_temp_free_i64(reth);
1776     }
1777 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1778     for (i = 0; i < nargs; ++i) {
1779         int is_64bit = sizemask & (1 << (i+1)*2);
1780         if (!is_64bit) {
1781             tcg_temp_free_internal(args[i]);
1782         }
1783     }
1784 #endif /* TCG_TARGET_EXTEND_ARGS */
1785 }
1786 
1787 static void tcg_reg_alloc_start(TCGContext *s)
1788 {
1789     int i, n;
1790     TCGTemp *ts;
1791 
1792     for (i = 0, n = s->nb_globals; i < n; i++) {
1793         ts = &s->temps[i];
1794         ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1795     }
1796     for (n = s->nb_temps; i < n; i++) {
1797         ts = &s->temps[i];
1798         ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1799         ts->mem_allocated = 0;
1800         ts->fixed_reg = 0;
1801     }
1802 
1803     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1804 }
1805 
1806 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1807                                  TCGTemp *ts)
1808 {
1809     int idx = temp_idx(ts);
1810 
1811     if (ts->temp_global) {
1812         pstrcpy(buf, buf_size, ts->name);
1813     } else if (ts->temp_local) {
1814         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1815     } else {
1816         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1817     }
1818     return buf;
1819 }
1820 
1821 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1822                              int buf_size, TCGArg arg)
1823 {
1824     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1825 }
1826 
1827 /* Find helper name.  */
1828 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1829 {
1830     const char *ret = NULL;
1831     if (helper_table) {
1832         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1833         if (info) {
1834             ret = info->name;
1835         }
1836     }
1837     return ret;
1838 }
1839 
1840 static const char * const cond_name[] =
1841 {
1842     [TCG_COND_NEVER] = "never",
1843     [TCG_COND_ALWAYS] = "always",
1844     [TCG_COND_EQ] = "eq",
1845     [TCG_COND_NE] = "ne",
1846     [TCG_COND_LT] = "lt",
1847     [TCG_COND_GE] = "ge",
1848     [TCG_COND_LE] = "le",
1849     [TCG_COND_GT] = "gt",
1850     [TCG_COND_LTU] = "ltu",
1851     [TCG_COND_GEU] = "geu",
1852     [TCG_COND_LEU] = "leu",
1853     [TCG_COND_GTU] = "gtu"
1854 };
1855 
1856 static const char * const ldst_name[] =
1857 {
1858     [MO_UB]   = "ub",
1859     [MO_SB]   = "sb",
1860     [MO_LEUW] = "leuw",
1861     [MO_LESW] = "lesw",
1862     [MO_LEUL] = "leul",
1863     [MO_LESL] = "lesl",
1864     [MO_LEQ]  = "leq",
1865     [MO_BEUW] = "beuw",
1866     [MO_BESW] = "besw",
1867     [MO_BEUL] = "beul",
1868     [MO_BESL] = "besl",
1869     [MO_BEQ]  = "beq",
1870 };
1871 
1872 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1873 #ifdef ALIGNED_ONLY
1874     [MO_UNALN >> MO_ASHIFT]    = "un+",
1875     [MO_ALIGN >> MO_ASHIFT]    = "",
1876 #else
1877     [MO_UNALN >> MO_ASHIFT]    = "",
1878     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1879 #endif
1880     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1881     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1882     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1883     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1884     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1885     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1886 };
1887 
1888 void tcg_dump_ops(TCGContext *s)
1889 {
1890     char buf[128];
1891     TCGOp *op;
1892 
1893     QTAILQ_FOREACH(op, &s->ops, link) {
1894         int i, k, nb_oargs, nb_iargs, nb_cargs;
1895         const TCGOpDef *def;
1896         TCGOpcode c;
1897         int col = 0;
1898 
1899         c = op->opc;
1900         def = &tcg_op_defs[c];
1901 
1902         if (c == INDEX_op_insn_start) {
1903             col += qemu_log("\n ----");
1904 
1905             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1906                 target_ulong a;
1907 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1908                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1909 #else
1910                 a = op->args[i];
1911 #endif
1912                 col += qemu_log(" " TARGET_FMT_lx, a);
1913             }
1914         } else if (c == INDEX_op_call) {
1915             /* variable number of arguments */
1916             nb_oargs = TCGOP_CALLO(op);
1917             nb_iargs = TCGOP_CALLI(op);
1918             nb_cargs = def->nb_cargs;
1919 
1920             /* function name, flags, out args */
1921             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1922                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
1923                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
1924             for (i = 0; i < nb_oargs; i++) {
1925                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1926                                                        op->args[i]));
1927             }
1928             for (i = 0; i < nb_iargs; i++) {
1929                 TCGArg arg = op->args[nb_oargs + i];
1930                 const char *t = "<dummy>";
1931                 if (arg != TCG_CALL_DUMMY_ARG) {
1932                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1933                 }
1934                 col += qemu_log(",%s", t);
1935             }
1936         } else {
1937             col += qemu_log(" %s ", def->name);
1938 
1939             nb_oargs = def->nb_oargs;
1940             nb_iargs = def->nb_iargs;
1941             nb_cargs = def->nb_cargs;
1942 
1943             if (def->flags & TCG_OPF_VECTOR) {
1944                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
1945                                 8 << TCGOP_VECE(op));
1946             }
1947 
1948             k = 0;
1949             for (i = 0; i < nb_oargs; i++) {
1950                 if (k != 0) {
1951                     col += qemu_log(",");
1952                 }
1953                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1954                                                       op->args[k++]));
1955             }
1956             for (i = 0; i < nb_iargs; i++) {
1957                 if (k != 0) {
1958                     col += qemu_log(",");
1959                 }
1960                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1961                                                       op->args[k++]));
1962             }
1963             switch (c) {
1964             case INDEX_op_brcond_i32:
1965             case INDEX_op_setcond_i32:
1966             case INDEX_op_movcond_i32:
1967             case INDEX_op_brcond2_i32:
1968             case INDEX_op_setcond2_i32:
1969             case INDEX_op_brcond_i64:
1970             case INDEX_op_setcond_i64:
1971             case INDEX_op_movcond_i64:
1972             case INDEX_op_cmp_vec:
1973                 if (op->args[k] < ARRAY_SIZE(cond_name)
1974                     && cond_name[op->args[k]]) {
1975                     col += qemu_log(",%s", cond_name[op->args[k++]]);
1976                 } else {
1977                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
1978                 }
1979                 i = 1;
1980                 break;
1981             case INDEX_op_qemu_ld_i32:
1982             case INDEX_op_qemu_st_i32:
1983             case INDEX_op_qemu_ld_i64:
1984             case INDEX_op_qemu_st_i64:
1985                 {
1986                     TCGMemOpIdx oi = op->args[k++];
1987                     TCGMemOp op = get_memop(oi);
1988                     unsigned ix = get_mmuidx(oi);
1989 
1990                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1991                         col += qemu_log(",$0x%x,%u", op, ix);
1992                     } else {
1993                         const char *s_al, *s_op;
1994                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1995                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1996                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
1997                     }
1998                     i = 1;
1999                 }
2000                 break;
2001             default:
2002                 i = 0;
2003                 break;
2004             }
2005             switch (c) {
2006             case INDEX_op_set_label:
2007             case INDEX_op_br:
2008             case INDEX_op_brcond_i32:
2009             case INDEX_op_brcond_i64:
2010             case INDEX_op_brcond2_i32:
2011                 col += qemu_log("%s$L%d", k ? "," : "",
2012                                 arg_label(op->args[k])->id);
2013                 i++, k++;
2014                 break;
2015             default:
2016                 break;
2017             }
2018             for (; i < nb_cargs; i++, k++) {
2019                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2020             }
2021         }
2022         if (op->life) {
2023             unsigned life = op->life;
2024 
2025             for (; col < 48; ++col) {
2026                 putc(' ', qemu_logfile);
2027             }
2028 
2029             if (life & (SYNC_ARG * 3)) {
2030                 qemu_log("  sync:");
2031                 for (i = 0; i < 2; ++i) {
2032                     if (life & (SYNC_ARG << i)) {
2033                         qemu_log(" %d", i);
2034                     }
2035                 }
2036             }
2037             life /= DEAD_ARG;
2038             if (life) {
2039                 qemu_log("  dead:");
2040                 for (i = 0; life; ++i, life >>= 1) {
2041                     if (life & 1) {
2042                         qemu_log(" %d", i);
2043                     }
2044                 }
2045             }
2046         }
2047         qemu_log("\n");
2048     }
2049 }
2050 
2051 /* we give more priority to constraints with less registers */
2052 static int get_constraint_priority(const TCGOpDef *def, int k)
2053 {
2054     const TCGArgConstraint *arg_ct;
2055 
2056     int i, n;
2057     arg_ct = &def->args_ct[k];
2058     if (arg_ct->ct & TCG_CT_ALIAS) {
2059         /* an alias is equivalent to a single register */
2060         n = 1;
2061     } else {
2062         if (!(arg_ct->ct & TCG_CT_REG))
2063             return 0;
2064         n = 0;
2065         for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2066             if (tcg_regset_test_reg(arg_ct->u.regs, i))
2067                 n++;
2068         }
2069     }
2070     return TCG_TARGET_NB_REGS - n + 1;
2071 }
2072 
2073 /* sort from highest priority to lowest */
2074 static void sort_constraints(TCGOpDef *def, int start, int n)
2075 {
2076     int i, j, p1, p2, tmp;
2077 
2078     for(i = 0; i < n; i++)
2079         def->sorted_args[start + i] = start + i;
2080     if (n <= 1)
2081         return;
2082     for(i = 0; i < n - 1; i++) {
2083         for(j = i + 1; j < n; j++) {
2084             p1 = get_constraint_priority(def, def->sorted_args[start + i]);
2085             p2 = get_constraint_priority(def, def->sorted_args[start + j]);
2086             if (p1 < p2) {
2087                 tmp = def->sorted_args[start + i];
2088                 def->sorted_args[start + i] = def->sorted_args[start + j];
2089                 def->sorted_args[start + j] = tmp;
2090             }
2091         }
2092     }
2093 }
2094 
2095 static void process_op_defs(TCGContext *s)
2096 {
2097     TCGOpcode op;
2098 
2099     for (op = 0; op < NB_OPS; op++) {
2100         TCGOpDef *def = &tcg_op_defs[op];
2101         const TCGTargetOpDef *tdefs;
2102         TCGType type;
2103         int i, nb_args;
2104 
2105         if (def->flags & TCG_OPF_NOT_PRESENT) {
2106             continue;
2107         }
2108 
2109         nb_args = def->nb_iargs + def->nb_oargs;
2110         if (nb_args == 0) {
2111             continue;
2112         }
2113 
2114         tdefs = tcg_target_op_def(op);
2115         /* Missing TCGTargetOpDef entry. */
2116         tcg_debug_assert(tdefs != NULL);
2117 
2118         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2119         for (i = 0; i < nb_args; i++) {
2120             const char *ct_str = tdefs->args_ct_str[i];
2121             /* Incomplete TCGTargetOpDef entry. */
2122             tcg_debug_assert(ct_str != NULL);
2123 
2124             def->args_ct[i].u.regs = 0;
2125             def->args_ct[i].ct = 0;
2126             while (*ct_str != '\0') {
2127                 switch(*ct_str) {
2128                 case '0' ... '9':
2129                     {
2130                         int oarg = *ct_str - '0';
2131                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2132                         tcg_debug_assert(oarg < def->nb_oargs);
2133                         tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
2134                         /* TCG_CT_ALIAS is for the output arguments.
2135                            The input is tagged with TCG_CT_IALIAS. */
2136                         def->args_ct[i] = def->args_ct[oarg];
2137                         def->args_ct[oarg].ct |= TCG_CT_ALIAS;
2138                         def->args_ct[oarg].alias_index = i;
2139                         def->args_ct[i].ct |= TCG_CT_IALIAS;
2140                         def->args_ct[i].alias_index = oarg;
2141                     }
2142                     ct_str++;
2143                     break;
2144                 case '&':
2145                     def->args_ct[i].ct |= TCG_CT_NEWREG;
2146                     ct_str++;
2147                     break;
2148                 case 'i':
2149                     def->args_ct[i].ct |= TCG_CT_CONST;
2150                     ct_str++;
2151                     break;
2152                 default:
2153                     ct_str = target_parse_constraint(&def->args_ct[i],
2154                                                      ct_str, type);
2155                     /* Typo in TCGTargetOpDef constraint. */
2156                     tcg_debug_assert(ct_str != NULL);
2157                 }
2158             }
2159         }
2160 
2161         /* TCGTargetOpDef entry with too much information? */
2162         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2163 
2164         /* sort the constraints (XXX: this is just an heuristic) */
2165         sort_constraints(def, 0, def->nb_oargs);
2166         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2167     }
2168 }
2169 
2170 void tcg_op_remove(TCGContext *s, TCGOp *op)
2171 {
2172     QTAILQ_REMOVE(&s->ops, op, link);
2173     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2174     s->nb_ops--;
2175 
2176 #ifdef CONFIG_PROFILER
2177     atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2178 #endif
2179 }
2180 
2181 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2182 {
2183     TCGContext *s = tcg_ctx;
2184     TCGOp *op;
2185 
2186     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2187         op = tcg_malloc(sizeof(TCGOp));
2188     } else {
2189         op = QTAILQ_FIRST(&s->free_ops);
2190         QTAILQ_REMOVE(&s->free_ops, op, link);
2191     }
2192     memset(op, 0, offsetof(TCGOp, link));
2193     op->opc = opc;
2194     s->nb_ops++;
2195 
2196     return op;
2197 }
2198 
2199 TCGOp *tcg_emit_op(TCGOpcode opc)
2200 {
2201     TCGOp *op = tcg_op_alloc(opc);
2202     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2203     return op;
2204 }
2205 
2206 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
2207                             TCGOpcode opc, int nargs)
2208 {
2209     TCGOp *new_op = tcg_op_alloc(opc);
2210     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2211     return new_op;
2212 }
2213 
2214 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
2215                            TCGOpcode opc, int nargs)
2216 {
2217     TCGOp *new_op = tcg_op_alloc(opc);
2218     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2219     return new_op;
2220 }
2221 
2222 #define TS_DEAD  1
2223 #define TS_MEM   2
2224 
2225 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2226 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2227 
2228 /* liveness analysis: end of function: all temps are dead, and globals
2229    should be in memory. */
2230 static void tcg_la_func_end(TCGContext *s)
2231 {
2232     int ng = s->nb_globals;
2233     int nt = s->nb_temps;
2234     int i;
2235 
2236     for (i = 0; i < ng; ++i) {
2237         s->temps[i].state = TS_DEAD | TS_MEM;
2238     }
2239     for (i = ng; i < nt; ++i) {
2240         s->temps[i].state = TS_DEAD;
2241     }
2242 }
2243 
2244 /* liveness analysis: end of basic block: all temps are dead, globals
2245    and local temps should be in memory. */
2246 static void tcg_la_bb_end(TCGContext *s)
2247 {
2248     int ng = s->nb_globals;
2249     int nt = s->nb_temps;
2250     int i;
2251 
2252     for (i = 0; i < ng; ++i) {
2253         s->temps[i].state = TS_DEAD | TS_MEM;
2254     }
2255     for (i = ng; i < nt; ++i) {
2256         s->temps[i].state = (s->temps[i].temp_local
2257                              ? TS_DEAD | TS_MEM
2258                              : TS_DEAD);
2259     }
2260 }
2261 
2262 /* Liveness analysis : update the opc_arg_life array to tell if a
2263    given input arguments is dead. Instructions updating dead
2264    temporaries are removed. */
2265 static void liveness_pass_1(TCGContext *s)
2266 {
2267     int nb_globals = s->nb_globals;
2268     TCGOp *op, *op_prev;
2269 
2270     tcg_la_func_end(s);
2271 
2272     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, TCGOpHead, link, op_prev) {
2273         int i, nb_iargs, nb_oargs;
2274         TCGOpcode opc_new, opc_new2;
2275         bool have_opc_new2;
2276         TCGLifeData arg_life = 0;
2277         TCGTemp *arg_ts;
2278         TCGOpcode opc = op->opc;
2279         const TCGOpDef *def = &tcg_op_defs[opc];
2280 
2281         switch (opc) {
2282         case INDEX_op_call:
2283             {
2284                 int call_flags;
2285 
2286                 nb_oargs = TCGOP_CALLO(op);
2287                 nb_iargs = TCGOP_CALLI(op);
2288                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2289 
2290                 /* pure functions can be removed if their result is unused */
2291                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2292                     for (i = 0; i < nb_oargs; i++) {
2293                         arg_ts = arg_temp(op->args[i]);
2294                         if (arg_ts->state != TS_DEAD) {
2295                             goto do_not_remove_call;
2296                         }
2297                     }
2298                     goto do_remove;
2299                 } else {
2300                 do_not_remove_call:
2301 
2302                     /* output args are dead */
2303                     for (i = 0; i < nb_oargs; i++) {
2304                         arg_ts = arg_temp(op->args[i]);
2305                         if (arg_ts->state & TS_DEAD) {
2306                             arg_life |= DEAD_ARG << i;
2307                         }
2308                         if (arg_ts->state & TS_MEM) {
2309                             arg_life |= SYNC_ARG << i;
2310                         }
2311                         arg_ts->state = TS_DEAD;
2312                     }
2313 
2314                     if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2315                                         TCG_CALL_NO_READ_GLOBALS))) {
2316                         /* globals should go back to memory */
2317                         for (i = 0; i < nb_globals; i++) {
2318                             s->temps[i].state = TS_DEAD | TS_MEM;
2319                         }
2320                     } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2321                         /* globals should be synced to memory */
2322                         for (i = 0; i < nb_globals; i++) {
2323                             s->temps[i].state |= TS_MEM;
2324                         }
2325                     }
2326 
2327                     /* record arguments that die in this helper */
2328                     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2329                         arg_ts = arg_temp(op->args[i]);
2330                         if (arg_ts && arg_ts->state & TS_DEAD) {
2331                             arg_life |= DEAD_ARG << i;
2332                         }
2333                     }
2334                     /* input arguments are live for preceding opcodes */
2335                     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2336                         arg_ts = arg_temp(op->args[i]);
2337                         if (arg_ts) {
2338                             arg_ts->state &= ~TS_DEAD;
2339                         }
2340                     }
2341                 }
2342             }
2343             break;
2344         case INDEX_op_insn_start:
2345             break;
2346         case INDEX_op_discard:
2347             /* mark the temporary as dead */
2348             arg_temp(op->args[0])->state = TS_DEAD;
2349             break;
2350 
2351         case INDEX_op_add2_i32:
2352             opc_new = INDEX_op_add_i32;
2353             goto do_addsub2;
2354         case INDEX_op_sub2_i32:
2355             opc_new = INDEX_op_sub_i32;
2356             goto do_addsub2;
2357         case INDEX_op_add2_i64:
2358             opc_new = INDEX_op_add_i64;
2359             goto do_addsub2;
2360         case INDEX_op_sub2_i64:
2361             opc_new = INDEX_op_sub_i64;
2362         do_addsub2:
2363             nb_iargs = 4;
2364             nb_oargs = 2;
2365             /* Test if the high part of the operation is dead, but not
2366                the low part.  The result can be optimized to a simple
2367                add or sub.  This happens often for x86_64 guest when the
2368                cpu mode is set to 32 bit.  */
2369             if (arg_temp(op->args[1])->state == TS_DEAD) {
2370                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2371                     goto do_remove;
2372                 }
2373                 /* Replace the opcode and adjust the args in place,
2374                    leaving 3 unused args at the end.  */
2375                 op->opc = opc = opc_new;
2376                 op->args[1] = op->args[2];
2377                 op->args[2] = op->args[4];
2378                 /* Fall through and mark the single-word operation live.  */
2379                 nb_iargs = 2;
2380                 nb_oargs = 1;
2381             }
2382             goto do_not_remove;
2383 
2384         case INDEX_op_mulu2_i32:
2385             opc_new = INDEX_op_mul_i32;
2386             opc_new2 = INDEX_op_muluh_i32;
2387             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2388             goto do_mul2;
2389         case INDEX_op_muls2_i32:
2390             opc_new = INDEX_op_mul_i32;
2391             opc_new2 = INDEX_op_mulsh_i32;
2392             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2393             goto do_mul2;
2394         case INDEX_op_mulu2_i64:
2395             opc_new = INDEX_op_mul_i64;
2396             opc_new2 = INDEX_op_muluh_i64;
2397             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2398             goto do_mul2;
2399         case INDEX_op_muls2_i64:
2400             opc_new = INDEX_op_mul_i64;
2401             opc_new2 = INDEX_op_mulsh_i64;
2402             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2403             goto do_mul2;
2404         do_mul2:
2405             nb_iargs = 2;
2406             nb_oargs = 2;
2407             if (arg_temp(op->args[1])->state == TS_DEAD) {
2408                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2409                     /* Both parts of the operation are dead.  */
2410                     goto do_remove;
2411                 }
2412                 /* The high part of the operation is dead; generate the low. */
2413                 op->opc = opc = opc_new;
2414                 op->args[1] = op->args[2];
2415                 op->args[2] = op->args[3];
2416             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2417                 /* The low part of the operation is dead; generate the high. */
2418                 op->opc = opc = opc_new2;
2419                 op->args[0] = op->args[1];
2420                 op->args[1] = op->args[2];
2421                 op->args[2] = op->args[3];
2422             } else {
2423                 goto do_not_remove;
2424             }
2425             /* Mark the single-word operation live.  */
2426             nb_oargs = 1;
2427             goto do_not_remove;
2428 
2429         default:
2430             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2431             nb_iargs = def->nb_iargs;
2432             nb_oargs = def->nb_oargs;
2433 
2434             /* Test if the operation can be removed because all
2435                its outputs are dead. We assume that nb_oargs == 0
2436                implies side effects */
2437             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2438                 for (i = 0; i < nb_oargs; i++) {
2439                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2440                         goto do_not_remove;
2441                     }
2442                 }
2443             do_remove:
2444                 tcg_op_remove(s, op);
2445             } else {
2446             do_not_remove:
2447                 /* output args are dead */
2448                 for (i = 0; i < nb_oargs; i++) {
2449                     arg_ts = arg_temp(op->args[i]);
2450                     if (arg_ts->state & TS_DEAD) {
2451                         arg_life |= DEAD_ARG << i;
2452                     }
2453                     if (arg_ts->state & TS_MEM) {
2454                         arg_life |= SYNC_ARG << i;
2455                     }
2456                     arg_ts->state = TS_DEAD;
2457                 }
2458 
2459                 /* if end of basic block, update */
2460                 if (def->flags & TCG_OPF_BB_END) {
2461                     tcg_la_bb_end(s);
2462                 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2463                     /* globals should be synced to memory */
2464                     for (i = 0; i < nb_globals; i++) {
2465                         s->temps[i].state |= TS_MEM;
2466                     }
2467                 }
2468 
2469                 /* record arguments that die in this opcode */
2470                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2471                     arg_ts = arg_temp(op->args[i]);
2472                     if (arg_ts->state & TS_DEAD) {
2473                         arg_life |= DEAD_ARG << i;
2474                     }
2475                 }
2476                 /* input arguments are live for preceding opcodes */
2477                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2478                     arg_temp(op->args[i])->state &= ~TS_DEAD;
2479                 }
2480             }
2481             break;
2482         }
2483         op->life = arg_life;
2484     }
2485 }
2486 
2487 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2488 static bool liveness_pass_2(TCGContext *s)
2489 {
2490     int nb_globals = s->nb_globals;
2491     int nb_temps, i;
2492     bool changes = false;
2493     TCGOp *op, *op_next;
2494 
2495     /* Create a temporary for each indirect global.  */
2496     for (i = 0; i < nb_globals; ++i) {
2497         TCGTemp *its = &s->temps[i];
2498         if (its->indirect_reg) {
2499             TCGTemp *dts = tcg_temp_alloc(s);
2500             dts->type = its->type;
2501             dts->base_type = its->base_type;
2502             its->state_ptr = dts;
2503         } else {
2504             its->state_ptr = NULL;
2505         }
2506         /* All globals begin dead.  */
2507         its->state = TS_DEAD;
2508     }
2509     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2510         TCGTemp *its = &s->temps[i];
2511         its->state_ptr = NULL;
2512         its->state = TS_DEAD;
2513     }
2514 
2515     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2516         TCGOpcode opc = op->opc;
2517         const TCGOpDef *def = &tcg_op_defs[opc];
2518         TCGLifeData arg_life = op->life;
2519         int nb_iargs, nb_oargs, call_flags;
2520         TCGTemp *arg_ts, *dir_ts;
2521 
2522         if (opc == INDEX_op_call) {
2523             nb_oargs = TCGOP_CALLO(op);
2524             nb_iargs = TCGOP_CALLI(op);
2525             call_flags = op->args[nb_oargs + nb_iargs + 1];
2526         } else {
2527             nb_iargs = def->nb_iargs;
2528             nb_oargs = def->nb_oargs;
2529 
2530             /* Set flags similar to how calls require.  */
2531             if (def->flags & TCG_OPF_BB_END) {
2532                 /* Like writing globals: save_globals */
2533                 call_flags = 0;
2534             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2535                 /* Like reading globals: sync_globals */
2536                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2537             } else {
2538                 /* No effect on globals.  */
2539                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2540                               TCG_CALL_NO_WRITE_GLOBALS);
2541             }
2542         }
2543 
2544         /* Make sure that input arguments are available.  */
2545         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2546             arg_ts = arg_temp(op->args[i]);
2547             if (arg_ts) {
2548                 dir_ts = arg_ts->state_ptr;
2549                 if (dir_ts && arg_ts->state == TS_DEAD) {
2550                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2551                                       ? INDEX_op_ld_i32
2552                                       : INDEX_op_ld_i64);
2553                     TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
2554 
2555                     lop->args[0] = temp_arg(dir_ts);
2556                     lop->args[1] = temp_arg(arg_ts->mem_base);
2557                     lop->args[2] = arg_ts->mem_offset;
2558 
2559                     /* Loaded, but synced with memory.  */
2560                     arg_ts->state = TS_MEM;
2561                 }
2562             }
2563         }
2564 
2565         /* Perform input replacement, and mark inputs that became dead.
2566            No action is required except keeping temp_state up to date
2567            so that we reload when needed.  */
2568         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2569             arg_ts = arg_temp(op->args[i]);
2570             if (arg_ts) {
2571                 dir_ts = arg_ts->state_ptr;
2572                 if (dir_ts) {
2573                     op->args[i] = temp_arg(dir_ts);
2574                     changes = true;
2575                     if (IS_DEAD_ARG(i)) {
2576                         arg_ts->state = TS_DEAD;
2577                     }
2578                 }
2579             }
2580         }
2581 
2582         /* Liveness analysis should ensure that the following are
2583            all correct, for call sites and basic block end points.  */
2584         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2585             /* Nothing to do */
2586         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2587             for (i = 0; i < nb_globals; ++i) {
2588                 /* Liveness should see that globals are synced back,
2589                    that is, either TS_DEAD or TS_MEM.  */
2590                 arg_ts = &s->temps[i];
2591                 tcg_debug_assert(arg_ts->state_ptr == 0
2592                                  || arg_ts->state != 0);
2593             }
2594         } else {
2595             for (i = 0; i < nb_globals; ++i) {
2596                 /* Liveness should see that globals are saved back,
2597                    that is, TS_DEAD, waiting to be reloaded.  */
2598                 arg_ts = &s->temps[i];
2599                 tcg_debug_assert(arg_ts->state_ptr == 0
2600                                  || arg_ts->state == TS_DEAD);
2601             }
2602         }
2603 
2604         /* Outputs become available.  */
2605         for (i = 0; i < nb_oargs; i++) {
2606             arg_ts = arg_temp(op->args[i]);
2607             dir_ts = arg_ts->state_ptr;
2608             if (!dir_ts) {
2609                 continue;
2610             }
2611             op->args[i] = temp_arg(dir_ts);
2612             changes = true;
2613 
2614             /* The output is now live and modified.  */
2615             arg_ts->state = 0;
2616 
2617             /* Sync outputs upon their last write.  */
2618             if (NEED_SYNC_ARG(i)) {
2619                 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2620                                   ? INDEX_op_st_i32
2621                                   : INDEX_op_st_i64);
2622                 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
2623 
2624                 sop->args[0] = temp_arg(dir_ts);
2625                 sop->args[1] = temp_arg(arg_ts->mem_base);
2626                 sop->args[2] = arg_ts->mem_offset;
2627 
2628                 arg_ts->state = TS_MEM;
2629             }
2630             /* Drop outputs that are dead.  */
2631             if (IS_DEAD_ARG(i)) {
2632                 arg_ts->state = TS_DEAD;
2633             }
2634         }
2635     }
2636 
2637     return changes;
2638 }
2639 
2640 #ifdef CONFIG_DEBUG_TCG
2641 static void dump_regs(TCGContext *s)
2642 {
2643     TCGTemp *ts;
2644     int i;
2645     char buf[64];
2646 
2647     for(i = 0; i < s->nb_temps; i++) {
2648         ts = &s->temps[i];
2649         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2650         switch(ts->val_type) {
2651         case TEMP_VAL_REG:
2652             printf("%s", tcg_target_reg_names[ts->reg]);
2653             break;
2654         case TEMP_VAL_MEM:
2655             printf("%d(%s)", (int)ts->mem_offset,
2656                    tcg_target_reg_names[ts->mem_base->reg]);
2657             break;
2658         case TEMP_VAL_CONST:
2659             printf("$0x%" TCG_PRIlx, ts->val);
2660             break;
2661         case TEMP_VAL_DEAD:
2662             printf("D");
2663             break;
2664         default:
2665             printf("???");
2666             break;
2667         }
2668         printf("\n");
2669     }
2670 
2671     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2672         if (s->reg_to_temp[i] != NULL) {
2673             printf("%s: %s\n",
2674                    tcg_target_reg_names[i],
2675                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
2676         }
2677     }
2678 }
2679 
2680 static void check_regs(TCGContext *s)
2681 {
2682     int reg;
2683     int k;
2684     TCGTemp *ts;
2685     char buf[64];
2686 
2687     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2688         ts = s->reg_to_temp[reg];
2689         if (ts != NULL) {
2690             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
2691                 printf("Inconsistency for register %s:\n",
2692                        tcg_target_reg_names[reg]);
2693                 goto fail;
2694             }
2695         }
2696     }
2697     for (k = 0; k < s->nb_temps; k++) {
2698         ts = &s->temps[k];
2699         if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
2700             && s->reg_to_temp[ts->reg] != ts) {
2701             printf("Inconsistency for temp %s:\n",
2702                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2703         fail:
2704             printf("reg state:\n");
2705             dump_regs(s);
2706             tcg_abort();
2707         }
2708     }
2709 }
2710 #endif
2711 
2712 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
2713 {
2714 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
2715     /* Sparc64 stack is accessed with offset of 2047 */
2716     s->current_frame_offset = (s->current_frame_offset +
2717                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
2718         ~(sizeof(tcg_target_long) - 1);
2719 #endif
2720     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
2721         s->frame_end) {
2722         tcg_abort();
2723     }
2724     ts->mem_offset = s->current_frame_offset;
2725     ts->mem_base = s->frame_temp;
2726     ts->mem_allocated = 1;
2727     s->current_frame_offset += sizeof(tcg_target_long);
2728 }
2729 
2730 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet);
2731 
2732 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
2733    mark it free; otherwise mark it dead.  */
2734 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
2735 {
2736     if (ts->fixed_reg) {
2737         return;
2738     }
2739     if (ts->val_type == TEMP_VAL_REG) {
2740         s->reg_to_temp[ts->reg] = NULL;
2741     }
2742     ts->val_type = (free_or_dead < 0
2743                     || ts->temp_local
2744                     || ts->temp_global
2745                     ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
2746 }
2747 
2748 /* Mark a temporary as dead.  */
2749 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
2750 {
2751     temp_free_or_dead(s, ts, 1);
2752 }
2753 
2754 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
2755    registers needs to be allocated to store a constant.  If 'free_or_dead'
2756    is non-zero, subsequently release the temporary; if it is positive, the
2757    temp is dead; if it is negative, the temp is free.  */
2758 static void temp_sync(TCGContext *s, TCGTemp *ts,
2759                       TCGRegSet allocated_regs, int free_or_dead)
2760 {
2761     if (ts->fixed_reg) {
2762         return;
2763     }
2764     if (!ts->mem_coherent) {
2765         if (!ts->mem_allocated) {
2766             temp_allocate_frame(s, ts);
2767         }
2768         switch (ts->val_type) {
2769         case TEMP_VAL_CONST:
2770             /* If we're going to free the temp immediately, then we won't
2771                require it later in a register, so attempt to store the
2772                constant to memory directly.  */
2773             if (free_or_dead
2774                 && tcg_out_sti(s, ts->type, ts->val,
2775                                ts->mem_base->reg, ts->mem_offset)) {
2776                 break;
2777             }
2778             temp_load(s, ts, tcg_target_available_regs[ts->type],
2779                       allocated_regs);
2780             /* fallthrough */
2781 
2782         case TEMP_VAL_REG:
2783             tcg_out_st(s, ts->type, ts->reg,
2784                        ts->mem_base->reg, ts->mem_offset);
2785             break;
2786 
2787         case TEMP_VAL_MEM:
2788             break;
2789 
2790         case TEMP_VAL_DEAD:
2791         default:
2792             tcg_abort();
2793         }
2794         ts->mem_coherent = 1;
2795     }
2796     if (free_or_dead) {
2797         temp_free_or_dead(s, ts, free_or_dead);
2798     }
2799 }
2800 
2801 /* free register 'reg' by spilling the corresponding temporary if necessary */
2802 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
2803 {
2804     TCGTemp *ts = s->reg_to_temp[reg];
2805     if (ts != NULL) {
2806         temp_sync(s, ts, allocated_regs, -1);
2807     }
2808 }
2809 
2810 /* Allocate a register belonging to reg1 & ~reg2 */
2811 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs,
2812                             TCGRegSet allocated_regs, bool rev)
2813 {
2814     int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
2815     const int *order;
2816     TCGReg reg;
2817     TCGRegSet reg_ct;
2818 
2819     reg_ct = desired_regs & ~allocated_regs;
2820     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
2821 
2822     /* first try free registers */
2823     for(i = 0; i < n; i++) {
2824         reg = order[i];
2825         if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL)
2826             return reg;
2827     }
2828 
2829     /* XXX: do better spill choice */
2830     for(i = 0; i < n; i++) {
2831         reg = order[i];
2832         if (tcg_regset_test_reg(reg_ct, reg)) {
2833             tcg_reg_free(s, reg, allocated_regs);
2834             return reg;
2835         }
2836     }
2837 
2838     tcg_abort();
2839 }
2840 
2841 /* Make sure the temporary is in a register.  If needed, allocate the register
2842    from DESIRED while avoiding ALLOCATED.  */
2843 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
2844                       TCGRegSet allocated_regs)
2845 {
2846     TCGReg reg;
2847 
2848     switch (ts->val_type) {
2849     case TEMP_VAL_REG:
2850         return;
2851     case TEMP_VAL_CONST:
2852         reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
2853         tcg_out_movi(s, ts->type, reg, ts->val);
2854         ts->mem_coherent = 0;
2855         break;
2856     case TEMP_VAL_MEM:
2857         reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
2858         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
2859         ts->mem_coherent = 1;
2860         break;
2861     case TEMP_VAL_DEAD:
2862     default:
2863         tcg_abort();
2864     }
2865     ts->reg = reg;
2866     ts->val_type = TEMP_VAL_REG;
2867     s->reg_to_temp[reg] = ts;
2868 }
2869 
2870 /* Save a temporary to memory. 'allocated_regs' is used in case a
2871    temporary registers needs to be allocated to store a constant.  */
2872 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
2873 {
2874     /* The liveness analysis already ensures that globals are back
2875        in memory. Keep an tcg_debug_assert for safety. */
2876     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
2877 }
2878 
2879 /* save globals to their canonical location and assume they can be
2880    modified be the following code. 'allocated_regs' is used in case a
2881    temporary registers needs to be allocated to store a constant. */
2882 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
2883 {
2884     int i, n;
2885 
2886     for (i = 0, n = s->nb_globals; i < n; i++) {
2887         temp_save(s, &s->temps[i], allocated_regs);
2888     }
2889 }
2890 
2891 /* sync globals to their canonical location and assume they can be
2892    read by the following code. 'allocated_regs' is used in case a
2893    temporary registers needs to be allocated to store a constant. */
2894 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
2895 {
2896     int i, n;
2897 
2898     for (i = 0, n = s->nb_globals; i < n; i++) {
2899         TCGTemp *ts = &s->temps[i];
2900         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
2901                          || ts->fixed_reg
2902                          || ts->mem_coherent);
2903     }
2904 }
2905 
2906 /* at the end of a basic block, we assume all temporaries are dead and
2907    all globals are stored at their canonical location. */
2908 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
2909 {
2910     int i;
2911 
2912     for (i = s->nb_globals; i < s->nb_temps; i++) {
2913         TCGTemp *ts = &s->temps[i];
2914         if (ts->temp_local) {
2915             temp_save(s, ts, allocated_regs);
2916         } else {
2917             /* The liveness analysis already ensures that temps are dead.
2918                Keep an tcg_debug_assert for safety. */
2919             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
2920         }
2921     }
2922 
2923     save_globals(s, allocated_regs);
2924 }
2925 
2926 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
2927                                   tcg_target_ulong val, TCGLifeData arg_life)
2928 {
2929     if (ots->fixed_reg) {
2930         /* For fixed registers, we do not do any constant propagation.  */
2931         tcg_out_movi(s, ots->type, ots->reg, val);
2932         return;
2933     }
2934 
2935     /* The movi is not explicitly generated here.  */
2936     if (ots->val_type == TEMP_VAL_REG) {
2937         s->reg_to_temp[ots->reg] = NULL;
2938     }
2939     ots->val_type = TEMP_VAL_CONST;
2940     ots->val = val;
2941     ots->mem_coherent = 0;
2942     if (NEED_SYNC_ARG(0)) {
2943         temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0));
2944     } else if (IS_DEAD_ARG(0)) {
2945         temp_dead(s, ots);
2946     }
2947 }
2948 
2949 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
2950 {
2951     TCGTemp *ots = arg_temp(op->args[0]);
2952     tcg_target_ulong val = op->args[1];
2953 
2954     tcg_reg_alloc_do_movi(s, ots, val, op->life);
2955 }
2956 
2957 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
2958 {
2959     const TCGLifeData arg_life = op->life;
2960     TCGRegSet allocated_regs;
2961     TCGTemp *ts, *ots;
2962     TCGType otype, itype;
2963 
2964     allocated_regs = s->reserved_regs;
2965     ots = arg_temp(op->args[0]);
2966     ts = arg_temp(op->args[1]);
2967 
2968     /* Note that otype != itype for no-op truncation.  */
2969     otype = ots->type;
2970     itype = ts->type;
2971 
2972     if (ts->val_type == TEMP_VAL_CONST) {
2973         /* propagate constant or generate sti */
2974         tcg_target_ulong val = ts->val;
2975         if (IS_DEAD_ARG(1)) {
2976             temp_dead(s, ts);
2977         }
2978         tcg_reg_alloc_do_movi(s, ots, val, arg_life);
2979         return;
2980     }
2981 
2982     /* If the source value is in memory we're going to be forced
2983        to have it in a register in order to perform the copy.  Copy
2984        the SOURCE value into its own register first, that way we
2985        don't have to reload SOURCE the next time it is used. */
2986     if (ts->val_type == TEMP_VAL_MEM) {
2987         temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs);
2988     }
2989 
2990     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
2991     if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
2992         /* mov to a non-saved dead register makes no sense (even with
2993            liveness analysis disabled). */
2994         tcg_debug_assert(NEED_SYNC_ARG(0));
2995         if (!ots->mem_allocated) {
2996             temp_allocate_frame(s, ots);
2997         }
2998         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
2999         if (IS_DEAD_ARG(1)) {
3000             temp_dead(s, ts);
3001         }
3002         temp_dead(s, ots);
3003     } else {
3004         if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
3005             /* the mov can be suppressed */
3006             if (ots->val_type == TEMP_VAL_REG) {
3007                 s->reg_to_temp[ots->reg] = NULL;
3008             }
3009             ots->reg = ts->reg;
3010             temp_dead(s, ts);
3011         } else {
3012             if (ots->val_type != TEMP_VAL_REG) {
3013                 /* When allocating a new register, make sure to not spill the
3014                    input one. */
3015                 tcg_regset_set_reg(allocated_regs, ts->reg);
3016                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3017                                          allocated_regs, ots->indirect_base);
3018             }
3019             tcg_out_mov(s, otype, ots->reg, ts->reg);
3020         }
3021         ots->val_type = TEMP_VAL_REG;
3022         ots->mem_coherent = 0;
3023         s->reg_to_temp[ots->reg] = ots;
3024         if (NEED_SYNC_ARG(0)) {
3025             temp_sync(s, ots, allocated_regs, 0);
3026         }
3027     }
3028 }
3029 
3030 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3031 {
3032     const TCGLifeData arg_life = op->life;
3033     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3034     TCGRegSet i_allocated_regs;
3035     TCGRegSet o_allocated_regs;
3036     int i, k, nb_iargs, nb_oargs;
3037     TCGReg reg;
3038     TCGArg arg;
3039     const TCGArgConstraint *arg_ct;
3040     TCGTemp *ts;
3041     TCGArg new_args[TCG_MAX_OP_ARGS];
3042     int const_args[TCG_MAX_OP_ARGS];
3043 
3044     nb_oargs = def->nb_oargs;
3045     nb_iargs = def->nb_iargs;
3046 
3047     /* copy constants */
3048     memcpy(new_args + nb_oargs + nb_iargs,
3049            op->args + nb_oargs + nb_iargs,
3050            sizeof(TCGArg) * def->nb_cargs);
3051 
3052     i_allocated_regs = s->reserved_regs;
3053     o_allocated_regs = s->reserved_regs;
3054 
3055     /* satisfy input constraints */
3056     for (k = 0; k < nb_iargs; k++) {
3057         i = def->sorted_args[nb_oargs + k];
3058         arg = op->args[i];
3059         arg_ct = &def->args_ct[i];
3060         ts = arg_temp(arg);
3061 
3062         if (ts->val_type == TEMP_VAL_CONST
3063             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3064             /* constant is OK for instruction */
3065             const_args[i] = 1;
3066             new_args[i] = ts->val;
3067             goto iarg_end;
3068         }
3069 
3070         temp_load(s, ts, arg_ct->u.regs, i_allocated_regs);
3071 
3072         if (arg_ct->ct & TCG_CT_IALIAS) {
3073             if (ts->fixed_reg) {
3074                 /* if fixed register, we must allocate a new register
3075                    if the alias is not the same register */
3076                 if (arg != op->args[arg_ct->alias_index])
3077                     goto allocate_in_reg;
3078             } else {
3079                 /* if the input is aliased to an output and if it is
3080                    not dead after the instruction, we must allocate
3081                    a new register and move it */
3082                 if (!IS_DEAD_ARG(i)) {
3083                     goto allocate_in_reg;
3084                 }
3085                 /* check if the current register has already been allocated
3086                    for another input aliased to an output */
3087                 int k2, i2;
3088                 for (k2 = 0 ; k2 < k ; k2++) {
3089                     i2 = def->sorted_args[nb_oargs + k2];
3090                     if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
3091                         (new_args[i2] == ts->reg)) {
3092                         goto allocate_in_reg;
3093                     }
3094                 }
3095             }
3096         }
3097         reg = ts->reg;
3098         if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3099             /* nothing to do : the constraint is satisfied */
3100         } else {
3101         allocate_in_reg:
3102             /* allocate a new register matching the constraint
3103                and move the temporary register into it */
3104             reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
3105                                 ts->indirect_base);
3106             tcg_out_mov(s, ts->type, reg, ts->reg);
3107         }
3108         new_args[i] = reg;
3109         const_args[i] = 0;
3110         tcg_regset_set_reg(i_allocated_regs, reg);
3111     iarg_end: ;
3112     }
3113 
3114     /* mark dead temporaries and free the associated registers */
3115     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3116         if (IS_DEAD_ARG(i)) {
3117             temp_dead(s, arg_temp(op->args[i]));
3118         }
3119     }
3120 
3121     if (def->flags & TCG_OPF_BB_END) {
3122         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3123     } else {
3124         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3125             /* XXX: permit generic clobber register list ? */
3126             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3127                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3128                     tcg_reg_free(s, i, i_allocated_regs);
3129                 }
3130             }
3131         }
3132         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3133             /* sync globals if the op has side effects and might trigger
3134                an exception. */
3135             sync_globals(s, i_allocated_regs);
3136         }
3137 
3138         /* satisfy the output constraints */
3139         for(k = 0; k < nb_oargs; k++) {
3140             i = def->sorted_args[k];
3141             arg = op->args[i];
3142             arg_ct = &def->args_ct[i];
3143             ts = arg_temp(arg);
3144             if ((arg_ct->ct & TCG_CT_ALIAS)
3145                 && !const_args[arg_ct->alias_index]) {
3146                 reg = new_args[arg_ct->alias_index];
3147             } else if (arg_ct->ct & TCG_CT_NEWREG) {
3148                 reg = tcg_reg_alloc(s, arg_ct->u.regs,
3149                                     i_allocated_regs | o_allocated_regs,
3150                                     ts->indirect_base);
3151             } else {
3152                 /* if fixed register, we try to use it */
3153                 reg = ts->reg;
3154                 if (ts->fixed_reg &&
3155                     tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3156                     goto oarg_end;
3157                 }
3158                 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
3159                                     ts->indirect_base);
3160             }
3161             tcg_regset_set_reg(o_allocated_regs, reg);
3162             /* if a fixed register is used, then a move will be done afterwards */
3163             if (!ts->fixed_reg) {
3164                 if (ts->val_type == TEMP_VAL_REG) {
3165                     s->reg_to_temp[ts->reg] = NULL;
3166                 }
3167                 ts->val_type = TEMP_VAL_REG;
3168                 ts->reg = reg;
3169                 /* temp value is modified, so the value kept in memory is
3170                    potentially not the same */
3171                 ts->mem_coherent = 0;
3172                 s->reg_to_temp[reg] = ts;
3173             }
3174         oarg_end:
3175             new_args[i] = reg;
3176         }
3177     }
3178 
3179     /* emit instruction */
3180     if (def->flags & TCG_OPF_VECTOR) {
3181         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3182                        new_args, const_args);
3183     } else {
3184         tcg_out_op(s, op->opc, new_args, const_args);
3185     }
3186 
3187     /* move the outputs in the correct register if needed */
3188     for(i = 0; i < nb_oargs; i++) {
3189         ts = arg_temp(op->args[i]);
3190         reg = new_args[i];
3191         if (ts->fixed_reg && ts->reg != reg) {
3192             tcg_out_mov(s, ts->type, ts->reg, reg);
3193         }
3194         if (NEED_SYNC_ARG(i)) {
3195             temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i));
3196         } else if (IS_DEAD_ARG(i)) {
3197             temp_dead(s, ts);
3198         }
3199     }
3200 }
3201 
3202 #ifdef TCG_TARGET_STACK_GROWSUP
3203 #define STACK_DIR(x) (-(x))
3204 #else
3205 #define STACK_DIR(x) (x)
3206 #endif
3207 
3208 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3209 {
3210     const int nb_oargs = TCGOP_CALLO(op);
3211     const int nb_iargs = TCGOP_CALLI(op);
3212     const TCGLifeData arg_life = op->life;
3213     int flags, nb_regs, i;
3214     TCGReg reg;
3215     TCGArg arg;
3216     TCGTemp *ts;
3217     intptr_t stack_offset;
3218     size_t call_stack_size;
3219     tcg_insn_unit *func_addr;
3220     int allocate_args;
3221     TCGRegSet allocated_regs;
3222 
3223     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3224     flags = op->args[nb_oargs + nb_iargs + 1];
3225 
3226     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3227     if (nb_regs > nb_iargs) {
3228         nb_regs = nb_iargs;
3229     }
3230 
3231     /* assign stack slots first */
3232     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3233     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3234         ~(TCG_TARGET_STACK_ALIGN - 1);
3235     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3236     if (allocate_args) {
3237         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3238            preallocate call stack */
3239         tcg_abort();
3240     }
3241 
3242     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3243     for (i = nb_regs; i < nb_iargs; i++) {
3244         arg = op->args[nb_oargs + i];
3245 #ifdef TCG_TARGET_STACK_GROWSUP
3246         stack_offset -= sizeof(tcg_target_long);
3247 #endif
3248         if (arg != TCG_CALL_DUMMY_ARG) {
3249             ts = arg_temp(arg);
3250             temp_load(s, ts, tcg_target_available_regs[ts->type],
3251                       s->reserved_regs);
3252             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3253         }
3254 #ifndef TCG_TARGET_STACK_GROWSUP
3255         stack_offset += sizeof(tcg_target_long);
3256 #endif
3257     }
3258 
3259     /* assign input registers */
3260     allocated_regs = s->reserved_regs;
3261     for (i = 0; i < nb_regs; i++) {
3262         arg = op->args[nb_oargs + i];
3263         if (arg != TCG_CALL_DUMMY_ARG) {
3264             ts = arg_temp(arg);
3265             reg = tcg_target_call_iarg_regs[i];
3266             tcg_reg_free(s, reg, allocated_regs);
3267 
3268             if (ts->val_type == TEMP_VAL_REG) {
3269                 if (ts->reg != reg) {
3270                     tcg_out_mov(s, ts->type, reg, ts->reg);
3271                 }
3272             } else {
3273                 TCGRegSet arg_set = 0;
3274 
3275                 tcg_regset_set_reg(arg_set, reg);
3276                 temp_load(s, ts, arg_set, allocated_regs);
3277             }
3278 
3279             tcg_regset_set_reg(allocated_regs, reg);
3280         }
3281     }
3282 
3283     /* mark dead temporaries and free the associated registers */
3284     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3285         if (IS_DEAD_ARG(i)) {
3286             temp_dead(s, arg_temp(op->args[i]));
3287         }
3288     }
3289 
3290     /* clobber call registers */
3291     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3292         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3293             tcg_reg_free(s, i, allocated_regs);
3294         }
3295     }
3296 
3297     /* Save globals if they might be written by the helper, sync them if
3298        they might be read. */
3299     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3300         /* Nothing to do */
3301     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3302         sync_globals(s, allocated_regs);
3303     } else {
3304         save_globals(s, allocated_regs);
3305     }
3306 
3307     tcg_out_call(s, func_addr);
3308 
3309     /* assign output registers and emit moves if needed */
3310     for(i = 0; i < nb_oargs; i++) {
3311         arg = op->args[i];
3312         ts = arg_temp(arg);
3313         reg = tcg_target_call_oarg_regs[i];
3314         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3315 
3316         if (ts->fixed_reg) {
3317             if (ts->reg != reg) {
3318                 tcg_out_mov(s, ts->type, ts->reg, reg);
3319             }
3320         } else {
3321             if (ts->val_type == TEMP_VAL_REG) {
3322                 s->reg_to_temp[ts->reg] = NULL;
3323             }
3324             ts->val_type = TEMP_VAL_REG;
3325             ts->reg = reg;
3326             ts->mem_coherent = 0;
3327             s->reg_to_temp[reg] = ts;
3328             if (NEED_SYNC_ARG(i)) {
3329                 temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i));
3330             } else if (IS_DEAD_ARG(i)) {
3331                 temp_dead(s, ts);
3332             }
3333         }
3334     }
3335 }
3336 
3337 #ifdef CONFIG_PROFILER
3338 
3339 /* avoid copy/paste errors */
3340 #define PROF_ADD(to, from, field)                       \
3341     do {                                                \
3342         (to)->field += atomic_read(&((from)->field));   \
3343     } while (0)
3344 
3345 #define PROF_MAX(to, from, field)                                       \
3346     do {                                                                \
3347         typeof((from)->field) val__ = atomic_read(&((from)->field));    \
3348         if (val__ > (to)->field) {                                      \
3349             (to)->field = val__;                                        \
3350         }                                                               \
3351     } while (0)
3352 
3353 /* Pass in a zero'ed @prof */
3354 static inline
3355 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3356 {
3357     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3358     unsigned int i;
3359 
3360     for (i = 0; i < n_ctxs; i++) {
3361         TCGContext *s = atomic_read(&tcg_ctxs[i]);
3362         const TCGProfile *orig = &s->prof;
3363 
3364         if (counters) {
3365             PROF_ADD(prof, orig, cpu_exec_time);
3366             PROF_ADD(prof, orig, tb_count1);
3367             PROF_ADD(prof, orig, tb_count);
3368             PROF_ADD(prof, orig, op_count);
3369             PROF_MAX(prof, orig, op_count_max);
3370             PROF_ADD(prof, orig, temp_count);
3371             PROF_MAX(prof, orig, temp_count_max);
3372             PROF_ADD(prof, orig, del_op_count);
3373             PROF_ADD(prof, orig, code_in_len);
3374             PROF_ADD(prof, orig, code_out_len);
3375             PROF_ADD(prof, orig, search_out_len);
3376             PROF_ADD(prof, orig, interm_time);
3377             PROF_ADD(prof, orig, code_time);
3378             PROF_ADD(prof, orig, la_time);
3379             PROF_ADD(prof, orig, opt_time);
3380             PROF_ADD(prof, orig, restore_count);
3381             PROF_ADD(prof, orig, restore_time);
3382         }
3383         if (table) {
3384             int i;
3385 
3386             for (i = 0; i < NB_OPS; i++) {
3387                 PROF_ADD(prof, orig, table_op_count[i]);
3388             }
3389         }
3390     }
3391 }
3392 
3393 #undef PROF_ADD
3394 #undef PROF_MAX
3395 
3396 static void tcg_profile_snapshot_counters(TCGProfile *prof)
3397 {
3398     tcg_profile_snapshot(prof, true, false);
3399 }
3400 
3401 static void tcg_profile_snapshot_table(TCGProfile *prof)
3402 {
3403     tcg_profile_snapshot(prof, false, true);
3404 }
3405 
3406 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3407 {
3408     TCGProfile prof = {};
3409     int i;
3410 
3411     tcg_profile_snapshot_table(&prof);
3412     for (i = 0; i < NB_OPS; i++) {
3413         cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name,
3414                     prof.table_op_count[i]);
3415     }
3416 }
3417 
3418 int64_t tcg_cpu_exec_time(void)
3419 {
3420     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3421     unsigned int i;
3422     int64_t ret = 0;
3423 
3424     for (i = 0; i < n_ctxs; i++) {
3425         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
3426         const TCGProfile *prof = &s->prof;
3427 
3428         ret += atomic_read(&prof->cpu_exec_time);
3429     }
3430     return ret;
3431 }
3432 #else
3433 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3434 {
3435     cpu_fprintf(f, "[TCG profiler not compiled]\n");
3436 }
3437 
3438 int64_t tcg_cpu_exec_time(void)
3439 {
3440     error_report("%s: TCG profiler not compiled", __func__);
3441     exit(EXIT_FAILURE);
3442 }
3443 #endif
3444 
3445 
3446 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
3447 {
3448 #ifdef CONFIG_PROFILER
3449     TCGProfile *prof = &s->prof;
3450 #endif
3451     int i, num_insns;
3452     TCGOp *op;
3453 
3454 #ifdef CONFIG_PROFILER
3455     {
3456         int n = 0;
3457 
3458         QTAILQ_FOREACH(op, &s->ops, link) {
3459             n++;
3460         }
3461         atomic_set(&prof->op_count, prof->op_count + n);
3462         if (n > prof->op_count_max) {
3463             atomic_set(&prof->op_count_max, n);
3464         }
3465 
3466         n = s->nb_temps;
3467         atomic_set(&prof->temp_count, prof->temp_count + n);
3468         if (n > prof->temp_count_max) {
3469             atomic_set(&prof->temp_count_max, n);
3470         }
3471     }
3472 #endif
3473 
3474 #ifdef DEBUG_DISAS
3475     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
3476                  && qemu_log_in_addr_range(tb->pc))) {
3477         qemu_log_lock();
3478         qemu_log("OP:\n");
3479         tcg_dump_ops(s);
3480         qemu_log("\n");
3481         qemu_log_unlock();
3482     }
3483 #endif
3484 
3485 #ifdef CONFIG_PROFILER
3486     atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
3487 #endif
3488 
3489 #ifdef USE_TCG_OPTIMIZATIONS
3490     tcg_optimize(s);
3491 #endif
3492 
3493 #ifdef CONFIG_PROFILER
3494     atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
3495     atomic_set(&prof->la_time, prof->la_time - profile_getclock());
3496 #endif
3497 
3498     liveness_pass_1(s);
3499 
3500     if (s->nb_indirects > 0) {
3501 #ifdef DEBUG_DISAS
3502         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
3503                      && qemu_log_in_addr_range(tb->pc))) {
3504             qemu_log_lock();
3505             qemu_log("OP before indirect lowering:\n");
3506             tcg_dump_ops(s);
3507             qemu_log("\n");
3508             qemu_log_unlock();
3509         }
3510 #endif
3511         /* Replace indirect temps with direct temps.  */
3512         if (liveness_pass_2(s)) {
3513             /* If changes were made, re-run liveness.  */
3514             liveness_pass_1(s);
3515         }
3516     }
3517 
3518 #ifdef CONFIG_PROFILER
3519     atomic_set(&prof->la_time, prof->la_time + profile_getclock());
3520 #endif
3521 
3522 #ifdef DEBUG_DISAS
3523     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
3524                  && qemu_log_in_addr_range(tb->pc))) {
3525         qemu_log_lock();
3526         qemu_log("OP after optimization and liveness analysis:\n");
3527         tcg_dump_ops(s);
3528         qemu_log("\n");
3529         qemu_log_unlock();
3530     }
3531 #endif
3532 
3533     tcg_reg_alloc_start(s);
3534 
3535     s->code_buf = tb->tc.ptr;
3536     s->code_ptr = tb->tc.ptr;
3537 
3538 #ifdef TCG_TARGET_NEED_LDST_LABELS
3539     QSIMPLEQ_INIT(&s->ldst_labels);
3540 #endif
3541 #ifdef TCG_TARGET_NEED_POOL_LABELS
3542     s->pool_labels = NULL;
3543 #endif
3544 
3545     num_insns = -1;
3546     QTAILQ_FOREACH(op, &s->ops, link) {
3547         TCGOpcode opc = op->opc;
3548 
3549 #ifdef CONFIG_PROFILER
3550         atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
3551 #endif
3552 
3553         switch (opc) {
3554         case INDEX_op_mov_i32:
3555         case INDEX_op_mov_i64:
3556         case INDEX_op_mov_vec:
3557             tcg_reg_alloc_mov(s, op);
3558             break;
3559         case INDEX_op_movi_i32:
3560         case INDEX_op_movi_i64:
3561         case INDEX_op_dupi_vec:
3562             tcg_reg_alloc_movi(s, op);
3563             break;
3564         case INDEX_op_insn_start:
3565             if (num_insns >= 0) {
3566                 size_t off = tcg_current_code_size(s);
3567                 s->gen_insn_end_off[num_insns] = off;
3568                 /* Assert that we do not overflow our stored offset.  */
3569                 assert(s->gen_insn_end_off[num_insns] == off);
3570             }
3571             num_insns++;
3572             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
3573                 target_ulong a;
3574 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
3575                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
3576 #else
3577                 a = op->args[i];
3578 #endif
3579                 s->gen_insn_data[num_insns][i] = a;
3580             }
3581             break;
3582         case INDEX_op_discard:
3583             temp_dead(s, arg_temp(op->args[0]));
3584             break;
3585         case INDEX_op_set_label:
3586             tcg_reg_alloc_bb_end(s, s->reserved_regs);
3587             tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
3588             break;
3589         case INDEX_op_call:
3590             tcg_reg_alloc_call(s, op);
3591             break;
3592         default:
3593             /* Sanity check that we've not introduced any unhandled opcodes. */
3594             tcg_debug_assert(tcg_op_supported(opc));
3595             /* Note: in order to speed up the code, it would be much
3596                faster to have specialized register allocator functions for
3597                some common argument patterns */
3598             tcg_reg_alloc_op(s, op);
3599             break;
3600         }
3601 #ifdef CONFIG_DEBUG_TCG
3602         check_regs(s);
3603 #endif
3604         /* Test for (pending) buffer overflow.  The assumption is that any
3605            one operation beginning below the high water mark cannot overrun
3606            the buffer completely.  Thus we can test for overflow after
3607            generating code without having to check during generation.  */
3608         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
3609             return -1;
3610         }
3611     }
3612     tcg_debug_assert(num_insns >= 0);
3613     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
3614 
3615     /* Generate TB finalization at the end of block */
3616 #ifdef TCG_TARGET_NEED_LDST_LABELS
3617     if (!tcg_out_ldst_finalize(s)) {
3618         return -1;
3619     }
3620 #endif
3621 #ifdef TCG_TARGET_NEED_POOL_LABELS
3622     if (!tcg_out_pool_finalize(s)) {
3623         return -1;
3624     }
3625 #endif
3626 
3627     /* flush instruction cache */
3628     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
3629 
3630     return tcg_current_code_size(s);
3631 }
3632 
3633 #ifdef CONFIG_PROFILER
3634 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
3635 {
3636     TCGProfile prof = {};
3637     const TCGProfile *s;
3638     int64_t tb_count;
3639     int64_t tb_div_count;
3640     int64_t tot;
3641 
3642     tcg_profile_snapshot_counters(&prof);
3643     s = &prof;
3644     tb_count = s->tb_count;
3645     tb_div_count = tb_count ? tb_count : 1;
3646     tot = s->interm_time + s->code_time;
3647 
3648     cpu_fprintf(f, "JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
3649                 tot, tot / 2.4e9);
3650     cpu_fprintf(f, "translated TBs      %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
3651                 tb_count, s->tb_count1 - tb_count,
3652                 (double)(s->tb_count1 - s->tb_count)
3653                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
3654     cpu_fprintf(f, "avg ops/TB          %0.1f max=%d\n",
3655                 (double)s->op_count / tb_div_count, s->op_count_max);
3656     cpu_fprintf(f, "deleted ops/TB      %0.2f\n",
3657                 (double)s->del_op_count / tb_div_count);
3658     cpu_fprintf(f, "avg temps/TB        %0.2f max=%d\n",
3659                 (double)s->temp_count / tb_div_count, s->temp_count_max);
3660     cpu_fprintf(f, "avg host code/TB    %0.1f\n",
3661                 (double)s->code_out_len / tb_div_count);
3662     cpu_fprintf(f, "avg search data/TB  %0.1f\n",
3663                 (double)s->search_out_len / tb_div_count);
3664 
3665     cpu_fprintf(f, "cycles/op           %0.1f\n",
3666                 s->op_count ? (double)tot / s->op_count : 0);
3667     cpu_fprintf(f, "cycles/in byte      %0.1f\n",
3668                 s->code_in_len ? (double)tot / s->code_in_len : 0);
3669     cpu_fprintf(f, "cycles/out byte     %0.1f\n",
3670                 s->code_out_len ? (double)tot / s->code_out_len : 0);
3671     cpu_fprintf(f, "cycles/search byte     %0.1f\n",
3672                 s->search_out_len ? (double)tot / s->search_out_len : 0);
3673     if (tot == 0) {
3674         tot = 1;
3675     }
3676     cpu_fprintf(f, "  gen_interm time   %0.1f%%\n",
3677                 (double)s->interm_time / tot * 100.0);
3678     cpu_fprintf(f, "  gen_code time     %0.1f%%\n",
3679                 (double)s->code_time / tot * 100.0);
3680     cpu_fprintf(f, "optim./code time    %0.1f%%\n",
3681                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
3682                 * 100.0);
3683     cpu_fprintf(f, "liveness/code time  %0.1f%%\n",
3684                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
3685     cpu_fprintf(f, "cpu_restore count   %" PRId64 "\n",
3686                 s->restore_count);
3687     cpu_fprintf(f, "  avg cycles        %0.1f\n",
3688                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
3689 }
3690 #else
3691 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
3692 {
3693     cpu_fprintf(f, "[TCG profiler not compiled]\n");
3694 }
3695 #endif
3696 
3697 #ifdef ELF_HOST_MACHINE
3698 /* In order to use this feature, the backend needs to do three things:
3699 
3700    (1) Define ELF_HOST_MACHINE to indicate both what value to
3701        put into the ELF image and to indicate support for the feature.
3702 
3703    (2) Define tcg_register_jit.  This should create a buffer containing
3704        the contents of a .debug_frame section that describes the post-
3705        prologue unwind info for the tcg machine.
3706 
3707    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
3708 */
3709 
3710 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
3711 typedef enum {
3712     JIT_NOACTION = 0,
3713     JIT_REGISTER_FN,
3714     JIT_UNREGISTER_FN
3715 } jit_actions_t;
3716 
3717 struct jit_code_entry {
3718     struct jit_code_entry *next_entry;
3719     struct jit_code_entry *prev_entry;
3720     const void *symfile_addr;
3721     uint64_t symfile_size;
3722 };
3723 
3724 struct jit_descriptor {
3725     uint32_t version;
3726     uint32_t action_flag;
3727     struct jit_code_entry *relevant_entry;
3728     struct jit_code_entry *first_entry;
3729 };
3730 
3731 void __jit_debug_register_code(void) __attribute__((noinline));
3732 void __jit_debug_register_code(void)
3733 {
3734     asm("");
3735 }
3736 
3737 /* Must statically initialize the version, because GDB may check
3738    the version before we can set it.  */
3739 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
3740 
3741 /* End GDB interface.  */
3742 
3743 static int find_string(const char *strtab, const char *str)
3744 {
3745     const char *p = strtab + 1;
3746 
3747     while (1) {
3748         if (strcmp(p, str) == 0) {
3749             return p - strtab;
3750         }
3751         p += strlen(p) + 1;
3752     }
3753 }
3754 
3755 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
3756                                  const void *debug_frame,
3757                                  size_t debug_frame_size)
3758 {
3759     struct __attribute__((packed)) DebugInfo {
3760         uint32_t  len;
3761         uint16_t  version;
3762         uint32_t  abbrev;
3763         uint8_t   ptr_size;
3764         uint8_t   cu_die;
3765         uint16_t  cu_lang;
3766         uintptr_t cu_low_pc;
3767         uintptr_t cu_high_pc;
3768         uint8_t   fn_die;
3769         char      fn_name[16];
3770         uintptr_t fn_low_pc;
3771         uintptr_t fn_high_pc;
3772         uint8_t   cu_eoc;
3773     };
3774 
3775     struct ElfImage {
3776         ElfW(Ehdr) ehdr;
3777         ElfW(Phdr) phdr;
3778         ElfW(Shdr) shdr[7];
3779         ElfW(Sym)  sym[2];
3780         struct DebugInfo di;
3781         uint8_t    da[24];
3782         char       str[80];
3783     };
3784 
3785     struct ElfImage *img;
3786 
3787     static const struct ElfImage img_template = {
3788         .ehdr = {
3789             .e_ident[EI_MAG0] = ELFMAG0,
3790             .e_ident[EI_MAG1] = ELFMAG1,
3791             .e_ident[EI_MAG2] = ELFMAG2,
3792             .e_ident[EI_MAG3] = ELFMAG3,
3793             .e_ident[EI_CLASS] = ELF_CLASS,
3794             .e_ident[EI_DATA] = ELF_DATA,
3795             .e_ident[EI_VERSION] = EV_CURRENT,
3796             .e_type = ET_EXEC,
3797             .e_machine = ELF_HOST_MACHINE,
3798             .e_version = EV_CURRENT,
3799             .e_phoff = offsetof(struct ElfImage, phdr),
3800             .e_shoff = offsetof(struct ElfImage, shdr),
3801             .e_ehsize = sizeof(ElfW(Shdr)),
3802             .e_phentsize = sizeof(ElfW(Phdr)),
3803             .e_phnum = 1,
3804             .e_shentsize = sizeof(ElfW(Shdr)),
3805             .e_shnum = ARRAY_SIZE(img->shdr),
3806             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
3807 #ifdef ELF_HOST_FLAGS
3808             .e_flags = ELF_HOST_FLAGS,
3809 #endif
3810 #ifdef ELF_OSABI
3811             .e_ident[EI_OSABI] = ELF_OSABI,
3812 #endif
3813         },
3814         .phdr = {
3815             .p_type = PT_LOAD,
3816             .p_flags = PF_X,
3817         },
3818         .shdr = {
3819             [0] = { .sh_type = SHT_NULL },
3820             /* Trick: The contents of code_gen_buffer are not present in
3821                this fake ELF file; that got allocated elsewhere.  Therefore
3822                we mark .text as SHT_NOBITS (similar to .bss) so that readers
3823                will not look for contents.  We can record any address.  */
3824             [1] = { /* .text */
3825                 .sh_type = SHT_NOBITS,
3826                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
3827             },
3828             [2] = { /* .debug_info */
3829                 .sh_type = SHT_PROGBITS,
3830                 .sh_offset = offsetof(struct ElfImage, di),
3831                 .sh_size = sizeof(struct DebugInfo),
3832             },
3833             [3] = { /* .debug_abbrev */
3834                 .sh_type = SHT_PROGBITS,
3835                 .sh_offset = offsetof(struct ElfImage, da),
3836                 .sh_size = sizeof(img->da),
3837             },
3838             [4] = { /* .debug_frame */
3839                 .sh_type = SHT_PROGBITS,
3840                 .sh_offset = sizeof(struct ElfImage),
3841             },
3842             [5] = { /* .symtab */
3843                 .sh_type = SHT_SYMTAB,
3844                 .sh_offset = offsetof(struct ElfImage, sym),
3845                 .sh_size = sizeof(img->sym),
3846                 .sh_info = 1,
3847                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
3848                 .sh_entsize = sizeof(ElfW(Sym)),
3849             },
3850             [6] = { /* .strtab */
3851                 .sh_type = SHT_STRTAB,
3852                 .sh_offset = offsetof(struct ElfImage, str),
3853                 .sh_size = sizeof(img->str),
3854             }
3855         },
3856         .sym = {
3857             [1] = { /* code_gen_buffer */
3858                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
3859                 .st_shndx = 1,
3860             }
3861         },
3862         .di = {
3863             .len = sizeof(struct DebugInfo) - 4,
3864             .version = 2,
3865             .ptr_size = sizeof(void *),
3866             .cu_die = 1,
3867             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
3868             .fn_die = 2,
3869             .fn_name = "code_gen_buffer"
3870         },
3871         .da = {
3872             1,          /* abbrev number (the cu) */
3873             0x11, 1,    /* DW_TAG_compile_unit, has children */
3874             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
3875             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
3876             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
3877             0, 0,       /* end of abbrev */
3878             2,          /* abbrev number (the fn) */
3879             0x2e, 0,    /* DW_TAG_subprogram, no children */
3880             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
3881             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
3882             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
3883             0, 0,       /* end of abbrev */
3884             0           /* no more abbrev */
3885         },
3886         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
3887                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
3888     };
3889 
3890     /* We only need a single jit entry; statically allocate it.  */
3891     static struct jit_code_entry one_entry;
3892 
3893     uintptr_t buf = (uintptr_t)buf_ptr;
3894     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
3895     DebugFrameHeader *dfh;
3896 
3897     img = g_malloc(img_size);
3898     *img = img_template;
3899 
3900     img->phdr.p_vaddr = buf;
3901     img->phdr.p_paddr = buf;
3902     img->phdr.p_memsz = buf_size;
3903 
3904     img->shdr[1].sh_name = find_string(img->str, ".text");
3905     img->shdr[1].sh_addr = buf;
3906     img->shdr[1].sh_size = buf_size;
3907 
3908     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
3909     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
3910 
3911     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
3912     img->shdr[4].sh_size = debug_frame_size;
3913 
3914     img->shdr[5].sh_name = find_string(img->str, ".symtab");
3915     img->shdr[6].sh_name = find_string(img->str, ".strtab");
3916 
3917     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
3918     img->sym[1].st_value = buf;
3919     img->sym[1].st_size = buf_size;
3920 
3921     img->di.cu_low_pc = buf;
3922     img->di.cu_high_pc = buf + buf_size;
3923     img->di.fn_low_pc = buf;
3924     img->di.fn_high_pc = buf + buf_size;
3925 
3926     dfh = (DebugFrameHeader *)(img + 1);
3927     memcpy(dfh, debug_frame, debug_frame_size);
3928     dfh->fde.func_start = buf;
3929     dfh->fde.func_len = buf_size;
3930 
3931 #ifdef DEBUG_JIT
3932     /* Enable this block to be able to debug the ELF image file creation.
3933        One can use readelf, objdump, or other inspection utilities.  */
3934     {
3935         FILE *f = fopen("/tmp/qemu.jit", "w+b");
3936         if (f) {
3937             if (fwrite(img, img_size, 1, f) != img_size) {
3938                 /* Avoid stupid unused return value warning for fwrite.  */
3939             }
3940             fclose(f);
3941         }
3942     }
3943 #endif
3944 
3945     one_entry.symfile_addr = img;
3946     one_entry.symfile_size = img_size;
3947 
3948     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
3949     __jit_debug_descriptor.relevant_entry = &one_entry;
3950     __jit_debug_descriptor.first_entry = &one_entry;
3951     __jit_debug_register_code();
3952 }
3953 #else
3954 /* No support for the feature.  Provide the entry point expected by exec.c,
3955    and implement the internal function we declared earlier.  */
3956 
3957 static void tcg_register_jit_int(void *buf, size_t size,
3958                                  const void *debug_frame,
3959                                  size_t debug_frame_size)
3960 {
3961 }
3962 
3963 void tcg_register_jit(void *buf, size_t buf_size)
3964 {
3965 }
3966 #endif /* ELF_HOST_MACHINE */
3967 
3968 #if !TCG_TARGET_MAYBE_vec
3969 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
3970 {
3971     g_assert_not_reached();
3972 }
3973 #endif
3974