xref: /openbmc/qemu/tcg/tcg.c (revision b7d89466)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/timer.h"
37 
38 /* Note: the long term plan is to reduce the dependencies on the QEMU
39    CPU definitions. Currently they are used for qemu_ld/st
40    instructions */
41 #define NO_CPU_IO_DEFS
42 #include "cpu.h"
43 
44 #include "exec/cpu-common.h"
45 #include "exec/exec-all.h"
46 
47 #include "tcg-op.h"
48 
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS  ELFCLASS32
51 #else
52 # define ELF_CLASS  ELFCLASS64
53 #endif
54 #ifdef HOST_WORDS_BIGENDIAN
55 # define ELF_DATA   ELFDATA2MSB
56 #else
57 # define ELF_DATA   ELFDATA2LSB
58 #endif
59 
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "sysemu/sysemu.h"
63 
64 /* Forward declarations for functions declared in tcg-target.inc.c and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
68 static void tcg_target_qemu_prologue(TCGContext *s);
69 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
70                         intptr_t value, intptr_t addend);
71 
72 /* The CIE and FDE header definitions will be common to all hosts.  */
73 typedef struct {
74     uint32_t len __attribute__((aligned((sizeof(void *)))));
75     uint32_t id;
76     uint8_t version;
77     char augmentation[1];
78     uint8_t code_align;
79     uint8_t data_align;
80     uint8_t return_column;
81 } DebugFrameCIE;
82 
83 typedef struct QEMU_PACKED {
84     uint32_t len __attribute__((aligned((sizeof(void *)))));
85     uint32_t cie_offset;
86     uintptr_t func_start;
87     uintptr_t func_len;
88 } DebugFrameFDEHeader;
89 
90 typedef struct QEMU_PACKED {
91     DebugFrameCIE cie;
92     DebugFrameFDEHeader fde;
93 } DebugFrameHeader;
94 
95 static void tcg_register_jit_int(void *buf, size_t size,
96                                  const void *debug_frame,
97                                  size_t debug_frame_size)
98     __attribute__((unused));
99 
100 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
101 static const char *target_parse_constraint(TCGArgConstraint *ct,
102                                            const char *ct_str, TCGType type);
103 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
104                        intptr_t arg2);
105 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
106 static void tcg_out_movi(TCGContext *s, TCGType type,
107                          TCGReg ret, tcg_target_long arg);
108 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
109                        const int *const_args);
110 #if TCG_TARGET_MAYBE_vec
111 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
112                            unsigned vece, const TCGArg *args,
113                            const int *const_args);
114 #else
115 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
116                                   unsigned vece, const TCGArg *args,
117                                   const int *const_args)
118 {
119     g_assert_not_reached();
120 }
121 #endif
122 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
123                        intptr_t arg2);
124 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
125                         TCGReg base, intptr_t ofs);
126 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
127 static int tcg_target_const_match(tcg_target_long val, TCGType type,
128                                   const TCGArgConstraint *arg_ct);
129 #ifdef TCG_TARGET_NEED_LDST_LABELS
130 static bool tcg_out_ldst_finalize(TCGContext *s);
131 #endif
132 
133 #define TCG_HIGHWATER 1024
134 
135 static TCGContext **tcg_ctxs;
136 static unsigned int n_tcg_ctxs;
137 TCGv_env cpu_env = 0;
138 
139 struct tcg_region_tree {
140     QemuMutex lock;
141     GTree *tree;
142     /* padding to avoid false sharing is computed at run-time */
143 };
144 
145 /*
146  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
147  * dynamically allocate from as demand dictates. Given appropriate region
148  * sizing, this minimizes flushes even when some TCG threads generate a lot
149  * more code than others.
150  */
151 struct tcg_region_state {
152     QemuMutex lock;
153 
154     /* fields set at init time */
155     void *start;
156     void *start_aligned;
157     void *end;
158     size_t n;
159     size_t size; /* size of one region */
160     size_t stride; /* .size + guard size */
161 
162     /* fields protected by the lock */
163     size_t current; /* current region index */
164     size_t agg_size_full; /* aggregate size of full regions */
165 };
166 
167 static struct tcg_region_state region;
168 /*
169  * This is an array of struct tcg_region_tree's, with padding.
170  * We use void * to simplify the computation of region_trees[i]; each
171  * struct is found every tree_size bytes.
172  */
173 static void *region_trees;
174 static size_t tree_size;
175 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
176 static TCGRegSet tcg_target_call_clobber_regs;
177 
178 #if TCG_TARGET_INSN_UNIT_SIZE == 1
179 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
180 {
181     *s->code_ptr++ = v;
182 }
183 
184 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
185                                                       uint8_t v)
186 {
187     *p = v;
188 }
189 #endif
190 
191 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
192 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
193 {
194     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
195         *s->code_ptr++ = v;
196     } else {
197         tcg_insn_unit *p = s->code_ptr;
198         memcpy(p, &v, sizeof(v));
199         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
200     }
201 }
202 
203 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
204                                                        uint16_t v)
205 {
206     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
207         *p = v;
208     } else {
209         memcpy(p, &v, sizeof(v));
210     }
211 }
212 #endif
213 
214 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
215 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
216 {
217     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
218         *s->code_ptr++ = v;
219     } else {
220         tcg_insn_unit *p = s->code_ptr;
221         memcpy(p, &v, sizeof(v));
222         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
223     }
224 }
225 
226 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
227                                                        uint32_t v)
228 {
229     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
230         *p = v;
231     } else {
232         memcpy(p, &v, sizeof(v));
233     }
234 }
235 #endif
236 
237 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
238 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
239 {
240     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
241         *s->code_ptr++ = v;
242     } else {
243         tcg_insn_unit *p = s->code_ptr;
244         memcpy(p, &v, sizeof(v));
245         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
246     }
247 }
248 
249 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
250                                                        uint64_t v)
251 {
252     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
253         *p = v;
254     } else {
255         memcpy(p, &v, sizeof(v));
256     }
257 }
258 #endif
259 
260 /* label relocation processing */
261 
262 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
263                           TCGLabel *l, intptr_t addend)
264 {
265     TCGRelocation *r;
266 
267     if (l->has_value) {
268         /* FIXME: This may break relocations on RISC targets that
269            modify instruction fields in place.  The caller may not have
270            written the initial value.  */
271         bool ok = patch_reloc(code_ptr, type, l->u.value, addend);
272         tcg_debug_assert(ok);
273     } else {
274         /* add a new relocation entry */
275         r = tcg_malloc(sizeof(TCGRelocation));
276         r->type = type;
277         r->ptr = code_ptr;
278         r->addend = addend;
279         r->next = l->u.first_reloc;
280         l->u.first_reloc = r;
281     }
282 }
283 
284 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
285 {
286     intptr_t value = (intptr_t)ptr;
287     TCGRelocation *r;
288 
289     tcg_debug_assert(!l->has_value);
290 
291     for (r = l->u.first_reloc; r != NULL; r = r->next) {
292         bool ok = patch_reloc(r->ptr, r->type, value, r->addend);
293         tcg_debug_assert(ok);
294     }
295 
296     l->has_value = 1;
297     l->u.value_ptr = ptr;
298 }
299 
300 TCGLabel *gen_new_label(void)
301 {
302     TCGContext *s = tcg_ctx;
303     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
304 
305     *l = (TCGLabel){
306         .id = s->nb_labels++
307     };
308 
309     return l;
310 }
311 
312 static void set_jmp_reset_offset(TCGContext *s, int which)
313 {
314     size_t off = tcg_current_code_size(s);
315     s->tb_jmp_reset_offset[which] = off;
316     /* Make sure that we didn't overflow the stored offset.  */
317     assert(s->tb_jmp_reset_offset[which] == off);
318 }
319 
320 #include "tcg-target.inc.c"
321 
322 /* compare a pointer @ptr and a tb_tc @s */
323 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
324 {
325     if (ptr >= s->ptr + s->size) {
326         return 1;
327     } else if (ptr < s->ptr) {
328         return -1;
329     }
330     return 0;
331 }
332 
333 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
334 {
335     const struct tb_tc *a = ap;
336     const struct tb_tc *b = bp;
337 
338     /*
339      * When both sizes are set, we know this isn't a lookup.
340      * This is the most likely case: every TB must be inserted; lookups
341      * are a lot less frequent.
342      */
343     if (likely(a->size && b->size)) {
344         if (a->ptr > b->ptr) {
345             return 1;
346         } else if (a->ptr < b->ptr) {
347             return -1;
348         }
349         /* a->ptr == b->ptr should happen only on deletions */
350         g_assert(a->size == b->size);
351         return 0;
352     }
353     /*
354      * All lookups have either .size field set to 0.
355      * From the glib sources we see that @ap is always the lookup key. However
356      * the docs provide no guarantee, so we just mark this case as likely.
357      */
358     if (likely(a->size == 0)) {
359         return ptr_cmp_tb_tc(a->ptr, b);
360     }
361     return ptr_cmp_tb_tc(b->ptr, a);
362 }
363 
364 static void tcg_region_trees_init(void)
365 {
366     size_t i;
367 
368     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
369     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
370     for (i = 0; i < region.n; i++) {
371         struct tcg_region_tree *rt = region_trees + i * tree_size;
372 
373         qemu_mutex_init(&rt->lock);
374         rt->tree = g_tree_new(tb_tc_cmp);
375     }
376 }
377 
378 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
379 {
380     size_t region_idx;
381 
382     if (p < region.start_aligned) {
383         region_idx = 0;
384     } else {
385         ptrdiff_t offset = p - region.start_aligned;
386 
387         if (offset > region.stride * (region.n - 1)) {
388             region_idx = region.n - 1;
389         } else {
390             region_idx = offset / region.stride;
391         }
392     }
393     return region_trees + region_idx * tree_size;
394 }
395 
396 void tcg_tb_insert(TranslationBlock *tb)
397 {
398     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
399 
400     qemu_mutex_lock(&rt->lock);
401     g_tree_insert(rt->tree, &tb->tc, tb);
402     qemu_mutex_unlock(&rt->lock);
403 }
404 
405 void tcg_tb_remove(TranslationBlock *tb)
406 {
407     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
408 
409     qemu_mutex_lock(&rt->lock);
410     g_tree_remove(rt->tree, &tb->tc);
411     qemu_mutex_unlock(&rt->lock);
412 }
413 
414 /*
415  * Find the TB 'tb' such that
416  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
417  * Return NULL if not found.
418  */
419 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
420 {
421     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
422     TranslationBlock *tb;
423     struct tb_tc s = { .ptr = (void *)tc_ptr };
424 
425     qemu_mutex_lock(&rt->lock);
426     tb = g_tree_lookup(rt->tree, &s);
427     qemu_mutex_unlock(&rt->lock);
428     return tb;
429 }
430 
431 static void tcg_region_tree_lock_all(void)
432 {
433     size_t i;
434 
435     for (i = 0; i < region.n; i++) {
436         struct tcg_region_tree *rt = region_trees + i * tree_size;
437 
438         qemu_mutex_lock(&rt->lock);
439     }
440 }
441 
442 static void tcg_region_tree_unlock_all(void)
443 {
444     size_t i;
445 
446     for (i = 0; i < region.n; i++) {
447         struct tcg_region_tree *rt = region_trees + i * tree_size;
448 
449         qemu_mutex_unlock(&rt->lock);
450     }
451 }
452 
453 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
454 {
455     size_t i;
456 
457     tcg_region_tree_lock_all();
458     for (i = 0; i < region.n; i++) {
459         struct tcg_region_tree *rt = region_trees + i * tree_size;
460 
461         g_tree_foreach(rt->tree, func, user_data);
462     }
463     tcg_region_tree_unlock_all();
464 }
465 
466 size_t tcg_nb_tbs(void)
467 {
468     size_t nb_tbs = 0;
469     size_t i;
470 
471     tcg_region_tree_lock_all();
472     for (i = 0; i < region.n; i++) {
473         struct tcg_region_tree *rt = region_trees + i * tree_size;
474 
475         nb_tbs += g_tree_nnodes(rt->tree);
476     }
477     tcg_region_tree_unlock_all();
478     return nb_tbs;
479 }
480 
481 static void tcg_region_tree_reset_all(void)
482 {
483     size_t i;
484 
485     tcg_region_tree_lock_all();
486     for (i = 0; i < region.n; i++) {
487         struct tcg_region_tree *rt = region_trees + i * tree_size;
488 
489         /* Increment the refcount first so that destroy acts as a reset */
490         g_tree_ref(rt->tree);
491         g_tree_destroy(rt->tree);
492     }
493     tcg_region_tree_unlock_all();
494 }
495 
496 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
497 {
498     void *start, *end;
499 
500     start = region.start_aligned + curr_region * region.stride;
501     end = start + region.size;
502 
503     if (curr_region == 0) {
504         start = region.start;
505     }
506     if (curr_region == region.n - 1) {
507         end = region.end;
508     }
509 
510     *pstart = start;
511     *pend = end;
512 }
513 
514 static void tcg_region_assign(TCGContext *s, size_t curr_region)
515 {
516     void *start, *end;
517 
518     tcg_region_bounds(curr_region, &start, &end);
519 
520     s->code_gen_buffer = start;
521     s->code_gen_ptr = start;
522     s->code_gen_buffer_size = end - start;
523     s->code_gen_highwater = end - TCG_HIGHWATER;
524 }
525 
526 static bool tcg_region_alloc__locked(TCGContext *s)
527 {
528     if (region.current == region.n) {
529         return true;
530     }
531     tcg_region_assign(s, region.current);
532     region.current++;
533     return false;
534 }
535 
536 /*
537  * Request a new region once the one in use has filled up.
538  * Returns true on error.
539  */
540 static bool tcg_region_alloc(TCGContext *s)
541 {
542     bool err;
543     /* read the region size now; alloc__locked will overwrite it on success */
544     size_t size_full = s->code_gen_buffer_size;
545 
546     qemu_mutex_lock(&region.lock);
547     err = tcg_region_alloc__locked(s);
548     if (!err) {
549         region.agg_size_full += size_full - TCG_HIGHWATER;
550     }
551     qemu_mutex_unlock(&region.lock);
552     return err;
553 }
554 
555 /*
556  * Perform a context's first region allocation.
557  * This function does _not_ increment region.agg_size_full.
558  */
559 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
560 {
561     return tcg_region_alloc__locked(s);
562 }
563 
564 /* Call from a safe-work context */
565 void tcg_region_reset_all(void)
566 {
567     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
568     unsigned int i;
569 
570     qemu_mutex_lock(&region.lock);
571     region.current = 0;
572     region.agg_size_full = 0;
573 
574     for (i = 0; i < n_ctxs; i++) {
575         TCGContext *s = atomic_read(&tcg_ctxs[i]);
576         bool err = tcg_region_initial_alloc__locked(s);
577 
578         g_assert(!err);
579     }
580     qemu_mutex_unlock(&region.lock);
581 
582     tcg_region_tree_reset_all();
583 }
584 
585 #ifdef CONFIG_USER_ONLY
586 static size_t tcg_n_regions(void)
587 {
588     return 1;
589 }
590 #else
591 /*
592  * It is likely that some vCPUs will translate more code than others, so we
593  * first try to set more regions than max_cpus, with those regions being of
594  * reasonable size. If that's not possible we make do by evenly dividing
595  * the code_gen_buffer among the vCPUs.
596  */
597 static size_t tcg_n_regions(void)
598 {
599     size_t i;
600 
601     /* Use a single region if all we have is one vCPU thread */
602     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
603         return 1;
604     }
605 
606     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
607     for (i = 8; i > 0; i--) {
608         size_t regions_per_thread = i;
609         size_t region_size;
610 
611         region_size = tcg_init_ctx.code_gen_buffer_size;
612         region_size /= max_cpus * regions_per_thread;
613 
614         if (region_size >= 2 * 1024u * 1024) {
615             return max_cpus * regions_per_thread;
616         }
617     }
618     /* If we can't, then just allocate one region per vCPU thread */
619     return max_cpus;
620 }
621 #endif
622 
623 /*
624  * Initializes region partitioning.
625  *
626  * Called at init time from the parent thread (i.e. the one calling
627  * tcg_context_init), after the target's TCG globals have been set.
628  *
629  * Region partitioning works by splitting code_gen_buffer into separate regions,
630  * and then assigning regions to TCG threads so that the threads can translate
631  * code in parallel without synchronization.
632  *
633  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
634  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
635  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
636  * must have been parsed before calling this function, since it calls
637  * qemu_tcg_mttcg_enabled().
638  *
639  * In user-mode we use a single region.  Having multiple regions in user-mode
640  * is not supported, because the number of vCPU threads (recall that each thread
641  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
642  * OS, and usually this number is huge (tens of thousands is not uncommon).
643  * Thus, given this large bound on the number of vCPU threads and the fact
644  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
645  * that the availability of at least one region per vCPU thread.
646  *
647  * However, this user-mode limitation is unlikely to be a significant problem
648  * in practice. Multi-threaded guests share most if not all of their translated
649  * code, which makes parallel code generation less appealing than in softmmu.
650  */
651 void tcg_region_init(void)
652 {
653     void *buf = tcg_init_ctx.code_gen_buffer;
654     void *aligned;
655     size_t size = tcg_init_ctx.code_gen_buffer_size;
656     size_t page_size = qemu_real_host_page_size;
657     size_t region_size;
658     size_t n_regions;
659     size_t i;
660 
661     n_regions = tcg_n_regions();
662 
663     /* The first region will be 'aligned - buf' bytes larger than the others */
664     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
665     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
666     /*
667      * Make region_size a multiple of page_size, using aligned as the start.
668      * As a result of this we might end up with a few extra pages at the end of
669      * the buffer; we will assign those to the last region.
670      */
671     region_size = (size - (aligned - buf)) / n_regions;
672     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
673 
674     /* A region must have at least 2 pages; one code, one guard */
675     g_assert(region_size >= 2 * page_size);
676 
677     /* init the region struct */
678     qemu_mutex_init(&region.lock);
679     region.n = n_regions;
680     region.size = region_size - page_size;
681     region.stride = region_size;
682     region.start = buf;
683     region.start_aligned = aligned;
684     /* page-align the end, since its last page will be a guard page */
685     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
686     /* account for that last guard page */
687     region.end -= page_size;
688 
689     /* set guard pages */
690     for (i = 0; i < region.n; i++) {
691         void *start, *end;
692         int rc;
693 
694         tcg_region_bounds(i, &start, &end);
695         rc = qemu_mprotect_none(end, page_size);
696         g_assert(!rc);
697     }
698 
699     tcg_region_trees_init();
700 
701     /* In user-mode we support only one ctx, so do the initial allocation now */
702 #ifdef CONFIG_USER_ONLY
703     {
704         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
705 
706         g_assert(!err);
707     }
708 #endif
709 }
710 
711 /*
712  * All TCG threads except the parent (i.e. the one that called tcg_context_init
713  * and registered the target's TCG globals) must register with this function
714  * before initiating translation.
715  *
716  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
717  * of tcg_region_init() for the reasoning behind this.
718  *
719  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
720  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
721  * is not used anymore for translation once this function is called.
722  *
723  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
724  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
725  */
726 #ifdef CONFIG_USER_ONLY
727 void tcg_register_thread(void)
728 {
729     tcg_ctx = &tcg_init_ctx;
730 }
731 #else
732 void tcg_register_thread(void)
733 {
734     TCGContext *s = g_malloc(sizeof(*s));
735     unsigned int i, n;
736     bool err;
737 
738     *s = tcg_init_ctx;
739 
740     /* Relink mem_base.  */
741     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
742         if (tcg_init_ctx.temps[i].mem_base) {
743             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
744             tcg_debug_assert(b >= 0 && b < n);
745             s->temps[i].mem_base = &s->temps[b];
746         }
747     }
748 
749     /* Claim an entry in tcg_ctxs */
750     n = atomic_fetch_inc(&n_tcg_ctxs);
751     g_assert(n < max_cpus);
752     atomic_set(&tcg_ctxs[n], s);
753 
754     tcg_ctx = s;
755     qemu_mutex_lock(&region.lock);
756     err = tcg_region_initial_alloc__locked(tcg_ctx);
757     g_assert(!err);
758     qemu_mutex_unlock(&region.lock);
759 }
760 #endif /* !CONFIG_USER_ONLY */
761 
762 /*
763  * Returns the size (in bytes) of all translated code (i.e. from all regions)
764  * currently in the cache.
765  * See also: tcg_code_capacity()
766  * Do not confuse with tcg_current_code_size(); that one applies to a single
767  * TCG context.
768  */
769 size_t tcg_code_size(void)
770 {
771     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
772     unsigned int i;
773     size_t total;
774 
775     qemu_mutex_lock(&region.lock);
776     total = region.agg_size_full;
777     for (i = 0; i < n_ctxs; i++) {
778         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
779         size_t size;
780 
781         size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
782         g_assert(size <= s->code_gen_buffer_size);
783         total += size;
784     }
785     qemu_mutex_unlock(&region.lock);
786     return total;
787 }
788 
789 /*
790  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
791  * regions.
792  * See also: tcg_code_size()
793  */
794 size_t tcg_code_capacity(void)
795 {
796     size_t guard_size, capacity;
797 
798     /* no need for synchronization; these variables are set at init time */
799     guard_size = region.stride - region.size;
800     capacity = region.end + guard_size - region.start;
801     capacity -= region.n * (guard_size + TCG_HIGHWATER);
802     return capacity;
803 }
804 
805 size_t tcg_tb_phys_invalidate_count(void)
806 {
807     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
808     unsigned int i;
809     size_t total = 0;
810 
811     for (i = 0; i < n_ctxs; i++) {
812         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
813 
814         total += atomic_read(&s->tb_phys_invalidate_count);
815     }
816     return total;
817 }
818 
819 /* pool based memory allocation */
820 void *tcg_malloc_internal(TCGContext *s, int size)
821 {
822     TCGPool *p;
823     int pool_size;
824 
825     if (size > TCG_POOL_CHUNK_SIZE) {
826         /* big malloc: insert a new pool (XXX: could optimize) */
827         p = g_malloc(sizeof(TCGPool) + size);
828         p->size = size;
829         p->next = s->pool_first_large;
830         s->pool_first_large = p;
831         return p->data;
832     } else {
833         p = s->pool_current;
834         if (!p) {
835             p = s->pool_first;
836             if (!p)
837                 goto new_pool;
838         } else {
839             if (!p->next) {
840             new_pool:
841                 pool_size = TCG_POOL_CHUNK_SIZE;
842                 p = g_malloc(sizeof(TCGPool) + pool_size);
843                 p->size = pool_size;
844                 p->next = NULL;
845                 if (s->pool_current)
846                     s->pool_current->next = p;
847                 else
848                     s->pool_first = p;
849             } else {
850                 p = p->next;
851             }
852         }
853     }
854     s->pool_current = p;
855     s->pool_cur = p->data + size;
856     s->pool_end = p->data + p->size;
857     return p->data;
858 }
859 
860 void tcg_pool_reset(TCGContext *s)
861 {
862     TCGPool *p, *t;
863     for (p = s->pool_first_large; p; p = t) {
864         t = p->next;
865         g_free(p);
866     }
867     s->pool_first_large = NULL;
868     s->pool_cur = s->pool_end = NULL;
869     s->pool_current = NULL;
870 }
871 
872 typedef struct TCGHelperInfo {
873     void *func;
874     const char *name;
875     unsigned flags;
876     unsigned sizemask;
877 } TCGHelperInfo;
878 
879 #include "exec/helper-proto.h"
880 
881 static const TCGHelperInfo all_helpers[] = {
882 #include "exec/helper-tcg.h"
883 };
884 static GHashTable *helper_table;
885 
886 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
887 static void process_op_defs(TCGContext *s);
888 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
889                                             TCGReg reg, const char *name);
890 
891 void tcg_context_init(TCGContext *s)
892 {
893     int op, total_args, n, i;
894     TCGOpDef *def;
895     TCGArgConstraint *args_ct;
896     int *sorted_args;
897     TCGTemp *ts;
898 
899     memset(s, 0, sizeof(*s));
900     s->nb_globals = 0;
901 
902     /* Count total number of arguments and allocate the corresponding
903        space */
904     total_args = 0;
905     for(op = 0; op < NB_OPS; op++) {
906         def = &tcg_op_defs[op];
907         n = def->nb_iargs + def->nb_oargs;
908         total_args += n;
909     }
910 
911     args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
912     sorted_args = g_malloc(sizeof(int) * total_args);
913 
914     for(op = 0; op < NB_OPS; op++) {
915         def = &tcg_op_defs[op];
916         def->args_ct = args_ct;
917         def->sorted_args = sorted_args;
918         n = def->nb_iargs + def->nb_oargs;
919         sorted_args += n;
920         args_ct += n;
921     }
922 
923     /* Register helpers.  */
924     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
925     helper_table = g_hash_table_new(NULL, NULL);
926 
927     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
928         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
929                             (gpointer)&all_helpers[i]);
930     }
931 
932     tcg_target_init(s);
933     process_op_defs(s);
934 
935     /* Reverse the order of the saved registers, assuming they're all at
936        the start of tcg_target_reg_alloc_order.  */
937     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
938         int r = tcg_target_reg_alloc_order[n];
939         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
940             break;
941         }
942     }
943     for (i = 0; i < n; ++i) {
944         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
945     }
946     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
947         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
948     }
949 
950     tcg_ctx = s;
951     /*
952      * In user-mode we simply share the init context among threads, since we
953      * use a single region. See the documentation tcg_region_init() for the
954      * reasoning behind this.
955      * In softmmu we will have at most max_cpus TCG threads.
956      */
957 #ifdef CONFIG_USER_ONLY
958     tcg_ctxs = &tcg_ctx;
959     n_tcg_ctxs = 1;
960 #else
961     tcg_ctxs = g_new(TCGContext *, max_cpus);
962 #endif
963 
964     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
965     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
966     cpu_env = temp_tcgv_ptr(ts);
967 }
968 
969 /*
970  * Allocate TBs right before their corresponding translated code, making
971  * sure that TBs and code are on different cache lines.
972  */
973 TranslationBlock *tcg_tb_alloc(TCGContext *s)
974 {
975     uintptr_t align = qemu_icache_linesize;
976     TranslationBlock *tb;
977     void *next;
978 
979  retry:
980     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
981     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
982 
983     if (unlikely(next > s->code_gen_highwater)) {
984         if (tcg_region_alloc(s)) {
985             return NULL;
986         }
987         goto retry;
988     }
989     atomic_set(&s->code_gen_ptr, next);
990     s->data_gen_ptr = NULL;
991     return tb;
992 }
993 
994 void tcg_prologue_init(TCGContext *s)
995 {
996     size_t prologue_size, total_size;
997     void *buf0, *buf1;
998 
999     /* Put the prologue at the beginning of code_gen_buffer.  */
1000     buf0 = s->code_gen_buffer;
1001     total_size = s->code_gen_buffer_size;
1002     s->code_ptr = buf0;
1003     s->code_buf = buf0;
1004     s->data_gen_ptr = NULL;
1005     s->code_gen_prologue = buf0;
1006 
1007     /* Compute a high-water mark, at which we voluntarily flush the buffer
1008        and start over.  The size here is arbitrary, significantly larger
1009        than we expect the code generation for any one opcode to require.  */
1010     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1011 
1012 #ifdef TCG_TARGET_NEED_POOL_LABELS
1013     s->pool_labels = NULL;
1014 #endif
1015 
1016     /* Generate the prologue.  */
1017     tcg_target_qemu_prologue(s);
1018 
1019 #ifdef TCG_TARGET_NEED_POOL_LABELS
1020     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1021     {
1022         bool ok = tcg_out_pool_finalize(s);
1023         tcg_debug_assert(ok);
1024     }
1025 #endif
1026 
1027     buf1 = s->code_ptr;
1028     flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1029 
1030     /* Deduct the prologue from the buffer.  */
1031     prologue_size = tcg_current_code_size(s);
1032     s->code_gen_ptr = buf1;
1033     s->code_gen_buffer = buf1;
1034     s->code_buf = buf1;
1035     total_size -= prologue_size;
1036     s->code_gen_buffer_size = total_size;
1037 
1038     tcg_register_jit(s->code_gen_buffer, total_size);
1039 
1040 #ifdef DEBUG_DISAS
1041     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1042         qemu_log_lock();
1043         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1044         if (s->data_gen_ptr) {
1045             size_t code_size = s->data_gen_ptr - buf0;
1046             size_t data_size = prologue_size - code_size;
1047             size_t i;
1048 
1049             log_disas(buf0, code_size);
1050 
1051             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1052                 if (sizeof(tcg_target_ulong) == 8) {
1053                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1054                              (uintptr_t)s->data_gen_ptr + i,
1055                              *(uint64_t *)(s->data_gen_ptr + i));
1056                 } else {
1057                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1058                              (uintptr_t)s->data_gen_ptr + i,
1059                              *(uint32_t *)(s->data_gen_ptr + i));
1060                 }
1061             }
1062         } else {
1063             log_disas(buf0, prologue_size);
1064         }
1065         qemu_log("\n");
1066         qemu_log_flush();
1067         qemu_log_unlock();
1068     }
1069 #endif
1070 
1071     /* Assert that goto_ptr is implemented completely.  */
1072     if (TCG_TARGET_HAS_goto_ptr) {
1073         tcg_debug_assert(s->code_gen_epilogue != NULL);
1074     }
1075 }
1076 
1077 void tcg_func_start(TCGContext *s)
1078 {
1079     tcg_pool_reset(s);
1080     s->nb_temps = s->nb_globals;
1081 
1082     /* No temps have been previously allocated for size or locality.  */
1083     memset(s->free_temps, 0, sizeof(s->free_temps));
1084 
1085     s->nb_ops = 0;
1086     s->nb_labels = 0;
1087     s->current_frame_offset = s->frame_start;
1088 
1089 #ifdef CONFIG_DEBUG_TCG
1090     s->goto_tb_issue_mask = 0;
1091 #endif
1092 
1093     QTAILQ_INIT(&s->ops);
1094     QTAILQ_INIT(&s->free_ops);
1095 }
1096 
1097 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1098 {
1099     int n = s->nb_temps++;
1100     tcg_debug_assert(n < TCG_MAX_TEMPS);
1101     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1102 }
1103 
1104 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1105 {
1106     TCGTemp *ts;
1107 
1108     tcg_debug_assert(s->nb_globals == s->nb_temps);
1109     s->nb_globals++;
1110     ts = tcg_temp_alloc(s);
1111     ts->temp_global = 1;
1112 
1113     return ts;
1114 }
1115 
1116 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1117                                             TCGReg reg, const char *name)
1118 {
1119     TCGTemp *ts;
1120 
1121     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1122         tcg_abort();
1123     }
1124 
1125     ts = tcg_global_alloc(s);
1126     ts->base_type = type;
1127     ts->type = type;
1128     ts->fixed_reg = 1;
1129     ts->reg = reg;
1130     ts->name = name;
1131     tcg_regset_set_reg(s->reserved_regs, reg);
1132 
1133     return ts;
1134 }
1135 
1136 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1137 {
1138     s->frame_start = start;
1139     s->frame_end = start + size;
1140     s->frame_temp
1141         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1142 }
1143 
1144 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1145                                      intptr_t offset, const char *name)
1146 {
1147     TCGContext *s = tcg_ctx;
1148     TCGTemp *base_ts = tcgv_ptr_temp(base);
1149     TCGTemp *ts = tcg_global_alloc(s);
1150     int indirect_reg = 0, bigendian = 0;
1151 #ifdef HOST_WORDS_BIGENDIAN
1152     bigendian = 1;
1153 #endif
1154 
1155     if (!base_ts->fixed_reg) {
1156         /* We do not support double-indirect registers.  */
1157         tcg_debug_assert(!base_ts->indirect_reg);
1158         base_ts->indirect_base = 1;
1159         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1160                             ? 2 : 1);
1161         indirect_reg = 1;
1162     }
1163 
1164     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1165         TCGTemp *ts2 = tcg_global_alloc(s);
1166         char buf[64];
1167 
1168         ts->base_type = TCG_TYPE_I64;
1169         ts->type = TCG_TYPE_I32;
1170         ts->indirect_reg = indirect_reg;
1171         ts->mem_allocated = 1;
1172         ts->mem_base = base_ts;
1173         ts->mem_offset = offset + bigendian * 4;
1174         pstrcpy(buf, sizeof(buf), name);
1175         pstrcat(buf, sizeof(buf), "_0");
1176         ts->name = strdup(buf);
1177 
1178         tcg_debug_assert(ts2 == ts + 1);
1179         ts2->base_type = TCG_TYPE_I64;
1180         ts2->type = TCG_TYPE_I32;
1181         ts2->indirect_reg = indirect_reg;
1182         ts2->mem_allocated = 1;
1183         ts2->mem_base = base_ts;
1184         ts2->mem_offset = offset + (1 - bigendian) * 4;
1185         pstrcpy(buf, sizeof(buf), name);
1186         pstrcat(buf, sizeof(buf), "_1");
1187         ts2->name = strdup(buf);
1188     } else {
1189         ts->base_type = type;
1190         ts->type = type;
1191         ts->indirect_reg = indirect_reg;
1192         ts->mem_allocated = 1;
1193         ts->mem_base = base_ts;
1194         ts->mem_offset = offset;
1195         ts->name = name;
1196     }
1197     return ts;
1198 }
1199 
1200 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1201 {
1202     TCGContext *s = tcg_ctx;
1203     TCGTemp *ts;
1204     int idx, k;
1205 
1206     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1207     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1208     if (idx < TCG_MAX_TEMPS) {
1209         /* There is already an available temp with the right type.  */
1210         clear_bit(idx, s->free_temps[k].l);
1211 
1212         ts = &s->temps[idx];
1213         ts->temp_allocated = 1;
1214         tcg_debug_assert(ts->base_type == type);
1215         tcg_debug_assert(ts->temp_local == temp_local);
1216     } else {
1217         ts = tcg_temp_alloc(s);
1218         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1219             TCGTemp *ts2 = tcg_temp_alloc(s);
1220 
1221             ts->base_type = type;
1222             ts->type = TCG_TYPE_I32;
1223             ts->temp_allocated = 1;
1224             ts->temp_local = temp_local;
1225 
1226             tcg_debug_assert(ts2 == ts + 1);
1227             ts2->base_type = TCG_TYPE_I64;
1228             ts2->type = TCG_TYPE_I32;
1229             ts2->temp_allocated = 1;
1230             ts2->temp_local = temp_local;
1231         } else {
1232             ts->base_type = type;
1233             ts->type = type;
1234             ts->temp_allocated = 1;
1235             ts->temp_local = temp_local;
1236         }
1237     }
1238 
1239 #if defined(CONFIG_DEBUG_TCG)
1240     s->temps_in_use++;
1241 #endif
1242     return ts;
1243 }
1244 
1245 TCGv_vec tcg_temp_new_vec(TCGType type)
1246 {
1247     TCGTemp *t;
1248 
1249 #ifdef CONFIG_DEBUG_TCG
1250     switch (type) {
1251     case TCG_TYPE_V64:
1252         assert(TCG_TARGET_HAS_v64);
1253         break;
1254     case TCG_TYPE_V128:
1255         assert(TCG_TARGET_HAS_v128);
1256         break;
1257     case TCG_TYPE_V256:
1258         assert(TCG_TARGET_HAS_v256);
1259         break;
1260     default:
1261         g_assert_not_reached();
1262     }
1263 #endif
1264 
1265     t = tcg_temp_new_internal(type, 0);
1266     return temp_tcgv_vec(t);
1267 }
1268 
1269 /* Create a new temp of the same type as an existing temp.  */
1270 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1271 {
1272     TCGTemp *t = tcgv_vec_temp(match);
1273 
1274     tcg_debug_assert(t->temp_allocated != 0);
1275 
1276     t = tcg_temp_new_internal(t->base_type, 0);
1277     return temp_tcgv_vec(t);
1278 }
1279 
1280 void tcg_temp_free_internal(TCGTemp *ts)
1281 {
1282     TCGContext *s = tcg_ctx;
1283     int k, idx;
1284 
1285 #if defined(CONFIG_DEBUG_TCG)
1286     s->temps_in_use--;
1287     if (s->temps_in_use < 0) {
1288         fprintf(stderr, "More temporaries freed than allocated!\n");
1289     }
1290 #endif
1291 
1292     tcg_debug_assert(ts->temp_global == 0);
1293     tcg_debug_assert(ts->temp_allocated != 0);
1294     ts->temp_allocated = 0;
1295 
1296     idx = temp_idx(ts);
1297     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1298     set_bit(idx, s->free_temps[k].l);
1299 }
1300 
1301 TCGv_i32 tcg_const_i32(int32_t val)
1302 {
1303     TCGv_i32 t0;
1304     t0 = tcg_temp_new_i32();
1305     tcg_gen_movi_i32(t0, val);
1306     return t0;
1307 }
1308 
1309 TCGv_i64 tcg_const_i64(int64_t val)
1310 {
1311     TCGv_i64 t0;
1312     t0 = tcg_temp_new_i64();
1313     tcg_gen_movi_i64(t0, val);
1314     return t0;
1315 }
1316 
1317 TCGv_i32 tcg_const_local_i32(int32_t val)
1318 {
1319     TCGv_i32 t0;
1320     t0 = tcg_temp_local_new_i32();
1321     tcg_gen_movi_i32(t0, val);
1322     return t0;
1323 }
1324 
1325 TCGv_i64 tcg_const_local_i64(int64_t val)
1326 {
1327     TCGv_i64 t0;
1328     t0 = tcg_temp_local_new_i64();
1329     tcg_gen_movi_i64(t0, val);
1330     return t0;
1331 }
1332 
1333 #if defined(CONFIG_DEBUG_TCG)
1334 void tcg_clear_temp_count(void)
1335 {
1336     TCGContext *s = tcg_ctx;
1337     s->temps_in_use = 0;
1338 }
1339 
1340 int tcg_check_temp_count(void)
1341 {
1342     TCGContext *s = tcg_ctx;
1343     if (s->temps_in_use) {
1344         /* Clear the count so that we don't give another
1345          * warning immediately next time around.
1346          */
1347         s->temps_in_use = 0;
1348         return 1;
1349     }
1350     return 0;
1351 }
1352 #endif
1353 
1354 /* Return true if OP may appear in the opcode stream.
1355    Test the runtime variable that controls each opcode.  */
1356 bool tcg_op_supported(TCGOpcode op)
1357 {
1358     const bool have_vec
1359         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1360 
1361     switch (op) {
1362     case INDEX_op_discard:
1363     case INDEX_op_set_label:
1364     case INDEX_op_call:
1365     case INDEX_op_br:
1366     case INDEX_op_mb:
1367     case INDEX_op_insn_start:
1368     case INDEX_op_exit_tb:
1369     case INDEX_op_goto_tb:
1370     case INDEX_op_qemu_ld_i32:
1371     case INDEX_op_qemu_st_i32:
1372     case INDEX_op_qemu_ld_i64:
1373     case INDEX_op_qemu_st_i64:
1374         return true;
1375 
1376     case INDEX_op_goto_ptr:
1377         return TCG_TARGET_HAS_goto_ptr;
1378 
1379     case INDEX_op_mov_i32:
1380     case INDEX_op_movi_i32:
1381     case INDEX_op_setcond_i32:
1382     case INDEX_op_brcond_i32:
1383     case INDEX_op_ld8u_i32:
1384     case INDEX_op_ld8s_i32:
1385     case INDEX_op_ld16u_i32:
1386     case INDEX_op_ld16s_i32:
1387     case INDEX_op_ld_i32:
1388     case INDEX_op_st8_i32:
1389     case INDEX_op_st16_i32:
1390     case INDEX_op_st_i32:
1391     case INDEX_op_add_i32:
1392     case INDEX_op_sub_i32:
1393     case INDEX_op_mul_i32:
1394     case INDEX_op_and_i32:
1395     case INDEX_op_or_i32:
1396     case INDEX_op_xor_i32:
1397     case INDEX_op_shl_i32:
1398     case INDEX_op_shr_i32:
1399     case INDEX_op_sar_i32:
1400         return true;
1401 
1402     case INDEX_op_movcond_i32:
1403         return TCG_TARGET_HAS_movcond_i32;
1404     case INDEX_op_div_i32:
1405     case INDEX_op_divu_i32:
1406         return TCG_TARGET_HAS_div_i32;
1407     case INDEX_op_rem_i32:
1408     case INDEX_op_remu_i32:
1409         return TCG_TARGET_HAS_rem_i32;
1410     case INDEX_op_div2_i32:
1411     case INDEX_op_divu2_i32:
1412         return TCG_TARGET_HAS_div2_i32;
1413     case INDEX_op_rotl_i32:
1414     case INDEX_op_rotr_i32:
1415         return TCG_TARGET_HAS_rot_i32;
1416     case INDEX_op_deposit_i32:
1417         return TCG_TARGET_HAS_deposit_i32;
1418     case INDEX_op_extract_i32:
1419         return TCG_TARGET_HAS_extract_i32;
1420     case INDEX_op_sextract_i32:
1421         return TCG_TARGET_HAS_sextract_i32;
1422     case INDEX_op_add2_i32:
1423         return TCG_TARGET_HAS_add2_i32;
1424     case INDEX_op_sub2_i32:
1425         return TCG_TARGET_HAS_sub2_i32;
1426     case INDEX_op_mulu2_i32:
1427         return TCG_TARGET_HAS_mulu2_i32;
1428     case INDEX_op_muls2_i32:
1429         return TCG_TARGET_HAS_muls2_i32;
1430     case INDEX_op_muluh_i32:
1431         return TCG_TARGET_HAS_muluh_i32;
1432     case INDEX_op_mulsh_i32:
1433         return TCG_TARGET_HAS_mulsh_i32;
1434     case INDEX_op_ext8s_i32:
1435         return TCG_TARGET_HAS_ext8s_i32;
1436     case INDEX_op_ext16s_i32:
1437         return TCG_TARGET_HAS_ext16s_i32;
1438     case INDEX_op_ext8u_i32:
1439         return TCG_TARGET_HAS_ext8u_i32;
1440     case INDEX_op_ext16u_i32:
1441         return TCG_TARGET_HAS_ext16u_i32;
1442     case INDEX_op_bswap16_i32:
1443         return TCG_TARGET_HAS_bswap16_i32;
1444     case INDEX_op_bswap32_i32:
1445         return TCG_TARGET_HAS_bswap32_i32;
1446     case INDEX_op_not_i32:
1447         return TCG_TARGET_HAS_not_i32;
1448     case INDEX_op_neg_i32:
1449         return TCG_TARGET_HAS_neg_i32;
1450     case INDEX_op_andc_i32:
1451         return TCG_TARGET_HAS_andc_i32;
1452     case INDEX_op_orc_i32:
1453         return TCG_TARGET_HAS_orc_i32;
1454     case INDEX_op_eqv_i32:
1455         return TCG_TARGET_HAS_eqv_i32;
1456     case INDEX_op_nand_i32:
1457         return TCG_TARGET_HAS_nand_i32;
1458     case INDEX_op_nor_i32:
1459         return TCG_TARGET_HAS_nor_i32;
1460     case INDEX_op_clz_i32:
1461         return TCG_TARGET_HAS_clz_i32;
1462     case INDEX_op_ctz_i32:
1463         return TCG_TARGET_HAS_ctz_i32;
1464     case INDEX_op_ctpop_i32:
1465         return TCG_TARGET_HAS_ctpop_i32;
1466 
1467     case INDEX_op_brcond2_i32:
1468     case INDEX_op_setcond2_i32:
1469         return TCG_TARGET_REG_BITS == 32;
1470 
1471     case INDEX_op_mov_i64:
1472     case INDEX_op_movi_i64:
1473     case INDEX_op_setcond_i64:
1474     case INDEX_op_brcond_i64:
1475     case INDEX_op_ld8u_i64:
1476     case INDEX_op_ld8s_i64:
1477     case INDEX_op_ld16u_i64:
1478     case INDEX_op_ld16s_i64:
1479     case INDEX_op_ld32u_i64:
1480     case INDEX_op_ld32s_i64:
1481     case INDEX_op_ld_i64:
1482     case INDEX_op_st8_i64:
1483     case INDEX_op_st16_i64:
1484     case INDEX_op_st32_i64:
1485     case INDEX_op_st_i64:
1486     case INDEX_op_add_i64:
1487     case INDEX_op_sub_i64:
1488     case INDEX_op_mul_i64:
1489     case INDEX_op_and_i64:
1490     case INDEX_op_or_i64:
1491     case INDEX_op_xor_i64:
1492     case INDEX_op_shl_i64:
1493     case INDEX_op_shr_i64:
1494     case INDEX_op_sar_i64:
1495     case INDEX_op_ext_i32_i64:
1496     case INDEX_op_extu_i32_i64:
1497         return TCG_TARGET_REG_BITS == 64;
1498 
1499     case INDEX_op_movcond_i64:
1500         return TCG_TARGET_HAS_movcond_i64;
1501     case INDEX_op_div_i64:
1502     case INDEX_op_divu_i64:
1503         return TCG_TARGET_HAS_div_i64;
1504     case INDEX_op_rem_i64:
1505     case INDEX_op_remu_i64:
1506         return TCG_TARGET_HAS_rem_i64;
1507     case INDEX_op_div2_i64:
1508     case INDEX_op_divu2_i64:
1509         return TCG_TARGET_HAS_div2_i64;
1510     case INDEX_op_rotl_i64:
1511     case INDEX_op_rotr_i64:
1512         return TCG_TARGET_HAS_rot_i64;
1513     case INDEX_op_deposit_i64:
1514         return TCG_TARGET_HAS_deposit_i64;
1515     case INDEX_op_extract_i64:
1516         return TCG_TARGET_HAS_extract_i64;
1517     case INDEX_op_sextract_i64:
1518         return TCG_TARGET_HAS_sextract_i64;
1519     case INDEX_op_extrl_i64_i32:
1520         return TCG_TARGET_HAS_extrl_i64_i32;
1521     case INDEX_op_extrh_i64_i32:
1522         return TCG_TARGET_HAS_extrh_i64_i32;
1523     case INDEX_op_ext8s_i64:
1524         return TCG_TARGET_HAS_ext8s_i64;
1525     case INDEX_op_ext16s_i64:
1526         return TCG_TARGET_HAS_ext16s_i64;
1527     case INDEX_op_ext32s_i64:
1528         return TCG_TARGET_HAS_ext32s_i64;
1529     case INDEX_op_ext8u_i64:
1530         return TCG_TARGET_HAS_ext8u_i64;
1531     case INDEX_op_ext16u_i64:
1532         return TCG_TARGET_HAS_ext16u_i64;
1533     case INDEX_op_ext32u_i64:
1534         return TCG_TARGET_HAS_ext32u_i64;
1535     case INDEX_op_bswap16_i64:
1536         return TCG_TARGET_HAS_bswap16_i64;
1537     case INDEX_op_bswap32_i64:
1538         return TCG_TARGET_HAS_bswap32_i64;
1539     case INDEX_op_bswap64_i64:
1540         return TCG_TARGET_HAS_bswap64_i64;
1541     case INDEX_op_not_i64:
1542         return TCG_TARGET_HAS_not_i64;
1543     case INDEX_op_neg_i64:
1544         return TCG_TARGET_HAS_neg_i64;
1545     case INDEX_op_andc_i64:
1546         return TCG_TARGET_HAS_andc_i64;
1547     case INDEX_op_orc_i64:
1548         return TCG_TARGET_HAS_orc_i64;
1549     case INDEX_op_eqv_i64:
1550         return TCG_TARGET_HAS_eqv_i64;
1551     case INDEX_op_nand_i64:
1552         return TCG_TARGET_HAS_nand_i64;
1553     case INDEX_op_nor_i64:
1554         return TCG_TARGET_HAS_nor_i64;
1555     case INDEX_op_clz_i64:
1556         return TCG_TARGET_HAS_clz_i64;
1557     case INDEX_op_ctz_i64:
1558         return TCG_TARGET_HAS_ctz_i64;
1559     case INDEX_op_ctpop_i64:
1560         return TCG_TARGET_HAS_ctpop_i64;
1561     case INDEX_op_add2_i64:
1562         return TCG_TARGET_HAS_add2_i64;
1563     case INDEX_op_sub2_i64:
1564         return TCG_TARGET_HAS_sub2_i64;
1565     case INDEX_op_mulu2_i64:
1566         return TCG_TARGET_HAS_mulu2_i64;
1567     case INDEX_op_muls2_i64:
1568         return TCG_TARGET_HAS_muls2_i64;
1569     case INDEX_op_muluh_i64:
1570         return TCG_TARGET_HAS_muluh_i64;
1571     case INDEX_op_mulsh_i64:
1572         return TCG_TARGET_HAS_mulsh_i64;
1573 
1574     case INDEX_op_mov_vec:
1575     case INDEX_op_dup_vec:
1576     case INDEX_op_dupi_vec:
1577     case INDEX_op_ld_vec:
1578     case INDEX_op_st_vec:
1579     case INDEX_op_add_vec:
1580     case INDEX_op_sub_vec:
1581     case INDEX_op_and_vec:
1582     case INDEX_op_or_vec:
1583     case INDEX_op_xor_vec:
1584     case INDEX_op_cmp_vec:
1585         return have_vec;
1586     case INDEX_op_dup2_vec:
1587         return have_vec && TCG_TARGET_REG_BITS == 32;
1588     case INDEX_op_not_vec:
1589         return have_vec && TCG_TARGET_HAS_not_vec;
1590     case INDEX_op_neg_vec:
1591         return have_vec && TCG_TARGET_HAS_neg_vec;
1592     case INDEX_op_andc_vec:
1593         return have_vec && TCG_TARGET_HAS_andc_vec;
1594     case INDEX_op_orc_vec:
1595         return have_vec && TCG_TARGET_HAS_orc_vec;
1596     case INDEX_op_mul_vec:
1597         return have_vec && TCG_TARGET_HAS_mul_vec;
1598     case INDEX_op_shli_vec:
1599     case INDEX_op_shri_vec:
1600     case INDEX_op_sari_vec:
1601         return have_vec && TCG_TARGET_HAS_shi_vec;
1602     case INDEX_op_shls_vec:
1603     case INDEX_op_shrs_vec:
1604     case INDEX_op_sars_vec:
1605         return have_vec && TCG_TARGET_HAS_shs_vec;
1606     case INDEX_op_shlv_vec:
1607     case INDEX_op_shrv_vec:
1608     case INDEX_op_sarv_vec:
1609         return have_vec && TCG_TARGET_HAS_shv_vec;
1610 
1611     default:
1612         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1613         return true;
1614     }
1615 }
1616 
1617 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1618    and endian swap. Maybe it would be better to do the alignment
1619    and endian swap in tcg_reg_alloc_call(). */
1620 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1621 {
1622     int i, real_args, nb_rets, pi;
1623     unsigned sizemask, flags;
1624     TCGHelperInfo *info;
1625     TCGOp *op;
1626 
1627     info = g_hash_table_lookup(helper_table, (gpointer)func);
1628     flags = info->flags;
1629     sizemask = info->sizemask;
1630 
1631 #if defined(__sparc__) && !defined(__arch64__) \
1632     && !defined(CONFIG_TCG_INTERPRETER)
1633     /* We have 64-bit values in one register, but need to pass as two
1634        separate parameters.  Split them.  */
1635     int orig_sizemask = sizemask;
1636     int orig_nargs = nargs;
1637     TCGv_i64 retl, reth;
1638     TCGTemp *split_args[MAX_OPC_PARAM];
1639 
1640     retl = NULL;
1641     reth = NULL;
1642     if (sizemask != 0) {
1643         for (i = real_args = 0; i < nargs; ++i) {
1644             int is_64bit = sizemask & (1 << (i+1)*2);
1645             if (is_64bit) {
1646                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1647                 TCGv_i32 h = tcg_temp_new_i32();
1648                 TCGv_i32 l = tcg_temp_new_i32();
1649                 tcg_gen_extr_i64_i32(l, h, orig);
1650                 split_args[real_args++] = tcgv_i32_temp(h);
1651                 split_args[real_args++] = tcgv_i32_temp(l);
1652             } else {
1653                 split_args[real_args++] = args[i];
1654             }
1655         }
1656         nargs = real_args;
1657         args = split_args;
1658         sizemask = 0;
1659     }
1660 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1661     for (i = 0; i < nargs; ++i) {
1662         int is_64bit = sizemask & (1 << (i+1)*2);
1663         int is_signed = sizemask & (2 << (i+1)*2);
1664         if (!is_64bit) {
1665             TCGv_i64 temp = tcg_temp_new_i64();
1666             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1667             if (is_signed) {
1668                 tcg_gen_ext32s_i64(temp, orig);
1669             } else {
1670                 tcg_gen_ext32u_i64(temp, orig);
1671             }
1672             args[i] = tcgv_i64_temp(temp);
1673         }
1674     }
1675 #endif /* TCG_TARGET_EXTEND_ARGS */
1676 
1677     op = tcg_emit_op(INDEX_op_call);
1678 
1679     pi = 0;
1680     if (ret != NULL) {
1681 #if defined(__sparc__) && !defined(__arch64__) \
1682     && !defined(CONFIG_TCG_INTERPRETER)
1683         if (orig_sizemask & 1) {
1684             /* The 32-bit ABI is going to return the 64-bit value in
1685                the %o0/%o1 register pair.  Prepare for this by using
1686                two return temporaries, and reassemble below.  */
1687             retl = tcg_temp_new_i64();
1688             reth = tcg_temp_new_i64();
1689             op->args[pi++] = tcgv_i64_arg(reth);
1690             op->args[pi++] = tcgv_i64_arg(retl);
1691             nb_rets = 2;
1692         } else {
1693             op->args[pi++] = temp_arg(ret);
1694             nb_rets = 1;
1695         }
1696 #else
1697         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1698 #ifdef HOST_WORDS_BIGENDIAN
1699             op->args[pi++] = temp_arg(ret + 1);
1700             op->args[pi++] = temp_arg(ret);
1701 #else
1702             op->args[pi++] = temp_arg(ret);
1703             op->args[pi++] = temp_arg(ret + 1);
1704 #endif
1705             nb_rets = 2;
1706         } else {
1707             op->args[pi++] = temp_arg(ret);
1708             nb_rets = 1;
1709         }
1710 #endif
1711     } else {
1712         nb_rets = 0;
1713     }
1714     TCGOP_CALLO(op) = nb_rets;
1715 
1716     real_args = 0;
1717     for (i = 0; i < nargs; i++) {
1718         int is_64bit = sizemask & (1 << (i+1)*2);
1719         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1720 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1721             /* some targets want aligned 64 bit args */
1722             if (real_args & 1) {
1723                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1724                 real_args++;
1725             }
1726 #endif
1727            /* If stack grows up, then we will be placing successive
1728               arguments at lower addresses, which means we need to
1729               reverse the order compared to how we would normally
1730               treat either big or little-endian.  For those arguments
1731               that will wind up in registers, this still works for
1732               HPPA (the only current STACK_GROWSUP target) since the
1733               argument registers are *also* allocated in decreasing
1734               order.  If another such target is added, this logic may
1735               have to get more complicated to differentiate between
1736               stack arguments and register arguments.  */
1737 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1738             op->args[pi++] = temp_arg(args[i] + 1);
1739             op->args[pi++] = temp_arg(args[i]);
1740 #else
1741             op->args[pi++] = temp_arg(args[i]);
1742             op->args[pi++] = temp_arg(args[i] + 1);
1743 #endif
1744             real_args += 2;
1745             continue;
1746         }
1747 
1748         op->args[pi++] = temp_arg(args[i]);
1749         real_args++;
1750     }
1751     op->args[pi++] = (uintptr_t)func;
1752     op->args[pi++] = flags;
1753     TCGOP_CALLI(op) = real_args;
1754 
1755     /* Make sure the fields didn't overflow.  */
1756     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1757     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1758 
1759 #if defined(__sparc__) && !defined(__arch64__) \
1760     && !defined(CONFIG_TCG_INTERPRETER)
1761     /* Free all of the parts we allocated above.  */
1762     for (i = real_args = 0; i < orig_nargs; ++i) {
1763         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1764         if (is_64bit) {
1765             tcg_temp_free_internal(args[real_args++]);
1766             tcg_temp_free_internal(args[real_args++]);
1767         } else {
1768             real_args++;
1769         }
1770     }
1771     if (orig_sizemask & 1) {
1772         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1773            Note that describing these as TCGv_i64 eliminates an unnecessary
1774            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1775         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1776         tcg_temp_free_i64(retl);
1777         tcg_temp_free_i64(reth);
1778     }
1779 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1780     for (i = 0; i < nargs; ++i) {
1781         int is_64bit = sizemask & (1 << (i+1)*2);
1782         if (!is_64bit) {
1783             tcg_temp_free_internal(args[i]);
1784         }
1785     }
1786 #endif /* TCG_TARGET_EXTEND_ARGS */
1787 }
1788 
1789 static void tcg_reg_alloc_start(TCGContext *s)
1790 {
1791     int i, n;
1792     TCGTemp *ts;
1793 
1794     for (i = 0, n = s->nb_globals; i < n; i++) {
1795         ts = &s->temps[i];
1796         ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1797     }
1798     for (n = s->nb_temps; i < n; i++) {
1799         ts = &s->temps[i];
1800         ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1801         ts->mem_allocated = 0;
1802         ts->fixed_reg = 0;
1803     }
1804 
1805     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1806 }
1807 
1808 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1809                                  TCGTemp *ts)
1810 {
1811     int idx = temp_idx(ts);
1812 
1813     if (ts->temp_global) {
1814         pstrcpy(buf, buf_size, ts->name);
1815     } else if (ts->temp_local) {
1816         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1817     } else {
1818         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1819     }
1820     return buf;
1821 }
1822 
1823 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1824                              int buf_size, TCGArg arg)
1825 {
1826     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1827 }
1828 
1829 /* Find helper name.  */
1830 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1831 {
1832     const char *ret = NULL;
1833     if (helper_table) {
1834         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1835         if (info) {
1836             ret = info->name;
1837         }
1838     }
1839     return ret;
1840 }
1841 
1842 static const char * const cond_name[] =
1843 {
1844     [TCG_COND_NEVER] = "never",
1845     [TCG_COND_ALWAYS] = "always",
1846     [TCG_COND_EQ] = "eq",
1847     [TCG_COND_NE] = "ne",
1848     [TCG_COND_LT] = "lt",
1849     [TCG_COND_GE] = "ge",
1850     [TCG_COND_LE] = "le",
1851     [TCG_COND_GT] = "gt",
1852     [TCG_COND_LTU] = "ltu",
1853     [TCG_COND_GEU] = "geu",
1854     [TCG_COND_LEU] = "leu",
1855     [TCG_COND_GTU] = "gtu"
1856 };
1857 
1858 static const char * const ldst_name[] =
1859 {
1860     [MO_UB]   = "ub",
1861     [MO_SB]   = "sb",
1862     [MO_LEUW] = "leuw",
1863     [MO_LESW] = "lesw",
1864     [MO_LEUL] = "leul",
1865     [MO_LESL] = "lesl",
1866     [MO_LEQ]  = "leq",
1867     [MO_BEUW] = "beuw",
1868     [MO_BESW] = "besw",
1869     [MO_BEUL] = "beul",
1870     [MO_BESL] = "besl",
1871     [MO_BEQ]  = "beq",
1872 };
1873 
1874 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1875 #ifdef ALIGNED_ONLY
1876     [MO_UNALN >> MO_ASHIFT]    = "un+",
1877     [MO_ALIGN >> MO_ASHIFT]    = "",
1878 #else
1879     [MO_UNALN >> MO_ASHIFT]    = "",
1880     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1881 #endif
1882     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1883     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1884     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1885     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1886     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1887     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1888 };
1889 
1890 void tcg_dump_ops(TCGContext *s)
1891 {
1892     char buf[128];
1893     TCGOp *op;
1894 
1895     QTAILQ_FOREACH(op, &s->ops, link) {
1896         int i, k, nb_oargs, nb_iargs, nb_cargs;
1897         const TCGOpDef *def;
1898         TCGOpcode c;
1899         int col = 0;
1900 
1901         c = op->opc;
1902         def = &tcg_op_defs[c];
1903 
1904         if (c == INDEX_op_insn_start) {
1905             col += qemu_log("\n ----");
1906 
1907             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1908                 target_ulong a;
1909 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1910                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1911 #else
1912                 a = op->args[i];
1913 #endif
1914                 col += qemu_log(" " TARGET_FMT_lx, a);
1915             }
1916         } else if (c == INDEX_op_call) {
1917             /* variable number of arguments */
1918             nb_oargs = TCGOP_CALLO(op);
1919             nb_iargs = TCGOP_CALLI(op);
1920             nb_cargs = def->nb_cargs;
1921 
1922             /* function name, flags, out args */
1923             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1924                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
1925                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
1926             for (i = 0; i < nb_oargs; i++) {
1927                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1928                                                        op->args[i]));
1929             }
1930             for (i = 0; i < nb_iargs; i++) {
1931                 TCGArg arg = op->args[nb_oargs + i];
1932                 const char *t = "<dummy>";
1933                 if (arg != TCG_CALL_DUMMY_ARG) {
1934                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1935                 }
1936                 col += qemu_log(",%s", t);
1937             }
1938         } else {
1939             col += qemu_log(" %s ", def->name);
1940 
1941             nb_oargs = def->nb_oargs;
1942             nb_iargs = def->nb_iargs;
1943             nb_cargs = def->nb_cargs;
1944 
1945             if (def->flags & TCG_OPF_VECTOR) {
1946                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
1947                                 8 << TCGOP_VECE(op));
1948             }
1949 
1950             k = 0;
1951             for (i = 0; i < nb_oargs; i++) {
1952                 if (k != 0) {
1953                     col += qemu_log(",");
1954                 }
1955                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1956                                                       op->args[k++]));
1957             }
1958             for (i = 0; i < nb_iargs; i++) {
1959                 if (k != 0) {
1960                     col += qemu_log(",");
1961                 }
1962                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1963                                                       op->args[k++]));
1964             }
1965             switch (c) {
1966             case INDEX_op_brcond_i32:
1967             case INDEX_op_setcond_i32:
1968             case INDEX_op_movcond_i32:
1969             case INDEX_op_brcond2_i32:
1970             case INDEX_op_setcond2_i32:
1971             case INDEX_op_brcond_i64:
1972             case INDEX_op_setcond_i64:
1973             case INDEX_op_movcond_i64:
1974             case INDEX_op_cmp_vec:
1975                 if (op->args[k] < ARRAY_SIZE(cond_name)
1976                     && cond_name[op->args[k]]) {
1977                     col += qemu_log(",%s", cond_name[op->args[k++]]);
1978                 } else {
1979                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
1980                 }
1981                 i = 1;
1982                 break;
1983             case INDEX_op_qemu_ld_i32:
1984             case INDEX_op_qemu_st_i32:
1985             case INDEX_op_qemu_ld_i64:
1986             case INDEX_op_qemu_st_i64:
1987                 {
1988                     TCGMemOpIdx oi = op->args[k++];
1989                     TCGMemOp op = get_memop(oi);
1990                     unsigned ix = get_mmuidx(oi);
1991 
1992                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1993                         col += qemu_log(",$0x%x,%u", op, ix);
1994                     } else {
1995                         const char *s_al, *s_op;
1996                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1997                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1998                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
1999                     }
2000                     i = 1;
2001                 }
2002                 break;
2003             default:
2004                 i = 0;
2005                 break;
2006             }
2007             switch (c) {
2008             case INDEX_op_set_label:
2009             case INDEX_op_br:
2010             case INDEX_op_brcond_i32:
2011             case INDEX_op_brcond_i64:
2012             case INDEX_op_brcond2_i32:
2013                 col += qemu_log("%s$L%d", k ? "," : "",
2014                                 arg_label(op->args[k])->id);
2015                 i++, k++;
2016                 break;
2017             default:
2018                 break;
2019             }
2020             for (; i < nb_cargs; i++, k++) {
2021                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2022             }
2023         }
2024         if (op->life) {
2025             unsigned life = op->life;
2026 
2027             for (; col < 48; ++col) {
2028                 putc(' ', qemu_logfile);
2029             }
2030 
2031             if (life & (SYNC_ARG * 3)) {
2032                 qemu_log("  sync:");
2033                 for (i = 0; i < 2; ++i) {
2034                     if (life & (SYNC_ARG << i)) {
2035                         qemu_log(" %d", i);
2036                     }
2037                 }
2038             }
2039             life /= DEAD_ARG;
2040             if (life) {
2041                 qemu_log("  dead:");
2042                 for (i = 0; life; ++i, life >>= 1) {
2043                     if (life & 1) {
2044                         qemu_log(" %d", i);
2045                     }
2046                 }
2047             }
2048         }
2049         qemu_log("\n");
2050     }
2051 }
2052 
2053 /* we give more priority to constraints with less registers */
2054 static int get_constraint_priority(const TCGOpDef *def, int k)
2055 {
2056     const TCGArgConstraint *arg_ct;
2057 
2058     int i, n;
2059     arg_ct = &def->args_ct[k];
2060     if (arg_ct->ct & TCG_CT_ALIAS) {
2061         /* an alias is equivalent to a single register */
2062         n = 1;
2063     } else {
2064         if (!(arg_ct->ct & TCG_CT_REG))
2065             return 0;
2066         n = 0;
2067         for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2068             if (tcg_regset_test_reg(arg_ct->u.regs, i))
2069                 n++;
2070         }
2071     }
2072     return TCG_TARGET_NB_REGS - n + 1;
2073 }
2074 
2075 /* sort from highest priority to lowest */
2076 static void sort_constraints(TCGOpDef *def, int start, int n)
2077 {
2078     int i, j, p1, p2, tmp;
2079 
2080     for(i = 0; i < n; i++)
2081         def->sorted_args[start + i] = start + i;
2082     if (n <= 1)
2083         return;
2084     for(i = 0; i < n - 1; i++) {
2085         for(j = i + 1; j < n; j++) {
2086             p1 = get_constraint_priority(def, def->sorted_args[start + i]);
2087             p2 = get_constraint_priority(def, def->sorted_args[start + j]);
2088             if (p1 < p2) {
2089                 tmp = def->sorted_args[start + i];
2090                 def->sorted_args[start + i] = def->sorted_args[start + j];
2091                 def->sorted_args[start + j] = tmp;
2092             }
2093         }
2094     }
2095 }
2096 
2097 static void process_op_defs(TCGContext *s)
2098 {
2099     TCGOpcode op;
2100 
2101     for (op = 0; op < NB_OPS; op++) {
2102         TCGOpDef *def = &tcg_op_defs[op];
2103         const TCGTargetOpDef *tdefs;
2104         TCGType type;
2105         int i, nb_args;
2106 
2107         if (def->flags & TCG_OPF_NOT_PRESENT) {
2108             continue;
2109         }
2110 
2111         nb_args = def->nb_iargs + def->nb_oargs;
2112         if (nb_args == 0) {
2113             continue;
2114         }
2115 
2116         tdefs = tcg_target_op_def(op);
2117         /* Missing TCGTargetOpDef entry. */
2118         tcg_debug_assert(tdefs != NULL);
2119 
2120         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2121         for (i = 0; i < nb_args; i++) {
2122             const char *ct_str = tdefs->args_ct_str[i];
2123             /* Incomplete TCGTargetOpDef entry. */
2124             tcg_debug_assert(ct_str != NULL);
2125 
2126             def->args_ct[i].u.regs = 0;
2127             def->args_ct[i].ct = 0;
2128             while (*ct_str != '\0') {
2129                 switch(*ct_str) {
2130                 case '0' ... '9':
2131                     {
2132                         int oarg = *ct_str - '0';
2133                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2134                         tcg_debug_assert(oarg < def->nb_oargs);
2135                         tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
2136                         /* TCG_CT_ALIAS is for the output arguments.
2137                            The input is tagged with TCG_CT_IALIAS. */
2138                         def->args_ct[i] = def->args_ct[oarg];
2139                         def->args_ct[oarg].ct |= TCG_CT_ALIAS;
2140                         def->args_ct[oarg].alias_index = i;
2141                         def->args_ct[i].ct |= TCG_CT_IALIAS;
2142                         def->args_ct[i].alias_index = oarg;
2143                     }
2144                     ct_str++;
2145                     break;
2146                 case '&':
2147                     def->args_ct[i].ct |= TCG_CT_NEWREG;
2148                     ct_str++;
2149                     break;
2150                 case 'i':
2151                     def->args_ct[i].ct |= TCG_CT_CONST;
2152                     ct_str++;
2153                     break;
2154                 default:
2155                     ct_str = target_parse_constraint(&def->args_ct[i],
2156                                                      ct_str, type);
2157                     /* Typo in TCGTargetOpDef constraint. */
2158                     tcg_debug_assert(ct_str != NULL);
2159                 }
2160             }
2161         }
2162 
2163         /* TCGTargetOpDef entry with too much information? */
2164         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2165 
2166         /* sort the constraints (XXX: this is just an heuristic) */
2167         sort_constraints(def, 0, def->nb_oargs);
2168         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2169     }
2170 }
2171 
2172 void tcg_op_remove(TCGContext *s, TCGOp *op)
2173 {
2174     QTAILQ_REMOVE(&s->ops, op, link);
2175     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2176     s->nb_ops--;
2177 
2178 #ifdef CONFIG_PROFILER
2179     atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2180 #endif
2181 }
2182 
2183 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2184 {
2185     TCGContext *s = tcg_ctx;
2186     TCGOp *op;
2187 
2188     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2189         op = tcg_malloc(sizeof(TCGOp));
2190     } else {
2191         op = QTAILQ_FIRST(&s->free_ops);
2192         QTAILQ_REMOVE(&s->free_ops, op, link);
2193     }
2194     memset(op, 0, offsetof(TCGOp, link));
2195     op->opc = opc;
2196     s->nb_ops++;
2197 
2198     return op;
2199 }
2200 
2201 TCGOp *tcg_emit_op(TCGOpcode opc)
2202 {
2203     TCGOp *op = tcg_op_alloc(opc);
2204     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2205     return op;
2206 }
2207 
2208 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2209 {
2210     TCGOp *new_op = tcg_op_alloc(opc);
2211     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2212     return new_op;
2213 }
2214 
2215 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2216 {
2217     TCGOp *new_op = tcg_op_alloc(opc);
2218     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2219     return new_op;
2220 }
2221 
2222 #define TS_DEAD  1
2223 #define TS_MEM   2
2224 
2225 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2226 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2227 
2228 /* liveness analysis: end of function: all temps are dead, and globals
2229    should be in memory. */
2230 static void tcg_la_func_end(TCGContext *s)
2231 {
2232     int ng = s->nb_globals;
2233     int nt = s->nb_temps;
2234     int i;
2235 
2236     for (i = 0; i < ng; ++i) {
2237         s->temps[i].state = TS_DEAD | TS_MEM;
2238     }
2239     for (i = ng; i < nt; ++i) {
2240         s->temps[i].state = TS_DEAD;
2241     }
2242 }
2243 
2244 /* liveness analysis: end of basic block: all temps are dead, globals
2245    and local temps should be in memory. */
2246 static void tcg_la_bb_end(TCGContext *s)
2247 {
2248     int ng = s->nb_globals;
2249     int nt = s->nb_temps;
2250     int i;
2251 
2252     for (i = 0; i < ng; ++i) {
2253         s->temps[i].state = TS_DEAD | TS_MEM;
2254     }
2255     for (i = ng; i < nt; ++i) {
2256         s->temps[i].state = (s->temps[i].temp_local
2257                              ? TS_DEAD | TS_MEM
2258                              : TS_DEAD);
2259     }
2260 }
2261 
2262 /* Liveness analysis : update the opc_arg_life array to tell if a
2263    given input arguments is dead. Instructions updating dead
2264    temporaries are removed. */
2265 static void liveness_pass_1(TCGContext *s)
2266 {
2267     int nb_globals = s->nb_globals;
2268     TCGOp *op, *op_prev;
2269 
2270     tcg_la_func_end(s);
2271 
2272     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, TCGOpHead, link, op_prev) {
2273         int i, nb_iargs, nb_oargs;
2274         TCGOpcode opc_new, opc_new2;
2275         bool have_opc_new2;
2276         TCGLifeData arg_life = 0;
2277         TCGTemp *arg_ts;
2278         TCGOpcode opc = op->opc;
2279         const TCGOpDef *def = &tcg_op_defs[opc];
2280 
2281         switch (opc) {
2282         case INDEX_op_call:
2283             {
2284                 int call_flags;
2285 
2286                 nb_oargs = TCGOP_CALLO(op);
2287                 nb_iargs = TCGOP_CALLI(op);
2288                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2289 
2290                 /* pure functions can be removed if their result is unused */
2291                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2292                     for (i = 0; i < nb_oargs; i++) {
2293                         arg_ts = arg_temp(op->args[i]);
2294                         if (arg_ts->state != TS_DEAD) {
2295                             goto do_not_remove_call;
2296                         }
2297                     }
2298                     goto do_remove;
2299                 } else {
2300                 do_not_remove_call:
2301 
2302                     /* output args are dead */
2303                     for (i = 0; i < nb_oargs; i++) {
2304                         arg_ts = arg_temp(op->args[i]);
2305                         if (arg_ts->state & TS_DEAD) {
2306                             arg_life |= DEAD_ARG << i;
2307                         }
2308                         if (arg_ts->state & TS_MEM) {
2309                             arg_life |= SYNC_ARG << i;
2310                         }
2311                         arg_ts->state = TS_DEAD;
2312                     }
2313 
2314                     if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2315                                         TCG_CALL_NO_READ_GLOBALS))) {
2316                         /* globals should go back to memory */
2317                         for (i = 0; i < nb_globals; i++) {
2318                             s->temps[i].state = TS_DEAD | TS_MEM;
2319                         }
2320                     } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2321                         /* globals should be synced to memory */
2322                         for (i = 0; i < nb_globals; i++) {
2323                             s->temps[i].state |= TS_MEM;
2324                         }
2325                     }
2326 
2327                     /* record arguments that die in this helper */
2328                     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2329                         arg_ts = arg_temp(op->args[i]);
2330                         if (arg_ts && arg_ts->state & TS_DEAD) {
2331                             arg_life |= DEAD_ARG << i;
2332                         }
2333                     }
2334                     /* input arguments are live for preceding opcodes */
2335                     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2336                         arg_ts = arg_temp(op->args[i]);
2337                         if (arg_ts) {
2338                             arg_ts->state &= ~TS_DEAD;
2339                         }
2340                     }
2341                 }
2342             }
2343             break;
2344         case INDEX_op_insn_start:
2345             break;
2346         case INDEX_op_discard:
2347             /* mark the temporary as dead */
2348             arg_temp(op->args[0])->state = TS_DEAD;
2349             break;
2350 
2351         case INDEX_op_add2_i32:
2352             opc_new = INDEX_op_add_i32;
2353             goto do_addsub2;
2354         case INDEX_op_sub2_i32:
2355             opc_new = INDEX_op_sub_i32;
2356             goto do_addsub2;
2357         case INDEX_op_add2_i64:
2358             opc_new = INDEX_op_add_i64;
2359             goto do_addsub2;
2360         case INDEX_op_sub2_i64:
2361             opc_new = INDEX_op_sub_i64;
2362         do_addsub2:
2363             nb_iargs = 4;
2364             nb_oargs = 2;
2365             /* Test if the high part of the operation is dead, but not
2366                the low part.  The result can be optimized to a simple
2367                add or sub.  This happens often for x86_64 guest when the
2368                cpu mode is set to 32 bit.  */
2369             if (arg_temp(op->args[1])->state == TS_DEAD) {
2370                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2371                     goto do_remove;
2372                 }
2373                 /* Replace the opcode and adjust the args in place,
2374                    leaving 3 unused args at the end.  */
2375                 op->opc = opc = opc_new;
2376                 op->args[1] = op->args[2];
2377                 op->args[2] = op->args[4];
2378                 /* Fall through and mark the single-word operation live.  */
2379                 nb_iargs = 2;
2380                 nb_oargs = 1;
2381             }
2382             goto do_not_remove;
2383 
2384         case INDEX_op_mulu2_i32:
2385             opc_new = INDEX_op_mul_i32;
2386             opc_new2 = INDEX_op_muluh_i32;
2387             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2388             goto do_mul2;
2389         case INDEX_op_muls2_i32:
2390             opc_new = INDEX_op_mul_i32;
2391             opc_new2 = INDEX_op_mulsh_i32;
2392             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2393             goto do_mul2;
2394         case INDEX_op_mulu2_i64:
2395             opc_new = INDEX_op_mul_i64;
2396             opc_new2 = INDEX_op_muluh_i64;
2397             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2398             goto do_mul2;
2399         case INDEX_op_muls2_i64:
2400             opc_new = INDEX_op_mul_i64;
2401             opc_new2 = INDEX_op_mulsh_i64;
2402             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2403             goto do_mul2;
2404         do_mul2:
2405             nb_iargs = 2;
2406             nb_oargs = 2;
2407             if (arg_temp(op->args[1])->state == TS_DEAD) {
2408                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2409                     /* Both parts of the operation are dead.  */
2410                     goto do_remove;
2411                 }
2412                 /* The high part of the operation is dead; generate the low. */
2413                 op->opc = opc = opc_new;
2414                 op->args[1] = op->args[2];
2415                 op->args[2] = op->args[3];
2416             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2417                 /* The low part of the operation is dead; generate the high. */
2418                 op->opc = opc = opc_new2;
2419                 op->args[0] = op->args[1];
2420                 op->args[1] = op->args[2];
2421                 op->args[2] = op->args[3];
2422             } else {
2423                 goto do_not_remove;
2424             }
2425             /* Mark the single-word operation live.  */
2426             nb_oargs = 1;
2427             goto do_not_remove;
2428 
2429         default:
2430             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2431             nb_iargs = def->nb_iargs;
2432             nb_oargs = def->nb_oargs;
2433 
2434             /* Test if the operation can be removed because all
2435                its outputs are dead. We assume that nb_oargs == 0
2436                implies side effects */
2437             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2438                 for (i = 0; i < nb_oargs; i++) {
2439                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2440                         goto do_not_remove;
2441                     }
2442                 }
2443             do_remove:
2444                 tcg_op_remove(s, op);
2445             } else {
2446             do_not_remove:
2447                 /* output args are dead */
2448                 for (i = 0; i < nb_oargs; i++) {
2449                     arg_ts = arg_temp(op->args[i]);
2450                     if (arg_ts->state & TS_DEAD) {
2451                         arg_life |= DEAD_ARG << i;
2452                     }
2453                     if (arg_ts->state & TS_MEM) {
2454                         arg_life |= SYNC_ARG << i;
2455                     }
2456                     arg_ts->state = TS_DEAD;
2457                 }
2458 
2459                 /* if end of basic block, update */
2460                 if (def->flags & TCG_OPF_BB_END) {
2461                     tcg_la_bb_end(s);
2462                 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2463                     /* globals should be synced to memory */
2464                     for (i = 0; i < nb_globals; i++) {
2465                         s->temps[i].state |= TS_MEM;
2466                     }
2467                 }
2468 
2469                 /* record arguments that die in this opcode */
2470                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2471                     arg_ts = arg_temp(op->args[i]);
2472                     if (arg_ts->state & TS_DEAD) {
2473                         arg_life |= DEAD_ARG << i;
2474                     }
2475                 }
2476                 /* input arguments are live for preceding opcodes */
2477                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2478                     arg_temp(op->args[i])->state &= ~TS_DEAD;
2479                 }
2480             }
2481             break;
2482         }
2483         op->life = arg_life;
2484     }
2485 }
2486 
2487 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2488 static bool liveness_pass_2(TCGContext *s)
2489 {
2490     int nb_globals = s->nb_globals;
2491     int nb_temps, i;
2492     bool changes = false;
2493     TCGOp *op, *op_next;
2494 
2495     /* Create a temporary for each indirect global.  */
2496     for (i = 0; i < nb_globals; ++i) {
2497         TCGTemp *its = &s->temps[i];
2498         if (its->indirect_reg) {
2499             TCGTemp *dts = tcg_temp_alloc(s);
2500             dts->type = its->type;
2501             dts->base_type = its->base_type;
2502             its->state_ptr = dts;
2503         } else {
2504             its->state_ptr = NULL;
2505         }
2506         /* All globals begin dead.  */
2507         its->state = TS_DEAD;
2508     }
2509     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2510         TCGTemp *its = &s->temps[i];
2511         its->state_ptr = NULL;
2512         its->state = TS_DEAD;
2513     }
2514 
2515     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2516         TCGOpcode opc = op->opc;
2517         const TCGOpDef *def = &tcg_op_defs[opc];
2518         TCGLifeData arg_life = op->life;
2519         int nb_iargs, nb_oargs, call_flags;
2520         TCGTemp *arg_ts, *dir_ts;
2521 
2522         if (opc == INDEX_op_call) {
2523             nb_oargs = TCGOP_CALLO(op);
2524             nb_iargs = TCGOP_CALLI(op);
2525             call_flags = op->args[nb_oargs + nb_iargs + 1];
2526         } else {
2527             nb_iargs = def->nb_iargs;
2528             nb_oargs = def->nb_oargs;
2529 
2530             /* Set flags similar to how calls require.  */
2531             if (def->flags & TCG_OPF_BB_END) {
2532                 /* Like writing globals: save_globals */
2533                 call_flags = 0;
2534             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2535                 /* Like reading globals: sync_globals */
2536                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2537             } else {
2538                 /* No effect on globals.  */
2539                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2540                               TCG_CALL_NO_WRITE_GLOBALS);
2541             }
2542         }
2543 
2544         /* Make sure that input arguments are available.  */
2545         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2546             arg_ts = arg_temp(op->args[i]);
2547             if (arg_ts) {
2548                 dir_ts = arg_ts->state_ptr;
2549                 if (dir_ts && arg_ts->state == TS_DEAD) {
2550                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2551                                       ? INDEX_op_ld_i32
2552                                       : INDEX_op_ld_i64);
2553                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2554 
2555                     lop->args[0] = temp_arg(dir_ts);
2556                     lop->args[1] = temp_arg(arg_ts->mem_base);
2557                     lop->args[2] = arg_ts->mem_offset;
2558 
2559                     /* Loaded, but synced with memory.  */
2560                     arg_ts->state = TS_MEM;
2561                 }
2562             }
2563         }
2564 
2565         /* Perform input replacement, and mark inputs that became dead.
2566            No action is required except keeping temp_state up to date
2567            so that we reload when needed.  */
2568         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2569             arg_ts = arg_temp(op->args[i]);
2570             if (arg_ts) {
2571                 dir_ts = arg_ts->state_ptr;
2572                 if (dir_ts) {
2573                     op->args[i] = temp_arg(dir_ts);
2574                     changes = true;
2575                     if (IS_DEAD_ARG(i)) {
2576                         arg_ts->state = TS_DEAD;
2577                     }
2578                 }
2579             }
2580         }
2581 
2582         /* Liveness analysis should ensure that the following are
2583            all correct, for call sites and basic block end points.  */
2584         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2585             /* Nothing to do */
2586         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2587             for (i = 0; i < nb_globals; ++i) {
2588                 /* Liveness should see that globals are synced back,
2589                    that is, either TS_DEAD or TS_MEM.  */
2590                 arg_ts = &s->temps[i];
2591                 tcg_debug_assert(arg_ts->state_ptr == 0
2592                                  || arg_ts->state != 0);
2593             }
2594         } else {
2595             for (i = 0; i < nb_globals; ++i) {
2596                 /* Liveness should see that globals are saved back,
2597                    that is, TS_DEAD, waiting to be reloaded.  */
2598                 arg_ts = &s->temps[i];
2599                 tcg_debug_assert(arg_ts->state_ptr == 0
2600                                  || arg_ts->state == TS_DEAD);
2601             }
2602         }
2603 
2604         /* Outputs become available.  */
2605         for (i = 0; i < nb_oargs; i++) {
2606             arg_ts = arg_temp(op->args[i]);
2607             dir_ts = arg_ts->state_ptr;
2608             if (!dir_ts) {
2609                 continue;
2610             }
2611             op->args[i] = temp_arg(dir_ts);
2612             changes = true;
2613 
2614             /* The output is now live and modified.  */
2615             arg_ts->state = 0;
2616 
2617             /* Sync outputs upon their last write.  */
2618             if (NEED_SYNC_ARG(i)) {
2619                 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2620                                   ? INDEX_op_st_i32
2621                                   : INDEX_op_st_i64);
2622                 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2623 
2624                 sop->args[0] = temp_arg(dir_ts);
2625                 sop->args[1] = temp_arg(arg_ts->mem_base);
2626                 sop->args[2] = arg_ts->mem_offset;
2627 
2628                 arg_ts->state = TS_MEM;
2629             }
2630             /* Drop outputs that are dead.  */
2631             if (IS_DEAD_ARG(i)) {
2632                 arg_ts->state = TS_DEAD;
2633             }
2634         }
2635     }
2636 
2637     return changes;
2638 }
2639 
2640 #ifdef CONFIG_DEBUG_TCG
2641 static void dump_regs(TCGContext *s)
2642 {
2643     TCGTemp *ts;
2644     int i;
2645     char buf[64];
2646 
2647     for(i = 0; i < s->nb_temps; i++) {
2648         ts = &s->temps[i];
2649         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2650         switch(ts->val_type) {
2651         case TEMP_VAL_REG:
2652             printf("%s", tcg_target_reg_names[ts->reg]);
2653             break;
2654         case TEMP_VAL_MEM:
2655             printf("%d(%s)", (int)ts->mem_offset,
2656                    tcg_target_reg_names[ts->mem_base->reg]);
2657             break;
2658         case TEMP_VAL_CONST:
2659             printf("$0x%" TCG_PRIlx, ts->val);
2660             break;
2661         case TEMP_VAL_DEAD:
2662             printf("D");
2663             break;
2664         default:
2665             printf("???");
2666             break;
2667         }
2668         printf("\n");
2669     }
2670 
2671     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2672         if (s->reg_to_temp[i] != NULL) {
2673             printf("%s: %s\n",
2674                    tcg_target_reg_names[i],
2675                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
2676         }
2677     }
2678 }
2679 
2680 static void check_regs(TCGContext *s)
2681 {
2682     int reg;
2683     int k;
2684     TCGTemp *ts;
2685     char buf[64];
2686 
2687     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2688         ts = s->reg_to_temp[reg];
2689         if (ts != NULL) {
2690             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
2691                 printf("Inconsistency for register %s:\n",
2692                        tcg_target_reg_names[reg]);
2693                 goto fail;
2694             }
2695         }
2696     }
2697     for (k = 0; k < s->nb_temps; k++) {
2698         ts = &s->temps[k];
2699         if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
2700             && s->reg_to_temp[ts->reg] != ts) {
2701             printf("Inconsistency for temp %s:\n",
2702                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2703         fail:
2704             printf("reg state:\n");
2705             dump_regs(s);
2706             tcg_abort();
2707         }
2708     }
2709 }
2710 #endif
2711 
2712 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
2713 {
2714 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
2715     /* Sparc64 stack is accessed with offset of 2047 */
2716     s->current_frame_offset = (s->current_frame_offset +
2717                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
2718         ~(sizeof(tcg_target_long) - 1);
2719 #endif
2720     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
2721         s->frame_end) {
2722         tcg_abort();
2723     }
2724     ts->mem_offset = s->current_frame_offset;
2725     ts->mem_base = s->frame_temp;
2726     ts->mem_allocated = 1;
2727     s->current_frame_offset += sizeof(tcg_target_long);
2728 }
2729 
2730 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet);
2731 
2732 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
2733    mark it free; otherwise mark it dead.  */
2734 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
2735 {
2736     if (ts->fixed_reg) {
2737         return;
2738     }
2739     if (ts->val_type == TEMP_VAL_REG) {
2740         s->reg_to_temp[ts->reg] = NULL;
2741     }
2742     ts->val_type = (free_or_dead < 0
2743                     || ts->temp_local
2744                     || ts->temp_global
2745                     ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
2746 }
2747 
2748 /* Mark a temporary as dead.  */
2749 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
2750 {
2751     temp_free_or_dead(s, ts, 1);
2752 }
2753 
2754 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
2755    registers needs to be allocated to store a constant.  If 'free_or_dead'
2756    is non-zero, subsequently release the temporary; if it is positive, the
2757    temp is dead; if it is negative, the temp is free.  */
2758 static void temp_sync(TCGContext *s, TCGTemp *ts,
2759                       TCGRegSet allocated_regs, int free_or_dead)
2760 {
2761     if (ts->fixed_reg) {
2762         return;
2763     }
2764     if (!ts->mem_coherent) {
2765         if (!ts->mem_allocated) {
2766             temp_allocate_frame(s, ts);
2767         }
2768         switch (ts->val_type) {
2769         case TEMP_VAL_CONST:
2770             /* If we're going to free the temp immediately, then we won't
2771                require it later in a register, so attempt to store the
2772                constant to memory directly.  */
2773             if (free_or_dead
2774                 && tcg_out_sti(s, ts->type, ts->val,
2775                                ts->mem_base->reg, ts->mem_offset)) {
2776                 break;
2777             }
2778             temp_load(s, ts, tcg_target_available_regs[ts->type],
2779                       allocated_regs);
2780             /* fallthrough */
2781 
2782         case TEMP_VAL_REG:
2783             tcg_out_st(s, ts->type, ts->reg,
2784                        ts->mem_base->reg, ts->mem_offset);
2785             break;
2786 
2787         case TEMP_VAL_MEM:
2788             break;
2789 
2790         case TEMP_VAL_DEAD:
2791         default:
2792             tcg_abort();
2793         }
2794         ts->mem_coherent = 1;
2795     }
2796     if (free_or_dead) {
2797         temp_free_or_dead(s, ts, free_or_dead);
2798     }
2799 }
2800 
2801 /* free register 'reg' by spilling the corresponding temporary if necessary */
2802 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
2803 {
2804     TCGTemp *ts = s->reg_to_temp[reg];
2805     if (ts != NULL) {
2806         temp_sync(s, ts, allocated_regs, -1);
2807     }
2808 }
2809 
2810 /* Allocate a register belonging to reg1 & ~reg2 */
2811 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs,
2812                             TCGRegSet allocated_regs, bool rev)
2813 {
2814     int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
2815     const int *order;
2816     TCGReg reg;
2817     TCGRegSet reg_ct;
2818 
2819     reg_ct = desired_regs & ~allocated_regs;
2820     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
2821 
2822     /* first try free registers */
2823     for(i = 0; i < n; i++) {
2824         reg = order[i];
2825         if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL)
2826             return reg;
2827     }
2828 
2829     /* XXX: do better spill choice */
2830     for(i = 0; i < n; i++) {
2831         reg = order[i];
2832         if (tcg_regset_test_reg(reg_ct, reg)) {
2833             tcg_reg_free(s, reg, allocated_regs);
2834             return reg;
2835         }
2836     }
2837 
2838     tcg_abort();
2839 }
2840 
2841 /* Make sure the temporary is in a register.  If needed, allocate the register
2842    from DESIRED while avoiding ALLOCATED.  */
2843 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
2844                       TCGRegSet allocated_regs)
2845 {
2846     TCGReg reg;
2847 
2848     switch (ts->val_type) {
2849     case TEMP_VAL_REG:
2850         return;
2851     case TEMP_VAL_CONST:
2852         reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
2853         tcg_out_movi(s, ts->type, reg, ts->val);
2854         ts->mem_coherent = 0;
2855         break;
2856     case TEMP_VAL_MEM:
2857         reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
2858         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
2859         ts->mem_coherent = 1;
2860         break;
2861     case TEMP_VAL_DEAD:
2862     default:
2863         tcg_abort();
2864     }
2865     ts->reg = reg;
2866     ts->val_type = TEMP_VAL_REG;
2867     s->reg_to_temp[reg] = ts;
2868 }
2869 
2870 /* Save a temporary to memory. 'allocated_regs' is used in case a
2871    temporary registers needs to be allocated to store a constant.  */
2872 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
2873 {
2874     /* The liveness analysis already ensures that globals are back
2875        in memory. Keep an tcg_debug_assert for safety. */
2876     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
2877 }
2878 
2879 /* save globals to their canonical location and assume they can be
2880    modified be the following code. 'allocated_regs' is used in case a
2881    temporary registers needs to be allocated to store a constant. */
2882 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
2883 {
2884     int i, n;
2885 
2886     for (i = 0, n = s->nb_globals; i < n; i++) {
2887         temp_save(s, &s->temps[i], allocated_regs);
2888     }
2889 }
2890 
2891 /* sync globals to their canonical location and assume they can be
2892    read by the following code. 'allocated_regs' is used in case a
2893    temporary registers needs to be allocated to store a constant. */
2894 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
2895 {
2896     int i, n;
2897 
2898     for (i = 0, n = s->nb_globals; i < n; i++) {
2899         TCGTemp *ts = &s->temps[i];
2900         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
2901                          || ts->fixed_reg
2902                          || ts->mem_coherent);
2903     }
2904 }
2905 
2906 /* at the end of a basic block, we assume all temporaries are dead and
2907    all globals are stored at their canonical location. */
2908 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
2909 {
2910     int i;
2911 
2912     for (i = s->nb_globals; i < s->nb_temps; i++) {
2913         TCGTemp *ts = &s->temps[i];
2914         if (ts->temp_local) {
2915             temp_save(s, ts, allocated_regs);
2916         } else {
2917             /* The liveness analysis already ensures that temps are dead.
2918                Keep an tcg_debug_assert for safety. */
2919             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
2920         }
2921     }
2922 
2923     save_globals(s, allocated_regs);
2924 }
2925 
2926 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
2927                                   tcg_target_ulong val, TCGLifeData arg_life)
2928 {
2929     if (ots->fixed_reg) {
2930         /* For fixed registers, we do not do any constant propagation.  */
2931         tcg_out_movi(s, ots->type, ots->reg, val);
2932         return;
2933     }
2934 
2935     /* The movi is not explicitly generated here.  */
2936     if (ots->val_type == TEMP_VAL_REG) {
2937         s->reg_to_temp[ots->reg] = NULL;
2938     }
2939     ots->val_type = TEMP_VAL_CONST;
2940     ots->val = val;
2941     ots->mem_coherent = 0;
2942     if (NEED_SYNC_ARG(0)) {
2943         temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0));
2944     } else if (IS_DEAD_ARG(0)) {
2945         temp_dead(s, ots);
2946     }
2947 }
2948 
2949 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
2950 {
2951     TCGTemp *ots = arg_temp(op->args[0]);
2952     tcg_target_ulong val = op->args[1];
2953 
2954     tcg_reg_alloc_do_movi(s, ots, val, op->life);
2955 }
2956 
2957 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
2958 {
2959     const TCGLifeData arg_life = op->life;
2960     TCGRegSet allocated_regs;
2961     TCGTemp *ts, *ots;
2962     TCGType otype, itype;
2963 
2964     allocated_regs = s->reserved_regs;
2965     ots = arg_temp(op->args[0]);
2966     ts = arg_temp(op->args[1]);
2967 
2968     /* Note that otype != itype for no-op truncation.  */
2969     otype = ots->type;
2970     itype = ts->type;
2971 
2972     if (ts->val_type == TEMP_VAL_CONST) {
2973         /* propagate constant or generate sti */
2974         tcg_target_ulong val = ts->val;
2975         if (IS_DEAD_ARG(1)) {
2976             temp_dead(s, ts);
2977         }
2978         tcg_reg_alloc_do_movi(s, ots, val, arg_life);
2979         return;
2980     }
2981 
2982     /* If the source value is in memory we're going to be forced
2983        to have it in a register in order to perform the copy.  Copy
2984        the SOURCE value into its own register first, that way we
2985        don't have to reload SOURCE the next time it is used. */
2986     if (ts->val_type == TEMP_VAL_MEM) {
2987         temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs);
2988     }
2989 
2990     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
2991     if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
2992         /* mov to a non-saved dead register makes no sense (even with
2993            liveness analysis disabled). */
2994         tcg_debug_assert(NEED_SYNC_ARG(0));
2995         if (!ots->mem_allocated) {
2996             temp_allocate_frame(s, ots);
2997         }
2998         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
2999         if (IS_DEAD_ARG(1)) {
3000             temp_dead(s, ts);
3001         }
3002         temp_dead(s, ots);
3003     } else {
3004         if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
3005             /* the mov can be suppressed */
3006             if (ots->val_type == TEMP_VAL_REG) {
3007                 s->reg_to_temp[ots->reg] = NULL;
3008             }
3009             ots->reg = ts->reg;
3010             temp_dead(s, ts);
3011         } else {
3012             if (ots->val_type != TEMP_VAL_REG) {
3013                 /* When allocating a new register, make sure to not spill the
3014                    input one. */
3015                 tcg_regset_set_reg(allocated_regs, ts->reg);
3016                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3017                                          allocated_regs, ots->indirect_base);
3018             }
3019             tcg_out_mov(s, otype, ots->reg, ts->reg);
3020         }
3021         ots->val_type = TEMP_VAL_REG;
3022         ots->mem_coherent = 0;
3023         s->reg_to_temp[ots->reg] = ots;
3024         if (NEED_SYNC_ARG(0)) {
3025             temp_sync(s, ots, allocated_regs, 0);
3026         }
3027     }
3028 }
3029 
3030 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3031 {
3032     const TCGLifeData arg_life = op->life;
3033     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3034     TCGRegSet i_allocated_regs;
3035     TCGRegSet o_allocated_regs;
3036     int i, k, nb_iargs, nb_oargs;
3037     TCGReg reg;
3038     TCGArg arg;
3039     const TCGArgConstraint *arg_ct;
3040     TCGTemp *ts;
3041     TCGArg new_args[TCG_MAX_OP_ARGS];
3042     int const_args[TCG_MAX_OP_ARGS];
3043 
3044     nb_oargs = def->nb_oargs;
3045     nb_iargs = def->nb_iargs;
3046 
3047     /* copy constants */
3048     memcpy(new_args + nb_oargs + nb_iargs,
3049            op->args + nb_oargs + nb_iargs,
3050            sizeof(TCGArg) * def->nb_cargs);
3051 
3052     i_allocated_regs = s->reserved_regs;
3053     o_allocated_regs = s->reserved_regs;
3054 
3055     /* satisfy input constraints */
3056     for (k = 0; k < nb_iargs; k++) {
3057         i = def->sorted_args[nb_oargs + k];
3058         arg = op->args[i];
3059         arg_ct = &def->args_ct[i];
3060         ts = arg_temp(arg);
3061 
3062         if (ts->val_type == TEMP_VAL_CONST
3063             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3064             /* constant is OK for instruction */
3065             const_args[i] = 1;
3066             new_args[i] = ts->val;
3067             goto iarg_end;
3068         }
3069 
3070         temp_load(s, ts, arg_ct->u.regs, i_allocated_regs);
3071 
3072         if (arg_ct->ct & TCG_CT_IALIAS) {
3073             if (ts->fixed_reg) {
3074                 /* if fixed register, we must allocate a new register
3075                    if the alias is not the same register */
3076                 if (arg != op->args[arg_ct->alias_index])
3077                     goto allocate_in_reg;
3078             } else {
3079                 /* if the input is aliased to an output and if it is
3080                    not dead after the instruction, we must allocate
3081                    a new register and move it */
3082                 if (!IS_DEAD_ARG(i)) {
3083                     goto allocate_in_reg;
3084                 }
3085                 /* check if the current register has already been allocated
3086                    for another input aliased to an output */
3087                 int k2, i2;
3088                 for (k2 = 0 ; k2 < k ; k2++) {
3089                     i2 = def->sorted_args[nb_oargs + k2];
3090                     if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
3091                         (new_args[i2] == ts->reg)) {
3092                         goto allocate_in_reg;
3093                     }
3094                 }
3095             }
3096         }
3097         reg = ts->reg;
3098         if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3099             /* nothing to do : the constraint is satisfied */
3100         } else {
3101         allocate_in_reg:
3102             /* allocate a new register matching the constraint
3103                and move the temporary register into it */
3104             reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
3105                                 ts->indirect_base);
3106             tcg_out_mov(s, ts->type, reg, ts->reg);
3107         }
3108         new_args[i] = reg;
3109         const_args[i] = 0;
3110         tcg_regset_set_reg(i_allocated_regs, reg);
3111     iarg_end: ;
3112     }
3113 
3114     /* mark dead temporaries and free the associated registers */
3115     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3116         if (IS_DEAD_ARG(i)) {
3117             temp_dead(s, arg_temp(op->args[i]));
3118         }
3119     }
3120 
3121     if (def->flags & TCG_OPF_BB_END) {
3122         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3123     } else {
3124         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3125             /* XXX: permit generic clobber register list ? */
3126             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3127                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3128                     tcg_reg_free(s, i, i_allocated_regs);
3129                 }
3130             }
3131         }
3132         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3133             /* sync globals if the op has side effects and might trigger
3134                an exception. */
3135             sync_globals(s, i_allocated_regs);
3136         }
3137 
3138         /* satisfy the output constraints */
3139         for(k = 0; k < nb_oargs; k++) {
3140             i = def->sorted_args[k];
3141             arg = op->args[i];
3142             arg_ct = &def->args_ct[i];
3143             ts = arg_temp(arg);
3144             if ((arg_ct->ct & TCG_CT_ALIAS)
3145                 && !const_args[arg_ct->alias_index]) {
3146                 reg = new_args[arg_ct->alias_index];
3147             } else if (arg_ct->ct & TCG_CT_NEWREG) {
3148                 reg = tcg_reg_alloc(s, arg_ct->u.regs,
3149                                     i_allocated_regs | o_allocated_regs,
3150                                     ts->indirect_base);
3151             } else {
3152                 /* if fixed register, we try to use it */
3153                 reg = ts->reg;
3154                 if (ts->fixed_reg &&
3155                     tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3156                     goto oarg_end;
3157                 }
3158                 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
3159                                     ts->indirect_base);
3160             }
3161             tcg_regset_set_reg(o_allocated_regs, reg);
3162             /* if a fixed register is used, then a move will be done afterwards */
3163             if (!ts->fixed_reg) {
3164                 if (ts->val_type == TEMP_VAL_REG) {
3165                     s->reg_to_temp[ts->reg] = NULL;
3166                 }
3167                 ts->val_type = TEMP_VAL_REG;
3168                 ts->reg = reg;
3169                 /* temp value is modified, so the value kept in memory is
3170                    potentially not the same */
3171                 ts->mem_coherent = 0;
3172                 s->reg_to_temp[reg] = ts;
3173             }
3174         oarg_end:
3175             new_args[i] = reg;
3176         }
3177     }
3178 
3179     /* emit instruction */
3180     if (def->flags & TCG_OPF_VECTOR) {
3181         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3182                        new_args, const_args);
3183     } else {
3184         tcg_out_op(s, op->opc, new_args, const_args);
3185     }
3186 
3187     /* move the outputs in the correct register if needed */
3188     for(i = 0; i < nb_oargs; i++) {
3189         ts = arg_temp(op->args[i]);
3190         reg = new_args[i];
3191         if (ts->fixed_reg && ts->reg != reg) {
3192             tcg_out_mov(s, ts->type, ts->reg, reg);
3193         }
3194         if (NEED_SYNC_ARG(i)) {
3195             temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i));
3196         } else if (IS_DEAD_ARG(i)) {
3197             temp_dead(s, ts);
3198         }
3199     }
3200 }
3201 
3202 #ifdef TCG_TARGET_STACK_GROWSUP
3203 #define STACK_DIR(x) (-(x))
3204 #else
3205 #define STACK_DIR(x) (x)
3206 #endif
3207 
3208 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3209 {
3210     const int nb_oargs = TCGOP_CALLO(op);
3211     const int nb_iargs = TCGOP_CALLI(op);
3212     const TCGLifeData arg_life = op->life;
3213     int flags, nb_regs, i;
3214     TCGReg reg;
3215     TCGArg arg;
3216     TCGTemp *ts;
3217     intptr_t stack_offset;
3218     size_t call_stack_size;
3219     tcg_insn_unit *func_addr;
3220     int allocate_args;
3221     TCGRegSet allocated_regs;
3222 
3223     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3224     flags = op->args[nb_oargs + nb_iargs + 1];
3225 
3226     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3227     if (nb_regs > nb_iargs) {
3228         nb_regs = nb_iargs;
3229     }
3230 
3231     /* assign stack slots first */
3232     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3233     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3234         ~(TCG_TARGET_STACK_ALIGN - 1);
3235     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3236     if (allocate_args) {
3237         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3238            preallocate call stack */
3239         tcg_abort();
3240     }
3241 
3242     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3243     for (i = nb_regs; i < nb_iargs; i++) {
3244         arg = op->args[nb_oargs + i];
3245 #ifdef TCG_TARGET_STACK_GROWSUP
3246         stack_offset -= sizeof(tcg_target_long);
3247 #endif
3248         if (arg != TCG_CALL_DUMMY_ARG) {
3249             ts = arg_temp(arg);
3250             temp_load(s, ts, tcg_target_available_regs[ts->type],
3251                       s->reserved_regs);
3252             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3253         }
3254 #ifndef TCG_TARGET_STACK_GROWSUP
3255         stack_offset += sizeof(tcg_target_long);
3256 #endif
3257     }
3258 
3259     /* assign input registers */
3260     allocated_regs = s->reserved_regs;
3261     for (i = 0; i < nb_regs; i++) {
3262         arg = op->args[nb_oargs + i];
3263         if (arg != TCG_CALL_DUMMY_ARG) {
3264             ts = arg_temp(arg);
3265             reg = tcg_target_call_iarg_regs[i];
3266             tcg_reg_free(s, reg, allocated_regs);
3267 
3268             if (ts->val_type == TEMP_VAL_REG) {
3269                 if (ts->reg != reg) {
3270                     tcg_out_mov(s, ts->type, reg, ts->reg);
3271                 }
3272             } else {
3273                 TCGRegSet arg_set = 0;
3274 
3275                 tcg_regset_set_reg(arg_set, reg);
3276                 temp_load(s, ts, arg_set, allocated_regs);
3277             }
3278 
3279             tcg_regset_set_reg(allocated_regs, reg);
3280         }
3281     }
3282 
3283     /* mark dead temporaries and free the associated registers */
3284     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3285         if (IS_DEAD_ARG(i)) {
3286             temp_dead(s, arg_temp(op->args[i]));
3287         }
3288     }
3289 
3290     /* clobber call registers */
3291     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3292         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3293             tcg_reg_free(s, i, allocated_regs);
3294         }
3295     }
3296 
3297     /* Save globals if they might be written by the helper, sync them if
3298        they might be read. */
3299     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3300         /* Nothing to do */
3301     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3302         sync_globals(s, allocated_regs);
3303     } else {
3304         save_globals(s, allocated_regs);
3305     }
3306 
3307     tcg_out_call(s, func_addr);
3308 
3309     /* assign output registers and emit moves if needed */
3310     for(i = 0; i < nb_oargs; i++) {
3311         arg = op->args[i];
3312         ts = arg_temp(arg);
3313         reg = tcg_target_call_oarg_regs[i];
3314         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3315 
3316         if (ts->fixed_reg) {
3317             if (ts->reg != reg) {
3318                 tcg_out_mov(s, ts->type, ts->reg, reg);
3319             }
3320         } else {
3321             if (ts->val_type == TEMP_VAL_REG) {
3322                 s->reg_to_temp[ts->reg] = NULL;
3323             }
3324             ts->val_type = TEMP_VAL_REG;
3325             ts->reg = reg;
3326             ts->mem_coherent = 0;
3327             s->reg_to_temp[reg] = ts;
3328             if (NEED_SYNC_ARG(i)) {
3329                 temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i));
3330             } else if (IS_DEAD_ARG(i)) {
3331                 temp_dead(s, ts);
3332             }
3333         }
3334     }
3335 }
3336 
3337 #ifdef CONFIG_PROFILER
3338 
3339 /* avoid copy/paste errors */
3340 #define PROF_ADD(to, from, field)                       \
3341     do {                                                \
3342         (to)->field += atomic_read(&((from)->field));   \
3343     } while (0)
3344 
3345 #define PROF_MAX(to, from, field)                                       \
3346     do {                                                                \
3347         typeof((from)->field) val__ = atomic_read(&((from)->field));    \
3348         if (val__ > (to)->field) {                                      \
3349             (to)->field = val__;                                        \
3350         }                                                               \
3351     } while (0)
3352 
3353 /* Pass in a zero'ed @prof */
3354 static inline
3355 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3356 {
3357     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3358     unsigned int i;
3359 
3360     for (i = 0; i < n_ctxs; i++) {
3361         TCGContext *s = atomic_read(&tcg_ctxs[i]);
3362         const TCGProfile *orig = &s->prof;
3363 
3364         if (counters) {
3365             PROF_ADD(prof, orig, cpu_exec_time);
3366             PROF_ADD(prof, orig, tb_count1);
3367             PROF_ADD(prof, orig, tb_count);
3368             PROF_ADD(prof, orig, op_count);
3369             PROF_MAX(prof, orig, op_count_max);
3370             PROF_ADD(prof, orig, temp_count);
3371             PROF_MAX(prof, orig, temp_count_max);
3372             PROF_ADD(prof, orig, del_op_count);
3373             PROF_ADD(prof, orig, code_in_len);
3374             PROF_ADD(prof, orig, code_out_len);
3375             PROF_ADD(prof, orig, search_out_len);
3376             PROF_ADD(prof, orig, interm_time);
3377             PROF_ADD(prof, orig, code_time);
3378             PROF_ADD(prof, orig, la_time);
3379             PROF_ADD(prof, orig, opt_time);
3380             PROF_ADD(prof, orig, restore_count);
3381             PROF_ADD(prof, orig, restore_time);
3382         }
3383         if (table) {
3384             int i;
3385 
3386             for (i = 0; i < NB_OPS; i++) {
3387                 PROF_ADD(prof, orig, table_op_count[i]);
3388             }
3389         }
3390     }
3391 }
3392 
3393 #undef PROF_ADD
3394 #undef PROF_MAX
3395 
3396 static void tcg_profile_snapshot_counters(TCGProfile *prof)
3397 {
3398     tcg_profile_snapshot(prof, true, false);
3399 }
3400 
3401 static void tcg_profile_snapshot_table(TCGProfile *prof)
3402 {
3403     tcg_profile_snapshot(prof, false, true);
3404 }
3405 
3406 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3407 {
3408     TCGProfile prof = {};
3409     int i;
3410 
3411     tcg_profile_snapshot_table(&prof);
3412     for (i = 0; i < NB_OPS; i++) {
3413         cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name,
3414                     prof.table_op_count[i]);
3415     }
3416 }
3417 
3418 int64_t tcg_cpu_exec_time(void)
3419 {
3420     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3421     unsigned int i;
3422     int64_t ret = 0;
3423 
3424     for (i = 0; i < n_ctxs; i++) {
3425         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
3426         const TCGProfile *prof = &s->prof;
3427 
3428         ret += atomic_read(&prof->cpu_exec_time);
3429     }
3430     return ret;
3431 }
3432 #else
3433 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3434 {
3435     cpu_fprintf(f, "[TCG profiler not compiled]\n");
3436 }
3437 
3438 int64_t tcg_cpu_exec_time(void)
3439 {
3440     error_report("%s: TCG profiler not compiled", __func__);
3441     exit(EXIT_FAILURE);
3442 }
3443 #endif
3444 
3445 
3446 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
3447 {
3448 #ifdef CONFIG_PROFILER
3449     TCGProfile *prof = &s->prof;
3450 #endif
3451     int i, num_insns;
3452     TCGOp *op;
3453 
3454 #ifdef CONFIG_PROFILER
3455     {
3456         int n = 0;
3457 
3458         QTAILQ_FOREACH(op, &s->ops, link) {
3459             n++;
3460         }
3461         atomic_set(&prof->op_count, prof->op_count + n);
3462         if (n > prof->op_count_max) {
3463             atomic_set(&prof->op_count_max, n);
3464         }
3465 
3466         n = s->nb_temps;
3467         atomic_set(&prof->temp_count, prof->temp_count + n);
3468         if (n > prof->temp_count_max) {
3469             atomic_set(&prof->temp_count_max, n);
3470         }
3471     }
3472 #endif
3473 
3474 #ifdef DEBUG_DISAS
3475     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
3476                  && qemu_log_in_addr_range(tb->pc))) {
3477         qemu_log_lock();
3478         qemu_log("OP:\n");
3479         tcg_dump_ops(s);
3480         qemu_log("\n");
3481         qemu_log_unlock();
3482     }
3483 #endif
3484 
3485 #ifdef CONFIG_PROFILER
3486     atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
3487 #endif
3488 
3489 #ifdef USE_TCG_OPTIMIZATIONS
3490     tcg_optimize(s);
3491 #endif
3492 
3493 #ifdef CONFIG_PROFILER
3494     atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
3495     atomic_set(&prof->la_time, prof->la_time - profile_getclock());
3496 #endif
3497 
3498     liveness_pass_1(s);
3499 
3500     if (s->nb_indirects > 0) {
3501 #ifdef DEBUG_DISAS
3502         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
3503                      && qemu_log_in_addr_range(tb->pc))) {
3504             qemu_log_lock();
3505             qemu_log("OP before indirect lowering:\n");
3506             tcg_dump_ops(s);
3507             qemu_log("\n");
3508             qemu_log_unlock();
3509         }
3510 #endif
3511         /* Replace indirect temps with direct temps.  */
3512         if (liveness_pass_2(s)) {
3513             /* If changes were made, re-run liveness.  */
3514             liveness_pass_1(s);
3515         }
3516     }
3517 
3518 #ifdef CONFIG_PROFILER
3519     atomic_set(&prof->la_time, prof->la_time + profile_getclock());
3520 #endif
3521 
3522 #ifdef DEBUG_DISAS
3523     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
3524                  && qemu_log_in_addr_range(tb->pc))) {
3525         qemu_log_lock();
3526         qemu_log("OP after optimization and liveness analysis:\n");
3527         tcg_dump_ops(s);
3528         qemu_log("\n");
3529         qemu_log_unlock();
3530     }
3531 #endif
3532 
3533     tcg_reg_alloc_start(s);
3534 
3535     s->code_buf = tb->tc.ptr;
3536     s->code_ptr = tb->tc.ptr;
3537 
3538 #ifdef TCG_TARGET_NEED_LDST_LABELS
3539     QSIMPLEQ_INIT(&s->ldst_labels);
3540 #endif
3541 #ifdef TCG_TARGET_NEED_POOL_LABELS
3542     s->pool_labels = NULL;
3543 #endif
3544 
3545     num_insns = -1;
3546     QTAILQ_FOREACH(op, &s->ops, link) {
3547         TCGOpcode opc = op->opc;
3548 
3549 #ifdef CONFIG_PROFILER
3550         atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
3551 #endif
3552 
3553         switch (opc) {
3554         case INDEX_op_mov_i32:
3555         case INDEX_op_mov_i64:
3556         case INDEX_op_mov_vec:
3557             tcg_reg_alloc_mov(s, op);
3558             break;
3559         case INDEX_op_movi_i32:
3560         case INDEX_op_movi_i64:
3561         case INDEX_op_dupi_vec:
3562             tcg_reg_alloc_movi(s, op);
3563             break;
3564         case INDEX_op_insn_start:
3565             if (num_insns >= 0) {
3566                 size_t off = tcg_current_code_size(s);
3567                 s->gen_insn_end_off[num_insns] = off;
3568                 /* Assert that we do not overflow our stored offset.  */
3569                 assert(s->gen_insn_end_off[num_insns] == off);
3570             }
3571             num_insns++;
3572             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
3573                 target_ulong a;
3574 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
3575                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
3576 #else
3577                 a = op->args[i];
3578 #endif
3579                 s->gen_insn_data[num_insns][i] = a;
3580             }
3581             break;
3582         case INDEX_op_discard:
3583             temp_dead(s, arg_temp(op->args[0]));
3584             break;
3585         case INDEX_op_set_label:
3586             tcg_reg_alloc_bb_end(s, s->reserved_regs);
3587             tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
3588             break;
3589         case INDEX_op_call:
3590             tcg_reg_alloc_call(s, op);
3591             break;
3592         default:
3593             /* Sanity check that we've not introduced any unhandled opcodes. */
3594             tcg_debug_assert(tcg_op_supported(opc));
3595             /* Note: in order to speed up the code, it would be much
3596                faster to have specialized register allocator functions for
3597                some common argument patterns */
3598             tcg_reg_alloc_op(s, op);
3599             break;
3600         }
3601 #ifdef CONFIG_DEBUG_TCG
3602         check_regs(s);
3603 #endif
3604         /* Test for (pending) buffer overflow.  The assumption is that any
3605            one operation beginning below the high water mark cannot overrun
3606            the buffer completely.  Thus we can test for overflow after
3607            generating code without having to check during generation.  */
3608         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
3609             return -1;
3610         }
3611     }
3612     tcg_debug_assert(num_insns >= 0);
3613     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
3614 
3615     /* Generate TB finalization at the end of block */
3616 #ifdef TCG_TARGET_NEED_LDST_LABELS
3617     if (!tcg_out_ldst_finalize(s)) {
3618         return -1;
3619     }
3620 #endif
3621 #ifdef TCG_TARGET_NEED_POOL_LABELS
3622     if (!tcg_out_pool_finalize(s)) {
3623         return -1;
3624     }
3625 #endif
3626 
3627     /* flush instruction cache */
3628     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
3629 
3630     return tcg_current_code_size(s);
3631 }
3632 
3633 #ifdef CONFIG_PROFILER
3634 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
3635 {
3636     TCGProfile prof = {};
3637     const TCGProfile *s;
3638     int64_t tb_count;
3639     int64_t tb_div_count;
3640     int64_t tot;
3641 
3642     tcg_profile_snapshot_counters(&prof);
3643     s = &prof;
3644     tb_count = s->tb_count;
3645     tb_div_count = tb_count ? tb_count : 1;
3646     tot = s->interm_time + s->code_time;
3647 
3648     cpu_fprintf(f, "JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
3649                 tot, tot / 2.4e9);
3650     cpu_fprintf(f, "translated TBs      %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
3651                 tb_count, s->tb_count1 - tb_count,
3652                 (double)(s->tb_count1 - s->tb_count)
3653                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
3654     cpu_fprintf(f, "avg ops/TB          %0.1f max=%d\n",
3655                 (double)s->op_count / tb_div_count, s->op_count_max);
3656     cpu_fprintf(f, "deleted ops/TB      %0.2f\n",
3657                 (double)s->del_op_count / tb_div_count);
3658     cpu_fprintf(f, "avg temps/TB        %0.2f max=%d\n",
3659                 (double)s->temp_count / tb_div_count, s->temp_count_max);
3660     cpu_fprintf(f, "avg host code/TB    %0.1f\n",
3661                 (double)s->code_out_len / tb_div_count);
3662     cpu_fprintf(f, "avg search data/TB  %0.1f\n",
3663                 (double)s->search_out_len / tb_div_count);
3664 
3665     cpu_fprintf(f, "cycles/op           %0.1f\n",
3666                 s->op_count ? (double)tot / s->op_count : 0);
3667     cpu_fprintf(f, "cycles/in byte      %0.1f\n",
3668                 s->code_in_len ? (double)tot / s->code_in_len : 0);
3669     cpu_fprintf(f, "cycles/out byte     %0.1f\n",
3670                 s->code_out_len ? (double)tot / s->code_out_len : 0);
3671     cpu_fprintf(f, "cycles/search byte     %0.1f\n",
3672                 s->search_out_len ? (double)tot / s->search_out_len : 0);
3673     if (tot == 0) {
3674         tot = 1;
3675     }
3676     cpu_fprintf(f, "  gen_interm time   %0.1f%%\n",
3677                 (double)s->interm_time / tot * 100.0);
3678     cpu_fprintf(f, "  gen_code time     %0.1f%%\n",
3679                 (double)s->code_time / tot * 100.0);
3680     cpu_fprintf(f, "optim./code time    %0.1f%%\n",
3681                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
3682                 * 100.0);
3683     cpu_fprintf(f, "liveness/code time  %0.1f%%\n",
3684                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
3685     cpu_fprintf(f, "cpu_restore count   %" PRId64 "\n",
3686                 s->restore_count);
3687     cpu_fprintf(f, "  avg cycles        %0.1f\n",
3688                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
3689 }
3690 #else
3691 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
3692 {
3693     cpu_fprintf(f, "[TCG profiler not compiled]\n");
3694 }
3695 #endif
3696 
3697 #ifdef ELF_HOST_MACHINE
3698 /* In order to use this feature, the backend needs to do three things:
3699 
3700    (1) Define ELF_HOST_MACHINE to indicate both what value to
3701        put into the ELF image and to indicate support for the feature.
3702 
3703    (2) Define tcg_register_jit.  This should create a buffer containing
3704        the contents of a .debug_frame section that describes the post-
3705        prologue unwind info for the tcg machine.
3706 
3707    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
3708 */
3709 
3710 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
3711 typedef enum {
3712     JIT_NOACTION = 0,
3713     JIT_REGISTER_FN,
3714     JIT_UNREGISTER_FN
3715 } jit_actions_t;
3716 
3717 struct jit_code_entry {
3718     struct jit_code_entry *next_entry;
3719     struct jit_code_entry *prev_entry;
3720     const void *symfile_addr;
3721     uint64_t symfile_size;
3722 };
3723 
3724 struct jit_descriptor {
3725     uint32_t version;
3726     uint32_t action_flag;
3727     struct jit_code_entry *relevant_entry;
3728     struct jit_code_entry *first_entry;
3729 };
3730 
3731 void __jit_debug_register_code(void) __attribute__((noinline));
3732 void __jit_debug_register_code(void)
3733 {
3734     asm("");
3735 }
3736 
3737 /* Must statically initialize the version, because GDB may check
3738    the version before we can set it.  */
3739 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
3740 
3741 /* End GDB interface.  */
3742 
3743 static int find_string(const char *strtab, const char *str)
3744 {
3745     const char *p = strtab + 1;
3746 
3747     while (1) {
3748         if (strcmp(p, str) == 0) {
3749             return p - strtab;
3750         }
3751         p += strlen(p) + 1;
3752     }
3753 }
3754 
3755 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
3756                                  const void *debug_frame,
3757                                  size_t debug_frame_size)
3758 {
3759     struct __attribute__((packed)) DebugInfo {
3760         uint32_t  len;
3761         uint16_t  version;
3762         uint32_t  abbrev;
3763         uint8_t   ptr_size;
3764         uint8_t   cu_die;
3765         uint16_t  cu_lang;
3766         uintptr_t cu_low_pc;
3767         uintptr_t cu_high_pc;
3768         uint8_t   fn_die;
3769         char      fn_name[16];
3770         uintptr_t fn_low_pc;
3771         uintptr_t fn_high_pc;
3772         uint8_t   cu_eoc;
3773     };
3774 
3775     struct ElfImage {
3776         ElfW(Ehdr) ehdr;
3777         ElfW(Phdr) phdr;
3778         ElfW(Shdr) shdr[7];
3779         ElfW(Sym)  sym[2];
3780         struct DebugInfo di;
3781         uint8_t    da[24];
3782         char       str[80];
3783     };
3784 
3785     struct ElfImage *img;
3786 
3787     static const struct ElfImage img_template = {
3788         .ehdr = {
3789             .e_ident[EI_MAG0] = ELFMAG0,
3790             .e_ident[EI_MAG1] = ELFMAG1,
3791             .e_ident[EI_MAG2] = ELFMAG2,
3792             .e_ident[EI_MAG3] = ELFMAG3,
3793             .e_ident[EI_CLASS] = ELF_CLASS,
3794             .e_ident[EI_DATA] = ELF_DATA,
3795             .e_ident[EI_VERSION] = EV_CURRENT,
3796             .e_type = ET_EXEC,
3797             .e_machine = ELF_HOST_MACHINE,
3798             .e_version = EV_CURRENT,
3799             .e_phoff = offsetof(struct ElfImage, phdr),
3800             .e_shoff = offsetof(struct ElfImage, shdr),
3801             .e_ehsize = sizeof(ElfW(Shdr)),
3802             .e_phentsize = sizeof(ElfW(Phdr)),
3803             .e_phnum = 1,
3804             .e_shentsize = sizeof(ElfW(Shdr)),
3805             .e_shnum = ARRAY_SIZE(img->shdr),
3806             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
3807 #ifdef ELF_HOST_FLAGS
3808             .e_flags = ELF_HOST_FLAGS,
3809 #endif
3810 #ifdef ELF_OSABI
3811             .e_ident[EI_OSABI] = ELF_OSABI,
3812 #endif
3813         },
3814         .phdr = {
3815             .p_type = PT_LOAD,
3816             .p_flags = PF_X,
3817         },
3818         .shdr = {
3819             [0] = { .sh_type = SHT_NULL },
3820             /* Trick: The contents of code_gen_buffer are not present in
3821                this fake ELF file; that got allocated elsewhere.  Therefore
3822                we mark .text as SHT_NOBITS (similar to .bss) so that readers
3823                will not look for contents.  We can record any address.  */
3824             [1] = { /* .text */
3825                 .sh_type = SHT_NOBITS,
3826                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
3827             },
3828             [2] = { /* .debug_info */
3829                 .sh_type = SHT_PROGBITS,
3830                 .sh_offset = offsetof(struct ElfImage, di),
3831                 .sh_size = sizeof(struct DebugInfo),
3832             },
3833             [3] = { /* .debug_abbrev */
3834                 .sh_type = SHT_PROGBITS,
3835                 .sh_offset = offsetof(struct ElfImage, da),
3836                 .sh_size = sizeof(img->da),
3837             },
3838             [4] = { /* .debug_frame */
3839                 .sh_type = SHT_PROGBITS,
3840                 .sh_offset = sizeof(struct ElfImage),
3841             },
3842             [5] = { /* .symtab */
3843                 .sh_type = SHT_SYMTAB,
3844                 .sh_offset = offsetof(struct ElfImage, sym),
3845                 .sh_size = sizeof(img->sym),
3846                 .sh_info = 1,
3847                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
3848                 .sh_entsize = sizeof(ElfW(Sym)),
3849             },
3850             [6] = { /* .strtab */
3851                 .sh_type = SHT_STRTAB,
3852                 .sh_offset = offsetof(struct ElfImage, str),
3853                 .sh_size = sizeof(img->str),
3854             }
3855         },
3856         .sym = {
3857             [1] = { /* code_gen_buffer */
3858                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
3859                 .st_shndx = 1,
3860             }
3861         },
3862         .di = {
3863             .len = sizeof(struct DebugInfo) - 4,
3864             .version = 2,
3865             .ptr_size = sizeof(void *),
3866             .cu_die = 1,
3867             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
3868             .fn_die = 2,
3869             .fn_name = "code_gen_buffer"
3870         },
3871         .da = {
3872             1,          /* abbrev number (the cu) */
3873             0x11, 1,    /* DW_TAG_compile_unit, has children */
3874             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
3875             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
3876             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
3877             0, 0,       /* end of abbrev */
3878             2,          /* abbrev number (the fn) */
3879             0x2e, 0,    /* DW_TAG_subprogram, no children */
3880             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
3881             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
3882             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
3883             0, 0,       /* end of abbrev */
3884             0           /* no more abbrev */
3885         },
3886         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
3887                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
3888     };
3889 
3890     /* We only need a single jit entry; statically allocate it.  */
3891     static struct jit_code_entry one_entry;
3892 
3893     uintptr_t buf = (uintptr_t)buf_ptr;
3894     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
3895     DebugFrameHeader *dfh;
3896 
3897     img = g_malloc(img_size);
3898     *img = img_template;
3899 
3900     img->phdr.p_vaddr = buf;
3901     img->phdr.p_paddr = buf;
3902     img->phdr.p_memsz = buf_size;
3903 
3904     img->shdr[1].sh_name = find_string(img->str, ".text");
3905     img->shdr[1].sh_addr = buf;
3906     img->shdr[1].sh_size = buf_size;
3907 
3908     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
3909     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
3910 
3911     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
3912     img->shdr[4].sh_size = debug_frame_size;
3913 
3914     img->shdr[5].sh_name = find_string(img->str, ".symtab");
3915     img->shdr[6].sh_name = find_string(img->str, ".strtab");
3916 
3917     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
3918     img->sym[1].st_value = buf;
3919     img->sym[1].st_size = buf_size;
3920 
3921     img->di.cu_low_pc = buf;
3922     img->di.cu_high_pc = buf + buf_size;
3923     img->di.fn_low_pc = buf;
3924     img->di.fn_high_pc = buf + buf_size;
3925 
3926     dfh = (DebugFrameHeader *)(img + 1);
3927     memcpy(dfh, debug_frame, debug_frame_size);
3928     dfh->fde.func_start = buf;
3929     dfh->fde.func_len = buf_size;
3930 
3931 #ifdef DEBUG_JIT
3932     /* Enable this block to be able to debug the ELF image file creation.
3933        One can use readelf, objdump, or other inspection utilities.  */
3934     {
3935         FILE *f = fopen("/tmp/qemu.jit", "w+b");
3936         if (f) {
3937             if (fwrite(img, img_size, 1, f) != img_size) {
3938                 /* Avoid stupid unused return value warning for fwrite.  */
3939             }
3940             fclose(f);
3941         }
3942     }
3943 #endif
3944 
3945     one_entry.symfile_addr = img;
3946     one_entry.symfile_size = img_size;
3947 
3948     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
3949     __jit_debug_descriptor.relevant_entry = &one_entry;
3950     __jit_debug_descriptor.first_entry = &one_entry;
3951     __jit_debug_register_code();
3952 }
3953 #else
3954 /* No support for the feature.  Provide the entry point expected by exec.c,
3955    and implement the internal function we declared earlier.  */
3956 
3957 static void tcg_register_jit_int(void *buf, size_t size,
3958                                  const void *debug_frame,
3959                                  size_t debug_frame_size)
3960 {
3961 }
3962 
3963 void tcg_register_jit(void *buf, size_t buf_size)
3964 {
3965 }
3966 #endif /* ELF_HOST_MACHINE */
3967 
3968 #if !TCG_TARGET_MAYBE_vec
3969 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
3970 {
3971     g_assert_not_reached();
3972 }
3973 #endif
3974