xref: /openbmc/qemu/tcg/tcg.c (revision 8f0a3716)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/cutils.h"
34 #include "qemu/host-utils.h"
35 #include "qemu/timer.h"
36 
37 /* Note: the long term plan is to reduce the dependencies on the QEMU
38    CPU definitions. Currently they are used for qemu_ld/st
39    instructions */
40 #define NO_CPU_IO_DEFS
41 #include "cpu.h"
42 
43 #include "exec/cpu-common.h"
44 #include "exec/exec-all.h"
45 
46 #include "tcg-op.h"
47 
48 #if UINTPTR_MAX == UINT32_MAX
49 # define ELF_CLASS  ELFCLASS32
50 #else
51 # define ELF_CLASS  ELFCLASS64
52 #endif
53 #ifdef HOST_WORDS_BIGENDIAN
54 # define ELF_DATA   ELFDATA2MSB
55 #else
56 # define ELF_DATA   ELFDATA2LSB
57 #endif
58 
59 #include "elf.h"
60 #include "exec/log.h"
61 #include "sysemu/sysemu.h"
62 
63 /* Forward declarations for functions declared in tcg-target.inc.c and
64    used here. */
65 static void tcg_target_init(TCGContext *s);
66 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 
71 /* The CIE and FDE header definitions will be common to all hosts.  */
72 typedef struct {
73     uint32_t len __attribute__((aligned((sizeof(void *)))));
74     uint32_t id;
75     uint8_t version;
76     char augmentation[1];
77     uint8_t code_align;
78     uint8_t data_align;
79     uint8_t return_column;
80 } DebugFrameCIE;
81 
82 typedef struct QEMU_PACKED {
83     uint32_t len __attribute__((aligned((sizeof(void *)))));
84     uint32_t cie_offset;
85     uintptr_t func_start;
86     uintptr_t func_len;
87 } DebugFrameFDEHeader;
88 
89 typedef struct QEMU_PACKED {
90     DebugFrameCIE cie;
91     DebugFrameFDEHeader fde;
92 } DebugFrameHeader;
93 
94 static void tcg_register_jit_int(void *buf, size_t size,
95                                  const void *debug_frame,
96                                  size_t debug_frame_size)
97     __attribute__((unused));
98 
99 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
100 static const char *target_parse_constraint(TCGArgConstraint *ct,
101                                            const char *ct_str, TCGType type);
102 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
103                        intptr_t arg2);
104 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
105 static void tcg_out_movi(TCGContext *s, TCGType type,
106                          TCGReg ret, tcg_target_long arg);
107 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
108                        const int *const_args);
109 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
110                        intptr_t arg2);
111 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
112                         TCGReg base, intptr_t ofs);
113 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
114 static int tcg_target_const_match(tcg_target_long val, TCGType type,
115                                   const TCGArgConstraint *arg_ct);
116 #ifdef TCG_TARGET_NEED_LDST_LABELS
117 static bool tcg_out_ldst_finalize(TCGContext *s);
118 #endif
119 
120 #define TCG_HIGHWATER 1024
121 
122 static TCGContext **tcg_ctxs;
123 static unsigned int n_tcg_ctxs;
124 TCGv_env cpu_env = 0;
125 
126 /*
127  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
128  * dynamically allocate from as demand dictates. Given appropriate region
129  * sizing, this minimizes flushes even when some TCG threads generate a lot
130  * more code than others.
131  */
132 struct tcg_region_state {
133     QemuMutex lock;
134 
135     /* fields set at init time */
136     void *start;
137     void *start_aligned;
138     void *end;
139     size_t n;
140     size_t size; /* size of one region */
141     size_t stride; /* .size + guard size */
142 
143     /* fields protected by the lock */
144     size_t current; /* current region index */
145     size_t agg_size_full; /* aggregate size of full regions */
146 };
147 
148 static struct tcg_region_state region;
149 
150 static TCGRegSet tcg_target_available_regs[2];
151 static TCGRegSet tcg_target_call_clobber_regs;
152 
153 #if TCG_TARGET_INSN_UNIT_SIZE == 1
154 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
155 {
156     *s->code_ptr++ = v;
157 }
158 
159 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
160                                                       uint8_t v)
161 {
162     *p = v;
163 }
164 #endif
165 
166 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
167 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
168 {
169     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
170         *s->code_ptr++ = v;
171     } else {
172         tcg_insn_unit *p = s->code_ptr;
173         memcpy(p, &v, sizeof(v));
174         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
175     }
176 }
177 
178 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
179                                                        uint16_t v)
180 {
181     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
182         *p = v;
183     } else {
184         memcpy(p, &v, sizeof(v));
185     }
186 }
187 #endif
188 
189 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
190 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
191 {
192     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
193         *s->code_ptr++ = v;
194     } else {
195         tcg_insn_unit *p = s->code_ptr;
196         memcpy(p, &v, sizeof(v));
197         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
198     }
199 }
200 
201 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
202                                                        uint32_t v)
203 {
204     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
205         *p = v;
206     } else {
207         memcpy(p, &v, sizeof(v));
208     }
209 }
210 #endif
211 
212 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
213 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
214 {
215     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
216         *s->code_ptr++ = v;
217     } else {
218         tcg_insn_unit *p = s->code_ptr;
219         memcpy(p, &v, sizeof(v));
220         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
221     }
222 }
223 
224 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
225                                                        uint64_t v)
226 {
227     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
228         *p = v;
229     } else {
230         memcpy(p, &v, sizeof(v));
231     }
232 }
233 #endif
234 
235 /* label relocation processing */
236 
237 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
238                           TCGLabel *l, intptr_t addend)
239 {
240     TCGRelocation *r;
241 
242     if (l->has_value) {
243         /* FIXME: This may break relocations on RISC targets that
244            modify instruction fields in place.  The caller may not have
245            written the initial value.  */
246         patch_reloc(code_ptr, type, l->u.value, addend);
247     } else {
248         /* add a new relocation entry */
249         r = tcg_malloc(sizeof(TCGRelocation));
250         r->type = type;
251         r->ptr = code_ptr;
252         r->addend = addend;
253         r->next = l->u.first_reloc;
254         l->u.first_reloc = r;
255     }
256 }
257 
258 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
259 {
260     intptr_t value = (intptr_t)ptr;
261     TCGRelocation *r;
262 
263     tcg_debug_assert(!l->has_value);
264 
265     for (r = l->u.first_reloc; r != NULL; r = r->next) {
266         patch_reloc(r->ptr, r->type, value, r->addend);
267     }
268 
269     l->has_value = 1;
270     l->u.value_ptr = ptr;
271 }
272 
273 TCGLabel *gen_new_label(void)
274 {
275     TCGContext *s = tcg_ctx;
276     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
277 
278     *l = (TCGLabel){
279         .id = s->nb_labels++
280     };
281 
282     return l;
283 }
284 
285 #include "tcg-target.inc.c"
286 
287 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
288 {
289     void *start, *end;
290 
291     start = region.start_aligned + curr_region * region.stride;
292     end = start + region.size;
293 
294     if (curr_region == 0) {
295         start = region.start;
296     }
297     if (curr_region == region.n - 1) {
298         end = region.end;
299     }
300 
301     *pstart = start;
302     *pend = end;
303 }
304 
305 static void tcg_region_assign(TCGContext *s, size_t curr_region)
306 {
307     void *start, *end;
308 
309     tcg_region_bounds(curr_region, &start, &end);
310 
311     s->code_gen_buffer = start;
312     s->code_gen_ptr = start;
313     s->code_gen_buffer_size = end - start;
314     s->code_gen_highwater = end - TCG_HIGHWATER;
315 }
316 
317 static bool tcg_region_alloc__locked(TCGContext *s)
318 {
319     if (region.current == region.n) {
320         return true;
321     }
322     tcg_region_assign(s, region.current);
323     region.current++;
324     return false;
325 }
326 
327 /*
328  * Request a new region once the one in use has filled up.
329  * Returns true on error.
330  */
331 static bool tcg_region_alloc(TCGContext *s)
332 {
333     bool err;
334     /* read the region size now; alloc__locked will overwrite it on success */
335     size_t size_full = s->code_gen_buffer_size;
336 
337     qemu_mutex_lock(&region.lock);
338     err = tcg_region_alloc__locked(s);
339     if (!err) {
340         region.agg_size_full += size_full - TCG_HIGHWATER;
341     }
342     qemu_mutex_unlock(&region.lock);
343     return err;
344 }
345 
346 /*
347  * Perform a context's first region allocation.
348  * This function does _not_ increment region.agg_size_full.
349  */
350 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
351 {
352     return tcg_region_alloc__locked(s);
353 }
354 
355 /* Call from a safe-work context */
356 void tcg_region_reset_all(void)
357 {
358     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
359     unsigned int i;
360 
361     qemu_mutex_lock(&region.lock);
362     region.current = 0;
363     region.agg_size_full = 0;
364 
365     for (i = 0; i < n_ctxs; i++) {
366         TCGContext *s = atomic_read(&tcg_ctxs[i]);
367         bool err = tcg_region_initial_alloc__locked(s);
368 
369         g_assert(!err);
370     }
371     qemu_mutex_unlock(&region.lock);
372 }
373 
374 #ifdef CONFIG_USER_ONLY
375 static size_t tcg_n_regions(void)
376 {
377     return 1;
378 }
379 #else
380 /*
381  * It is likely that some vCPUs will translate more code than others, so we
382  * first try to set more regions than max_cpus, with those regions being of
383  * reasonable size. If that's not possible we make do by evenly dividing
384  * the code_gen_buffer among the vCPUs.
385  */
386 static size_t tcg_n_regions(void)
387 {
388     size_t i;
389 
390     /* Use a single region if all we have is one vCPU thread */
391     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
392         return 1;
393     }
394 
395     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
396     for (i = 8; i > 0; i--) {
397         size_t regions_per_thread = i;
398         size_t region_size;
399 
400         region_size = tcg_init_ctx.code_gen_buffer_size;
401         region_size /= max_cpus * regions_per_thread;
402 
403         if (region_size >= 2 * 1024u * 1024) {
404             return max_cpus * regions_per_thread;
405         }
406     }
407     /* If we can't, then just allocate one region per vCPU thread */
408     return max_cpus;
409 }
410 #endif
411 
412 /*
413  * Initializes region partitioning.
414  *
415  * Called at init time from the parent thread (i.e. the one calling
416  * tcg_context_init), after the target's TCG globals have been set.
417  *
418  * Region partitioning works by splitting code_gen_buffer into separate regions,
419  * and then assigning regions to TCG threads so that the threads can translate
420  * code in parallel without synchronization.
421  *
422  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
423  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
424  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
425  * must have been parsed before calling this function, since it calls
426  * qemu_tcg_mttcg_enabled().
427  *
428  * In user-mode we use a single region.  Having multiple regions in user-mode
429  * is not supported, because the number of vCPU threads (recall that each thread
430  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
431  * OS, and usually this number is huge (tens of thousands is not uncommon).
432  * Thus, given this large bound on the number of vCPU threads and the fact
433  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
434  * that the availability of at least one region per vCPU thread.
435  *
436  * However, this user-mode limitation is unlikely to be a significant problem
437  * in practice. Multi-threaded guests share most if not all of their translated
438  * code, which makes parallel code generation less appealing than in softmmu.
439  */
440 void tcg_region_init(void)
441 {
442     void *buf = tcg_init_ctx.code_gen_buffer;
443     void *aligned;
444     size_t size = tcg_init_ctx.code_gen_buffer_size;
445     size_t page_size = qemu_real_host_page_size;
446     size_t region_size;
447     size_t n_regions;
448     size_t i;
449 
450     n_regions = tcg_n_regions();
451 
452     /* The first region will be 'aligned - buf' bytes larger than the others */
453     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
454     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
455     /*
456      * Make region_size a multiple of page_size, using aligned as the start.
457      * As a result of this we might end up with a few extra pages at the end of
458      * the buffer; we will assign those to the last region.
459      */
460     region_size = (size - (aligned - buf)) / n_regions;
461     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
462 
463     /* A region must have at least 2 pages; one code, one guard */
464     g_assert(region_size >= 2 * page_size);
465 
466     /* init the region struct */
467     qemu_mutex_init(&region.lock);
468     region.n = n_regions;
469     region.size = region_size - page_size;
470     region.stride = region_size;
471     region.start = buf;
472     region.start_aligned = aligned;
473     /* page-align the end, since its last page will be a guard page */
474     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
475     /* account for that last guard page */
476     region.end -= page_size;
477 
478     /* set guard pages */
479     for (i = 0; i < region.n; i++) {
480         void *start, *end;
481         int rc;
482 
483         tcg_region_bounds(i, &start, &end);
484         rc = qemu_mprotect_none(end, page_size);
485         g_assert(!rc);
486     }
487 
488     /* In user-mode we support only one ctx, so do the initial allocation now */
489 #ifdef CONFIG_USER_ONLY
490     {
491         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
492 
493         g_assert(!err);
494     }
495 #endif
496 }
497 
498 /*
499  * All TCG threads except the parent (i.e. the one that called tcg_context_init
500  * and registered the target's TCG globals) must register with this function
501  * before initiating translation.
502  *
503  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
504  * of tcg_region_init() for the reasoning behind this.
505  *
506  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
507  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
508  * is not used anymore for translation once this function is called.
509  *
510  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
511  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
512  */
513 #ifdef CONFIG_USER_ONLY
514 void tcg_register_thread(void)
515 {
516     tcg_ctx = &tcg_init_ctx;
517 }
518 #else
519 void tcg_register_thread(void)
520 {
521     TCGContext *s = g_malloc(sizeof(*s));
522     unsigned int i, n;
523     bool err;
524 
525     *s = tcg_init_ctx;
526 
527     /* Relink mem_base.  */
528     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
529         if (tcg_init_ctx.temps[i].mem_base) {
530             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
531             tcg_debug_assert(b >= 0 && b < n);
532             s->temps[i].mem_base = &s->temps[b];
533         }
534     }
535 
536     /* Claim an entry in tcg_ctxs */
537     n = atomic_fetch_inc(&n_tcg_ctxs);
538     g_assert(n < max_cpus);
539     atomic_set(&tcg_ctxs[n], s);
540 
541     tcg_ctx = s;
542     qemu_mutex_lock(&region.lock);
543     err = tcg_region_initial_alloc__locked(tcg_ctx);
544     g_assert(!err);
545     qemu_mutex_unlock(&region.lock);
546 }
547 #endif /* !CONFIG_USER_ONLY */
548 
549 /*
550  * Returns the size (in bytes) of all translated code (i.e. from all regions)
551  * currently in the cache.
552  * See also: tcg_code_capacity()
553  * Do not confuse with tcg_current_code_size(); that one applies to a single
554  * TCG context.
555  */
556 size_t tcg_code_size(void)
557 {
558     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
559     unsigned int i;
560     size_t total;
561 
562     qemu_mutex_lock(&region.lock);
563     total = region.agg_size_full;
564     for (i = 0; i < n_ctxs; i++) {
565         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
566         size_t size;
567 
568         size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
569         g_assert(size <= s->code_gen_buffer_size);
570         total += size;
571     }
572     qemu_mutex_unlock(&region.lock);
573     return total;
574 }
575 
576 /*
577  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
578  * regions.
579  * See also: tcg_code_size()
580  */
581 size_t tcg_code_capacity(void)
582 {
583     size_t guard_size, capacity;
584 
585     /* no need for synchronization; these variables are set at init time */
586     guard_size = region.stride - region.size;
587     capacity = region.end + guard_size - region.start;
588     capacity -= region.n * (guard_size + TCG_HIGHWATER);
589     return capacity;
590 }
591 
592 /* pool based memory allocation */
593 void *tcg_malloc_internal(TCGContext *s, int size)
594 {
595     TCGPool *p;
596     int pool_size;
597 
598     if (size > TCG_POOL_CHUNK_SIZE) {
599         /* big malloc: insert a new pool (XXX: could optimize) */
600         p = g_malloc(sizeof(TCGPool) + size);
601         p->size = size;
602         p->next = s->pool_first_large;
603         s->pool_first_large = p;
604         return p->data;
605     } else {
606         p = s->pool_current;
607         if (!p) {
608             p = s->pool_first;
609             if (!p)
610                 goto new_pool;
611         } else {
612             if (!p->next) {
613             new_pool:
614                 pool_size = TCG_POOL_CHUNK_SIZE;
615                 p = g_malloc(sizeof(TCGPool) + pool_size);
616                 p->size = pool_size;
617                 p->next = NULL;
618                 if (s->pool_current)
619                     s->pool_current->next = p;
620                 else
621                     s->pool_first = p;
622             } else {
623                 p = p->next;
624             }
625         }
626     }
627     s->pool_current = p;
628     s->pool_cur = p->data + size;
629     s->pool_end = p->data + p->size;
630     return p->data;
631 }
632 
633 void tcg_pool_reset(TCGContext *s)
634 {
635     TCGPool *p, *t;
636     for (p = s->pool_first_large; p; p = t) {
637         t = p->next;
638         g_free(p);
639     }
640     s->pool_first_large = NULL;
641     s->pool_cur = s->pool_end = NULL;
642     s->pool_current = NULL;
643 }
644 
645 typedef struct TCGHelperInfo {
646     void *func;
647     const char *name;
648     unsigned flags;
649     unsigned sizemask;
650 } TCGHelperInfo;
651 
652 #include "exec/helper-proto.h"
653 
654 static const TCGHelperInfo all_helpers[] = {
655 #include "exec/helper-tcg.h"
656 };
657 static GHashTable *helper_table;
658 
659 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
660 static void process_op_defs(TCGContext *s);
661 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
662                                             TCGReg reg, const char *name);
663 
664 void tcg_context_init(TCGContext *s)
665 {
666     int op, total_args, n, i;
667     TCGOpDef *def;
668     TCGArgConstraint *args_ct;
669     int *sorted_args;
670     TCGTemp *ts;
671 
672     memset(s, 0, sizeof(*s));
673     s->nb_globals = 0;
674 
675     /* Count total number of arguments and allocate the corresponding
676        space */
677     total_args = 0;
678     for(op = 0; op < NB_OPS; op++) {
679         def = &tcg_op_defs[op];
680         n = def->nb_iargs + def->nb_oargs;
681         total_args += n;
682     }
683 
684     args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
685     sorted_args = g_malloc(sizeof(int) * total_args);
686 
687     for(op = 0; op < NB_OPS; op++) {
688         def = &tcg_op_defs[op];
689         def->args_ct = args_ct;
690         def->sorted_args = sorted_args;
691         n = def->nb_iargs + def->nb_oargs;
692         sorted_args += n;
693         args_ct += n;
694     }
695 
696     /* Register helpers.  */
697     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
698     helper_table = g_hash_table_new(NULL, NULL);
699 
700     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
701         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
702                             (gpointer)&all_helpers[i]);
703     }
704 
705     tcg_target_init(s);
706     process_op_defs(s);
707 
708     /* Reverse the order of the saved registers, assuming they're all at
709        the start of tcg_target_reg_alloc_order.  */
710     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
711         int r = tcg_target_reg_alloc_order[n];
712         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
713             break;
714         }
715     }
716     for (i = 0; i < n; ++i) {
717         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
718     }
719     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
720         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
721     }
722 
723     tcg_ctx = s;
724     /*
725      * In user-mode we simply share the init context among threads, since we
726      * use a single region. See the documentation tcg_region_init() for the
727      * reasoning behind this.
728      * In softmmu we will have at most max_cpus TCG threads.
729      */
730 #ifdef CONFIG_USER_ONLY
731     tcg_ctxs = &tcg_ctx;
732     n_tcg_ctxs = 1;
733 #else
734     tcg_ctxs = g_new(TCGContext *, max_cpus);
735 #endif
736 
737     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
738     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
739     cpu_env = temp_tcgv_ptr(ts);
740 }
741 
742 /*
743  * Allocate TBs right before their corresponding translated code, making
744  * sure that TBs and code are on different cache lines.
745  */
746 TranslationBlock *tcg_tb_alloc(TCGContext *s)
747 {
748     uintptr_t align = qemu_icache_linesize;
749     TranslationBlock *tb;
750     void *next;
751 
752  retry:
753     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
754     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
755 
756     if (unlikely(next > s->code_gen_highwater)) {
757         if (tcg_region_alloc(s)) {
758             return NULL;
759         }
760         goto retry;
761     }
762     atomic_set(&s->code_gen_ptr, next);
763     s->data_gen_ptr = NULL;
764     return tb;
765 }
766 
767 void tcg_prologue_init(TCGContext *s)
768 {
769     size_t prologue_size, total_size;
770     void *buf0, *buf1;
771 
772     /* Put the prologue at the beginning of code_gen_buffer.  */
773     buf0 = s->code_gen_buffer;
774     total_size = s->code_gen_buffer_size;
775     s->code_ptr = buf0;
776     s->code_buf = buf0;
777     s->data_gen_ptr = NULL;
778     s->code_gen_prologue = buf0;
779 
780     /* Compute a high-water mark, at which we voluntarily flush the buffer
781        and start over.  The size here is arbitrary, significantly larger
782        than we expect the code generation for any one opcode to require.  */
783     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
784 
785 #ifdef TCG_TARGET_NEED_POOL_LABELS
786     s->pool_labels = NULL;
787 #endif
788 
789     /* Generate the prologue.  */
790     tcg_target_qemu_prologue(s);
791 
792 #ifdef TCG_TARGET_NEED_POOL_LABELS
793     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
794     {
795         bool ok = tcg_out_pool_finalize(s);
796         tcg_debug_assert(ok);
797     }
798 #endif
799 
800     buf1 = s->code_ptr;
801     flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
802 
803     /* Deduct the prologue from the buffer.  */
804     prologue_size = tcg_current_code_size(s);
805     s->code_gen_ptr = buf1;
806     s->code_gen_buffer = buf1;
807     s->code_buf = buf1;
808     total_size -= prologue_size;
809     s->code_gen_buffer_size = total_size;
810 
811     tcg_register_jit(s->code_gen_buffer, total_size);
812 
813 #ifdef DEBUG_DISAS
814     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
815         qemu_log_lock();
816         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
817         if (s->data_gen_ptr) {
818             size_t code_size = s->data_gen_ptr - buf0;
819             size_t data_size = prologue_size - code_size;
820             size_t i;
821 
822             log_disas(buf0, code_size);
823 
824             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
825                 if (sizeof(tcg_target_ulong) == 8) {
826                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
827                              (uintptr_t)s->data_gen_ptr + i,
828                              *(uint64_t *)(s->data_gen_ptr + i));
829                 } else {
830                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
831                              (uintptr_t)s->data_gen_ptr + i,
832                              *(uint32_t *)(s->data_gen_ptr + i));
833                 }
834             }
835         } else {
836             log_disas(buf0, prologue_size);
837         }
838         qemu_log("\n");
839         qemu_log_flush();
840         qemu_log_unlock();
841     }
842 #endif
843 
844     /* Assert that goto_ptr is implemented completely.  */
845     if (TCG_TARGET_HAS_goto_ptr) {
846         tcg_debug_assert(s->code_gen_epilogue != NULL);
847     }
848 }
849 
850 void tcg_func_start(TCGContext *s)
851 {
852     tcg_pool_reset(s);
853     s->nb_temps = s->nb_globals;
854 
855     /* No temps have been previously allocated for size or locality.  */
856     memset(s->free_temps, 0, sizeof(s->free_temps));
857 
858     s->nb_labels = 0;
859     s->current_frame_offset = s->frame_start;
860 
861 #ifdef CONFIG_DEBUG_TCG
862     s->goto_tb_issue_mask = 0;
863 #endif
864 
865     QTAILQ_INIT(&s->ops);
866     QTAILQ_INIT(&s->free_ops);
867 }
868 
869 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
870 {
871     int n = s->nb_temps++;
872     tcg_debug_assert(n < TCG_MAX_TEMPS);
873     return memset(&s->temps[n], 0, sizeof(TCGTemp));
874 }
875 
876 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
877 {
878     TCGTemp *ts;
879 
880     tcg_debug_assert(s->nb_globals == s->nb_temps);
881     s->nb_globals++;
882     ts = tcg_temp_alloc(s);
883     ts->temp_global = 1;
884 
885     return ts;
886 }
887 
888 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
889                                             TCGReg reg, const char *name)
890 {
891     TCGTemp *ts;
892 
893     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
894         tcg_abort();
895     }
896 
897     ts = tcg_global_alloc(s);
898     ts->base_type = type;
899     ts->type = type;
900     ts->fixed_reg = 1;
901     ts->reg = reg;
902     ts->name = name;
903     tcg_regset_set_reg(s->reserved_regs, reg);
904 
905     return ts;
906 }
907 
908 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
909 {
910     s->frame_start = start;
911     s->frame_end = start + size;
912     s->frame_temp
913         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
914 }
915 
916 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
917                                      intptr_t offset, const char *name)
918 {
919     TCGContext *s = tcg_ctx;
920     TCGTemp *base_ts = tcgv_ptr_temp(base);
921     TCGTemp *ts = tcg_global_alloc(s);
922     int indirect_reg = 0, bigendian = 0;
923 #ifdef HOST_WORDS_BIGENDIAN
924     bigendian = 1;
925 #endif
926 
927     if (!base_ts->fixed_reg) {
928         /* We do not support double-indirect registers.  */
929         tcg_debug_assert(!base_ts->indirect_reg);
930         base_ts->indirect_base = 1;
931         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
932                             ? 2 : 1);
933         indirect_reg = 1;
934     }
935 
936     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
937         TCGTemp *ts2 = tcg_global_alloc(s);
938         char buf[64];
939 
940         ts->base_type = TCG_TYPE_I64;
941         ts->type = TCG_TYPE_I32;
942         ts->indirect_reg = indirect_reg;
943         ts->mem_allocated = 1;
944         ts->mem_base = base_ts;
945         ts->mem_offset = offset + bigendian * 4;
946         pstrcpy(buf, sizeof(buf), name);
947         pstrcat(buf, sizeof(buf), "_0");
948         ts->name = strdup(buf);
949 
950         tcg_debug_assert(ts2 == ts + 1);
951         ts2->base_type = TCG_TYPE_I64;
952         ts2->type = TCG_TYPE_I32;
953         ts2->indirect_reg = indirect_reg;
954         ts2->mem_allocated = 1;
955         ts2->mem_base = base_ts;
956         ts2->mem_offset = offset + (1 - bigendian) * 4;
957         pstrcpy(buf, sizeof(buf), name);
958         pstrcat(buf, sizeof(buf), "_1");
959         ts2->name = strdup(buf);
960     } else {
961         ts->base_type = type;
962         ts->type = type;
963         ts->indirect_reg = indirect_reg;
964         ts->mem_allocated = 1;
965         ts->mem_base = base_ts;
966         ts->mem_offset = offset;
967         ts->name = name;
968     }
969     return ts;
970 }
971 
972 static TCGTemp *tcg_temp_new_internal(TCGType type, int temp_local)
973 {
974     TCGContext *s = tcg_ctx;
975     TCGTemp *ts;
976     int idx, k;
977 
978     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
979     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
980     if (idx < TCG_MAX_TEMPS) {
981         /* There is already an available temp with the right type.  */
982         clear_bit(idx, s->free_temps[k].l);
983 
984         ts = &s->temps[idx];
985         ts->temp_allocated = 1;
986         tcg_debug_assert(ts->base_type == type);
987         tcg_debug_assert(ts->temp_local == temp_local);
988     } else {
989         ts = tcg_temp_alloc(s);
990         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
991             TCGTemp *ts2 = tcg_temp_alloc(s);
992 
993             ts->base_type = type;
994             ts->type = TCG_TYPE_I32;
995             ts->temp_allocated = 1;
996             ts->temp_local = temp_local;
997 
998             tcg_debug_assert(ts2 == ts + 1);
999             ts2->base_type = TCG_TYPE_I64;
1000             ts2->type = TCG_TYPE_I32;
1001             ts2->temp_allocated = 1;
1002             ts2->temp_local = temp_local;
1003         } else {
1004             ts->base_type = type;
1005             ts->type = type;
1006             ts->temp_allocated = 1;
1007             ts->temp_local = temp_local;
1008         }
1009     }
1010 
1011 #if defined(CONFIG_DEBUG_TCG)
1012     s->temps_in_use++;
1013 #endif
1014     return ts;
1015 }
1016 
1017 TCGv_i32 tcg_temp_new_internal_i32(int temp_local)
1018 {
1019     TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, temp_local);
1020     return temp_tcgv_i32(t);
1021 }
1022 
1023 TCGv_i64 tcg_temp_new_internal_i64(int temp_local)
1024 {
1025     TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, temp_local);
1026     return temp_tcgv_i64(t);
1027 }
1028 
1029 static void tcg_temp_free_internal(TCGTemp *ts)
1030 {
1031     TCGContext *s = tcg_ctx;
1032     int k, idx;
1033 
1034 #if defined(CONFIG_DEBUG_TCG)
1035     s->temps_in_use--;
1036     if (s->temps_in_use < 0) {
1037         fprintf(stderr, "More temporaries freed than allocated!\n");
1038     }
1039 #endif
1040 
1041     tcg_debug_assert(ts->temp_global == 0);
1042     tcg_debug_assert(ts->temp_allocated != 0);
1043     ts->temp_allocated = 0;
1044 
1045     idx = temp_idx(ts);
1046     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1047     set_bit(idx, s->free_temps[k].l);
1048 }
1049 
1050 void tcg_temp_free_i32(TCGv_i32 arg)
1051 {
1052     tcg_temp_free_internal(tcgv_i32_temp(arg));
1053 }
1054 
1055 void tcg_temp_free_i64(TCGv_i64 arg)
1056 {
1057     tcg_temp_free_internal(tcgv_i64_temp(arg));
1058 }
1059 
1060 TCGv_i32 tcg_const_i32(int32_t val)
1061 {
1062     TCGv_i32 t0;
1063     t0 = tcg_temp_new_i32();
1064     tcg_gen_movi_i32(t0, val);
1065     return t0;
1066 }
1067 
1068 TCGv_i64 tcg_const_i64(int64_t val)
1069 {
1070     TCGv_i64 t0;
1071     t0 = tcg_temp_new_i64();
1072     tcg_gen_movi_i64(t0, val);
1073     return t0;
1074 }
1075 
1076 TCGv_i32 tcg_const_local_i32(int32_t val)
1077 {
1078     TCGv_i32 t0;
1079     t0 = tcg_temp_local_new_i32();
1080     tcg_gen_movi_i32(t0, val);
1081     return t0;
1082 }
1083 
1084 TCGv_i64 tcg_const_local_i64(int64_t val)
1085 {
1086     TCGv_i64 t0;
1087     t0 = tcg_temp_local_new_i64();
1088     tcg_gen_movi_i64(t0, val);
1089     return t0;
1090 }
1091 
1092 #if defined(CONFIG_DEBUG_TCG)
1093 void tcg_clear_temp_count(void)
1094 {
1095     TCGContext *s = tcg_ctx;
1096     s->temps_in_use = 0;
1097 }
1098 
1099 int tcg_check_temp_count(void)
1100 {
1101     TCGContext *s = tcg_ctx;
1102     if (s->temps_in_use) {
1103         /* Clear the count so that we don't give another
1104          * warning immediately next time around.
1105          */
1106         s->temps_in_use = 0;
1107         return 1;
1108     }
1109     return 0;
1110 }
1111 #endif
1112 
1113 /* Return true if OP may appear in the opcode stream.
1114    Test the runtime variable that controls each opcode.  */
1115 bool tcg_op_supported(TCGOpcode op)
1116 {
1117     switch (op) {
1118     case INDEX_op_discard:
1119     case INDEX_op_set_label:
1120     case INDEX_op_call:
1121     case INDEX_op_br:
1122     case INDEX_op_mb:
1123     case INDEX_op_insn_start:
1124     case INDEX_op_exit_tb:
1125     case INDEX_op_goto_tb:
1126     case INDEX_op_qemu_ld_i32:
1127     case INDEX_op_qemu_st_i32:
1128     case INDEX_op_qemu_ld_i64:
1129     case INDEX_op_qemu_st_i64:
1130         return true;
1131 
1132     case INDEX_op_goto_ptr:
1133         return TCG_TARGET_HAS_goto_ptr;
1134 
1135     case INDEX_op_mov_i32:
1136     case INDEX_op_movi_i32:
1137     case INDEX_op_setcond_i32:
1138     case INDEX_op_brcond_i32:
1139     case INDEX_op_ld8u_i32:
1140     case INDEX_op_ld8s_i32:
1141     case INDEX_op_ld16u_i32:
1142     case INDEX_op_ld16s_i32:
1143     case INDEX_op_ld_i32:
1144     case INDEX_op_st8_i32:
1145     case INDEX_op_st16_i32:
1146     case INDEX_op_st_i32:
1147     case INDEX_op_add_i32:
1148     case INDEX_op_sub_i32:
1149     case INDEX_op_mul_i32:
1150     case INDEX_op_and_i32:
1151     case INDEX_op_or_i32:
1152     case INDEX_op_xor_i32:
1153     case INDEX_op_shl_i32:
1154     case INDEX_op_shr_i32:
1155     case INDEX_op_sar_i32:
1156         return true;
1157 
1158     case INDEX_op_movcond_i32:
1159         return TCG_TARGET_HAS_movcond_i32;
1160     case INDEX_op_div_i32:
1161     case INDEX_op_divu_i32:
1162         return TCG_TARGET_HAS_div_i32;
1163     case INDEX_op_rem_i32:
1164     case INDEX_op_remu_i32:
1165         return TCG_TARGET_HAS_rem_i32;
1166     case INDEX_op_div2_i32:
1167     case INDEX_op_divu2_i32:
1168         return TCG_TARGET_HAS_div2_i32;
1169     case INDEX_op_rotl_i32:
1170     case INDEX_op_rotr_i32:
1171         return TCG_TARGET_HAS_rot_i32;
1172     case INDEX_op_deposit_i32:
1173         return TCG_TARGET_HAS_deposit_i32;
1174     case INDEX_op_extract_i32:
1175         return TCG_TARGET_HAS_extract_i32;
1176     case INDEX_op_sextract_i32:
1177         return TCG_TARGET_HAS_sextract_i32;
1178     case INDEX_op_add2_i32:
1179         return TCG_TARGET_HAS_add2_i32;
1180     case INDEX_op_sub2_i32:
1181         return TCG_TARGET_HAS_sub2_i32;
1182     case INDEX_op_mulu2_i32:
1183         return TCG_TARGET_HAS_mulu2_i32;
1184     case INDEX_op_muls2_i32:
1185         return TCG_TARGET_HAS_muls2_i32;
1186     case INDEX_op_muluh_i32:
1187         return TCG_TARGET_HAS_muluh_i32;
1188     case INDEX_op_mulsh_i32:
1189         return TCG_TARGET_HAS_mulsh_i32;
1190     case INDEX_op_ext8s_i32:
1191         return TCG_TARGET_HAS_ext8s_i32;
1192     case INDEX_op_ext16s_i32:
1193         return TCG_TARGET_HAS_ext16s_i32;
1194     case INDEX_op_ext8u_i32:
1195         return TCG_TARGET_HAS_ext8u_i32;
1196     case INDEX_op_ext16u_i32:
1197         return TCG_TARGET_HAS_ext16u_i32;
1198     case INDEX_op_bswap16_i32:
1199         return TCG_TARGET_HAS_bswap16_i32;
1200     case INDEX_op_bswap32_i32:
1201         return TCG_TARGET_HAS_bswap32_i32;
1202     case INDEX_op_not_i32:
1203         return TCG_TARGET_HAS_not_i32;
1204     case INDEX_op_neg_i32:
1205         return TCG_TARGET_HAS_neg_i32;
1206     case INDEX_op_andc_i32:
1207         return TCG_TARGET_HAS_andc_i32;
1208     case INDEX_op_orc_i32:
1209         return TCG_TARGET_HAS_orc_i32;
1210     case INDEX_op_eqv_i32:
1211         return TCG_TARGET_HAS_eqv_i32;
1212     case INDEX_op_nand_i32:
1213         return TCG_TARGET_HAS_nand_i32;
1214     case INDEX_op_nor_i32:
1215         return TCG_TARGET_HAS_nor_i32;
1216     case INDEX_op_clz_i32:
1217         return TCG_TARGET_HAS_clz_i32;
1218     case INDEX_op_ctz_i32:
1219         return TCG_TARGET_HAS_ctz_i32;
1220     case INDEX_op_ctpop_i32:
1221         return TCG_TARGET_HAS_ctpop_i32;
1222 
1223     case INDEX_op_brcond2_i32:
1224     case INDEX_op_setcond2_i32:
1225         return TCG_TARGET_REG_BITS == 32;
1226 
1227     case INDEX_op_mov_i64:
1228     case INDEX_op_movi_i64:
1229     case INDEX_op_setcond_i64:
1230     case INDEX_op_brcond_i64:
1231     case INDEX_op_ld8u_i64:
1232     case INDEX_op_ld8s_i64:
1233     case INDEX_op_ld16u_i64:
1234     case INDEX_op_ld16s_i64:
1235     case INDEX_op_ld32u_i64:
1236     case INDEX_op_ld32s_i64:
1237     case INDEX_op_ld_i64:
1238     case INDEX_op_st8_i64:
1239     case INDEX_op_st16_i64:
1240     case INDEX_op_st32_i64:
1241     case INDEX_op_st_i64:
1242     case INDEX_op_add_i64:
1243     case INDEX_op_sub_i64:
1244     case INDEX_op_mul_i64:
1245     case INDEX_op_and_i64:
1246     case INDEX_op_or_i64:
1247     case INDEX_op_xor_i64:
1248     case INDEX_op_shl_i64:
1249     case INDEX_op_shr_i64:
1250     case INDEX_op_sar_i64:
1251     case INDEX_op_ext_i32_i64:
1252     case INDEX_op_extu_i32_i64:
1253         return TCG_TARGET_REG_BITS == 64;
1254 
1255     case INDEX_op_movcond_i64:
1256         return TCG_TARGET_HAS_movcond_i64;
1257     case INDEX_op_div_i64:
1258     case INDEX_op_divu_i64:
1259         return TCG_TARGET_HAS_div_i64;
1260     case INDEX_op_rem_i64:
1261     case INDEX_op_remu_i64:
1262         return TCG_TARGET_HAS_rem_i64;
1263     case INDEX_op_div2_i64:
1264     case INDEX_op_divu2_i64:
1265         return TCG_TARGET_HAS_div2_i64;
1266     case INDEX_op_rotl_i64:
1267     case INDEX_op_rotr_i64:
1268         return TCG_TARGET_HAS_rot_i64;
1269     case INDEX_op_deposit_i64:
1270         return TCG_TARGET_HAS_deposit_i64;
1271     case INDEX_op_extract_i64:
1272         return TCG_TARGET_HAS_extract_i64;
1273     case INDEX_op_sextract_i64:
1274         return TCG_TARGET_HAS_sextract_i64;
1275     case INDEX_op_extrl_i64_i32:
1276         return TCG_TARGET_HAS_extrl_i64_i32;
1277     case INDEX_op_extrh_i64_i32:
1278         return TCG_TARGET_HAS_extrh_i64_i32;
1279     case INDEX_op_ext8s_i64:
1280         return TCG_TARGET_HAS_ext8s_i64;
1281     case INDEX_op_ext16s_i64:
1282         return TCG_TARGET_HAS_ext16s_i64;
1283     case INDEX_op_ext32s_i64:
1284         return TCG_TARGET_HAS_ext32s_i64;
1285     case INDEX_op_ext8u_i64:
1286         return TCG_TARGET_HAS_ext8u_i64;
1287     case INDEX_op_ext16u_i64:
1288         return TCG_TARGET_HAS_ext16u_i64;
1289     case INDEX_op_ext32u_i64:
1290         return TCG_TARGET_HAS_ext32u_i64;
1291     case INDEX_op_bswap16_i64:
1292         return TCG_TARGET_HAS_bswap16_i64;
1293     case INDEX_op_bswap32_i64:
1294         return TCG_TARGET_HAS_bswap32_i64;
1295     case INDEX_op_bswap64_i64:
1296         return TCG_TARGET_HAS_bswap64_i64;
1297     case INDEX_op_not_i64:
1298         return TCG_TARGET_HAS_not_i64;
1299     case INDEX_op_neg_i64:
1300         return TCG_TARGET_HAS_neg_i64;
1301     case INDEX_op_andc_i64:
1302         return TCG_TARGET_HAS_andc_i64;
1303     case INDEX_op_orc_i64:
1304         return TCG_TARGET_HAS_orc_i64;
1305     case INDEX_op_eqv_i64:
1306         return TCG_TARGET_HAS_eqv_i64;
1307     case INDEX_op_nand_i64:
1308         return TCG_TARGET_HAS_nand_i64;
1309     case INDEX_op_nor_i64:
1310         return TCG_TARGET_HAS_nor_i64;
1311     case INDEX_op_clz_i64:
1312         return TCG_TARGET_HAS_clz_i64;
1313     case INDEX_op_ctz_i64:
1314         return TCG_TARGET_HAS_ctz_i64;
1315     case INDEX_op_ctpop_i64:
1316         return TCG_TARGET_HAS_ctpop_i64;
1317     case INDEX_op_add2_i64:
1318         return TCG_TARGET_HAS_add2_i64;
1319     case INDEX_op_sub2_i64:
1320         return TCG_TARGET_HAS_sub2_i64;
1321     case INDEX_op_mulu2_i64:
1322         return TCG_TARGET_HAS_mulu2_i64;
1323     case INDEX_op_muls2_i64:
1324         return TCG_TARGET_HAS_muls2_i64;
1325     case INDEX_op_muluh_i64:
1326         return TCG_TARGET_HAS_muluh_i64;
1327     case INDEX_op_mulsh_i64:
1328         return TCG_TARGET_HAS_mulsh_i64;
1329 
1330     case NB_OPS:
1331         break;
1332     }
1333     g_assert_not_reached();
1334 }
1335 
1336 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1337    and endian swap. Maybe it would be better to do the alignment
1338    and endian swap in tcg_reg_alloc_call(). */
1339 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1340 {
1341     int i, real_args, nb_rets, pi;
1342     unsigned sizemask, flags;
1343     TCGHelperInfo *info;
1344     TCGOp *op;
1345 
1346     info = g_hash_table_lookup(helper_table, (gpointer)func);
1347     flags = info->flags;
1348     sizemask = info->sizemask;
1349 
1350 #if defined(__sparc__) && !defined(__arch64__) \
1351     && !defined(CONFIG_TCG_INTERPRETER)
1352     /* We have 64-bit values in one register, but need to pass as two
1353        separate parameters.  Split them.  */
1354     int orig_sizemask = sizemask;
1355     int orig_nargs = nargs;
1356     TCGv_i64 retl, reth;
1357     TCGTemp *split_args[MAX_OPC_PARAM];
1358 
1359     retl = NULL;
1360     reth = NULL;
1361     if (sizemask != 0) {
1362         for (i = real_args = 0; i < nargs; ++i) {
1363             int is_64bit = sizemask & (1 << (i+1)*2);
1364             if (is_64bit) {
1365                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1366                 TCGv_i32 h = tcg_temp_new_i32();
1367                 TCGv_i32 l = tcg_temp_new_i32();
1368                 tcg_gen_extr_i64_i32(l, h, orig);
1369                 split_args[real_args++] = tcgv_i32_temp(h);
1370                 split_args[real_args++] = tcgv_i32_temp(l);
1371             } else {
1372                 split_args[real_args++] = args[i];
1373             }
1374         }
1375         nargs = real_args;
1376         args = split_args;
1377         sizemask = 0;
1378     }
1379 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1380     for (i = 0; i < nargs; ++i) {
1381         int is_64bit = sizemask & (1 << (i+1)*2);
1382         int is_signed = sizemask & (2 << (i+1)*2);
1383         if (!is_64bit) {
1384             TCGv_i64 temp = tcg_temp_new_i64();
1385             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1386             if (is_signed) {
1387                 tcg_gen_ext32s_i64(temp, orig);
1388             } else {
1389                 tcg_gen_ext32u_i64(temp, orig);
1390             }
1391             args[i] = tcgv_i64_temp(temp);
1392         }
1393     }
1394 #endif /* TCG_TARGET_EXTEND_ARGS */
1395 
1396     op = tcg_emit_op(INDEX_op_call);
1397 
1398     pi = 0;
1399     if (ret != NULL) {
1400 #if defined(__sparc__) && !defined(__arch64__) \
1401     && !defined(CONFIG_TCG_INTERPRETER)
1402         if (orig_sizemask & 1) {
1403             /* The 32-bit ABI is going to return the 64-bit value in
1404                the %o0/%o1 register pair.  Prepare for this by using
1405                two return temporaries, and reassemble below.  */
1406             retl = tcg_temp_new_i64();
1407             reth = tcg_temp_new_i64();
1408             op->args[pi++] = tcgv_i64_arg(reth);
1409             op->args[pi++] = tcgv_i64_arg(retl);
1410             nb_rets = 2;
1411         } else {
1412             op->args[pi++] = temp_arg(ret);
1413             nb_rets = 1;
1414         }
1415 #else
1416         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1417 #ifdef HOST_WORDS_BIGENDIAN
1418             op->args[pi++] = temp_arg(ret + 1);
1419             op->args[pi++] = temp_arg(ret);
1420 #else
1421             op->args[pi++] = temp_arg(ret);
1422             op->args[pi++] = temp_arg(ret + 1);
1423 #endif
1424             nb_rets = 2;
1425         } else {
1426             op->args[pi++] = temp_arg(ret);
1427             nb_rets = 1;
1428         }
1429 #endif
1430     } else {
1431         nb_rets = 0;
1432     }
1433     TCGOP_CALLO(op) = nb_rets;
1434 
1435     real_args = 0;
1436     for (i = 0; i < nargs; i++) {
1437         int is_64bit = sizemask & (1 << (i+1)*2);
1438         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1439 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1440             /* some targets want aligned 64 bit args */
1441             if (real_args & 1) {
1442                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1443                 real_args++;
1444             }
1445 #endif
1446            /* If stack grows up, then we will be placing successive
1447               arguments at lower addresses, which means we need to
1448               reverse the order compared to how we would normally
1449               treat either big or little-endian.  For those arguments
1450               that will wind up in registers, this still works for
1451               HPPA (the only current STACK_GROWSUP target) since the
1452               argument registers are *also* allocated in decreasing
1453               order.  If another such target is added, this logic may
1454               have to get more complicated to differentiate between
1455               stack arguments and register arguments.  */
1456 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1457             op->args[pi++] = temp_arg(args[i] + 1);
1458             op->args[pi++] = temp_arg(args[i]);
1459 #else
1460             op->args[pi++] = temp_arg(args[i]);
1461             op->args[pi++] = temp_arg(args[i] + 1);
1462 #endif
1463             real_args += 2;
1464             continue;
1465         }
1466 
1467         op->args[pi++] = temp_arg(args[i]);
1468         real_args++;
1469     }
1470     op->args[pi++] = (uintptr_t)func;
1471     op->args[pi++] = flags;
1472     TCGOP_CALLI(op) = real_args;
1473 
1474     /* Make sure the fields didn't overflow.  */
1475     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1476     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1477 
1478 #if defined(__sparc__) && !defined(__arch64__) \
1479     && !defined(CONFIG_TCG_INTERPRETER)
1480     /* Free all of the parts we allocated above.  */
1481     for (i = real_args = 0; i < orig_nargs; ++i) {
1482         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1483         if (is_64bit) {
1484             tcg_temp_free_internal(args[real_args++]);
1485             tcg_temp_free_internal(args[real_args++]);
1486         } else {
1487             real_args++;
1488         }
1489     }
1490     if (orig_sizemask & 1) {
1491         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1492            Note that describing these as TCGv_i64 eliminates an unnecessary
1493            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1494         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1495         tcg_temp_free_i64(retl);
1496         tcg_temp_free_i64(reth);
1497     }
1498 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1499     for (i = 0; i < nargs; ++i) {
1500         int is_64bit = sizemask & (1 << (i+1)*2);
1501         if (!is_64bit) {
1502             tcg_temp_free_internal(args[i]);
1503         }
1504     }
1505 #endif /* TCG_TARGET_EXTEND_ARGS */
1506 }
1507 
1508 static void tcg_reg_alloc_start(TCGContext *s)
1509 {
1510     int i, n;
1511     TCGTemp *ts;
1512 
1513     for (i = 0, n = s->nb_globals; i < n; i++) {
1514         ts = &s->temps[i];
1515         ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1516     }
1517     for (n = s->nb_temps; i < n; i++) {
1518         ts = &s->temps[i];
1519         ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1520         ts->mem_allocated = 0;
1521         ts->fixed_reg = 0;
1522     }
1523 
1524     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1525 }
1526 
1527 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1528                                  TCGTemp *ts)
1529 {
1530     int idx = temp_idx(ts);
1531 
1532     if (ts->temp_global) {
1533         pstrcpy(buf, buf_size, ts->name);
1534     } else if (ts->temp_local) {
1535         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1536     } else {
1537         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1538     }
1539     return buf;
1540 }
1541 
1542 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1543                              int buf_size, TCGArg arg)
1544 {
1545     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1546 }
1547 
1548 /* Find helper name.  */
1549 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1550 {
1551     const char *ret = NULL;
1552     if (helper_table) {
1553         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1554         if (info) {
1555             ret = info->name;
1556         }
1557     }
1558     return ret;
1559 }
1560 
1561 static const char * const cond_name[] =
1562 {
1563     [TCG_COND_NEVER] = "never",
1564     [TCG_COND_ALWAYS] = "always",
1565     [TCG_COND_EQ] = "eq",
1566     [TCG_COND_NE] = "ne",
1567     [TCG_COND_LT] = "lt",
1568     [TCG_COND_GE] = "ge",
1569     [TCG_COND_LE] = "le",
1570     [TCG_COND_GT] = "gt",
1571     [TCG_COND_LTU] = "ltu",
1572     [TCG_COND_GEU] = "geu",
1573     [TCG_COND_LEU] = "leu",
1574     [TCG_COND_GTU] = "gtu"
1575 };
1576 
1577 static const char * const ldst_name[] =
1578 {
1579     [MO_UB]   = "ub",
1580     [MO_SB]   = "sb",
1581     [MO_LEUW] = "leuw",
1582     [MO_LESW] = "lesw",
1583     [MO_LEUL] = "leul",
1584     [MO_LESL] = "lesl",
1585     [MO_LEQ]  = "leq",
1586     [MO_BEUW] = "beuw",
1587     [MO_BESW] = "besw",
1588     [MO_BEUL] = "beul",
1589     [MO_BESL] = "besl",
1590     [MO_BEQ]  = "beq",
1591 };
1592 
1593 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1594 #ifdef ALIGNED_ONLY
1595     [MO_UNALN >> MO_ASHIFT]    = "un+",
1596     [MO_ALIGN >> MO_ASHIFT]    = "",
1597 #else
1598     [MO_UNALN >> MO_ASHIFT]    = "",
1599     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1600 #endif
1601     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1602     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1603     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1604     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1605     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1606     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1607 };
1608 
1609 void tcg_dump_ops(TCGContext *s)
1610 {
1611     char buf[128];
1612     TCGOp *op;
1613 
1614     QTAILQ_FOREACH(op, &s->ops, link) {
1615         int i, k, nb_oargs, nb_iargs, nb_cargs;
1616         const TCGOpDef *def;
1617         TCGOpcode c;
1618         int col = 0;
1619 
1620         c = op->opc;
1621         def = &tcg_op_defs[c];
1622 
1623         if (c == INDEX_op_insn_start) {
1624             col += qemu_log("\n ----");
1625 
1626             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1627                 target_ulong a;
1628 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1629                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1630 #else
1631                 a = op->args[i];
1632 #endif
1633                 col += qemu_log(" " TARGET_FMT_lx, a);
1634             }
1635         } else if (c == INDEX_op_call) {
1636             /* variable number of arguments */
1637             nb_oargs = TCGOP_CALLO(op);
1638             nb_iargs = TCGOP_CALLI(op);
1639             nb_cargs = def->nb_cargs;
1640 
1641             /* function name, flags, out args */
1642             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1643                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
1644                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
1645             for (i = 0; i < nb_oargs; i++) {
1646                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1647                                                        op->args[i]));
1648             }
1649             for (i = 0; i < nb_iargs; i++) {
1650                 TCGArg arg = op->args[nb_oargs + i];
1651                 const char *t = "<dummy>";
1652                 if (arg != TCG_CALL_DUMMY_ARG) {
1653                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1654                 }
1655                 col += qemu_log(",%s", t);
1656             }
1657         } else {
1658             col += qemu_log(" %s ", def->name);
1659 
1660             nb_oargs = def->nb_oargs;
1661             nb_iargs = def->nb_iargs;
1662             nb_cargs = def->nb_cargs;
1663 
1664             k = 0;
1665             for (i = 0; i < nb_oargs; i++) {
1666                 if (k != 0) {
1667                     col += qemu_log(",");
1668                 }
1669                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1670                                                       op->args[k++]));
1671             }
1672             for (i = 0; i < nb_iargs; i++) {
1673                 if (k != 0) {
1674                     col += qemu_log(",");
1675                 }
1676                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1677                                                       op->args[k++]));
1678             }
1679             switch (c) {
1680             case INDEX_op_brcond_i32:
1681             case INDEX_op_setcond_i32:
1682             case INDEX_op_movcond_i32:
1683             case INDEX_op_brcond2_i32:
1684             case INDEX_op_setcond2_i32:
1685             case INDEX_op_brcond_i64:
1686             case INDEX_op_setcond_i64:
1687             case INDEX_op_movcond_i64:
1688                 if (op->args[k] < ARRAY_SIZE(cond_name)
1689                     && cond_name[op->args[k]]) {
1690                     col += qemu_log(",%s", cond_name[op->args[k++]]);
1691                 } else {
1692                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
1693                 }
1694                 i = 1;
1695                 break;
1696             case INDEX_op_qemu_ld_i32:
1697             case INDEX_op_qemu_st_i32:
1698             case INDEX_op_qemu_ld_i64:
1699             case INDEX_op_qemu_st_i64:
1700                 {
1701                     TCGMemOpIdx oi = op->args[k++];
1702                     TCGMemOp op = get_memop(oi);
1703                     unsigned ix = get_mmuidx(oi);
1704 
1705                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1706                         col += qemu_log(",$0x%x,%u", op, ix);
1707                     } else {
1708                         const char *s_al, *s_op;
1709                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1710                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1711                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
1712                     }
1713                     i = 1;
1714                 }
1715                 break;
1716             default:
1717                 i = 0;
1718                 break;
1719             }
1720             switch (c) {
1721             case INDEX_op_set_label:
1722             case INDEX_op_br:
1723             case INDEX_op_brcond_i32:
1724             case INDEX_op_brcond_i64:
1725             case INDEX_op_brcond2_i32:
1726                 col += qemu_log("%s$L%d", k ? "," : "",
1727                                 arg_label(op->args[k])->id);
1728                 i++, k++;
1729                 break;
1730             default:
1731                 break;
1732             }
1733             for (; i < nb_cargs; i++, k++) {
1734                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
1735             }
1736         }
1737         if (op->life) {
1738             unsigned life = op->life;
1739 
1740             for (; col < 48; ++col) {
1741                 putc(' ', qemu_logfile);
1742             }
1743 
1744             if (life & (SYNC_ARG * 3)) {
1745                 qemu_log("  sync:");
1746                 for (i = 0; i < 2; ++i) {
1747                     if (life & (SYNC_ARG << i)) {
1748                         qemu_log(" %d", i);
1749                     }
1750                 }
1751             }
1752             life /= DEAD_ARG;
1753             if (life) {
1754                 qemu_log("  dead:");
1755                 for (i = 0; life; ++i, life >>= 1) {
1756                     if (life & 1) {
1757                         qemu_log(" %d", i);
1758                     }
1759                 }
1760             }
1761         }
1762         qemu_log("\n");
1763     }
1764 }
1765 
1766 /* we give more priority to constraints with less registers */
1767 static int get_constraint_priority(const TCGOpDef *def, int k)
1768 {
1769     const TCGArgConstraint *arg_ct;
1770 
1771     int i, n;
1772     arg_ct = &def->args_ct[k];
1773     if (arg_ct->ct & TCG_CT_ALIAS) {
1774         /* an alias is equivalent to a single register */
1775         n = 1;
1776     } else {
1777         if (!(arg_ct->ct & TCG_CT_REG))
1778             return 0;
1779         n = 0;
1780         for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1781             if (tcg_regset_test_reg(arg_ct->u.regs, i))
1782                 n++;
1783         }
1784     }
1785     return TCG_TARGET_NB_REGS - n + 1;
1786 }
1787 
1788 /* sort from highest priority to lowest */
1789 static void sort_constraints(TCGOpDef *def, int start, int n)
1790 {
1791     int i, j, p1, p2, tmp;
1792 
1793     for(i = 0; i < n; i++)
1794         def->sorted_args[start + i] = start + i;
1795     if (n <= 1)
1796         return;
1797     for(i = 0; i < n - 1; i++) {
1798         for(j = i + 1; j < n; j++) {
1799             p1 = get_constraint_priority(def, def->sorted_args[start + i]);
1800             p2 = get_constraint_priority(def, def->sorted_args[start + j]);
1801             if (p1 < p2) {
1802                 tmp = def->sorted_args[start + i];
1803                 def->sorted_args[start + i] = def->sorted_args[start + j];
1804                 def->sorted_args[start + j] = tmp;
1805             }
1806         }
1807     }
1808 }
1809 
1810 static void process_op_defs(TCGContext *s)
1811 {
1812     TCGOpcode op;
1813 
1814     for (op = 0; op < NB_OPS; op++) {
1815         TCGOpDef *def = &tcg_op_defs[op];
1816         const TCGTargetOpDef *tdefs;
1817         TCGType type;
1818         int i, nb_args;
1819 
1820         if (def->flags & TCG_OPF_NOT_PRESENT) {
1821             continue;
1822         }
1823 
1824         nb_args = def->nb_iargs + def->nb_oargs;
1825         if (nb_args == 0) {
1826             continue;
1827         }
1828 
1829         tdefs = tcg_target_op_def(op);
1830         /* Missing TCGTargetOpDef entry. */
1831         tcg_debug_assert(tdefs != NULL);
1832 
1833         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
1834         for (i = 0; i < nb_args; i++) {
1835             const char *ct_str = tdefs->args_ct_str[i];
1836             /* Incomplete TCGTargetOpDef entry. */
1837             tcg_debug_assert(ct_str != NULL);
1838 
1839             def->args_ct[i].u.regs = 0;
1840             def->args_ct[i].ct = 0;
1841             while (*ct_str != '\0') {
1842                 switch(*ct_str) {
1843                 case '0' ... '9':
1844                     {
1845                         int oarg = *ct_str - '0';
1846                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
1847                         tcg_debug_assert(oarg < def->nb_oargs);
1848                         tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
1849                         /* TCG_CT_ALIAS is for the output arguments.
1850                            The input is tagged with TCG_CT_IALIAS. */
1851                         def->args_ct[i] = def->args_ct[oarg];
1852                         def->args_ct[oarg].ct |= TCG_CT_ALIAS;
1853                         def->args_ct[oarg].alias_index = i;
1854                         def->args_ct[i].ct |= TCG_CT_IALIAS;
1855                         def->args_ct[i].alias_index = oarg;
1856                     }
1857                     ct_str++;
1858                     break;
1859                 case '&':
1860                     def->args_ct[i].ct |= TCG_CT_NEWREG;
1861                     ct_str++;
1862                     break;
1863                 case 'i':
1864                     def->args_ct[i].ct |= TCG_CT_CONST;
1865                     ct_str++;
1866                     break;
1867                 default:
1868                     ct_str = target_parse_constraint(&def->args_ct[i],
1869                                                      ct_str, type);
1870                     /* Typo in TCGTargetOpDef constraint. */
1871                     tcg_debug_assert(ct_str != NULL);
1872                 }
1873             }
1874         }
1875 
1876         /* TCGTargetOpDef entry with too much information? */
1877         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
1878 
1879         /* sort the constraints (XXX: this is just an heuristic) */
1880         sort_constraints(def, 0, def->nb_oargs);
1881         sort_constraints(def, def->nb_oargs, def->nb_iargs);
1882     }
1883 }
1884 
1885 void tcg_op_remove(TCGContext *s, TCGOp *op)
1886 {
1887     QTAILQ_REMOVE(&s->ops, op, link);
1888     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
1889 
1890 #ifdef CONFIG_PROFILER
1891     atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
1892 #endif
1893 }
1894 
1895 static TCGOp *tcg_op_alloc(TCGOpcode opc)
1896 {
1897     TCGContext *s = tcg_ctx;
1898     TCGOp *op;
1899 
1900     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
1901         op = tcg_malloc(sizeof(TCGOp));
1902     } else {
1903         op = QTAILQ_FIRST(&s->free_ops);
1904         QTAILQ_REMOVE(&s->free_ops, op, link);
1905     }
1906     memset(op, 0, offsetof(TCGOp, link));
1907     op->opc = opc;
1908 
1909     return op;
1910 }
1911 
1912 TCGOp *tcg_emit_op(TCGOpcode opc)
1913 {
1914     TCGOp *op = tcg_op_alloc(opc);
1915     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
1916     return op;
1917 }
1918 
1919 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
1920                             TCGOpcode opc, int nargs)
1921 {
1922     TCGOp *new_op = tcg_op_alloc(opc);
1923     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
1924     return new_op;
1925 }
1926 
1927 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
1928                            TCGOpcode opc, int nargs)
1929 {
1930     TCGOp *new_op = tcg_op_alloc(opc);
1931     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
1932     return new_op;
1933 }
1934 
1935 #define TS_DEAD  1
1936 #define TS_MEM   2
1937 
1938 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
1939 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
1940 
1941 /* liveness analysis: end of function: all temps are dead, and globals
1942    should be in memory. */
1943 static void tcg_la_func_end(TCGContext *s)
1944 {
1945     int ng = s->nb_globals;
1946     int nt = s->nb_temps;
1947     int i;
1948 
1949     for (i = 0; i < ng; ++i) {
1950         s->temps[i].state = TS_DEAD | TS_MEM;
1951     }
1952     for (i = ng; i < nt; ++i) {
1953         s->temps[i].state = TS_DEAD;
1954     }
1955 }
1956 
1957 /* liveness analysis: end of basic block: all temps are dead, globals
1958    and local temps should be in memory. */
1959 static void tcg_la_bb_end(TCGContext *s)
1960 {
1961     int ng = s->nb_globals;
1962     int nt = s->nb_temps;
1963     int i;
1964 
1965     for (i = 0; i < ng; ++i) {
1966         s->temps[i].state = TS_DEAD | TS_MEM;
1967     }
1968     for (i = ng; i < nt; ++i) {
1969         s->temps[i].state = (s->temps[i].temp_local
1970                              ? TS_DEAD | TS_MEM
1971                              : TS_DEAD);
1972     }
1973 }
1974 
1975 /* Liveness analysis : update the opc_arg_life array to tell if a
1976    given input arguments is dead. Instructions updating dead
1977    temporaries are removed. */
1978 static void liveness_pass_1(TCGContext *s)
1979 {
1980     int nb_globals = s->nb_globals;
1981     TCGOp *op, *op_prev;
1982 
1983     tcg_la_func_end(s);
1984 
1985     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, TCGOpHead, link, op_prev) {
1986         int i, nb_iargs, nb_oargs;
1987         TCGOpcode opc_new, opc_new2;
1988         bool have_opc_new2;
1989         TCGLifeData arg_life = 0;
1990         TCGTemp *arg_ts;
1991         TCGOpcode opc = op->opc;
1992         const TCGOpDef *def = &tcg_op_defs[opc];
1993 
1994         switch (opc) {
1995         case INDEX_op_call:
1996             {
1997                 int call_flags;
1998 
1999                 nb_oargs = TCGOP_CALLO(op);
2000                 nb_iargs = TCGOP_CALLI(op);
2001                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2002 
2003                 /* pure functions can be removed if their result is unused */
2004                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2005                     for (i = 0; i < nb_oargs; i++) {
2006                         arg_ts = arg_temp(op->args[i]);
2007                         if (arg_ts->state != TS_DEAD) {
2008                             goto do_not_remove_call;
2009                         }
2010                     }
2011                     goto do_remove;
2012                 } else {
2013                 do_not_remove_call:
2014 
2015                     /* output args are dead */
2016                     for (i = 0; i < nb_oargs; i++) {
2017                         arg_ts = arg_temp(op->args[i]);
2018                         if (arg_ts->state & TS_DEAD) {
2019                             arg_life |= DEAD_ARG << i;
2020                         }
2021                         if (arg_ts->state & TS_MEM) {
2022                             arg_life |= SYNC_ARG << i;
2023                         }
2024                         arg_ts->state = TS_DEAD;
2025                     }
2026 
2027                     if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2028                                         TCG_CALL_NO_READ_GLOBALS))) {
2029                         /* globals should go back to memory */
2030                         for (i = 0; i < nb_globals; i++) {
2031                             s->temps[i].state = TS_DEAD | TS_MEM;
2032                         }
2033                     } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2034                         /* globals should be synced to memory */
2035                         for (i = 0; i < nb_globals; i++) {
2036                             s->temps[i].state |= TS_MEM;
2037                         }
2038                     }
2039 
2040                     /* record arguments that die in this helper */
2041                     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2042                         arg_ts = arg_temp(op->args[i]);
2043                         if (arg_ts && arg_ts->state & TS_DEAD) {
2044                             arg_life |= DEAD_ARG << i;
2045                         }
2046                     }
2047                     /* input arguments are live for preceding opcodes */
2048                     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2049                         arg_ts = arg_temp(op->args[i]);
2050                         if (arg_ts) {
2051                             arg_ts->state &= ~TS_DEAD;
2052                         }
2053                     }
2054                 }
2055             }
2056             break;
2057         case INDEX_op_insn_start:
2058             break;
2059         case INDEX_op_discard:
2060             /* mark the temporary as dead */
2061             arg_temp(op->args[0])->state = TS_DEAD;
2062             break;
2063 
2064         case INDEX_op_add2_i32:
2065             opc_new = INDEX_op_add_i32;
2066             goto do_addsub2;
2067         case INDEX_op_sub2_i32:
2068             opc_new = INDEX_op_sub_i32;
2069             goto do_addsub2;
2070         case INDEX_op_add2_i64:
2071             opc_new = INDEX_op_add_i64;
2072             goto do_addsub2;
2073         case INDEX_op_sub2_i64:
2074             opc_new = INDEX_op_sub_i64;
2075         do_addsub2:
2076             nb_iargs = 4;
2077             nb_oargs = 2;
2078             /* Test if the high part of the operation is dead, but not
2079                the low part.  The result can be optimized to a simple
2080                add or sub.  This happens often for x86_64 guest when the
2081                cpu mode is set to 32 bit.  */
2082             if (arg_temp(op->args[1])->state == TS_DEAD) {
2083                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2084                     goto do_remove;
2085                 }
2086                 /* Replace the opcode and adjust the args in place,
2087                    leaving 3 unused args at the end.  */
2088                 op->opc = opc = opc_new;
2089                 op->args[1] = op->args[2];
2090                 op->args[2] = op->args[4];
2091                 /* Fall through and mark the single-word operation live.  */
2092                 nb_iargs = 2;
2093                 nb_oargs = 1;
2094             }
2095             goto do_not_remove;
2096 
2097         case INDEX_op_mulu2_i32:
2098             opc_new = INDEX_op_mul_i32;
2099             opc_new2 = INDEX_op_muluh_i32;
2100             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2101             goto do_mul2;
2102         case INDEX_op_muls2_i32:
2103             opc_new = INDEX_op_mul_i32;
2104             opc_new2 = INDEX_op_mulsh_i32;
2105             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2106             goto do_mul2;
2107         case INDEX_op_mulu2_i64:
2108             opc_new = INDEX_op_mul_i64;
2109             opc_new2 = INDEX_op_muluh_i64;
2110             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2111             goto do_mul2;
2112         case INDEX_op_muls2_i64:
2113             opc_new = INDEX_op_mul_i64;
2114             opc_new2 = INDEX_op_mulsh_i64;
2115             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2116             goto do_mul2;
2117         do_mul2:
2118             nb_iargs = 2;
2119             nb_oargs = 2;
2120             if (arg_temp(op->args[1])->state == TS_DEAD) {
2121                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2122                     /* Both parts of the operation are dead.  */
2123                     goto do_remove;
2124                 }
2125                 /* The high part of the operation is dead; generate the low. */
2126                 op->opc = opc = opc_new;
2127                 op->args[1] = op->args[2];
2128                 op->args[2] = op->args[3];
2129             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2130                 /* The low part of the operation is dead; generate the high. */
2131                 op->opc = opc = opc_new2;
2132                 op->args[0] = op->args[1];
2133                 op->args[1] = op->args[2];
2134                 op->args[2] = op->args[3];
2135             } else {
2136                 goto do_not_remove;
2137             }
2138             /* Mark the single-word operation live.  */
2139             nb_oargs = 1;
2140             goto do_not_remove;
2141 
2142         default:
2143             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2144             nb_iargs = def->nb_iargs;
2145             nb_oargs = def->nb_oargs;
2146 
2147             /* Test if the operation can be removed because all
2148                its outputs are dead. We assume that nb_oargs == 0
2149                implies side effects */
2150             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2151                 for (i = 0; i < nb_oargs; i++) {
2152                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2153                         goto do_not_remove;
2154                     }
2155                 }
2156             do_remove:
2157                 tcg_op_remove(s, op);
2158             } else {
2159             do_not_remove:
2160                 /* output args are dead */
2161                 for (i = 0; i < nb_oargs; i++) {
2162                     arg_ts = arg_temp(op->args[i]);
2163                     if (arg_ts->state & TS_DEAD) {
2164                         arg_life |= DEAD_ARG << i;
2165                     }
2166                     if (arg_ts->state & TS_MEM) {
2167                         arg_life |= SYNC_ARG << i;
2168                     }
2169                     arg_ts->state = TS_DEAD;
2170                 }
2171 
2172                 /* if end of basic block, update */
2173                 if (def->flags & TCG_OPF_BB_END) {
2174                     tcg_la_bb_end(s);
2175                 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2176                     /* globals should be synced to memory */
2177                     for (i = 0; i < nb_globals; i++) {
2178                         s->temps[i].state |= TS_MEM;
2179                     }
2180                 }
2181 
2182                 /* record arguments that die in this opcode */
2183                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2184                     arg_ts = arg_temp(op->args[i]);
2185                     if (arg_ts->state & TS_DEAD) {
2186                         arg_life |= DEAD_ARG << i;
2187                     }
2188                 }
2189                 /* input arguments are live for preceding opcodes */
2190                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2191                     arg_temp(op->args[i])->state &= ~TS_DEAD;
2192                 }
2193             }
2194             break;
2195         }
2196         op->life = arg_life;
2197     }
2198 }
2199 
2200 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2201 static bool liveness_pass_2(TCGContext *s)
2202 {
2203     int nb_globals = s->nb_globals;
2204     int nb_temps, i;
2205     bool changes = false;
2206     TCGOp *op, *op_next;
2207 
2208     /* Create a temporary for each indirect global.  */
2209     for (i = 0; i < nb_globals; ++i) {
2210         TCGTemp *its = &s->temps[i];
2211         if (its->indirect_reg) {
2212             TCGTemp *dts = tcg_temp_alloc(s);
2213             dts->type = its->type;
2214             dts->base_type = its->base_type;
2215             its->state_ptr = dts;
2216         } else {
2217             its->state_ptr = NULL;
2218         }
2219         /* All globals begin dead.  */
2220         its->state = TS_DEAD;
2221     }
2222     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2223         TCGTemp *its = &s->temps[i];
2224         its->state_ptr = NULL;
2225         its->state = TS_DEAD;
2226     }
2227 
2228     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2229         TCGOpcode opc = op->opc;
2230         const TCGOpDef *def = &tcg_op_defs[opc];
2231         TCGLifeData arg_life = op->life;
2232         int nb_iargs, nb_oargs, call_flags;
2233         TCGTemp *arg_ts, *dir_ts;
2234 
2235         if (opc == INDEX_op_call) {
2236             nb_oargs = TCGOP_CALLO(op);
2237             nb_iargs = TCGOP_CALLI(op);
2238             call_flags = op->args[nb_oargs + nb_iargs + 1];
2239         } else {
2240             nb_iargs = def->nb_iargs;
2241             nb_oargs = def->nb_oargs;
2242 
2243             /* Set flags similar to how calls require.  */
2244             if (def->flags & TCG_OPF_BB_END) {
2245                 /* Like writing globals: save_globals */
2246                 call_flags = 0;
2247             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2248                 /* Like reading globals: sync_globals */
2249                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2250             } else {
2251                 /* No effect on globals.  */
2252                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2253                               TCG_CALL_NO_WRITE_GLOBALS);
2254             }
2255         }
2256 
2257         /* Make sure that input arguments are available.  */
2258         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2259             arg_ts = arg_temp(op->args[i]);
2260             if (arg_ts) {
2261                 dir_ts = arg_ts->state_ptr;
2262                 if (dir_ts && arg_ts->state == TS_DEAD) {
2263                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2264                                       ? INDEX_op_ld_i32
2265                                       : INDEX_op_ld_i64);
2266                     TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
2267 
2268                     lop->args[0] = temp_arg(dir_ts);
2269                     lop->args[1] = temp_arg(arg_ts->mem_base);
2270                     lop->args[2] = arg_ts->mem_offset;
2271 
2272                     /* Loaded, but synced with memory.  */
2273                     arg_ts->state = TS_MEM;
2274                 }
2275             }
2276         }
2277 
2278         /* Perform input replacement, and mark inputs that became dead.
2279            No action is required except keeping temp_state up to date
2280            so that we reload when needed.  */
2281         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2282             arg_ts = arg_temp(op->args[i]);
2283             if (arg_ts) {
2284                 dir_ts = arg_ts->state_ptr;
2285                 if (dir_ts) {
2286                     op->args[i] = temp_arg(dir_ts);
2287                     changes = true;
2288                     if (IS_DEAD_ARG(i)) {
2289                         arg_ts->state = TS_DEAD;
2290                     }
2291                 }
2292             }
2293         }
2294 
2295         /* Liveness analysis should ensure that the following are
2296            all correct, for call sites and basic block end points.  */
2297         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2298             /* Nothing to do */
2299         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2300             for (i = 0; i < nb_globals; ++i) {
2301                 /* Liveness should see that globals are synced back,
2302                    that is, either TS_DEAD or TS_MEM.  */
2303                 arg_ts = &s->temps[i];
2304                 tcg_debug_assert(arg_ts->state_ptr == 0
2305                                  || arg_ts->state != 0);
2306             }
2307         } else {
2308             for (i = 0; i < nb_globals; ++i) {
2309                 /* Liveness should see that globals are saved back,
2310                    that is, TS_DEAD, waiting to be reloaded.  */
2311                 arg_ts = &s->temps[i];
2312                 tcg_debug_assert(arg_ts->state_ptr == 0
2313                                  || arg_ts->state == TS_DEAD);
2314             }
2315         }
2316 
2317         /* Outputs become available.  */
2318         for (i = 0; i < nb_oargs; i++) {
2319             arg_ts = arg_temp(op->args[i]);
2320             dir_ts = arg_ts->state_ptr;
2321             if (!dir_ts) {
2322                 continue;
2323             }
2324             op->args[i] = temp_arg(dir_ts);
2325             changes = true;
2326 
2327             /* The output is now live and modified.  */
2328             arg_ts->state = 0;
2329 
2330             /* Sync outputs upon their last write.  */
2331             if (NEED_SYNC_ARG(i)) {
2332                 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2333                                   ? INDEX_op_st_i32
2334                                   : INDEX_op_st_i64);
2335                 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
2336 
2337                 sop->args[0] = temp_arg(dir_ts);
2338                 sop->args[1] = temp_arg(arg_ts->mem_base);
2339                 sop->args[2] = arg_ts->mem_offset;
2340 
2341                 arg_ts->state = TS_MEM;
2342             }
2343             /* Drop outputs that are dead.  */
2344             if (IS_DEAD_ARG(i)) {
2345                 arg_ts->state = TS_DEAD;
2346             }
2347         }
2348     }
2349 
2350     return changes;
2351 }
2352 
2353 #ifdef CONFIG_DEBUG_TCG
2354 static void dump_regs(TCGContext *s)
2355 {
2356     TCGTemp *ts;
2357     int i;
2358     char buf[64];
2359 
2360     for(i = 0; i < s->nb_temps; i++) {
2361         ts = &s->temps[i];
2362         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2363         switch(ts->val_type) {
2364         case TEMP_VAL_REG:
2365             printf("%s", tcg_target_reg_names[ts->reg]);
2366             break;
2367         case TEMP_VAL_MEM:
2368             printf("%d(%s)", (int)ts->mem_offset,
2369                    tcg_target_reg_names[ts->mem_base->reg]);
2370             break;
2371         case TEMP_VAL_CONST:
2372             printf("$0x%" TCG_PRIlx, ts->val);
2373             break;
2374         case TEMP_VAL_DEAD:
2375             printf("D");
2376             break;
2377         default:
2378             printf("???");
2379             break;
2380         }
2381         printf("\n");
2382     }
2383 
2384     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2385         if (s->reg_to_temp[i] != NULL) {
2386             printf("%s: %s\n",
2387                    tcg_target_reg_names[i],
2388                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
2389         }
2390     }
2391 }
2392 
2393 static void check_regs(TCGContext *s)
2394 {
2395     int reg;
2396     int k;
2397     TCGTemp *ts;
2398     char buf[64];
2399 
2400     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2401         ts = s->reg_to_temp[reg];
2402         if (ts != NULL) {
2403             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
2404                 printf("Inconsistency for register %s:\n",
2405                        tcg_target_reg_names[reg]);
2406                 goto fail;
2407             }
2408         }
2409     }
2410     for (k = 0; k < s->nb_temps; k++) {
2411         ts = &s->temps[k];
2412         if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
2413             && s->reg_to_temp[ts->reg] != ts) {
2414             printf("Inconsistency for temp %s:\n",
2415                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2416         fail:
2417             printf("reg state:\n");
2418             dump_regs(s);
2419             tcg_abort();
2420         }
2421     }
2422 }
2423 #endif
2424 
2425 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
2426 {
2427 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
2428     /* Sparc64 stack is accessed with offset of 2047 */
2429     s->current_frame_offset = (s->current_frame_offset +
2430                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
2431         ~(sizeof(tcg_target_long) - 1);
2432 #endif
2433     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
2434         s->frame_end) {
2435         tcg_abort();
2436     }
2437     ts->mem_offset = s->current_frame_offset;
2438     ts->mem_base = s->frame_temp;
2439     ts->mem_allocated = 1;
2440     s->current_frame_offset += sizeof(tcg_target_long);
2441 }
2442 
2443 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet);
2444 
2445 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
2446    mark it free; otherwise mark it dead.  */
2447 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
2448 {
2449     if (ts->fixed_reg) {
2450         return;
2451     }
2452     if (ts->val_type == TEMP_VAL_REG) {
2453         s->reg_to_temp[ts->reg] = NULL;
2454     }
2455     ts->val_type = (free_or_dead < 0
2456                     || ts->temp_local
2457                     || ts->temp_global
2458                     ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
2459 }
2460 
2461 /* Mark a temporary as dead.  */
2462 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
2463 {
2464     temp_free_or_dead(s, ts, 1);
2465 }
2466 
2467 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
2468    registers needs to be allocated to store a constant.  If 'free_or_dead'
2469    is non-zero, subsequently release the temporary; if it is positive, the
2470    temp is dead; if it is negative, the temp is free.  */
2471 static void temp_sync(TCGContext *s, TCGTemp *ts,
2472                       TCGRegSet allocated_regs, int free_or_dead)
2473 {
2474     if (ts->fixed_reg) {
2475         return;
2476     }
2477     if (!ts->mem_coherent) {
2478         if (!ts->mem_allocated) {
2479             temp_allocate_frame(s, ts);
2480         }
2481         switch (ts->val_type) {
2482         case TEMP_VAL_CONST:
2483             /* If we're going to free the temp immediately, then we won't
2484                require it later in a register, so attempt to store the
2485                constant to memory directly.  */
2486             if (free_or_dead
2487                 && tcg_out_sti(s, ts->type, ts->val,
2488                                ts->mem_base->reg, ts->mem_offset)) {
2489                 break;
2490             }
2491             temp_load(s, ts, tcg_target_available_regs[ts->type],
2492                       allocated_regs);
2493             /* fallthrough */
2494 
2495         case TEMP_VAL_REG:
2496             tcg_out_st(s, ts->type, ts->reg,
2497                        ts->mem_base->reg, ts->mem_offset);
2498             break;
2499 
2500         case TEMP_VAL_MEM:
2501             break;
2502 
2503         case TEMP_VAL_DEAD:
2504         default:
2505             tcg_abort();
2506         }
2507         ts->mem_coherent = 1;
2508     }
2509     if (free_or_dead) {
2510         temp_free_or_dead(s, ts, free_or_dead);
2511     }
2512 }
2513 
2514 /* free register 'reg' by spilling the corresponding temporary if necessary */
2515 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
2516 {
2517     TCGTemp *ts = s->reg_to_temp[reg];
2518     if (ts != NULL) {
2519         temp_sync(s, ts, allocated_regs, -1);
2520     }
2521 }
2522 
2523 /* Allocate a register belonging to reg1 & ~reg2 */
2524 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs,
2525                             TCGRegSet allocated_regs, bool rev)
2526 {
2527     int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
2528     const int *order;
2529     TCGReg reg;
2530     TCGRegSet reg_ct;
2531 
2532     reg_ct = desired_regs & ~allocated_regs;
2533     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
2534 
2535     /* first try free registers */
2536     for(i = 0; i < n; i++) {
2537         reg = order[i];
2538         if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL)
2539             return reg;
2540     }
2541 
2542     /* XXX: do better spill choice */
2543     for(i = 0; i < n; i++) {
2544         reg = order[i];
2545         if (tcg_regset_test_reg(reg_ct, reg)) {
2546             tcg_reg_free(s, reg, allocated_regs);
2547             return reg;
2548         }
2549     }
2550 
2551     tcg_abort();
2552 }
2553 
2554 /* Make sure the temporary is in a register.  If needed, allocate the register
2555    from DESIRED while avoiding ALLOCATED.  */
2556 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
2557                       TCGRegSet allocated_regs)
2558 {
2559     TCGReg reg;
2560 
2561     switch (ts->val_type) {
2562     case TEMP_VAL_REG:
2563         return;
2564     case TEMP_VAL_CONST:
2565         reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
2566         tcg_out_movi(s, ts->type, reg, ts->val);
2567         ts->mem_coherent = 0;
2568         break;
2569     case TEMP_VAL_MEM:
2570         reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
2571         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
2572         ts->mem_coherent = 1;
2573         break;
2574     case TEMP_VAL_DEAD:
2575     default:
2576         tcg_abort();
2577     }
2578     ts->reg = reg;
2579     ts->val_type = TEMP_VAL_REG;
2580     s->reg_to_temp[reg] = ts;
2581 }
2582 
2583 /* Save a temporary to memory. 'allocated_regs' is used in case a
2584    temporary registers needs to be allocated to store a constant.  */
2585 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
2586 {
2587     /* The liveness analysis already ensures that globals are back
2588        in memory. Keep an tcg_debug_assert for safety. */
2589     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
2590 }
2591 
2592 /* save globals to their canonical location and assume they can be
2593    modified be the following code. 'allocated_regs' is used in case a
2594    temporary registers needs to be allocated to store a constant. */
2595 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
2596 {
2597     int i, n;
2598 
2599     for (i = 0, n = s->nb_globals; i < n; i++) {
2600         temp_save(s, &s->temps[i], allocated_regs);
2601     }
2602 }
2603 
2604 /* sync globals to their canonical location and assume they can be
2605    read by the following code. 'allocated_regs' is used in case a
2606    temporary registers needs to be allocated to store a constant. */
2607 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
2608 {
2609     int i, n;
2610 
2611     for (i = 0, n = s->nb_globals; i < n; i++) {
2612         TCGTemp *ts = &s->temps[i];
2613         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
2614                          || ts->fixed_reg
2615                          || ts->mem_coherent);
2616     }
2617 }
2618 
2619 /* at the end of a basic block, we assume all temporaries are dead and
2620    all globals are stored at their canonical location. */
2621 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
2622 {
2623     int i;
2624 
2625     for (i = s->nb_globals; i < s->nb_temps; i++) {
2626         TCGTemp *ts = &s->temps[i];
2627         if (ts->temp_local) {
2628             temp_save(s, ts, allocated_regs);
2629         } else {
2630             /* The liveness analysis already ensures that temps are dead.
2631                Keep an tcg_debug_assert for safety. */
2632             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
2633         }
2634     }
2635 
2636     save_globals(s, allocated_regs);
2637 }
2638 
2639 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
2640                                   tcg_target_ulong val, TCGLifeData arg_life)
2641 {
2642     if (ots->fixed_reg) {
2643         /* For fixed registers, we do not do any constant propagation.  */
2644         tcg_out_movi(s, ots->type, ots->reg, val);
2645         return;
2646     }
2647 
2648     /* The movi is not explicitly generated here.  */
2649     if (ots->val_type == TEMP_VAL_REG) {
2650         s->reg_to_temp[ots->reg] = NULL;
2651     }
2652     ots->val_type = TEMP_VAL_CONST;
2653     ots->val = val;
2654     ots->mem_coherent = 0;
2655     if (NEED_SYNC_ARG(0)) {
2656         temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0));
2657     } else if (IS_DEAD_ARG(0)) {
2658         temp_dead(s, ots);
2659     }
2660 }
2661 
2662 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
2663 {
2664     TCGTemp *ots = arg_temp(op->args[0]);
2665     tcg_target_ulong val = op->args[1];
2666 
2667     tcg_reg_alloc_do_movi(s, ots, val, op->life);
2668 }
2669 
2670 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
2671 {
2672     const TCGLifeData arg_life = op->life;
2673     TCGRegSet allocated_regs;
2674     TCGTemp *ts, *ots;
2675     TCGType otype, itype;
2676 
2677     allocated_regs = s->reserved_regs;
2678     ots = arg_temp(op->args[0]);
2679     ts = arg_temp(op->args[1]);
2680 
2681     /* Note that otype != itype for no-op truncation.  */
2682     otype = ots->type;
2683     itype = ts->type;
2684 
2685     if (ts->val_type == TEMP_VAL_CONST) {
2686         /* propagate constant or generate sti */
2687         tcg_target_ulong val = ts->val;
2688         if (IS_DEAD_ARG(1)) {
2689             temp_dead(s, ts);
2690         }
2691         tcg_reg_alloc_do_movi(s, ots, val, arg_life);
2692         return;
2693     }
2694 
2695     /* If the source value is in memory we're going to be forced
2696        to have it in a register in order to perform the copy.  Copy
2697        the SOURCE value into its own register first, that way we
2698        don't have to reload SOURCE the next time it is used. */
2699     if (ts->val_type == TEMP_VAL_MEM) {
2700         temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs);
2701     }
2702 
2703     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
2704     if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
2705         /* mov to a non-saved dead register makes no sense (even with
2706            liveness analysis disabled). */
2707         tcg_debug_assert(NEED_SYNC_ARG(0));
2708         if (!ots->mem_allocated) {
2709             temp_allocate_frame(s, ots);
2710         }
2711         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
2712         if (IS_DEAD_ARG(1)) {
2713             temp_dead(s, ts);
2714         }
2715         temp_dead(s, ots);
2716     } else {
2717         if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
2718             /* the mov can be suppressed */
2719             if (ots->val_type == TEMP_VAL_REG) {
2720                 s->reg_to_temp[ots->reg] = NULL;
2721             }
2722             ots->reg = ts->reg;
2723             temp_dead(s, ts);
2724         } else {
2725             if (ots->val_type != TEMP_VAL_REG) {
2726                 /* When allocating a new register, make sure to not spill the
2727                    input one. */
2728                 tcg_regset_set_reg(allocated_regs, ts->reg);
2729                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
2730                                          allocated_regs, ots->indirect_base);
2731             }
2732             tcg_out_mov(s, otype, ots->reg, ts->reg);
2733         }
2734         ots->val_type = TEMP_VAL_REG;
2735         ots->mem_coherent = 0;
2736         s->reg_to_temp[ots->reg] = ots;
2737         if (NEED_SYNC_ARG(0)) {
2738             temp_sync(s, ots, allocated_regs, 0);
2739         }
2740     }
2741 }
2742 
2743 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
2744 {
2745     const TCGLifeData arg_life = op->life;
2746     const TCGOpDef * const def = &tcg_op_defs[op->opc];
2747     TCGRegSet i_allocated_regs;
2748     TCGRegSet o_allocated_regs;
2749     int i, k, nb_iargs, nb_oargs;
2750     TCGReg reg;
2751     TCGArg arg;
2752     const TCGArgConstraint *arg_ct;
2753     TCGTemp *ts;
2754     TCGArg new_args[TCG_MAX_OP_ARGS];
2755     int const_args[TCG_MAX_OP_ARGS];
2756 
2757     nb_oargs = def->nb_oargs;
2758     nb_iargs = def->nb_iargs;
2759 
2760     /* copy constants */
2761     memcpy(new_args + nb_oargs + nb_iargs,
2762            op->args + nb_oargs + nb_iargs,
2763            sizeof(TCGArg) * def->nb_cargs);
2764 
2765     i_allocated_regs = s->reserved_regs;
2766     o_allocated_regs = s->reserved_regs;
2767 
2768     /* satisfy input constraints */
2769     for (k = 0; k < nb_iargs; k++) {
2770         i = def->sorted_args[nb_oargs + k];
2771         arg = op->args[i];
2772         arg_ct = &def->args_ct[i];
2773         ts = arg_temp(arg);
2774 
2775         if (ts->val_type == TEMP_VAL_CONST
2776             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
2777             /* constant is OK for instruction */
2778             const_args[i] = 1;
2779             new_args[i] = ts->val;
2780             goto iarg_end;
2781         }
2782 
2783         temp_load(s, ts, arg_ct->u.regs, i_allocated_regs);
2784 
2785         if (arg_ct->ct & TCG_CT_IALIAS) {
2786             if (ts->fixed_reg) {
2787                 /* if fixed register, we must allocate a new register
2788                    if the alias is not the same register */
2789                 if (arg != op->args[arg_ct->alias_index])
2790                     goto allocate_in_reg;
2791             } else {
2792                 /* if the input is aliased to an output and if it is
2793                    not dead after the instruction, we must allocate
2794                    a new register and move it */
2795                 if (!IS_DEAD_ARG(i)) {
2796                     goto allocate_in_reg;
2797                 }
2798                 /* check if the current register has already been allocated
2799                    for another input aliased to an output */
2800                 int k2, i2;
2801                 for (k2 = 0 ; k2 < k ; k2++) {
2802                     i2 = def->sorted_args[nb_oargs + k2];
2803                     if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
2804                         (new_args[i2] == ts->reg)) {
2805                         goto allocate_in_reg;
2806                     }
2807                 }
2808             }
2809         }
2810         reg = ts->reg;
2811         if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2812             /* nothing to do : the constraint is satisfied */
2813         } else {
2814         allocate_in_reg:
2815             /* allocate a new register matching the constraint
2816                and move the temporary register into it */
2817             reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
2818                                 ts->indirect_base);
2819             tcg_out_mov(s, ts->type, reg, ts->reg);
2820         }
2821         new_args[i] = reg;
2822         const_args[i] = 0;
2823         tcg_regset_set_reg(i_allocated_regs, reg);
2824     iarg_end: ;
2825     }
2826 
2827     /* mark dead temporaries and free the associated registers */
2828     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2829         if (IS_DEAD_ARG(i)) {
2830             temp_dead(s, arg_temp(op->args[i]));
2831         }
2832     }
2833 
2834     if (def->flags & TCG_OPF_BB_END) {
2835         tcg_reg_alloc_bb_end(s, i_allocated_regs);
2836     } else {
2837         if (def->flags & TCG_OPF_CALL_CLOBBER) {
2838             /* XXX: permit generic clobber register list ? */
2839             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
2840                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
2841                     tcg_reg_free(s, i, i_allocated_regs);
2842                 }
2843             }
2844         }
2845         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2846             /* sync globals if the op has side effects and might trigger
2847                an exception. */
2848             sync_globals(s, i_allocated_regs);
2849         }
2850 
2851         /* satisfy the output constraints */
2852         for(k = 0; k < nb_oargs; k++) {
2853             i = def->sorted_args[k];
2854             arg = op->args[i];
2855             arg_ct = &def->args_ct[i];
2856             ts = arg_temp(arg);
2857             if ((arg_ct->ct & TCG_CT_ALIAS)
2858                 && !const_args[arg_ct->alias_index]) {
2859                 reg = new_args[arg_ct->alias_index];
2860             } else if (arg_ct->ct & TCG_CT_NEWREG) {
2861                 reg = tcg_reg_alloc(s, arg_ct->u.regs,
2862                                     i_allocated_regs | o_allocated_regs,
2863                                     ts->indirect_base);
2864             } else {
2865                 /* if fixed register, we try to use it */
2866                 reg = ts->reg;
2867                 if (ts->fixed_reg &&
2868                     tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2869                     goto oarg_end;
2870                 }
2871                 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
2872                                     ts->indirect_base);
2873             }
2874             tcg_regset_set_reg(o_allocated_regs, reg);
2875             /* if a fixed register is used, then a move will be done afterwards */
2876             if (!ts->fixed_reg) {
2877                 if (ts->val_type == TEMP_VAL_REG) {
2878                     s->reg_to_temp[ts->reg] = NULL;
2879                 }
2880                 ts->val_type = TEMP_VAL_REG;
2881                 ts->reg = reg;
2882                 /* temp value is modified, so the value kept in memory is
2883                    potentially not the same */
2884                 ts->mem_coherent = 0;
2885                 s->reg_to_temp[reg] = ts;
2886             }
2887         oarg_end:
2888             new_args[i] = reg;
2889         }
2890     }
2891 
2892     /* emit instruction */
2893     tcg_out_op(s, op->opc, new_args, const_args);
2894 
2895     /* move the outputs in the correct register if needed */
2896     for(i = 0; i < nb_oargs; i++) {
2897         ts = arg_temp(op->args[i]);
2898         reg = new_args[i];
2899         if (ts->fixed_reg && ts->reg != reg) {
2900             tcg_out_mov(s, ts->type, ts->reg, reg);
2901         }
2902         if (NEED_SYNC_ARG(i)) {
2903             temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i));
2904         } else if (IS_DEAD_ARG(i)) {
2905             temp_dead(s, ts);
2906         }
2907     }
2908 }
2909 
2910 #ifdef TCG_TARGET_STACK_GROWSUP
2911 #define STACK_DIR(x) (-(x))
2912 #else
2913 #define STACK_DIR(x) (x)
2914 #endif
2915 
2916 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
2917 {
2918     const int nb_oargs = TCGOP_CALLO(op);
2919     const int nb_iargs = TCGOP_CALLI(op);
2920     const TCGLifeData arg_life = op->life;
2921     int flags, nb_regs, i;
2922     TCGReg reg;
2923     TCGArg arg;
2924     TCGTemp *ts;
2925     intptr_t stack_offset;
2926     size_t call_stack_size;
2927     tcg_insn_unit *func_addr;
2928     int allocate_args;
2929     TCGRegSet allocated_regs;
2930 
2931     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
2932     flags = op->args[nb_oargs + nb_iargs + 1];
2933 
2934     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2935     if (nb_regs > nb_iargs) {
2936         nb_regs = nb_iargs;
2937     }
2938 
2939     /* assign stack slots first */
2940     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
2941     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
2942         ~(TCG_TARGET_STACK_ALIGN - 1);
2943     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
2944     if (allocate_args) {
2945         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
2946            preallocate call stack */
2947         tcg_abort();
2948     }
2949 
2950     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
2951     for (i = nb_regs; i < nb_iargs; i++) {
2952         arg = op->args[nb_oargs + i];
2953 #ifdef TCG_TARGET_STACK_GROWSUP
2954         stack_offset -= sizeof(tcg_target_long);
2955 #endif
2956         if (arg != TCG_CALL_DUMMY_ARG) {
2957             ts = arg_temp(arg);
2958             temp_load(s, ts, tcg_target_available_regs[ts->type],
2959                       s->reserved_regs);
2960             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
2961         }
2962 #ifndef TCG_TARGET_STACK_GROWSUP
2963         stack_offset += sizeof(tcg_target_long);
2964 #endif
2965     }
2966 
2967     /* assign input registers */
2968     allocated_regs = s->reserved_regs;
2969     for (i = 0; i < nb_regs; i++) {
2970         arg = op->args[nb_oargs + i];
2971         if (arg != TCG_CALL_DUMMY_ARG) {
2972             ts = arg_temp(arg);
2973             reg = tcg_target_call_iarg_regs[i];
2974             tcg_reg_free(s, reg, allocated_regs);
2975 
2976             if (ts->val_type == TEMP_VAL_REG) {
2977                 if (ts->reg != reg) {
2978                     tcg_out_mov(s, ts->type, reg, ts->reg);
2979                 }
2980             } else {
2981                 TCGRegSet arg_set = 0;
2982 
2983                 tcg_regset_set_reg(arg_set, reg);
2984                 temp_load(s, ts, arg_set, allocated_regs);
2985             }
2986 
2987             tcg_regset_set_reg(allocated_regs, reg);
2988         }
2989     }
2990 
2991     /* mark dead temporaries and free the associated registers */
2992     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2993         if (IS_DEAD_ARG(i)) {
2994             temp_dead(s, arg_temp(op->args[i]));
2995         }
2996     }
2997 
2998     /* clobber call registers */
2999     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3000         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3001             tcg_reg_free(s, i, allocated_regs);
3002         }
3003     }
3004 
3005     /* Save globals if they might be written by the helper, sync them if
3006        they might be read. */
3007     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3008         /* Nothing to do */
3009     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3010         sync_globals(s, allocated_regs);
3011     } else {
3012         save_globals(s, allocated_regs);
3013     }
3014 
3015     tcg_out_call(s, func_addr);
3016 
3017     /* assign output registers and emit moves if needed */
3018     for(i = 0; i < nb_oargs; i++) {
3019         arg = op->args[i];
3020         ts = arg_temp(arg);
3021         reg = tcg_target_call_oarg_regs[i];
3022         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3023 
3024         if (ts->fixed_reg) {
3025             if (ts->reg != reg) {
3026                 tcg_out_mov(s, ts->type, ts->reg, reg);
3027             }
3028         } else {
3029             if (ts->val_type == TEMP_VAL_REG) {
3030                 s->reg_to_temp[ts->reg] = NULL;
3031             }
3032             ts->val_type = TEMP_VAL_REG;
3033             ts->reg = reg;
3034             ts->mem_coherent = 0;
3035             s->reg_to_temp[reg] = ts;
3036             if (NEED_SYNC_ARG(i)) {
3037                 temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i));
3038             } else if (IS_DEAD_ARG(i)) {
3039                 temp_dead(s, ts);
3040             }
3041         }
3042     }
3043 }
3044 
3045 #ifdef CONFIG_PROFILER
3046 
3047 /* avoid copy/paste errors */
3048 #define PROF_ADD(to, from, field)                       \
3049     do {                                                \
3050         (to)->field += atomic_read(&((from)->field));   \
3051     } while (0)
3052 
3053 #define PROF_MAX(to, from, field)                                       \
3054     do {                                                                \
3055         typeof((from)->field) val__ = atomic_read(&((from)->field));    \
3056         if (val__ > (to)->field) {                                      \
3057             (to)->field = val__;                                        \
3058         }                                                               \
3059     } while (0)
3060 
3061 /* Pass in a zero'ed @prof */
3062 static inline
3063 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3064 {
3065     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3066     unsigned int i;
3067 
3068     for (i = 0; i < n_ctxs; i++) {
3069         TCGContext *s = atomic_read(&tcg_ctxs[i]);
3070         const TCGProfile *orig = &s->prof;
3071 
3072         if (counters) {
3073             PROF_ADD(prof, orig, tb_count1);
3074             PROF_ADD(prof, orig, tb_count);
3075             PROF_ADD(prof, orig, op_count);
3076             PROF_MAX(prof, orig, op_count_max);
3077             PROF_ADD(prof, orig, temp_count);
3078             PROF_MAX(prof, orig, temp_count_max);
3079             PROF_ADD(prof, orig, del_op_count);
3080             PROF_ADD(prof, orig, code_in_len);
3081             PROF_ADD(prof, orig, code_out_len);
3082             PROF_ADD(prof, orig, search_out_len);
3083             PROF_ADD(prof, orig, interm_time);
3084             PROF_ADD(prof, orig, code_time);
3085             PROF_ADD(prof, orig, la_time);
3086             PROF_ADD(prof, orig, opt_time);
3087             PROF_ADD(prof, orig, restore_count);
3088             PROF_ADD(prof, orig, restore_time);
3089         }
3090         if (table) {
3091             int i;
3092 
3093             for (i = 0; i < NB_OPS; i++) {
3094                 PROF_ADD(prof, orig, table_op_count[i]);
3095             }
3096         }
3097     }
3098 }
3099 
3100 #undef PROF_ADD
3101 #undef PROF_MAX
3102 
3103 static void tcg_profile_snapshot_counters(TCGProfile *prof)
3104 {
3105     tcg_profile_snapshot(prof, true, false);
3106 }
3107 
3108 static void tcg_profile_snapshot_table(TCGProfile *prof)
3109 {
3110     tcg_profile_snapshot(prof, false, true);
3111 }
3112 
3113 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3114 {
3115     TCGProfile prof = {};
3116     int i;
3117 
3118     tcg_profile_snapshot_table(&prof);
3119     for (i = 0; i < NB_OPS; i++) {
3120         cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name,
3121                     prof.table_op_count[i]);
3122     }
3123 }
3124 #else
3125 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3126 {
3127     cpu_fprintf(f, "[TCG profiler not compiled]\n");
3128 }
3129 #endif
3130 
3131 
3132 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
3133 {
3134 #ifdef CONFIG_PROFILER
3135     TCGProfile *prof = &s->prof;
3136 #endif
3137     int i, num_insns;
3138     TCGOp *op;
3139 
3140 #ifdef CONFIG_PROFILER
3141     {
3142         int n;
3143 
3144         QTAILQ_FOREACH(op, &s->ops, link) {
3145             n++;
3146         }
3147         atomic_set(&prof->op_count, prof->op_count + n);
3148         if (n > prof->op_count_max) {
3149             atomic_set(&prof->op_count_max, n);
3150         }
3151 
3152         n = s->nb_temps;
3153         atomic_set(&prof->temp_count, prof->temp_count + n);
3154         if (n > prof->temp_count_max) {
3155             atomic_set(&prof->temp_count_max, n);
3156         }
3157     }
3158 #endif
3159 
3160 #ifdef DEBUG_DISAS
3161     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
3162                  && qemu_log_in_addr_range(tb->pc))) {
3163         qemu_log_lock();
3164         qemu_log("OP:\n");
3165         tcg_dump_ops(s);
3166         qemu_log("\n");
3167         qemu_log_unlock();
3168     }
3169 #endif
3170 
3171 #ifdef CONFIG_PROFILER
3172     atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
3173 #endif
3174 
3175 #ifdef USE_TCG_OPTIMIZATIONS
3176     tcg_optimize(s);
3177 #endif
3178 
3179 #ifdef CONFIG_PROFILER
3180     atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
3181     atomic_set(&prof->la_time, prof->la_time - profile_getclock());
3182 #endif
3183 
3184     liveness_pass_1(s);
3185 
3186     if (s->nb_indirects > 0) {
3187 #ifdef DEBUG_DISAS
3188         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
3189                      && qemu_log_in_addr_range(tb->pc))) {
3190             qemu_log_lock();
3191             qemu_log("OP before indirect lowering:\n");
3192             tcg_dump_ops(s);
3193             qemu_log("\n");
3194             qemu_log_unlock();
3195         }
3196 #endif
3197         /* Replace indirect temps with direct temps.  */
3198         if (liveness_pass_2(s)) {
3199             /* If changes were made, re-run liveness.  */
3200             liveness_pass_1(s);
3201         }
3202     }
3203 
3204 #ifdef CONFIG_PROFILER
3205     atomic_set(&prof->la_time, prof->la_time + profile_getclock());
3206 #endif
3207 
3208 #ifdef DEBUG_DISAS
3209     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
3210                  && qemu_log_in_addr_range(tb->pc))) {
3211         qemu_log_lock();
3212         qemu_log("OP after optimization and liveness analysis:\n");
3213         tcg_dump_ops(s);
3214         qemu_log("\n");
3215         qemu_log_unlock();
3216     }
3217 #endif
3218 
3219     tcg_reg_alloc_start(s);
3220 
3221     s->code_buf = tb->tc.ptr;
3222     s->code_ptr = tb->tc.ptr;
3223 
3224 #ifdef TCG_TARGET_NEED_LDST_LABELS
3225     s->ldst_labels = NULL;
3226 #endif
3227 #ifdef TCG_TARGET_NEED_POOL_LABELS
3228     s->pool_labels = NULL;
3229 #endif
3230 
3231     num_insns = -1;
3232     QTAILQ_FOREACH(op, &s->ops, link) {
3233         TCGOpcode opc = op->opc;
3234 
3235 #ifdef CONFIG_PROFILER
3236         atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
3237 #endif
3238 
3239         switch (opc) {
3240         case INDEX_op_mov_i32:
3241         case INDEX_op_mov_i64:
3242             tcg_reg_alloc_mov(s, op);
3243             break;
3244         case INDEX_op_movi_i32:
3245         case INDEX_op_movi_i64:
3246             tcg_reg_alloc_movi(s, op);
3247             break;
3248         case INDEX_op_insn_start:
3249             if (num_insns >= 0) {
3250                 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
3251             }
3252             num_insns++;
3253             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
3254                 target_ulong a;
3255 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
3256                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
3257 #else
3258                 a = op->args[i];
3259 #endif
3260                 s->gen_insn_data[num_insns][i] = a;
3261             }
3262             break;
3263         case INDEX_op_discard:
3264             temp_dead(s, arg_temp(op->args[0]));
3265             break;
3266         case INDEX_op_set_label:
3267             tcg_reg_alloc_bb_end(s, s->reserved_regs);
3268             tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
3269             break;
3270         case INDEX_op_call:
3271             tcg_reg_alloc_call(s, op);
3272             break;
3273         default:
3274             /* Sanity check that we've not introduced any unhandled opcodes. */
3275             tcg_debug_assert(tcg_op_supported(opc));
3276             /* Note: in order to speed up the code, it would be much
3277                faster to have specialized register allocator functions for
3278                some common argument patterns */
3279             tcg_reg_alloc_op(s, op);
3280             break;
3281         }
3282 #ifdef CONFIG_DEBUG_TCG
3283         check_regs(s);
3284 #endif
3285         /* Test for (pending) buffer overflow.  The assumption is that any
3286            one operation beginning below the high water mark cannot overrun
3287            the buffer completely.  Thus we can test for overflow after
3288            generating code without having to check during generation.  */
3289         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
3290             return -1;
3291         }
3292     }
3293     tcg_debug_assert(num_insns >= 0);
3294     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
3295 
3296     /* Generate TB finalization at the end of block */
3297 #ifdef TCG_TARGET_NEED_LDST_LABELS
3298     if (!tcg_out_ldst_finalize(s)) {
3299         return -1;
3300     }
3301 #endif
3302 #ifdef TCG_TARGET_NEED_POOL_LABELS
3303     if (!tcg_out_pool_finalize(s)) {
3304         return -1;
3305     }
3306 #endif
3307 
3308     /* flush instruction cache */
3309     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
3310 
3311     return tcg_current_code_size(s);
3312 }
3313 
3314 #ifdef CONFIG_PROFILER
3315 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
3316 {
3317     TCGProfile prof = {};
3318     const TCGProfile *s;
3319     int64_t tb_count;
3320     int64_t tb_div_count;
3321     int64_t tot;
3322 
3323     tcg_profile_snapshot_counters(&prof);
3324     s = &prof;
3325     tb_count = s->tb_count;
3326     tb_div_count = tb_count ? tb_count : 1;
3327     tot = s->interm_time + s->code_time;
3328 
3329     cpu_fprintf(f, "JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
3330                 tot, tot / 2.4e9);
3331     cpu_fprintf(f, "translated TBs      %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
3332                 tb_count, s->tb_count1 - tb_count,
3333                 (double)(s->tb_count1 - s->tb_count)
3334                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
3335     cpu_fprintf(f, "avg ops/TB          %0.1f max=%d\n",
3336                 (double)s->op_count / tb_div_count, s->op_count_max);
3337     cpu_fprintf(f, "deleted ops/TB      %0.2f\n",
3338                 (double)s->del_op_count / tb_div_count);
3339     cpu_fprintf(f, "avg temps/TB        %0.2f max=%d\n",
3340                 (double)s->temp_count / tb_div_count, s->temp_count_max);
3341     cpu_fprintf(f, "avg host code/TB    %0.1f\n",
3342                 (double)s->code_out_len / tb_div_count);
3343     cpu_fprintf(f, "avg search data/TB  %0.1f\n",
3344                 (double)s->search_out_len / tb_div_count);
3345 
3346     cpu_fprintf(f, "cycles/op           %0.1f\n",
3347                 s->op_count ? (double)tot / s->op_count : 0);
3348     cpu_fprintf(f, "cycles/in byte      %0.1f\n",
3349                 s->code_in_len ? (double)tot / s->code_in_len : 0);
3350     cpu_fprintf(f, "cycles/out byte     %0.1f\n",
3351                 s->code_out_len ? (double)tot / s->code_out_len : 0);
3352     cpu_fprintf(f, "cycles/search byte     %0.1f\n",
3353                 s->search_out_len ? (double)tot / s->search_out_len : 0);
3354     if (tot == 0) {
3355         tot = 1;
3356     }
3357     cpu_fprintf(f, "  gen_interm time   %0.1f%%\n",
3358                 (double)s->interm_time / tot * 100.0);
3359     cpu_fprintf(f, "  gen_code time     %0.1f%%\n",
3360                 (double)s->code_time / tot * 100.0);
3361     cpu_fprintf(f, "optim./code time    %0.1f%%\n",
3362                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
3363                 * 100.0);
3364     cpu_fprintf(f, "liveness/code time  %0.1f%%\n",
3365                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
3366     cpu_fprintf(f, "cpu_restore count   %" PRId64 "\n",
3367                 s->restore_count);
3368     cpu_fprintf(f, "  avg cycles        %0.1f\n",
3369                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
3370 }
3371 #else
3372 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
3373 {
3374     cpu_fprintf(f, "[TCG profiler not compiled]\n");
3375 }
3376 #endif
3377 
3378 #ifdef ELF_HOST_MACHINE
3379 /* In order to use this feature, the backend needs to do three things:
3380 
3381    (1) Define ELF_HOST_MACHINE to indicate both what value to
3382        put into the ELF image and to indicate support for the feature.
3383 
3384    (2) Define tcg_register_jit.  This should create a buffer containing
3385        the contents of a .debug_frame section that describes the post-
3386        prologue unwind info for the tcg machine.
3387 
3388    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
3389 */
3390 
3391 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
3392 typedef enum {
3393     JIT_NOACTION = 0,
3394     JIT_REGISTER_FN,
3395     JIT_UNREGISTER_FN
3396 } jit_actions_t;
3397 
3398 struct jit_code_entry {
3399     struct jit_code_entry *next_entry;
3400     struct jit_code_entry *prev_entry;
3401     const void *symfile_addr;
3402     uint64_t symfile_size;
3403 };
3404 
3405 struct jit_descriptor {
3406     uint32_t version;
3407     uint32_t action_flag;
3408     struct jit_code_entry *relevant_entry;
3409     struct jit_code_entry *first_entry;
3410 };
3411 
3412 void __jit_debug_register_code(void) __attribute__((noinline));
3413 void __jit_debug_register_code(void)
3414 {
3415     asm("");
3416 }
3417 
3418 /* Must statically initialize the version, because GDB may check
3419    the version before we can set it.  */
3420 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
3421 
3422 /* End GDB interface.  */
3423 
3424 static int find_string(const char *strtab, const char *str)
3425 {
3426     const char *p = strtab + 1;
3427 
3428     while (1) {
3429         if (strcmp(p, str) == 0) {
3430             return p - strtab;
3431         }
3432         p += strlen(p) + 1;
3433     }
3434 }
3435 
3436 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
3437                                  const void *debug_frame,
3438                                  size_t debug_frame_size)
3439 {
3440     struct __attribute__((packed)) DebugInfo {
3441         uint32_t  len;
3442         uint16_t  version;
3443         uint32_t  abbrev;
3444         uint8_t   ptr_size;
3445         uint8_t   cu_die;
3446         uint16_t  cu_lang;
3447         uintptr_t cu_low_pc;
3448         uintptr_t cu_high_pc;
3449         uint8_t   fn_die;
3450         char      fn_name[16];
3451         uintptr_t fn_low_pc;
3452         uintptr_t fn_high_pc;
3453         uint8_t   cu_eoc;
3454     };
3455 
3456     struct ElfImage {
3457         ElfW(Ehdr) ehdr;
3458         ElfW(Phdr) phdr;
3459         ElfW(Shdr) shdr[7];
3460         ElfW(Sym)  sym[2];
3461         struct DebugInfo di;
3462         uint8_t    da[24];
3463         char       str[80];
3464     };
3465 
3466     struct ElfImage *img;
3467 
3468     static const struct ElfImage img_template = {
3469         .ehdr = {
3470             .e_ident[EI_MAG0] = ELFMAG0,
3471             .e_ident[EI_MAG1] = ELFMAG1,
3472             .e_ident[EI_MAG2] = ELFMAG2,
3473             .e_ident[EI_MAG3] = ELFMAG3,
3474             .e_ident[EI_CLASS] = ELF_CLASS,
3475             .e_ident[EI_DATA] = ELF_DATA,
3476             .e_ident[EI_VERSION] = EV_CURRENT,
3477             .e_type = ET_EXEC,
3478             .e_machine = ELF_HOST_MACHINE,
3479             .e_version = EV_CURRENT,
3480             .e_phoff = offsetof(struct ElfImage, phdr),
3481             .e_shoff = offsetof(struct ElfImage, shdr),
3482             .e_ehsize = sizeof(ElfW(Shdr)),
3483             .e_phentsize = sizeof(ElfW(Phdr)),
3484             .e_phnum = 1,
3485             .e_shentsize = sizeof(ElfW(Shdr)),
3486             .e_shnum = ARRAY_SIZE(img->shdr),
3487             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
3488 #ifdef ELF_HOST_FLAGS
3489             .e_flags = ELF_HOST_FLAGS,
3490 #endif
3491 #ifdef ELF_OSABI
3492             .e_ident[EI_OSABI] = ELF_OSABI,
3493 #endif
3494         },
3495         .phdr = {
3496             .p_type = PT_LOAD,
3497             .p_flags = PF_X,
3498         },
3499         .shdr = {
3500             [0] = { .sh_type = SHT_NULL },
3501             /* Trick: The contents of code_gen_buffer are not present in
3502                this fake ELF file; that got allocated elsewhere.  Therefore
3503                we mark .text as SHT_NOBITS (similar to .bss) so that readers
3504                will not look for contents.  We can record any address.  */
3505             [1] = { /* .text */
3506                 .sh_type = SHT_NOBITS,
3507                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
3508             },
3509             [2] = { /* .debug_info */
3510                 .sh_type = SHT_PROGBITS,
3511                 .sh_offset = offsetof(struct ElfImage, di),
3512                 .sh_size = sizeof(struct DebugInfo),
3513             },
3514             [3] = { /* .debug_abbrev */
3515                 .sh_type = SHT_PROGBITS,
3516                 .sh_offset = offsetof(struct ElfImage, da),
3517                 .sh_size = sizeof(img->da),
3518             },
3519             [4] = { /* .debug_frame */
3520                 .sh_type = SHT_PROGBITS,
3521                 .sh_offset = sizeof(struct ElfImage),
3522             },
3523             [5] = { /* .symtab */
3524                 .sh_type = SHT_SYMTAB,
3525                 .sh_offset = offsetof(struct ElfImage, sym),
3526                 .sh_size = sizeof(img->sym),
3527                 .sh_info = 1,
3528                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
3529                 .sh_entsize = sizeof(ElfW(Sym)),
3530             },
3531             [6] = { /* .strtab */
3532                 .sh_type = SHT_STRTAB,
3533                 .sh_offset = offsetof(struct ElfImage, str),
3534                 .sh_size = sizeof(img->str),
3535             }
3536         },
3537         .sym = {
3538             [1] = { /* code_gen_buffer */
3539                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
3540                 .st_shndx = 1,
3541             }
3542         },
3543         .di = {
3544             .len = sizeof(struct DebugInfo) - 4,
3545             .version = 2,
3546             .ptr_size = sizeof(void *),
3547             .cu_die = 1,
3548             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
3549             .fn_die = 2,
3550             .fn_name = "code_gen_buffer"
3551         },
3552         .da = {
3553             1,          /* abbrev number (the cu) */
3554             0x11, 1,    /* DW_TAG_compile_unit, has children */
3555             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
3556             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
3557             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
3558             0, 0,       /* end of abbrev */
3559             2,          /* abbrev number (the fn) */
3560             0x2e, 0,    /* DW_TAG_subprogram, no children */
3561             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
3562             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
3563             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
3564             0, 0,       /* end of abbrev */
3565             0           /* no more abbrev */
3566         },
3567         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
3568                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
3569     };
3570 
3571     /* We only need a single jit entry; statically allocate it.  */
3572     static struct jit_code_entry one_entry;
3573 
3574     uintptr_t buf = (uintptr_t)buf_ptr;
3575     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
3576     DebugFrameHeader *dfh;
3577 
3578     img = g_malloc(img_size);
3579     *img = img_template;
3580 
3581     img->phdr.p_vaddr = buf;
3582     img->phdr.p_paddr = buf;
3583     img->phdr.p_memsz = buf_size;
3584 
3585     img->shdr[1].sh_name = find_string(img->str, ".text");
3586     img->shdr[1].sh_addr = buf;
3587     img->shdr[1].sh_size = buf_size;
3588 
3589     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
3590     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
3591 
3592     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
3593     img->shdr[4].sh_size = debug_frame_size;
3594 
3595     img->shdr[5].sh_name = find_string(img->str, ".symtab");
3596     img->shdr[6].sh_name = find_string(img->str, ".strtab");
3597 
3598     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
3599     img->sym[1].st_value = buf;
3600     img->sym[1].st_size = buf_size;
3601 
3602     img->di.cu_low_pc = buf;
3603     img->di.cu_high_pc = buf + buf_size;
3604     img->di.fn_low_pc = buf;
3605     img->di.fn_high_pc = buf + buf_size;
3606 
3607     dfh = (DebugFrameHeader *)(img + 1);
3608     memcpy(dfh, debug_frame, debug_frame_size);
3609     dfh->fde.func_start = buf;
3610     dfh->fde.func_len = buf_size;
3611 
3612 #ifdef DEBUG_JIT
3613     /* Enable this block to be able to debug the ELF image file creation.
3614        One can use readelf, objdump, or other inspection utilities.  */
3615     {
3616         FILE *f = fopen("/tmp/qemu.jit", "w+b");
3617         if (f) {
3618             if (fwrite(img, img_size, 1, f) != img_size) {
3619                 /* Avoid stupid unused return value warning for fwrite.  */
3620             }
3621             fclose(f);
3622         }
3623     }
3624 #endif
3625 
3626     one_entry.symfile_addr = img;
3627     one_entry.symfile_size = img_size;
3628 
3629     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
3630     __jit_debug_descriptor.relevant_entry = &one_entry;
3631     __jit_debug_descriptor.first_entry = &one_entry;
3632     __jit_debug_register_code();
3633 }
3634 #else
3635 /* No support for the feature.  Provide the entry point expected by exec.c,
3636    and implement the internal function we declared earlier.  */
3637 
3638 static void tcg_register_jit_int(void *buf, size_t size,
3639                                  const void *debug_frame,
3640                                  size_t debug_frame_size)
3641 {
3642 }
3643 
3644 void tcg_register_jit(void *buf, size_t buf_size)
3645 {
3646 }
3647 #endif /* ELF_HOST_MACHINE */
3648