xref: /openbmc/qemu/tcg/tcg.c (revision dd873966)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/cutils.h"
34 #include "qemu/host-utils.h"
35 #include "qemu/timer.h"
36 
37 /* Note: the long term plan is to reduce the dependencies on the QEMU
38    CPU definitions. Currently they are used for qemu_ld/st
39    instructions */
40 #define NO_CPU_IO_DEFS
41 #include "cpu.h"
42 
43 #include "exec/cpu-common.h"
44 #include "exec/exec-all.h"
45 
46 #include "tcg-op.h"
47 
48 #if UINTPTR_MAX == UINT32_MAX
49 # define ELF_CLASS  ELFCLASS32
50 #else
51 # define ELF_CLASS  ELFCLASS64
52 #endif
53 #ifdef HOST_WORDS_BIGENDIAN
54 # define ELF_DATA   ELFDATA2MSB
55 #else
56 # define ELF_DATA   ELFDATA2LSB
57 #endif
58 
59 #include "elf.h"
60 #include "exec/log.h"
61 #include "sysemu/sysemu.h"
62 
63 /* Forward declarations for functions declared in tcg-target.inc.c and
64    used here. */
65 static void tcg_target_init(TCGContext *s);
66 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 
71 /* The CIE and FDE header definitions will be common to all hosts.  */
72 typedef struct {
73     uint32_t len __attribute__((aligned((sizeof(void *)))));
74     uint32_t id;
75     uint8_t version;
76     char augmentation[1];
77     uint8_t code_align;
78     uint8_t data_align;
79     uint8_t return_column;
80 } DebugFrameCIE;
81 
82 typedef struct QEMU_PACKED {
83     uint32_t len __attribute__((aligned((sizeof(void *)))));
84     uint32_t cie_offset;
85     uintptr_t func_start;
86     uintptr_t func_len;
87 } DebugFrameFDEHeader;
88 
89 typedef struct QEMU_PACKED {
90     DebugFrameCIE cie;
91     DebugFrameFDEHeader fde;
92 } DebugFrameHeader;
93 
94 static void tcg_register_jit_int(void *buf, size_t size,
95                                  const void *debug_frame,
96                                  size_t debug_frame_size)
97     __attribute__((unused));
98 
99 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
100 static const char *target_parse_constraint(TCGArgConstraint *ct,
101                                            const char *ct_str, TCGType type);
102 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
103                        intptr_t arg2);
104 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
105 static void tcg_out_movi(TCGContext *s, TCGType type,
106                          TCGReg ret, tcg_target_long arg);
107 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
108                        const int *const_args);
109 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
110                        intptr_t arg2);
111 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
112                         TCGReg base, intptr_t ofs);
113 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
114 static int tcg_target_const_match(tcg_target_long val, TCGType type,
115                                   const TCGArgConstraint *arg_ct);
116 #ifdef TCG_TARGET_NEED_LDST_LABELS
117 static bool tcg_out_ldst_finalize(TCGContext *s);
118 #endif
119 
120 #define TCG_HIGHWATER 1024
121 
122 static TCGContext **tcg_ctxs;
123 static unsigned int n_tcg_ctxs;
124 TCGv_env cpu_env = 0;
125 
126 /*
127  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
128  * dynamically allocate from as demand dictates. Given appropriate region
129  * sizing, this minimizes flushes even when some TCG threads generate a lot
130  * more code than others.
131  */
132 struct tcg_region_state {
133     QemuMutex lock;
134 
135     /* fields set at init time */
136     void *start;
137     void *start_aligned;
138     void *end;
139     size_t n;
140     size_t size; /* size of one region */
141     size_t stride; /* .size + guard size */
142 
143     /* fields protected by the lock */
144     size_t current; /* current region index */
145     size_t agg_size_full; /* aggregate size of full regions */
146 };
147 
148 static struct tcg_region_state region;
149 
150 static TCGRegSet tcg_target_available_regs[2];
151 static TCGRegSet tcg_target_call_clobber_regs;
152 
153 #if TCG_TARGET_INSN_UNIT_SIZE == 1
154 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
155 {
156     *s->code_ptr++ = v;
157 }
158 
159 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
160                                                       uint8_t v)
161 {
162     *p = v;
163 }
164 #endif
165 
166 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
167 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
168 {
169     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
170         *s->code_ptr++ = v;
171     } else {
172         tcg_insn_unit *p = s->code_ptr;
173         memcpy(p, &v, sizeof(v));
174         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
175     }
176 }
177 
178 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
179                                                        uint16_t v)
180 {
181     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
182         *p = v;
183     } else {
184         memcpy(p, &v, sizeof(v));
185     }
186 }
187 #endif
188 
189 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
190 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
191 {
192     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
193         *s->code_ptr++ = v;
194     } else {
195         tcg_insn_unit *p = s->code_ptr;
196         memcpy(p, &v, sizeof(v));
197         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
198     }
199 }
200 
201 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
202                                                        uint32_t v)
203 {
204     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
205         *p = v;
206     } else {
207         memcpy(p, &v, sizeof(v));
208     }
209 }
210 #endif
211 
212 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
213 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
214 {
215     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
216         *s->code_ptr++ = v;
217     } else {
218         tcg_insn_unit *p = s->code_ptr;
219         memcpy(p, &v, sizeof(v));
220         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
221     }
222 }
223 
224 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
225                                                        uint64_t v)
226 {
227     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
228         *p = v;
229     } else {
230         memcpy(p, &v, sizeof(v));
231     }
232 }
233 #endif
234 
235 /* label relocation processing */
236 
237 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
238                           TCGLabel *l, intptr_t addend)
239 {
240     TCGRelocation *r;
241 
242     if (l->has_value) {
243         /* FIXME: This may break relocations on RISC targets that
244            modify instruction fields in place.  The caller may not have
245            written the initial value.  */
246         patch_reloc(code_ptr, type, l->u.value, addend);
247     } else {
248         /* add a new relocation entry */
249         r = tcg_malloc(sizeof(TCGRelocation));
250         r->type = type;
251         r->ptr = code_ptr;
252         r->addend = addend;
253         r->next = l->u.first_reloc;
254         l->u.first_reloc = r;
255     }
256 }
257 
258 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
259 {
260     intptr_t value = (intptr_t)ptr;
261     TCGRelocation *r;
262 
263     tcg_debug_assert(!l->has_value);
264 
265     for (r = l->u.first_reloc; r != NULL; r = r->next) {
266         patch_reloc(r->ptr, r->type, value, r->addend);
267     }
268 
269     l->has_value = 1;
270     l->u.value_ptr = ptr;
271 }
272 
273 TCGLabel *gen_new_label(void)
274 {
275     TCGContext *s = tcg_ctx;
276     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
277 
278     *l = (TCGLabel){
279         .id = s->nb_labels++
280     };
281 
282     return l;
283 }
284 
285 #include "tcg-target.inc.c"
286 
287 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
288 {
289     void *start, *end;
290 
291     start = region.start_aligned + curr_region * region.stride;
292     end = start + region.size;
293 
294     if (curr_region == 0) {
295         start = region.start;
296     }
297     if (curr_region == region.n - 1) {
298         end = region.end;
299     }
300 
301     *pstart = start;
302     *pend = end;
303 }
304 
305 static void tcg_region_assign(TCGContext *s, size_t curr_region)
306 {
307     void *start, *end;
308 
309     tcg_region_bounds(curr_region, &start, &end);
310 
311     s->code_gen_buffer = start;
312     s->code_gen_ptr = start;
313     s->code_gen_buffer_size = end - start;
314     s->code_gen_highwater = end - TCG_HIGHWATER;
315 }
316 
317 static bool tcg_region_alloc__locked(TCGContext *s)
318 {
319     if (region.current == region.n) {
320         return true;
321     }
322     tcg_region_assign(s, region.current);
323     region.current++;
324     return false;
325 }
326 
327 /*
328  * Request a new region once the one in use has filled up.
329  * Returns true on error.
330  */
331 static bool tcg_region_alloc(TCGContext *s)
332 {
333     bool err;
334     /* read the region size now; alloc__locked will overwrite it on success */
335     size_t size_full = s->code_gen_buffer_size;
336 
337     qemu_mutex_lock(&region.lock);
338     err = tcg_region_alloc__locked(s);
339     if (!err) {
340         region.agg_size_full += size_full - TCG_HIGHWATER;
341     }
342     qemu_mutex_unlock(&region.lock);
343     return err;
344 }
345 
346 /*
347  * Perform a context's first region allocation.
348  * This function does _not_ increment region.agg_size_full.
349  */
350 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
351 {
352     return tcg_region_alloc__locked(s);
353 }
354 
355 /* Call from a safe-work context */
356 void tcg_region_reset_all(void)
357 {
358     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
359     unsigned int i;
360 
361     qemu_mutex_lock(&region.lock);
362     region.current = 0;
363     region.agg_size_full = 0;
364 
365     for (i = 0; i < n_ctxs; i++) {
366         TCGContext *s = atomic_read(&tcg_ctxs[i]);
367         bool err = tcg_region_initial_alloc__locked(s);
368 
369         g_assert(!err);
370     }
371     qemu_mutex_unlock(&region.lock);
372 }
373 
374 #ifdef CONFIG_USER_ONLY
375 static size_t tcg_n_regions(void)
376 {
377     return 1;
378 }
379 #else
380 /*
381  * It is likely that some vCPUs will translate more code than others, so we
382  * first try to set more regions than max_cpus, with those regions being of
383  * reasonable size. If that's not possible we make do by evenly dividing
384  * the code_gen_buffer among the vCPUs.
385  */
386 static size_t tcg_n_regions(void)
387 {
388     size_t i;
389 
390     /* Use a single region if all we have is one vCPU thread */
391     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
392         return 1;
393     }
394 
395     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
396     for (i = 8; i > 0; i--) {
397         size_t regions_per_thread = i;
398         size_t region_size;
399 
400         region_size = tcg_init_ctx.code_gen_buffer_size;
401         region_size /= max_cpus * regions_per_thread;
402 
403         if (region_size >= 2 * 1024u * 1024) {
404             return max_cpus * regions_per_thread;
405         }
406     }
407     /* If we can't, then just allocate one region per vCPU thread */
408     return max_cpus;
409 }
410 #endif
411 
412 /*
413  * Initializes region partitioning.
414  *
415  * Called at init time from the parent thread (i.e. the one calling
416  * tcg_context_init), after the target's TCG globals have been set.
417  *
418  * Region partitioning works by splitting code_gen_buffer into separate regions,
419  * and then assigning regions to TCG threads so that the threads can translate
420  * code in parallel without synchronization.
421  *
422  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
423  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
424  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
425  * must have been parsed before calling this function, since it calls
426  * qemu_tcg_mttcg_enabled().
427  *
428  * In user-mode we use a single region.  Having multiple regions in user-mode
429  * is not supported, because the number of vCPU threads (recall that each thread
430  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
431  * OS, and usually this number is huge (tens of thousands is not uncommon).
432  * Thus, given this large bound on the number of vCPU threads and the fact
433  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
434  * that the availability of at least one region per vCPU thread.
435  *
436  * However, this user-mode limitation is unlikely to be a significant problem
437  * in practice. Multi-threaded guests share most if not all of their translated
438  * code, which makes parallel code generation less appealing than in softmmu.
439  */
440 void tcg_region_init(void)
441 {
442     void *buf = tcg_init_ctx.code_gen_buffer;
443     void *aligned;
444     size_t size = tcg_init_ctx.code_gen_buffer_size;
445     size_t page_size = qemu_real_host_page_size;
446     size_t region_size;
447     size_t n_regions;
448     size_t i;
449 
450     n_regions = tcg_n_regions();
451 
452     /* The first region will be 'aligned - buf' bytes larger than the others */
453     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
454     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
455     /*
456      * Make region_size a multiple of page_size, using aligned as the start.
457      * As a result of this we might end up with a few extra pages at the end of
458      * the buffer; we will assign those to the last region.
459      */
460     region_size = (size - (aligned - buf)) / n_regions;
461     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
462 
463     /* A region must have at least 2 pages; one code, one guard */
464     g_assert(region_size >= 2 * page_size);
465 
466     /* init the region struct */
467     qemu_mutex_init(&region.lock);
468     region.n = n_regions;
469     region.size = region_size - page_size;
470     region.stride = region_size;
471     region.start = buf;
472     region.start_aligned = aligned;
473     /* page-align the end, since its last page will be a guard page */
474     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
475     /* account for that last guard page */
476     region.end -= page_size;
477 
478     /* set guard pages */
479     for (i = 0; i < region.n; i++) {
480         void *start, *end;
481         int rc;
482 
483         tcg_region_bounds(i, &start, &end);
484         rc = qemu_mprotect_none(end, page_size);
485         g_assert(!rc);
486     }
487 
488     /* In user-mode we support only one ctx, so do the initial allocation now */
489 #ifdef CONFIG_USER_ONLY
490     {
491         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
492 
493         g_assert(!err);
494     }
495 #endif
496 }
497 
498 /*
499  * All TCG threads except the parent (i.e. the one that called tcg_context_init
500  * and registered the target's TCG globals) must register with this function
501  * before initiating translation.
502  *
503  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
504  * of tcg_region_init() for the reasoning behind this.
505  *
506  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
507  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
508  * is not used anymore for translation once this function is called.
509  *
510  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
511  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
512  */
513 #ifdef CONFIG_USER_ONLY
514 void tcg_register_thread(void)
515 {
516     tcg_ctx = &tcg_init_ctx;
517 }
518 #else
519 void tcg_register_thread(void)
520 {
521     TCGContext *s = g_malloc(sizeof(*s));
522     unsigned int i, n;
523     bool err;
524 
525     *s = tcg_init_ctx;
526 
527     /* Relink mem_base.  */
528     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
529         if (tcg_init_ctx.temps[i].mem_base) {
530             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
531             tcg_debug_assert(b >= 0 && b < n);
532             s->temps[i].mem_base = &s->temps[b];
533         }
534     }
535 
536     /* Claim an entry in tcg_ctxs */
537     n = atomic_fetch_inc(&n_tcg_ctxs);
538     g_assert(n < max_cpus);
539     atomic_set(&tcg_ctxs[n], s);
540 
541     tcg_ctx = s;
542     qemu_mutex_lock(&region.lock);
543     err = tcg_region_initial_alloc__locked(tcg_ctx);
544     g_assert(!err);
545     qemu_mutex_unlock(&region.lock);
546 }
547 #endif /* !CONFIG_USER_ONLY */
548 
549 /*
550  * Returns the size (in bytes) of all translated code (i.e. from all regions)
551  * currently in the cache.
552  * See also: tcg_code_capacity()
553  * Do not confuse with tcg_current_code_size(); that one applies to a single
554  * TCG context.
555  */
556 size_t tcg_code_size(void)
557 {
558     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
559     unsigned int i;
560     size_t total;
561 
562     qemu_mutex_lock(&region.lock);
563     total = region.agg_size_full;
564     for (i = 0; i < n_ctxs; i++) {
565         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
566         size_t size;
567 
568         size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
569         g_assert(size <= s->code_gen_buffer_size);
570         total += size;
571     }
572     qemu_mutex_unlock(&region.lock);
573     return total;
574 }
575 
576 /*
577  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
578  * regions.
579  * See also: tcg_code_size()
580  */
581 size_t tcg_code_capacity(void)
582 {
583     size_t guard_size, capacity;
584 
585     /* no need for synchronization; these variables are set at init time */
586     guard_size = region.stride - region.size;
587     capacity = region.end + guard_size - region.start;
588     capacity -= region.n * (guard_size + TCG_HIGHWATER);
589     return capacity;
590 }
591 
592 /* pool based memory allocation */
593 void *tcg_malloc_internal(TCGContext *s, int size)
594 {
595     TCGPool *p;
596     int pool_size;
597 
598     if (size > TCG_POOL_CHUNK_SIZE) {
599         /* big malloc: insert a new pool (XXX: could optimize) */
600         p = g_malloc(sizeof(TCGPool) + size);
601         p->size = size;
602         p->next = s->pool_first_large;
603         s->pool_first_large = p;
604         return p->data;
605     } else {
606         p = s->pool_current;
607         if (!p) {
608             p = s->pool_first;
609             if (!p)
610                 goto new_pool;
611         } else {
612             if (!p->next) {
613             new_pool:
614                 pool_size = TCG_POOL_CHUNK_SIZE;
615                 p = g_malloc(sizeof(TCGPool) + pool_size);
616                 p->size = pool_size;
617                 p->next = NULL;
618                 if (s->pool_current)
619                     s->pool_current->next = p;
620                 else
621                     s->pool_first = p;
622             } else {
623                 p = p->next;
624             }
625         }
626     }
627     s->pool_current = p;
628     s->pool_cur = p->data + size;
629     s->pool_end = p->data + p->size;
630     return p->data;
631 }
632 
633 void tcg_pool_reset(TCGContext *s)
634 {
635     TCGPool *p, *t;
636     for (p = s->pool_first_large; p; p = t) {
637         t = p->next;
638         g_free(p);
639     }
640     s->pool_first_large = NULL;
641     s->pool_cur = s->pool_end = NULL;
642     s->pool_current = NULL;
643 }
644 
645 typedef struct TCGHelperInfo {
646     void *func;
647     const char *name;
648     unsigned flags;
649     unsigned sizemask;
650 } TCGHelperInfo;
651 
652 #include "exec/helper-proto.h"
653 
654 static const TCGHelperInfo all_helpers[] = {
655 #include "exec/helper-tcg.h"
656 };
657 static GHashTable *helper_table;
658 
659 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
660 static void process_op_defs(TCGContext *s);
661 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
662                                             TCGReg reg, const char *name);
663 
664 void tcg_context_init(TCGContext *s)
665 {
666     int op, total_args, n, i;
667     TCGOpDef *def;
668     TCGArgConstraint *args_ct;
669     int *sorted_args;
670     TCGTemp *ts;
671 
672     memset(s, 0, sizeof(*s));
673     s->nb_globals = 0;
674 
675     /* Count total number of arguments and allocate the corresponding
676        space */
677     total_args = 0;
678     for(op = 0; op < NB_OPS; op++) {
679         def = &tcg_op_defs[op];
680         n = def->nb_iargs + def->nb_oargs;
681         total_args += n;
682     }
683 
684     args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
685     sorted_args = g_malloc(sizeof(int) * total_args);
686 
687     for(op = 0; op < NB_OPS; op++) {
688         def = &tcg_op_defs[op];
689         def->args_ct = args_ct;
690         def->sorted_args = sorted_args;
691         n = def->nb_iargs + def->nb_oargs;
692         sorted_args += n;
693         args_ct += n;
694     }
695 
696     /* Register helpers.  */
697     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
698     helper_table = g_hash_table_new(NULL, NULL);
699 
700     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
701         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
702                             (gpointer)&all_helpers[i]);
703     }
704 
705     tcg_target_init(s);
706     process_op_defs(s);
707 
708     /* Reverse the order of the saved registers, assuming they're all at
709        the start of tcg_target_reg_alloc_order.  */
710     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
711         int r = tcg_target_reg_alloc_order[n];
712         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
713             break;
714         }
715     }
716     for (i = 0; i < n; ++i) {
717         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
718     }
719     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
720         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
721     }
722 
723     tcg_ctx = s;
724     /*
725      * In user-mode we simply share the init context among threads, since we
726      * use a single region. See the documentation tcg_region_init() for the
727      * reasoning behind this.
728      * In softmmu we will have at most max_cpus TCG threads.
729      */
730 #ifdef CONFIG_USER_ONLY
731     tcg_ctxs = &tcg_ctx;
732     n_tcg_ctxs = 1;
733 #else
734     tcg_ctxs = g_new(TCGContext *, max_cpus);
735 #endif
736 
737     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
738     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
739     cpu_env = temp_tcgv_ptr(ts);
740 }
741 
742 /*
743  * Allocate TBs right before their corresponding translated code, making
744  * sure that TBs and code are on different cache lines.
745  */
746 TranslationBlock *tcg_tb_alloc(TCGContext *s)
747 {
748     uintptr_t align = qemu_icache_linesize;
749     TranslationBlock *tb;
750     void *next;
751 
752  retry:
753     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
754     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
755 
756     if (unlikely(next > s->code_gen_highwater)) {
757         if (tcg_region_alloc(s)) {
758             return NULL;
759         }
760         goto retry;
761     }
762     atomic_set(&s->code_gen_ptr, next);
763     s->data_gen_ptr = NULL;
764     return tb;
765 }
766 
767 void tcg_prologue_init(TCGContext *s)
768 {
769     size_t prologue_size, total_size;
770     void *buf0, *buf1;
771 
772     /* Put the prologue at the beginning of code_gen_buffer.  */
773     buf0 = s->code_gen_buffer;
774     total_size = s->code_gen_buffer_size;
775     s->code_ptr = buf0;
776     s->code_buf = buf0;
777     s->data_gen_ptr = NULL;
778     s->code_gen_prologue = buf0;
779 
780     /* Compute a high-water mark, at which we voluntarily flush the buffer
781        and start over.  The size here is arbitrary, significantly larger
782        than we expect the code generation for any one opcode to require.  */
783     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
784 
785 #ifdef TCG_TARGET_NEED_POOL_LABELS
786     s->pool_labels = NULL;
787 #endif
788 
789     /* Generate the prologue.  */
790     tcg_target_qemu_prologue(s);
791 
792 #ifdef TCG_TARGET_NEED_POOL_LABELS
793     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
794     {
795         bool ok = tcg_out_pool_finalize(s);
796         tcg_debug_assert(ok);
797     }
798 #endif
799 
800     buf1 = s->code_ptr;
801     flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
802 
803     /* Deduct the prologue from the buffer.  */
804     prologue_size = tcg_current_code_size(s);
805     s->code_gen_ptr = buf1;
806     s->code_gen_buffer = buf1;
807     s->code_buf = buf1;
808     total_size -= prologue_size;
809     s->code_gen_buffer_size = total_size;
810 
811     tcg_register_jit(s->code_gen_buffer, total_size);
812 
813 #ifdef DEBUG_DISAS
814     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
815         qemu_log_lock();
816         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
817         if (s->data_gen_ptr) {
818             size_t code_size = s->data_gen_ptr - buf0;
819             size_t data_size = prologue_size - code_size;
820             size_t i;
821 
822             log_disas(buf0, code_size);
823 
824             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
825                 if (sizeof(tcg_target_ulong) == 8) {
826                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
827                              (uintptr_t)s->data_gen_ptr + i,
828                              *(uint64_t *)(s->data_gen_ptr + i));
829                 } else {
830                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
831                              (uintptr_t)s->data_gen_ptr + i,
832                              *(uint32_t *)(s->data_gen_ptr + i));
833                 }
834             }
835         } else {
836             log_disas(buf0, prologue_size);
837         }
838         qemu_log("\n");
839         qemu_log_flush();
840         qemu_log_unlock();
841     }
842 #endif
843 
844     /* Assert that goto_ptr is implemented completely.  */
845     if (TCG_TARGET_HAS_goto_ptr) {
846         tcg_debug_assert(s->code_gen_epilogue != NULL);
847     }
848 }
849 
850 void tcg_func_start(TCGContext *s)
851 {
852     tcg_pool_reset(s);
853     s->nb_temps = s->nb_globals;
854 
855     /* No temps have been previously allocated for size or locality.  */
856     memset(s->free_temps, 0, sizeof(s->free_temps));
857 
858     s->nb_labels = 0;
859     s->current_frame_offset = s->frame_start;
860 
861 #ifdef CONFIG_DEBUG_TCG
862     s->goto_tb_issue_mask = 0;
863 #endif
864 
865     s->gen_op_buf[0].next = 1;
866     s->gen_op_buf[0].prev = 0;
867     s->gen_next_op_idx = 1;
868 }
869 
870 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
871 {
872     int n = s->nb_temps++;
873     tcg_debug_assert(n < TCG_MAX_TEMPS);
874     return memset(&s->temps[n], 0, sizeof(TCGTemp));
875 }
876 
877 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
878 {
879     TCGTemp *ts;
880 
881     tcg_debug_assert(s->nb_globals == s->nb_temps);
882     s->nb_globals++;
883     ts = tcg_temp_alloc(s);
884     ts->temp_global = 1;
885 
886     return ts;
887 }
888 
889 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
890                                             TCGReg reg, const char *name)
891 {
892     TCGTemp *ts;
893 
894     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
895         tcg_abort();
896     }
897 
898     ts = tcg_global_alloc(s);
899     ts->base_type = type;
900     ts->type = type;
901     ts->fixed_reg = 1;
902     ts->reg = reg;
903     ts->name = name;
904     tcg_regset_set_reg(s->reserved_regs, reg);
905 
906     return ts;
907 }
908 
909 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
910 {
911     s->frame_start = start;
912     s->frame_end = start + size;
913     s->frame_temp
914         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
915 }
916 
917 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
918                                      intptr_t offset, const char *name)
919 {
920     TCGContext *s = tcg_ctx;
921     TCGTemp *base_ts = tcgv_ptr_temp(base);
922     TCGTemp *ts = tcg_global_alloc(s);
923     int indirect_reg = 0, bigendian = 0;
924 #ifdef HOST_WORDS_BIGENDIAN
925     bigendian = 1;
926 #endif
927 
928     if (!base_ts->fixed_reg) {
929         /* We do not support double-indirect registers.  */
930         tcg_debug_assert(!base_ts->indirect_reg);
931         base_ts->indirect_base = 1;
932         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
933                             ? 2 : 1);
934         indirect_reg = 1;
935     }
936 
937     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
938         TCGTemp *ts2 = tcg_global_alloc(s);
939         char buf[64];
940 
941         ts->base_type = TCG_TYPE_I64;
942         ts->type = TCG_TYPE_I32;
943         ts->indirect_reg = indirect_reg;
944         ts->mem_allocated = 1;
945         ts->mem_base = base_ts;
946         ts->mem_offset = offset + bigendian * 4;
947         pstrcpy(buf, sizeof(buf), name);
948         pstrcat(buf, sizeof(buf), "_0");
949         ts->name = strdup(buf);
950 
951         tcg_debug_assert(ts2 == ts + 1);
952         ts2->base_type = TCG_TYPE_I64;
953         ts2->type = TCG_TYPE_I32;
954         ts2->indirect_reg = indirect_reg;
955         ts2->mem_allocated = 1;
956         ts2->mem_base = base_ts;
957         ts2->mem_offset = offset + (1 - bigendian) * 4;
958         pstrcpy(buf, sizeof(buf), name);
959         pstrcat(buf, sizeof(buf), "_1");
960         ts2->name = strdup(buf);
961     } else {
962         ts->base_type = type;
963         ts->type = type;
964         ts->indirect_reg = indirect_reg;
965         ts->mem_allocated = 1;
966         ts->mem_base = base_ts;
967         ts->mem_offset = offset;
968         ts->name = name;
969     }
970     return ts;
971 }
972 
973 static TCGTemp *tcg_temp_new_internal(TCGType type, int temp_local)
974 {
975     TCGContext *s = tcg_ctx;
976     TCGTemp *ts;
977     int idx, k;
978 
979     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
980     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
981     if (idx < TCG_MAX_TEMPS) {
982         /* There is already an available temp with the right type.  */
983         clear_bit(idx, s->free_temps[k].l);
984 
985         ts = &s->temps[idx];
986         ts->temp_allocated = 1;
987         tcg_debug_assert(ts->base_type == type);
988         tcg_debug_assert(ts->temp_local == temp_local);
989     } else {
990         ts = tcg_temp_alloc(s);
991         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
992             TCGTemp *ts2 = tcg_temp_alloc(s);
993 
994             ts->base_type = type;
995             ts->type = TCG_TYPE_I32;
996             ts->temp_allocated = 1;
997             ts->temp_local = temp_local;
998 
999             tcg_debug_assert(ts2 == ts + 1);
1000             ts2->base_type = TCG_TYPE_I64;
1001             ts2->type = TCG_TYPE_I32;
1002             ts2->temp_allocated = 1;
1003             ts2->temp_local = temp_local;
1004         } else {
1005             ts->base_type = type;
1006             ts->type = type;
1007             ts->temp_allocated = 1;
1008             ts->temp_local = temp_local;
1009         }
1010     }
1011 
1012 #if defined(CONFIG_DEBUG_TCG)
1013     s->temps_in_use++;
1014 #endif
1015     return ts;
1016 }
1017 
1018 TCGv_i32 tcg_temp_new_internal_i32(int temp_local)
1019 {
1020     TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, temp_local);
1021     return temp_tcgv_i32(t);
1022 }
1023 
1024 TCGv_i64 tcg_temp_new_internal_i64(int temp_local)
1025 {
1026     TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, temp_local);
1027     return temp_tcgv_i64(t);
1028 }
1029 
1030 static void tcg_temp_free_internal(TCGTemp *ts)
1031 {
1032     TCGContext *s = tcg_ctx;
1033     int k, idx;
1034 
1035 #if defined(CONFIG_DEBUG_TCG)
1036     s->temps_in_use--;
1037     if (s->temps_in_use < 0) {
1038         fprintf(stderr, "More temporaries freed than allocated!\n");
1039     }
1040 #endif
1041 
1042     tcg_debug_assert(ts->temp_global == 0);
1043     tcg_debug_assert(ts->temp_allocated != 0);
1044     ts->temp_allocated = 0;
1045 
1046     idx = temp_idx(ts);
1047     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1048     set_bit(idx, s->free_temps[k].l);
1049 }
1050 
1051 void tcg_temp_free_i32(TCGv_i32 arg)
1052 {
1053     tcg_temp_free_internal(tcgv_i32_temp(arg));
1054 }
1055 
1056 void tcg_temp_free_i64(TCGv_i64 arg)
1057 {
1058     tcg_temp_free_internal(tcgv_i64_temp(arg));
1059 }
1060 
1061 TCGv_i32 tcg_const_i32(int32_t val)
1062 {
1063     TCGv_i32 t0;
1064     t0 = tcg_temp_new_i32();
1065     tcg_gen_movi_i32(t0, val);
1066     return t0;
1067 }
1068 
1069 TCGv_i64 tcg_const_i64(int64_t val)
1070 {
1071     TCGv_i64 t0;
1072     t0 = tcg_temp_new_i64();
1073     tcg_gen_movi_i64(t0, val);
1074     return t0;
1075 }
1076 
1077 TCGv_i32 tcg_const_local_i32(int32_t val)
1078 {
1079     TCGv_i32 t0;
1080     t0 = tcg_temp_local_new_i32();
1081     tcg_gen_movi_i32(t0, val);
1082     return t0;
1083 }
1084 
1085 TCGv_i64 tcg_const_local_i64(int64_t val)
1086 {
1087     TCGv_i64 t0;
1088     t0 = tcg_temp_local_new_i64();
1089     tcg_gen_movi_i64(t0, val);
1090     return t0;
1091 }
1092 
1093 #if defined(CONFIG_DEBUG_TCG)
1094 void tcg_clear_temp_count(void)
1095 {
1096     TCGContext *s = tcg_ctx;
1097     s->temps_in_use = 0;
1098 }
1099 
1100 int tcg_check_temp_count(void)
1101 {
1102     TCGContext *s = tcg_ctx;
1103     if (s->temps_in_use) {
1104         /* Clear the count so that we don't give another
1105          * warning immediately next time around.
1106          */
1107         s->temps_in_use = 0;
1108         return 1;
1109     }
1110     return 0;
1111 }
1112 #endif
1113 
1114 /* Return true if OP may appear in the opcode stream.
1115    Test the runtime variable that controls each opcode.  */
1116 bool tcg_op_supported(TCGOpcode op)
1117 {
1118     switch (op) {
1119     case INDEX_op_discard:
1120     case INDEX_op_set_label:
1121     case INDEX_op_call:
1122     case INDEX_op_br:
1123     case INDEX_op_mb:
1124     case INDEX_op_insn_start:
1125     case INDEX_op_exit_tb:
1126     case INDEX_op_goto_tb:
1127     case INDEX_op_qemu_ld_i32:
1128     case INDEX_op_qemu_st_i32:
1129     case INDEX_op_qemu_ld_i64:
1130     case INDEX_op_qemu_st_i64:
1131         return true;
1132 
1133     case INDEX_op_goto_ptr:
1134         return TCG_TARGET_HAS_goto_ptr;
1135 
1136     case INDEX_op_mov_i32:
1137     case INDEX_op_movi_i32:
1138     case INDEX_op_setcond_i32:
1139     case INDEX_op_brcond_i32:
1140     case INDEX_op_ld8u_i32:
1141     case INDEX_op_ld8s_i32:
1142     case INDEX_op_ld16u_i32:
1143     case INDEX_op_ld16s_i32:
1144     case INDEX_op_ld_i32:
1145     case INDEX_op_st8_i32:
1146     case INDEX_op_st16_i32:
1147     case INDEX_op_st_i32:
1148     case INDEX_op_add_i32:
1149     case INDEX_op_sub_i32:
1150     case INDEX_op_mul_i32:
1151     case INDEX_op_and_i32:
1152     case INDEX_op_or_i32:
1153     case INDEX_op_xor_i32:
1154     case INDEX_op_shl_i32:
1155     case INDEX_op_shr_i32:
1156     case INDEX_op_sar_i32:
1157         return true;
1158 
1159     case INDEX_op_movcond_i32:
1160         return TCG_TARGET_HAS_movcond_i32;
1161     case INDEX_op_div_i32:
1162     case INDEX_op_divu_i32:
1163         return TCG_TARGET_HAS_div_i32;
1164     case INDEX_op_rem_i32:
1165     case INDEX_op_remu_i32:
1166         return TCG_TARGET_HAS_rem_i32;
1167     case INDEX_op_div2_i32:
1168     case INDEX_op_divu2_i32:
1169         return TCG_TARGET_HAS_div2_i32;
1170     case INDEX_op_rotl_i32:
1171     case INDEX_op_rotr_i32:
1172         return TCG_TARGET_HAS_rot_i32;
1173     case INDEX_op_deposit_i32:
1174         return TCG_TARGET_HAS_deposit_i32;
1175     case INDEX_op_extract_i32:
1176         return TCG_TARGET_HAS_extract_i32;
1177     case INDEX_op_sextract_i32:
1178         return TCG_TARGET_HAS_sextract_i32;
1179     case INDEX_op_add2_i32:
1180         return TCG_TARGET_HAS_add2_i32;
1181     case INDEX_op_sub2_i32:
1182         return TCG_TARGET_HAS_sub2_i32;
1183     case INDEX_op_mulu2_i32:
1184         return TCG_TARGET_HAS_mulu2_i32;
1185     case INDEX_op_muls2_i32:
1186         return TCG_TARGET_HAS_muls2_i32;
1187     case INDEX_op_muluh_i32:
1188         return TCG_TARGET_HAS_muluh_i32;
1189     case INDEX_op_mulsh_i32:
1190         return TCG_TARGET_HAS_mulsh_i32;
1191     case INDEX_op_ext8s_i32:
1192         return TCG_TARGET_HAS_ext8s_i32;
1193     case INDEX_op_ext16s_i32:
1194         return TCG_TARGET_HAS_ext16s_i32;
1195     case INDEX_op_ext8u_i32:
1196         return TCG_TARGET_HAS_ext8u_i32;
1197     case INDEX_op_ext16u_i32:
1198         return TCG_TARGET_HAS_ext16u_i32;
1199     case INDEX_op_bswap16_i32:
1200         return TCG_TARGET_HAS_bswap16_i32;
1201     case INDEX_op_bswap32_i32:
1202         return TCG_TARGET_HAS_bswap32_i32;
1203     case INDEX_op_not_i32:
1204         return TCG_TARGET_HAS_not_i32;
1205     case INDEX_op_neg_i32:
1206         return TCG_TARGET_HAS_neg_i32;
1207     case INDEX_op_andc_i32:
1208         return TCG_TARGET_HAS_andc_i32;
1209     case INDEX_op_orc_i32:
1210         return TCG_TARGET_HAS_orc_i32;
1211     case INDEX_op_eqv_i32:
1212         return TCG_TARGET_HAS_eqv_i32;
1213     case INDEX_op_nand_i32:
1214         return TCG_TARGET_HAS_nand_i32;
1215     case INDEX_op_nor_i32:
1216         return TCG_TARGET_HAS_nor_i32;
1217     case INDEX_op_clz_i32:
1218         return TCG_TARGET_HAS_clz_i32;
1219     case INDEX_op_ctz_i32:
1220         return TCG_TARGET_HAS_ctz_i32;
1221     case INDEX_op_ctpop_i32:
1222         return TCG_TARGET_HAS_ctpop_i32;
1223 
1224     case INDEX_op_brcond2_i32:
1225     case INDEX_op_setcond2_i32:
1226         return TCG_TARGET_REG_BITS == 32;
1227 
1228     case INDEX_op_mov_i64:
1229     case INDEX_op_movi_i64:
1230     case INDEX_op_setcond_i64:
1231     case INDEX_op_brcond_i64:
1232     case INDEX_op_ld8u_i64:
1233     case INDEX_op_ld8s_i64:
1234     case INDEX_op_ld16u_i64:
1235     case INDEX_op_ld16s_i64:
1236     case INDEX_op_ld32u_i64:
1237     case INDEX_op_ld32s_i64:
1238     case INDEX_op_ld_i64:
1239     case INDEX_op_st8_i64:
1240     case INDEX_op_st16_i64:
1241     case INDEX_op_st32_i64:
1242     case INDEX_op_st_i64:
1243     case INDEX_op_add_i64:
1244     case INDEX_op_sub_i64:
1245     case INDEX_op_mul_i64:
1246     case INDEX_op_and_i64:
1247     case INDEX_op_or_i64:
1248     case INDEX_op_xor_i64:
1249     case INDEX_op_shl_i64:
1250     case INDEX_op_shr_i64:
1251     case INDEX_op_sar_i64:
1252     case INDEX_op_ext_i32_i64:
1253     case INDEX_op_extu_i32_i64:
1254         return TCG_TARGET_REG_BITS == 64;
1255 
1256     case INDEX_op_movcond_i64:
1257         return TCG_TARGET_HAS_movcond_i64;
1258     case INDEX_op_div_i64:
1259     case INDEX_op_divu_i64:
1260         return TCG_TARGET_HAS_div_i64;
1261     case INDEX_op_rem_i64:
1262     case INDEX_op_remu_i64:
1263         return TCG_TARGET_HAS_rem_i64;
1264     case INDEX_op_div2_i64:
1265     case INDEX_op_divu2_i64:
1266         return TCG_TARGET_HAS_div2_i64;
1267     case INDEX_op_rotl_i64:
1268     case INDEX_op_rotr_i64:
1269         return TCG_TARGET_HAS_rot_i64;
1270     case INDEX_op_deposit_i64:
1271         return TCG_TARGET_HAS_deposit_i64;
1272     case INDEX_op_extract_i64:
1273         return TCG_TARGET_HAS_extract_i64;
1274     case INDEX_op_sextract_i64:
1275         return TCG_TARGET_HAS_sextract_i64;
1276     case INDEX_op_extrl_i64_i32:
1277         return TCG_TARGET_HAS_extrl_i64_i32;
1278     case INDEX_op_extrh_i64_i32:
1279         return TCG_TARGET_HAS_extrh_i64_i32;
1280     case INDEX_op_ext8s_i64:
1281         return TCG_TARGET_HAS_ext8s_i64;
1282     case INDEX_op_ext16s_i64:
1283         return TCG_TARGET_HAS_ext16s_i64;
1284     case INDEX_op_ext32s_i64:
1285         return TCG_TARGET_HAS_ext32s_i64;
1286     case INDEX_op_ext8u_i64:
1287         return TCG_TARGET_HAS_ext8u_i64;
1288     case INDEX_op_ext16u_i64:
1289         return TCG_TARGET_HAS_ext16u_i64;
1290     case INDEX_op_ext32u_i64:
1291         return TCG_TARGET_HAS_ext32u_i64;
1292     case INDEX_op_bswap16_i64:
1293         return TCG_TARGET_HAS_bswap16_i64;
1294     case INDEX_op_bswap32_i64:
1295         return TCG_TARGET_HAS_bswap32_i64;
1296     case INDEX_op_bswap64_i64:
1297         return TCG_TARGET_HAS_bswap64_i64;
1298     case INDEX_op_not_i64:
1299         return TCG_TARGET_HAS_not_i64;
1300     case INDEX_op_neg_i64:
1301         return TCG_TARGET_HAS_neg_i64;
1302     case INDEX_op_andc_i64:
1303         return TCG_TARGET_HAS_andc_i64;
1304     case INDEX_op_orc_i64:
1305         return TCG_TARGET_HAS_orc_i64;
1306     case INDEX_op_eqv_i64:
1307         return TCG_TARGET_HAS_eqv_i64;
1308     case INDEX_op_nand_i64:
1309         return TCG_TARGET_HAS_nand_i64;
1310     case INDEX_op_nor_i64:
1311         return TCG_TARGET_HAS_nor_i64;
1312     case INDEX_op_clz_i64:
1313         return TCG_TARGET_HAS_clz_i64;
1314     case INDEX_op_ctz_i64:
1315         return TCG_TARGET_HAS_ctz_i64;
1316     case INDEX_op_ctpop_i64:
1317         return TCG_TARGET_HAS_ctpop_i64;
1318     case INDEX_op_add2_i64:
1319         return TCG_TARGET_HAS_add2_i64;
1320     case INDEX_op_sub2_i64:
1321         return TCG_TARGET_HAS_sub2_i64;
1322     case INDEX_op_mulu2_i64:
1323         return TCG_TARGET_HAS_mulu2_i64;
1324     case INDEX_op_muls2_i64:
1325         return TCG_TARGET_HAS_muls2_i64;
1326     case INDEX_op_muluh_i64:
1327         return TCG_TARGET_HAS_muluh_i64;
1328     case INDEX_op_mulsh_i64:
1329         return TCG_TARGET_HAS_mulsh_i64;
1330 
1331     case NB_OPS:
1332         break;
1333     }
1334     g_assert_not_reached();
1335 }
1336 
1337 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1338    and endian swap. Maybe it would be better to do the alignment
1339    and endian swap in tcg_reg_alloc_call(). */
1340 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1341 {
1342     TCGContext *s = tcg_ctx;
1343     int i, real_args, nb_rets, pi;
1344     unsigned sizemask, flags;
1345     TCGHelperInfo *info;
1346     TCGOp *op;
1347 
1348     info = g_hash_table_lookup(helper_table, (gpointer)func);
1349     flags = info->flags;
1350     sizemask = info->sizemask;
1351 
1352 #if defined(__sparc__) && !defined(__arch64__) \
1353     && !defined(CONFIG_TCG_INTERPRETER)
1354     /* We have 64-bit values in one register, but need to pass as two
1355        separate parameters.  Split them.  */
1356     int orig_sizemask = sizemask;
1357     int orig_nargs = nargs;
1358     TCGv_i64 retl, reth;
1359     TCGTemp *split_args[MAX_OPC_PARAM];
1360 
1361     TCGV_UNUSED_I64(retl);
1362     TCGV_UNUSED_I64(reth);
1363     if (sizemask != 0) {
1364         for (i = real_args = 0; i < nargs; ++i) {
1365             int is_64bit = sizemask & (1 << (i+1)*2);
1366             if (is_64bit) {
1367                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1368                 TCGv_i32 h = tcg_temp_new_i32();
1369                 TCGv_i32 l = tcg_temp_new_i32();
1370                 tcg_gen_extr_i64_i32(l, h, orig);
1371                 split_args[real_args++] = tcgv_i32_temp(h);
1372                 split_args[real_args++] = tcgv_i32_temp(l);
1373             } else {
1374                 split_args[real_args++] = args[i];
1375             }
1376         }
1377         nargs = real_args;
1378         args = split_args;
1379         sizemask = 0;
1380     }
1381 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1382     for (i = 0; i < nargs; ++i) {
1383         int is_64bit = sizemask & (1 << (i+1)*2);
1384         int is_signed = sizemask & (2 << (i+1)*2);
1385         if (!is_64bit) {
1386             TCGv_i64 temp = tcg_temp_new_i64();
1387             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1388             if (is_signed) {
1389                 tcg_gen_ext32s_i64(temp, orig);
1390             } else {
1391                 tcg_gen_ext32u_i64(temp, orig);
1392             }
1393             args[i] = tcgv_i64_temp(temp);
1394         }
1395     }
1396 #endif /* TCG_TARGET_EXTEND_ARGS */
1397 
1398     i = s->gen_next_op_idx;
1399     tcg_debug_assert(i < OPC_BUF_SIZE);
1400     s->gen_op_buf[0].prev = i;
1401     s->gen_next_op_idx = i + 1;
1402     op = &s->gen_op_buf[i];
1403 
1404     /* Set links for sequential allocation during translation.  */
1405     memset(op, 0, offsetof(TCGOp, args));
1406     op->opc = INDEX_op_call;
1407     op->prev = i - 1;
1408     op->next = i + 1;
1409 
1410     pi = 0;
1411     if (ret != NULL) {
1412 #if defined(__sparc__) && !defined(__arch64__) \
1413     && !defined(CONFIG_TCG_INTERPRETER)
1414         if (orig_sizemask & 1) {
1415             /* The 32-bit ABI is going to return the 64-bit value in
1416                the %o0/%o1 register pair.  Prepare for this by using
1417                two return temporaries, and reassemble below.  */
1418             retl = tcg_temp_new_i64();
1419             reth = tcg_temp_new_i64();
1420             op->args[pi++] = tcgv_i64_arg(reth);
1421             op->args[pi++] = tcgv_i64_arg(retl);
1422             nb_rets = 2;
1423         } else {
1424             op->args[pi++] = temp_arg(ret);
1425             nb_rets = 1;
1426         }
1427 #else
1428         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1429 #ifdef HOST_WORDS_BIGENDIAN
1430             op->args[pi++] = temp_arg(ret + 1);
1431             op->args[pi++] = temp_arg(ret);
1432 #else
1433             op->args[pi++] = temp_arg(ret);
1434             op->args[pi++] = temp_arg(ret + 1);
1435 #endif
1436             nb_rets = 2;
1437         } else {
1438             op->args[pi++] = temp_arg(ret);
1439             nb_rets = 1;
1440         }
1441 #endif
1442     } else {
1443         nb_rets = 0;
1444     }
1445     op->callo = nb_rets;
1446 
1447     real_args = 0;
1448     for (i = 0; i < nargs; i++) {
1449         int is_64bit = sizemask & (1 << (i+1)*2);
1450         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1451 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1452             /* some targets want aligned 64 bit args */
1453             if (real_args & 1) {
1454                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1455                 real_args++;
1456             }
1457 #endif
1458            /* If stack grows up, then we will be placing successive
1459               arguments at lower addresses, which means we need to
1460               reverse the order compared to how we would normally
1461               treat either big or little-endian.  For those arguments
1462               that will wind up in registers, this still works for
1463               HPPA (the only current STACK_GROWSUP target) since the
1464               argument registers are *also* allocated in decreasing
1465               order.  If another such target is added, this logic may
1466               have to get more complicated to differentiate between
1467               stack arguments and register arguments.  */
1468 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1469             op->args[pi++] = temp_arg(args[i] + 1);
1470             op->args[pi++] = temp_arg(args[i]);
1471 #else
1472             op->args[pi++] = temp_arg(args[i]);
1473             op->args[pi++] = temp_arg(args[i] + 1);
1474 #endif
1475             real_args += 2;
1476             continue;
1477         }
1478 
1479         op->args[pi++] = temp_arg(args[i]);
1480         real_args++;
1481     }
1482     op->args[pi++] = (uintptr_t)func;
1483     op->args[pi++] = flags;
1484     op->calli = real_args;
1485 
1486     /* Make sure the fields didn't overflow.  */
1487     tcg_debug_assert(op->calli == real_args);
1488     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1489 
1490 #if defined(__sparc__) && !defined(__arch64__) \
1491     && !defined(CONFIG_TCG_INTERPRETER)
1492     /* Free all of the parts we allocated above.  */
1493     for (i = real_args = 0; i < orig_nargs; ++i) {
1494         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1495         if (is_64bit) {
1496             tcg_temp_free_internal(args[real_args++]);
1497             tcg_temp_free_internal(args[real_args++]);
1498         } else {
1499             real_args++;
1500         }
1501     }
1502     if (orig_sizemask & 1) {
1503         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1504            Note that describing these as TCGv_i64 eliminates an unnecessary
1505            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1506         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1507         tcg_temp_free_i64(retl);
1508         tcg_temp_free_i64(reth);
1509     }
1510 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1511     for (i = 0; i < nargs; ++i) {
1512         int is_64bit = sizemask & (1 << (i+1)*2);
1513         if (!is_64bit) {
1514             tcg_temp_free_internal(args[i]);
1515         }
1516     }
1517 #endif /* TCG_TARGET_EXTEND_ARGS */
1518 }
1519 
1520 static void tcg_reg_alloc_start(TCGContext *s)
1521 {
1522     int i, n;
1523     TCGTemp *ts;
1524 
1525     for (i = 0, n = s->nb_globals; i < n; i++) {
1526         ts = &s->temps[i];
1527         ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1528     }
1529     for (n = s->nb_temps; i < n; i++) {
1530         ts = &s->temps[i];
1531         ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1532         ts->mem_allocated = 0;
1533         ts->fixed_reg = 0;
1534     }
1535 
1536     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1537 }
1538 
1539 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1540                                  TCGTemp *ts)
1541 {
1542     int idx = temp_idx(ts);
1543 
1544     if (ts->temp_global) {
1545         pstrcpy(buf, buf_size, ts->name);
1546     } else if (ts->temp_local) {
1547         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1548     } else {
1549         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1550     }
1551     return buf;
1552 }
1553 
1554 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1555                              int buf_size, TCGArg arg)
1556 {
1557     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1558 }
1559 
1560 /* Find helper name.  */
1561 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1562 {
1563     const char *ret = NULL;
1564     if (helper_table) {
1565         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1566         if (info) {
1567             ret = info->name;
1568         }
1569     }
1570     return ret;
1571 }
1572 
1573 static const char * const cond_name[] =
1574 {
1575     [TCG_COND_NEVER] = "never",
1576     [TCG_COND_ALWAYS] = "always",
1577     [TCG_COND_EQ] = "eq",
1578     [TCG_COND_NE] = "ne",
1579     [TCG_COND_LT] = "lt",
1580     [TCG_COND_GE] = "ge",
1581     [TCG_COND_LE] = "le",
1582     [TCG_COND_GT] = "gt",
1583     [TCG_COND_LTU] = "ltu",
1584     [TCG_COND_GEU] = "geu",
1585     [TCG_COND_LEU] = "leu",
1586     [TCG_COND_GTU] = "gtu"
1587 };
1588 
1589 static const char * const ldst_name[] =
1590 {
1591     [MO_UB]   = "ub",
1592     [MO_SB]   = "sb",
1593     [MO_LEUW] = "leuw",
1594     [MO_LESW] = "lesw",
1595     [MO_LEUL] = "leul",
1596     [MO_LESL] = "lesl",
1597     [MO_LEQ]  = "leq",
1598     [MO_BEUW] = "beuw",
1599     [MO_BESW] = "besw",
1600     [MO_BEUL] = "beul",
1601     [MO_BESL] = "besl",
1602     [MO_BEQ]  = "beq",
1603 };
1604 
1605 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1606 #ifdef ALIGNED_ONLY
1607     [MO_UNALN >> MO_ASHIFT]    = "un+",
1608     [MO_ALIGN >> MO_ASHIFT]    = "",
1609 #else
1610     [MO_UNALN >> MO_ASHIFT]    = "",
1611     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1612 #endif
1613     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1614     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1615     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1616     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1617     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1618     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1619 };
1620 
1621 void tcg_dump_ops(TCGContext *s)
1622 {
1623     char buf[128];
1624     TCGOp *op;
1625     int oi;
1626 
1627     for (oi = s->gen_op_buf[0].next; oi != 0; oi = op->next) {
1628         int i, k, nb_oargs, nb_iargs, nb_cargs;
1629         const TCGOpDef *def;
1630         TCGOpcode c;
1631         int col = 0;
1632 
1633         op = &s->gen_op_buf[oi];
1634         c = op->opc;
1635         def = &tcg_op_defs[c];
1636 
1637         if (c == INDEX_op_insn_start) {
1638             col += qemu_log("%s ----", oi != s->gen_op_buf[0].next ? "\n" : "");
1639 
1640             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1641                 target_ulong a;
1642 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1643                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1644 #else
1645                 a = op->args[i];
1646 #endif
1647                 col += qemu_log(" " TARGET_FMT_lx, a);
1648             }
1649         } else if (c == INDEX_op_call) {
1650             /* variable number of arguments */
1651             nb_oargs = op->callo;
1652             nb_iargs = op->calli;
1653             nb_cargs = def->nb_cargs;
1654 
1655             /* function name, flags, out args */
1656             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1657                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
1658                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
1659             for (i = 0; i < nb_oargs; i++) {
1660                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1661                                                        op->args[i]));
1662             }
1663             for (i = 0; i < nb_iargs; i++) {
1664                 TCGArg arg = op->args[nb_oargs + i];
1665                 const char *t = "<dummy>";
1666                 if (arg != TCG_CALL_DUMMY_ARG) {
1667                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1668                 }
1669                 col += qemu_log(",%s", t);
1670             }
1671         } else {
1672             col += qemu_log(" %s ", def->name);
1673 
1674             nb_oargs = def->nb_oargs;
1675             nb_iargs = def->nb_iargs;
1676             nb_cargs = def->nb_cargs;
1677 
1678             k = 0;
1679             for (i = 0; i < nb_oargs; i++) {
1680                 if (k != 0) {
1681                     col += qemu_log(",");
1682                 }
1683                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1684                                                       op->args[k++]));
1685             }
1686             for (i = 0; i < nb_iargs; i++) {
1687                 if (k != 0) {
1688                     col += qemu_log(",");
1689                 }
1690                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1691                                                       op->args[k++]));
1692             }
1693             switch (c) {
1694             case INDEX_op_brcond_i32:
1695             case INDEX_op_setcond_i32:
1696             case INDEX_op_movcond_i32:
1697             case INDEX_op_brcond2_i32:
1698             case INDEX_op_setcond2_i32:
1699             case INDEX_op_brcond_i64:
1700             case INDEX_op_setcond_i64:
1701             case INDEX_op_movcond_i64:
1702                 if (op->args[k] < ARRAY_SIZE(cond_name)
1703                     && cond_name[op->args[k]]) {
1704                     col += qemu_log(",%s", cond_name[op->args[k++]]);
1705                 } else {
1706                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
1707                 }
1708                 i = 1;
1709                 break;
1710             case INDEX_op_qemu_ld_i32:
1711             case INDEX_op_qemu_st_i32:
1712             case INDEX_op_qemu_ld_i64:
1713             case INDEX_op_qemu_st_i64:
1714                 {
1715                     TCGMemOpIdx oi = op->args[k++];
1716                     TCGMemOp op = get_memop(oi);
1717                     unsigned ix = get_mmuidx(oi);
1718 
1719                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1720                         col += qemu_log(",$0x%x,%u", op, ix);
1721                     } else {
1722                         const char *s_al, *s_op;
1723                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1724                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1725                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
1726                     }
1727                     i = 1;
1728                 }
1729                 break;
1730             default:
1731                 i = 0;
1732                 break;
1733             }
1734             switch (c) {
1735             case INDEX_op_set_label:
1736             case INDEX_op_br:
1737             case INDEX_op_brcond_i32:
1738             case INDEX_op_brcond_i64:
1739             case INDEX_op_brcond2_i32:
1740                 col += qemu_log("%s$L%d", k ? "," : "",
1741                                 arg_label(op->args[k])->id);
1742                 i++, k++;
1743                 break;
1744             default:
1745                 break;
1746             }
1747             for (; i < nb_cargs; i++, k++) {
1748                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
1749             }
1750         }
1751         if (op->life) {
1752             unsigned life = op->life;
1753 
1754             for (; col < 48; ++col) {
1755                 putc(' ', qemu_logfile);
1756             }
1757 
1758             if (life & (SYNC_ARG * 3)) {
1759                 qemu_log("  sync:");
1760                 for (i = 0; i < 2; ++i) {
1761                     if (life & (SYNC_ARG << i)) {
1762                         qemu_log(" %d", i);
1763                     }
1764                 }
1765             }
1766             life /= DEAD_ARG;
1767             if (life) {
1768                 qemu_log("  dead:");
1769                 for (i = 0; life; ++i, life >>= 1) {
1770                     if (life & 1) {
1771                         qemu_log(" %d", i);
1772                     }
1773                 }
1774             }
1775         }
1776         qemu_log("\n");
1777     }
1778 }
1779 
1780 /* we give more priority to constraints with less registers */
1781 static int get_constraint_priority(const TCGOpDef *def, int k)
1782 {
1783     const TCGArgConstraint *arg_ct;
1784 
1785     int i, n;
1786     arg_ct = &def->args_ct[k];
1787     if (arg_ct->ct & TCG_CT_ALIAS) {
1788         /* an alias is equivalent to a single register */
1789         n = 1;
1790     } else {
1791         if (!(arg_ct->ct & TCG_CT_REG))
1792             return 0;
1793         n = 0;
1794         for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1795             if (tcg_regset_test_reg(arg_ct->u.regs, i))
1796                 n++;
1797         }
1798     }
1799     return TCG_TARGET_NB_REGS - n + 1;
1800 }
1801 
1802 /* sort from highest priority to lowest */
1803 static void sort_constraints(TCGOpDef *def, int start, int n)
1804 {
1805     int i, j, p1, p2, tmp;
1806 
1807     for(i = 0; i < n; i++)
1808         def->sorted_args[start + i] = start + i;
1809     if (n <= 1)
1810         return;
1811     for(i = 0; i < n - 1; i++) {
1812         for(j = i + 1; j < n; j++) {
1813             p1 = get_constraint_priority(def, def->sorted_args[start + i]);
1814             p2 = get_constraint_priority(def, def->sorted_args[start + j]);
1815             if (p1 < p2) {
1816                 tmp = def->sorted_args[start + i];
1817                 def->sorted_args[start + i] = def->sorted_args[start + j];
1818                 def->sorted_args[start + j] = tmp;
1819             }
1820         }
1821     }
1822 }
1823 
1824 static void process_op_defs(TCGContext *s)
1825 {
1826     TCGOpcode op;
1827 
1828     for (op = 0; op < NB_OPS; op++) {
1829         TCGOpDef *def = &tcg_op_defs[op];
1830         const TCGTargetOpDef *tdefs;
1831         TCGType type;
1832         int i, nb_args;
1833 
1834         if (def->flags & TCG_OPF_NOT_PRESENT) {
1835             continue;
1836         }
1837 
1838         nb_args = def->nb_iargs + def->nb_oargs;
1839         if (nb_args == 0) {
1840             continue;
1841         }
1842 
1843         tdefs = tcg_target_op_def(op);
1844         /* Missing TCGTargetOpDef entry. */
1845         tcg_debug_assert(tdefs != NULL);
1846 
1847         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
1848         for (i = 0; i < nb_args; i++) {
1849             const char *ct_str = tdefs->args_ct_str[i];
1850             /* Incomplete TCGTargetOpDef entry. */
1851             tcg_debug_assert(ct_str != NULL);
1852 
1853             def->args_ct[i].u.regs = 0;
1854             def->args_ct[i].ct = 0;
1855             while (*ct_str != '\0') {
1856                 switch(*ct_str) {
1857                 case '0' ... '9':
1858                     {
1859                         int oarg = *ct_str - '0';
1860                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
1861                         tcg_debug_assert(oarg < def->nb_oargs);
1862                         tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
1863                         /* TCG_CT_ALIAS is for the output arguments.
1864                            The input is tagged with TCG_CT_IALIAS. */
1865                         def->args_ct[i] = def->args_ct[oarg];
1866                         def->args_ct[oarg].ct |= TCG_CT_ALIAS;
1867                         def->args_ct[oarg].alias_index = i;
1868                         def->args_ct[i].ct |= TCG_CT_IALIAS;
1869                         def->args_ct[i].alias_index = oarg;
1870                     }
1871                     ct_str++;
1872                     break;
1873                 case '&':
1874                     def->args_ct[i].ct |= TCG_CT_NEWREG;
1875                     ct_str++;
1876                     break;
1877                 case 'i':
1878                     def->args_ct[i].ct |= TCG_CT_CONST;
1879                     ct_str++;
1880                     break;
1881                 default:
1882                     ct_str = target_parse_constraint(&def->args_ct[i],
1883                                                      ct_str, type);
1884                     /* Typo in TCGTargetOpDef constraint. */
1885                     tcg_debug_assert(ct_str != NULL);
1886                 }
1887             }
1888         }
1889 
1890         /* TCGTargetOpDef entry with too much information? */
1891         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
1892 
1893         /* sort the constraints (XXX: this is just an heuristic) */
1894         sort_constraints(def, 0, def->nb_oargs);
1895         sort_constraints(def, def->nb_oargs, def->nb_iargs);
1896     }
1897 }
1898 
1899 void tcg_op_remove(TCGContext *s, TCGOp *op)
1900 {
1901     int next = op->next;
1902     int prev = op->prev;
1903 
1904     /* We should never attempt to remove the list terminator.  */
1905     tcg_debug_assert(op != &s->gen_op_buf[0]);
1906 
1907     s->gen_op_buf[next].prev = prev;
1908     s->gen_op_buf[prev].next = next;
1909 
1910     memset(op, 0, sizeof(*op));
1911 
1912 #ifdef CONFIG_PROFILER
1913     atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
1914 #endif
1915 }
1916 
1917 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
1918                             TCGOpcode opc, int nargs)
1919 {
1920     int oi = s->gen_next_op_idx;
1921     int prev = old_op->prev;
1922     int next = old_op - s->gen_op_buf;
1923     TCGOp *new_op;
1924 
1925     tcg_debug_assert(oi < OPC_BUF_SIZE);
1926     s->gen_next_op_idx = oi + 1;
1927 
1928     new_op = &s->gen_op_buf[oi];
1929     *new_op = (TCGOp){
1930         .opc = opc,
1931         .prev = prev,
1932         .next = next
1933     };
1934     s->gen_op_buf[prev].next = oi;
1935     old_op->prev = oi;
1936 
1937     return new_op;
1938 }
1939 
1940 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
1941                            TCGOpcode opc, int nargs)
1942 {
1943     int oi = s->gen_next_op_idx;
1944     int prev = old_op - s->gen_op_buf;
1945     int next = old_op->next;
1946     TCGOp *new_op;
1947 
1948     tcg_debug_assert(oi < OPC_BUF_SIZE);
1949     s->gen_next_op_idx = oi + 1;
1950 
1951     new_op = &s->gen_op_buf[oi];
1952     *new_op = (TCGOp){
1953         .opc = opc,
1954         .prev = prev,
1955         .next = next
1956     };
1957     s->gen_op_buf[next].prev = oi;
1958     old_op->next = oi;
1959 
1960     return new_op;
1961 }
1962 
1963 #define TS_DEAD  1
1964 #define TS_MEM   2
1965 
1966 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
1967 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
1968 
1969 /* liveness analysis: end of function: all temps are dead, and globals
1970    should be in memory. */
1971 static void tcg_la_func_end(TCGContext *s)
1972 {
1973     int ng = s->nb_globals;
1974     int nt = s->nb_temps;
1975     int i;
1976 
1977     for (i = 0; i < ng; ++i) {
1978         s->temps[i].state = TS_DEAD | TS_MEM;
1979     }
1980     for (i = ng; i < nt; ++i) {
1981         s->temps[i].state = TS_DEAD;
1982     }
1983 }
1984 
1985 /* liveness analysis: end of basic block: all temps are dead, globals
1986    and local temps should be in memory. */
1987 static void tcg_la_bb_end(TCGContext *s)
1988 {
1989     int ng = s->nb_globals;
1990     int nt = s->nb_temps;
1991     int i;
1992 
1993     for (i = 0; i < ng; ++i) {
1994         s->temps[i].state = TS_DEAD | TS_MEM;
1995     }
1996     for (i = ng; i < nt; ++i) {
1997         s->temps[i].state = (s->temps[i].temp_local
1998                              ? TS_DEAD | TS_MEM
1999                              : TS_DEAD);
2000     }
2001 }
2002 
2003 /* Liveness analysis : update the opc_arg_life array to tell if a
2004    given input arguments is dead. Instructions updating dead
2005    temporaries are removed. */
2006 static void liveness_pass_1(TCGContext *s)
2007 {
2008     int nb_globals = s->nb_globals;
2009     int oi, oi_prev;
2010 
2011     tcg_la_func_end(s);
2012 
2013     for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
2014         int i, nb_iargs, nb_oargs;
2015         TCGOpcode opc_new, opc_new2;
2016         bool have_opc_new2;
2017         TCGLifeData arg_life = 0;
2018         TCGTemp *arg_ts;
2019 
2020         TCGOp * const op = &s->gen_op_buf[oi];
2021         TCGOpcode opc = op->opc;
2022         const TCGOpDef *def = &tcg_op_defs[opc];
2023 
2024         oi_prev = op->prev;
2025 
2026         switch (opc) {
2027         case INDEX_op_call:
2028             {
2029                 int call_flags;
2030 
2031                 nb_oargs = op->callo;
2032                 nb_iargs = op->calli;
2033                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2034 
2035                 /* pure functions can be removed if their result is unused */
2036                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2037                     for (i = 0; i < nb_oargs; i++) {
2038                         arg_ts = arg_temp(op->args[i]);
2039                         if (arg_ts->state != TS_DEAD) {
2040                             goto do_not_remove_call;
2041                         }
2042                     }
2043                     goto do_remove;
2044                 } else {
2045                 do_not_remove_call:
2046 
2047                     /* output args are dead */
2048                     for (i = 0; i < nb_oargs; i++) {
2049                         arg_ts = arg_temp(op->args[i]);
2050                         if (arg_ts->state & TS_DEAD) {
2051                             arg_life |= DEAD_ARG << i;
2052                         }
2053                         if (arg_ts->state & TS_MEM) {
2054                             arg_life |= SYNC_ARG << i;
2055                         }
2056                         arg_ts->state = TS_DEAD;
2057                     }
2058 
2059                     if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2060                                         TCG_CALL_NO_READ_GLOBALS))) {
2061                         /* globals should go back to memory */
2062                         for (i = 0; i < nb_globals; i++) {
2063                             s->temps[i].state = TS_DEAD | TS_MEM;
2064                         }
2065                     } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2066                         /* globals should be synced to memory */
2067                         for (i = 0; i < nb_globals; i++) {
2068                             s->temps[i].state |= TS_MEM;
2069                         }
2070                     }
2071 
2072                     /* record arguments that die in this helper */
2073                     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2074                         arg_ts = arg_temp(op->args[i]);
2075                         if (arg_ts && arg_ts->state & TS_DEAD) {
2076                             arg_life |= DEAD_ARG << i;
2077                         }
2078                     }
2079                     /* input arguments are live for preceding opcodes */
2080                     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2081                         arg_ts = arg_temp(op->args[i]);
2082                         if (arg_ts) {
2083                             arg_ts->state &= ~TS_DEAD;
2084                         }
2085                     }
2086                 }
2087             }
2088             break;
2089         case INDEX_op_insn_start:
2090             break;
2091         case INDEX_op_discard:
2092             /* mark the temporary as dead */
2093             arg_temp(op->args[0])->state = TS_DEAD;
2094             break;
2095 
2096         case INDEX_op_add2_i32:
2097             opc_new = INDEX_op_add_i32;
2098             goto do_addsub2;
2099         case INDEX_op_sub2_i32:
2100             opc_new = INDEX_op_sub_i32;
2101             goto do_addsub2;
2102         case INDEX_op_add2_i64:
2103             opc_new = INDEX_op_add_i64;
2104             goto do_addsub2;
2105         case INDEX_op_sub2_i64:
2106             opc_new = INDEX_op_sub_i64;
2107         do_addsub2:
2108             nb_iargs = 4;
2109             nb_oargs = 2;
2110             /* Test if the high part of the operation is dead, but not
2111                the low part.  The result can be optimized to a simple
2112                add or sub.  This happens often for x86_64 guest when the
2113                cpu mode is set to 32 bit.  */
2114             if (arg_temp(op->args[1])->state == TS_DEAD) {
2115                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2116                     goto do_remove;
2117                 }
2118                 /* Replace the opcode and adjust the args in place,
2119                    leaving 3 unused args at the end.  */
2120                 op->opc = opc = opc_new;
2121                 op->args[1] = op->args[2];
2122                 op->args[2] = op->args[4];
2123                 /* Fall through and mark the single-word operation live.  */
2124                 nb_iargs = 2;
2125                 nb_oargs = 1;
2126             }
2127             goto do_not_remove;
2128 
2129         case INDEX_op_mulu2_i32:
2130             opc_new = INDEX_op_mul_i32;
2131             opc_new2 = INDEX_op_muluh_i32;
2132             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2133             goto do_mul2;
2134         case INDEX_op_muls2_i32:
2135             opc_new = INDEX_op_mul_i32;
2136             opc_new2 = INDEX_op_mulsh_i32;
2137             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2138             goto do_mul2;
2139         case INDEX_op_mulu2_i64:
2140             opc_new = INDEX_op_mul_i64;
2141             opc_new2 = INDEX_op_muluh_i64;
2142             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2143             goto do_mul2;
2144         case INDEX_op_muls2_i64:
2145             opc_new = INDEX_op_mul_i64;
2146             opc_new2 = INDEX_op_mulsh_i64;
2147             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2148             goto do_mul2;
2149         do_mul2:
2150             nb_iargs = 2;
2151             nb_oargs = 2;
2152             if (arg_temp(op->args[1])->state == TS_DEAD) {
2153                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2154                     /* Both parts of the operation are dead.  */
2155                     goto do_remove;
2156                 }
2157                 /* The high part of the operation is dead; generate the low. */
2158                 op->opc = opc = opc_new;
2159                 op->args[1] = op->args[2];
2160                 op->args[2] = op->args[3];
2161             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2162                 /* The low part of the operation is dead; generate the high. */
2163                 op->opc = opc = opc_new2;
2164                 op->args[0] = op->args[1];
2165                 op->args[1] = op->args[2];
2166                 op->args[2] = op->args[3];
2167             } else {
2168                 goto do_not_remove;
2169             }
2170             /* Mark the single-word operation live.  */
2171             nb_oargs = 1;
2172             goto do_not_remove;
2173 
2174         default:
2175             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2176             nb_iargs = def->nb_iargs;
2177             nb_oargs = def->nb_oargs;
2178 
2179             /* Test if the operation can be removed because all
2180                its outputs are dead. We assume that nb_oargs == 0
2181                implies side effects */
2182             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2183                 for (i = 0; i < nb_oargs; i++) {
2184                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2185                         goto do_not_remove;
2186                     }
2187                 }
2188             do_remove:
2189                 tcg_op_remove(s, op);
2190             } else {
2191             do_not_remove:
2192                 /* output args are dead */
2193                 for (i = 0; i < nb_oargs; i++) {
2194                     arg_ts = arg_temp(op->args[i]);
2195                     if (arg_ts->state & TS_DEAD) {
2196                         arg_life |= DEAD_ARG << i;
2197                     }
2198                     if (arg_ts->state & TS_MEM) {
2199                         arg_life |= SYNC_ARG << i;
2200                     }
2201                     arg_ts->state = TS_DEAD;
2202                 }
2203 
2204                 /* if end of basic block, update */
2205                 if (def->flags & TCG_OPF_BB_END) {
2206                     tcg_la_bb_end(s);
2207                 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2208                     /* globals should be synced to memory */
2209                     for (i = 0; i < nb_globals; i++) {
2210                         s->temps[i].state |= TS_MEM;
2211                     }
2212                 }
2213 
2214                 /* record arguments that die in this opcode */
2215                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2216                     arg_ts = arg_temp(op->args[i]);
2217                     if (arg_ts->state & TS_DEAD) {
2218                         arg_life |= DEAD_ARG << i;
2219                     }
2220                 }
2221                 /* input arguments are live for preceding opcodes */
2222                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2223                     arg_temp(op->args[i])->state &= ~TS_DEAD;
2224                 }
2225             }
2226             break;
2227         }
2228         op->life = arg_life;
2229     }
2230 }
2231 
2232 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2233 static bool liveness_pass_2(TCGContext *s)
2234 {
2235     int nb_globals = s->nb_globals;
2236     int nb_temps, i, oi, oi_next;
2237     bool changes = false;
2238 
2239     /* Create a temporary for each indirect global.  */
2240     for (i = 0; i < nb_globals; ++i) {
2241         TCGTemp *its = &s->temps[i];
2242         if (its->indirect_reg) {
2243             TCGTemp *dts = tcg_temp_alloc(s);
2244             dts->type = its->type;
2245             dts->base_type = its->base_type;
2246             its->state_ptr = dts;
2247         } else {
2248             its->state_ptr = NULL;
2249         }
2250         /* All globals begin dead.  */
2251         its->state = TS_DEAD;
2252     }
2253     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2254         TCGTemp *its = &s->temps[i];
2255         its->state_ptr = NULL;
2256         its->state = TS_DEAD;
2257     }
2258 
2259     for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
2260         TCGOp *op = &s->gen_op_buf[oi];
2261         TCGOpcode opc = op->opc;
2262         const TCGOpDef *def = &tcg_op_defs[opc];
2263         TCGLifeData arg_life = op->life;
2264         int nb_iargs, nb_oargs, call_flags;
2265         TCGTemp *arg_ts, *dir_ts;
2266 
2267         oi_next = op->next;
2268 
2269         if (opc == INDEX_op_call) {
2270             nb_oargs = op->callo;
2271             nb_iargs = op->calli;
2272             call_flags = op->args[nb_oargs + nb_iargs + 1];
2273         } else {
2274             nb_iargs = def->nb_iargs;
2275             nb_oargs = def->nb_oargs;
2276 
2277             /* Set flags similar to how calls require.  */
2278             if (def->flags & TCG_OPF_BB_END) {
2279                 /* Like writing globals: save_globals */
2280                 call_flags = 0;
2281             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2282                 /* Like reading globals: sync_globals */
2283                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2284             } else {
2285                 /* No effect on globals.  */
2286                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2287                               TCG_CALL_NO_WRITE_GLOBALS);
2288             }
2289         }
2290 
2291         /* Make sure that input arguments are available.  */
2292         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2293             arg_ts = arg_temp(op->args[i]);
2294             if (arg_ts) {
2295                 dir_ts = arg_ts->state_ptr;
2296                 if (dir_ts && arg_ts->state == TS_DEAD) {
2297                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2298                                       ? INDEX_op_ld_i32
2299                                       : INDEX_op_ld_i64);
2300                     TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
2301 
2302                     lop->args[0] = temp_arg(dir_ts);
2303                     lop->args[1] = temp_arg(arg_ts->mem_base);
2304                     lop->args[2] = arg_ts->mem_offset;
2305 
2306                     /* Loaded, but synced with memory.  */
2307                     arg_ts->state = TS_MEM;
2308                 }
2309             }
2310         }
2311 
2312         /* Perform input replacement, and mark inputs that became dead.
2313            No action is required except keeping temp_state up to date
2314            so that we reload when needed.  */
2315         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2316             arg_ts = arg_temp(op->args[i]);
2317             if (arg_ts) {
2318                 dir_ts = arg_ts->state_ptr;
2319                 if (dir_ts) {
2320                     op->args[i] = temp_arg(dir_ts);
2321                     changes = true;
2322                     if (IS_DEAD_ARG(i)) {
2323                         arg_ts->state = TS_DEAD;
2324                     }
2325                 }
2326             }
2327         }
2328 
2329         /* Liveness analysis should ensure that the following are
2330            all correct, for call sites and basic block end points.  */
2331         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2332             /* Nothing to do */
2333         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2334             for (i = 0; i < nb_globals; ++i) {
2335                 /* Liveness should see that globals are synced back,
2336                    that is, either TS_DEAD or TS_MEM.  */
2337                 arg_ts = &s->temps[i];
2338                 tcg_debug_assert(arg_ts->state_ptr == 0
2339                                  || arg_ts->state != 0);
2340             }
2341         } else {
2342             for (i = 0; i < nb_globals; ++i) {
2343                 /* Liveness should see that globals are saved back,
2344                    that is, TS_DEAD, waiting to be reloaded.  */
2345                 arg_ts = &s->temps[i];
2346                 tcg_debug_assert(arg_ts->state_ptr == 0
2347                                  || arg_ts->state == TS_DEAD);
2348             }
2349         }
2350 
2351         /* Outputs become available.  */
2352         for (i = 0; i < nb_oargs; i++) {
2353             arg_ts = arg_temp(op->args[i]);
2354             dir_ts = arg_ts->state_ptr;
2355             if (!dir_ts) {
2356                 continue;
2357             }
2358             op->args[i] = temp_arg(dir_ts);
2359             changes = true;
2360 
2361             /* The output is now live and modified.  */
2362             arg_ts->state = 0;
2363 
2364             /* Sync outputs upon their last write.  */
2365             if (NEED_SYNC_ARG(i)) {
2366                 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2367                                   ? INDEX_op_st_i32
2368                                   : INDEX_op_st_i64);
2369                 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
2370 
2371                 sop->args[0] = temp_arg(dir_ts);
2372                 sop->args[1] = temp_arg(arg_ts->mem_base);
2373                 sop->args[2] = arg_ts->mem_offset;
2374 
2375                 arg_ts->state = TS_MEM;
2376             }
2377             /* Drop outputs that are dead.  */
2378             if (IS_DEAD_ARG(i)) {
2379                 arg_ts->state = TS_DEAD;
2380             }
2381         }
2382     }
2383 
2384     return changes;
2385 }
2386 
2387 #ifdef CONFIG_DEBUG_TCG
2388 static void dump_regs(TCGContext *s)
2389 {
2390     TCGTemp *ts;
2391     int i;
2392     char buf[64];
2393 
2394     for(i = 0; i < s->nb_temps; i++) {
2395         ts = &s->temps[i];
2396         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2397         switch(ts->val_type) {
2398         case TEMP_VAL_REG:
2399             printf("%s", tcg_target_reg_names[ts->reg]);
2400             break;
2401         case TEMP_VAL_MEM:
2402             printf("%d(%s)", (int)ts->mem_offset,
2403                    tcg_target_reg_names[ts->mem_base->reg]);
2404             break;
2405         case TEMP_VAL_CONST:
2406             printf("$0x%" TCG_PRIlx, ts->val);
2407             break;
2408         case TEMP_VAL_DEAD:
2409             printf("D");
2410             break;
2411         default:
2412             printf("???");
2413             break;
2414         }
2415         printf("\n");
2416     }
2417 
2418     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2419         if (s->reg_to_temp[i] != NULL) {
2420             printf("%s: %s\n",
2421                    tcg_target_reg_names[i],
2422                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
2423         }
2424     }
2425 }
2426 
2427 static void check_regs(TCGContext *s)
2428 {
2429     int reg;
2430     int k;
2431     TCGTemp *ts;
2432     char buf[64];
2433 
2434     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2435         ts = s->reg_to_temp[reg];
2436         if (ts != NULL) {
2437             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
2438                 printf("Inconsistency for register %s:\n",
2439                        tcg_target_reg_names[reg]);
2440                 goto fail;
2441             }
2442         }
2443     }
2444     for (k = 0; k < s->nb_temps; k++) {
2445         ts = &s->temps[k];
2446         if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
2447             && s->reg_to_temp[ts->reg] != ts) {
2448             printf("Inconsistency for temp %s:\n",
2449                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2450         fail:
2451             printf("reg state:\n");
2452             dump_regs(s);
2453             tcg_abort();
2454         }
2455     }
2456 }
2457 #endif
2458 
2459 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
2460 {
2461 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
2462     /* Sparc64 stack is accessed with offset of 2047 */
2463     s->current_frame_offset = (s->current_frame_offset +
2464                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
2465         ~(sizeof(tcg_target_long) - 1);
2466 #endif
2467     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
2468         s->frame_end) {
2469         tcg_abort();
2470     }
2471     ts->mem_offset = s->current_frame_offset;
2472     ts->mem_base = s->frame_temp;
2473     ts->mem_allocated = 1;
2474     s->current_frame_offset += sizeof(tcg_target_long);
2475 }
2476 
2477 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet);
2478 
2479 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
2480    mark it free; otherwise mark it dead.  */
2481 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
2482 {
2483     if (ts->fixed_reg) {
2484         return;
2485     }
2486     if (ts->val_type == TEMP_VAL_REG) {
2487         s->reg_to_temp[ts->reg] = NULL;
2488     }
2489     ts->val_type = (free_or_dead < 0
2490                     || ts->temp_local
2491                     || ts->temp_global
2492                     ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
2493 }
2494 
2495 /* Mark a temporary as dead.  */
2496 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
2497 {
2498     temp_free_or_dead(s, ts, 1);
2499 }
2500 
2501 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
2502    registers needs to be allocated to store a constant.  If 'free_or_dead'
2503    is non-zero, subsequently release the temporary; if it is positive, the
2504    temp is dead; if it is negative, the temp is free.  */
2505 static void temp_sync(TCGContext *s, TCGTemp *ts,
2506                       TCGRegSet allocated_regs, int free_or_dead)
2507 {
2508     if (ts->fixed_reg) {
2509         return;
2510     }
2511     if (!ts->mem_coherent) {
2512         if (!ts->mem_allocated) {
2513             temp_allocate_frame(s, ts);
2514         }
2515         switch (ts->val_type) {
2516         case TEMP_VAL_CONST:
2517             /* If we're going to free the temp immediately, then we won't
2518                require it later in a register, so attempt to store the
2519                constant to memory directly.  */
2520             if (free_or_dead
2521                 && tcg_out_sti(s, ts->type, ts->val,
2522                                ts->mem_base->reg, ts->mem_offset)) {
2523                 break;
2524             }
2525             temp_load(s, ts, tcg_target_available_regs[ts->type],
2526                       allocated_regs);
2527             /* fallthrough */
2528 
2529         case TEMP_VAL_REG:
2530             tcg_out_st(s, ts->type, ts->reg,
2531                        ts->mem_base->reg, ts->mem_offset);
2532             break;
2533 
2534         case TEMP_VAL_MEM:
2535             break;
2536 
2537         case TEMP_VAL_DEAD:
2538         default:
2539             tcg_abort();
2540         }
2541         ts->mem_coherent = 1;
2542     }
2543     if (free_or_dead) {
2544         temp_free_or_dead(s, ts, free_or_dead);
2545     }
2546 }
2547 
2548 /* free register 'reg' by spilling the corresponding temporary if necessary */
2549 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
2550 {
2551     TCGTemp *ts = s->reg_to_temp[reg];
2552     if (ts != NULL) {
2553         temp_sync(s, ts, allocated_regs, -1);
2554     }
2555 }
2556 
2557 /* Allocate a register belonging to reg1 & ~reg2 */
2558 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs,
2559                             TCGRegSet allocated_regs, bool rev)
2560 {
2561     int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
2562     const int *order;
2563     TCGReg reg;
2564     TCGRegSet reg_ct;
2565 
2566     reg_ct = desired_regs & ~allocated_regs;
2567     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
2568 
2569     /* first try free registers */
2570     for(i = 0; i < n; i++) {
2571         reg = order[i];
2572         if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL)
2573             return reg;
2574     }
2575 
2576     /* XXX: do better spill choice */
2577     for(i = 0; i < n; i++) {
2578         reg = order[i];
2579         if (tcg_regset_test_reg(reg_ct, reg)) {
2580             tcg_reg_free(s, reg, allocated_regs);
2581             return reg;
2582         }
2583     }
2584 
2585     tcg_abort();
2586 }
2587 
2588 /* Make sure the temporary is in a register.  If needed, allocate the register
2589    from DESIRED while avoiding ALLOCATED.  */
2590 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
2591                       TCGRegSet allocated_regs)
2592 {
2593     TCGReg reg;
2594 
2595     switch (ts->val_type) {
2596     case TEMP_VAL_REG:
2597         return;
2598     case TEMP_VAL_CONST:
2599         reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
2600         tcg_out_movi(s, ts->type, reg, ts->val);
2601         ts->mem_coherent = 0;
2602         break;
2603     case TEMP_VAL_MEM:
2604         reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
2605         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
2606         ts->mem_coherent = 1;
2607         break;
2608     case TEMP_VAL_DEAD:
2609     default:
2610         tcg_abort();
2611     }
2612     ts->reg = reg;
2613     ts->val_type = TEMP_VAL_REG;
2614     s->reg_to_temp[reg] = ts;
2615 }
2616 
2617 /* Save a temporary to memory. 'allocated_regs' is used in case a
2618    temporary registers needs to be allocated to store a constant.  */
2619 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
2620 {
2621     /* The liveness analysis already ensures that globals are back
2622        in memory. Keep an tcg_debug_assert for safety. */
2623     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
2624 }
2625 
2626 /* save globals to their canonical location and assume they can be
2627    modified be the following code. 'allocated_regs' is used in case a
2628    temporary registers needs to be allocated to store a constant. */
2629 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
2630 {
2631     int i, n;
2632 
2633     for (i = 0, n = s->nb_globals; i < n; i++) {
2634         temp_save(s, &s->temps[i], allocated_regs);
2635     }
2636 }
2637 
2638 /* sync globals to their canonical location and assume they can be
2639    read by the following code. 'allocated_regs' is used in case a
2640    temporary registers needs to be allocated to store a constant. */
2641 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
2642 {
2643     int i, n;
2644 
2645     for (i = 0, n = s->nb_globals; i < n; i++) {
2646         TCGTemp *ts = &s->temps[i];
2647         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
2648                          || ts->fixed_reg
2649                          || ts->mem_coherent);
2650     }
2651 }
2652 
2653 /* at the end of a basic block, we assume all temporaries are dead and
2654    all globals are stored at their canonical location. */
2655 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
2656 {
2657     int i;
2658 
2659     for (i = s->nb_globals; i < s->nb_temps; i++) {
2660         TCGTemp *ts = &s->temps[i];
2661         if (ts->temp_local) {
2662             temp_save(s, ts, allocated_regs);
2663         } else {
2664             /* The liveness analysis already ensures that temps are dead.
2665                Keep an tcg_debug_assert for safety. */
2666             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
2667         }
2668     }
2669 
2670     save_globals(s, allocated_regs);
2671 }
2672 
2673 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
2674                                   tcg_target_ulong val, TCGLifeData arg_life)
2675 {
2676     if (ots->fixed_reg) {
2677         /* For fixed registers, we do not do any constant propagation.  */
2678         tcg_out_movi(s, ots->type, ots->reg, val);
2679         return;
2680     }
2681 
2682     /* The movi is not explicitly generated here.  */
2683     if (ots->val_type == TEMP_VAL_REG) {
2684         s->reg_to_temp[ots->reg] = NULL;
2685     }
2686     ots->val_type = TEMP_VAL_CONST;
2687     ots->val = val;
2688     ots->mem_coherent = 0;
2689     if (NEED_SYNC_ARG(0)) {
2690         temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0));
2691     } else if (IS_DEAD_ARG(0)) {
2692         temp_dead(s, ots);
2693     }
2694 }
2695 
2696 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
2697 {
2698     TCGTemp *ots = arg_temp(op->args[0]);
2699     tcg_target_ulong val = op->args[1];
2700 
2701     tcg_reg_alloc_do_movi(s, ots, val, op->life);
2702 }
2703 
2704 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
2705 {
2706     const TCGLifeData arg_life = op->life;
2707     TCGRegSet allocated_regs;
2708     TCGTemp *ts, *ots;
2709     TCGType otype, itype;
2710 
2711     allocated_regs = s->reserved_regs;
2712     ots = arg_temp(op->args[0]);
2713     ts = arg_temp(op->args[1]);
2714 
2715     /* Note that otype != itype for no-op truncation.  */
2716     otype = ots->type;
2717     itype = ts->type;
2718 
2719     if (ts->val_type == TEMP_VAL_CONST) {
2720         /* propagate constant or generate sti */
2721         tcg_target_ulong val = ts->val;
2722         if (IS_DEAD_ARG(1)) {
2723             temp_dead(s, ts);
2724         }
2725         tcg_reg_alloc_do_movi(s, ots, val, arg_life);
2726         return;
2727     }
2728 
2729     /* If the source value is in memory we're going to be forced
2730        to have it in a register in order to perform the copy.  Copy
2731        the SOURCE value into its own register first, that way we
2732        don't have to reload SOURCE the next time it is used. */
2733     if (ts->val_type == TEMP_VAL_MEM) {
2734         temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs);
2735     }
2736 
2737     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
2738     if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
2739         /* mov to a non-saved dead register makes no sense (even with
2740            liveness analysis disabled). */
2741         tcg_debug_assert(NEED_SYNC_ARG(0));
2742         if (!ots->mem_allocated) {
2743             temp_allocate_frame(s, ots);
2744         }
2745         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
2746         if (IS_DEAD_ARG(1)) {
2747             temp_dead(s, ts);
2748         }
2749         temp_dead(s, ots);
2750     } else {
2751         if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
2752             /* the mov can be suppressed */
2753             if (ots->val_type == TEMP_VAL_REG) {
2754                 s->reg_to_temp[ots->reg] = NULL;
2755             }
2756             ots->reg = ts->reg;
2757             temp_dead(s, ts);
2758         } else {
2759             if (ots->val_type != TEMP_VAL_REG) {
2760                 /* When allocating a new register, make sure to not spill the
2761                    input one. */
2762                 tcg_regset_set_reg(allocated_regs, ts->reg);
2763                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
2764                                          allocated_regs, ots->indirect_base);
2765             }
2766             tcg_out_mov(s, otype, ots->reg, ts->reg);
2767         }
2768         ots->val_type = TEMP_VAL_REG;
2769         ots->mem_coherent = 0;
2770         s->reg_to_temp[ots->reg] = ots;
2771         if (NEED_SYNC_ARG(0)) {
2772             temp_sync(s, ots, allocated_regs, 0);
2773         }
2774     }
2775 }
2776 
2777 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
2778 {
2779     const TCGLifeData arg_life = op->life;
2780     const TCGOpDef * const def = &tcg_op_defs[op->opc];
2781     TCGRegSet i_allocated_regs;
2782     TCGRegSet o_allocated_regs;
2783     int i, k, nb_iargs, nb_oargs;
2784     TCGReg reg;
2785     TCGArg arg;
2786     const TCGArgConstraint *arg_ct;
2787     TCGTemp *ts;
2788     TCGArg new_args[TCG_MAX_OP_ARGS];
2789     int const_args[TCG_MAX_OP_ARGS];
2790 
2791     nb_oargs = def->nb_oargs;
2792     nb_iargs = def->nb_iargs;
2793 
2794     /* copy constants */
2795     memcpy(new_args + nb_oargs + nb_iargs,
2796            op->args + nb_oargs + nb_iargs,
2797            sizeof(TCGArg) * def->nb_cargs);
2798 
2799     i_allocated_regs = s->reserved_regs;
2800     o_allocated_regs = s->reserved_regs;
2801 
2802     /* satisfy input constraints */
2803     for (k = 0; k < nb_iargs; k++) {
2804         i = def->sorted_args[nb_oargs + k];
2805         arg = op->args[i];
2806         arg_ct = &def->args_ct[i];
2807         ts = arg_temp(arg);
2808 
2809         if (ts->val_type == TEMP_VAL_CONST
2810             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
2811             /* constant is OK for instruction */
2812             const_args[i] = 1;
2813             new_args[i] = ts->val;
2814             goto iarg_end;
2815         }
2816 
2817         temp_load(s, ts, arg_ct->u.regs, i_allocated_regs);
2818 
2819         if (arg_ct->ct & TCG_CT_IALIAS) {
2820             if (ts->fixed_reg) {
2821                 /* if fixed register, we must allocate a new register
2822                    if the alias is not the same register */
2823                 if (arg != op->args[arg_ct->alias_index])
2824                     goto allocate_in_reg;
2825             } else {
2826                 /* if the input is aliased to an output and if it is
2827                    not dead after the instruction, we must allocate
2828                    a new register and move it */
2829                 if (!IS_DEAD_ARG(i)) {
2830                     goto allocate_in_reg;
2831                 }
2832                 /* check if the current register has already been allocated
2833                    for another input aliased to an output */
2834                 int k2, i2;
2835                 for (k2 = 0 ; k2 < k ; k2++) {
2836                     i2 = def->sorted_args[nb_oargs + k2];
2837                     if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
2838                         (new_args[i2] == ts->reg)) {
2839                         goto allocate_in_reg;
2840                     }
2841                 }
2842             }
2843         }
2844         reg = ts->reg;
2845         if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2846             /* nothing to do : the constraint is satisfied */
2847         } else {
2848         allocate_in_reg:
2849             /* allocate a new register matching the constraint
2850                and move the temporary register into it */
2851             reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
2852                                 ts->indirect_base);
2853             tcg_out_mov(s, ts->type, reg, ts->reg);
2854         }
2855         new_args[i] = reg;
2856         const_args[i] = 0;
2857         tcg_regset_set_reg(i_allocated_regs, reg);
2858     iarg_end: ;
2859     }
2860 
2861     /* mark dead temporaries and free the associated registers */
2862     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2863         if (IS_DEAD_ARG(i)) {
2864             temp_dead(s, arg_temp(op->args[i]));
2865         }
2866     }
2867 
2868     if (def->flags & TCG_OPF_BB_END) {
2869         tcg_reg_alloc_bb_end(s, i_allocated_regs);
2870     } else {
2871         if (def->flags & TCG_OPF_CALL_CLOBBER) {
2872             /* XXX: permit generic clobber register list ? */
2873             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
2874                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
2875                     tcg_reg_free(s, i, i_allocated_regs);
2876                 }
2877             }
2878         }
2879         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2880             /* sync globals if the op has side effects and might trigger
2881                an exception. */
2882             sync_globals(s, i_allocated_regs);
2883         }
2884 
2885         /* satisfy the output constraints */
2886         for(k = 0; k < nb_oargs; k++) {
2887             i = def->sorted_args[k];
2888             arg = op->args[i];
2889             arg_ct = &def->args_ct[i];
2890             ts = arg_temp(arg);
2891             if ((arg_ct->ct & TCG_CT_ALIAS)
2892                 && !const_args[arg_ct->alias_index]) {
2893                 reg = new_args[arg_ct->alias_index];
2894             } else if (arg_ct->ct & TCG_CT_NEWREG) {
2895                 reg = tcg_reg_alloc(s, arg_ct->u.regs,
2896                                     i_allocated_regs | o_allocated_regs,
2897                                     ts->indirect_base);
2898             } else {
2899                 /* if fixed register, we try to use it */
2900                 reg = ts->reg;
2901                 if (ts->fixed_reg &&
2902                     tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2903                     goto oarg_end;
2904                 }
2905                 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
2906                                     ts->indirect_base);
2907             }
2908             tcg_regset_set_reg(o_allocated_regs, reg);
2909             /* if a fixed register is used, then a move will be done afterwards */
2910             if (!ts->fixed_reg) {
2911                 if (ts->val_type == TEMP_VAL_REG) {
2912                     s->reg_to_temp[ts->reg] = NULL;
2913                 }
2914                 ts->val_type = TEMP_VAL_REG;
2915                 ts->reg = reg;
2916                 /* temp value is modified, so the value kept in memory is
2917                    potentially not the same */
2918                 ts->mem_coherent = 0;
2919                 s->reg_to_temp[reg] = ts;
2920             }
2921         oarg_end:
2922             new_args[i] = reg;
2923         }
2924     }
2925 
2926     /* emit instruction */
2927     tcg_out_op(s, op->opc, new_args, const_args);
2928 
2929     /* move the outputs in the correct register if needed */
2930     for(i = 0; i < nb_oargs; i++) {
2931         ts = arg_temp(op->args[i]);
2932         reg = new_args[i];
2933         if (ts->fixed_reg && ts->reg != reg) {
2934             tcg_out_mov(s, ts->type, ts->reg, reg);
2935         }
2936         if (NEED_SYNC_ARG(i)) {
2937             temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i));
2938         } else if (IS_DEAD_ARG(i)) {
2939             temp_dead(s, ts);
2940         }
2941     }
2942 }
2943 
2944 #ifdef TCG_TARGET_STACK_GROWSUP
2945 #define STACK_DIR(x) (-(x))
2946 #else
2947 #define STACK_DIR(x) (x)
2948 #endif
2949 
2950 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
2951 {
2952     const int nb_oargs = op->callo;
2953     const int nb_iargs = op->calli;
2954     const TCGLifeData arg_life = op->life;
2955     int flags, nb_regs, i;
2956     TCGReg reg;
2957     TCGArg arg;
2958     TCGTemp *ts;
2959     intptr_t stack_offset;
2960     size_t call_stack_size;
2961     tcg_insn_unit *func_addr;
2962     int allocate_args;
2963     TCGRegSet allocated_regs;
2964 
2965     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
2966     flags = op->args[nb_oargs + nb_iargs + 1];
2967 
2968     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2969     if (nb_regs > nb_iargs) {
2970         nb_regs = nb_iargs;
2971     }
2972 
2973     /* assign stack slots first */
2974     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
2975     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
2976         ~(TCG_TARGET_STACK_ALIGN - 1);
2977     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
2978     if (allocate_args) {
2979         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
2980            preallocate call stack */
2981         tcg_abort();
2982     }
2983 
2984     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
2985     for (i = nb_regs; i < nb_iargs; i++) {
2986         arg = op->args[nb_oargs + i];
2987 #ifdef TCG_TARGET_STACK_GROWSUP
2988         stack_offset -= sizeof(tcg_target_long);
2989 #endif
2990         if (arg != TCG_CALL_DUMMY_ARG) {
2991             ts = arg_temp(arg);
2992             temp_load(s, ts, tcg_target_available_regs[ts->type],
2993                       s->reserved_regs);
2994             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
2995         }
2996 #ifndef TCG_TARGET_STACK_GROWSUP
2997         stack_offset += sizeof(tcg_target_long);
2998 #endif
2999     }
3000 
3001     /* assign input registers */
3002     allocated_regs = s->reserved_regs;
3003     for (i = 0; i < nb_regs; i++) {
3004         arg = op->args[nb_oargs + i];
3005         if (arg != TCG_CALL_DUMMY_ARG) {
3006             ts = arg_temp(arg);
3007             reg = tcg_target_call_iarg_regs[i];
3008             tcg_reg_free(s, reg, allocated_regs);
3009 
3010             if (ts->val_type == TEMP_VAL_REG) {
3011                 if (ts->reg != reg) {
3012                     tcg_out_mov(s, ts->type, reg, ts->reg);
3013                 }
3014             } else {
3015                 TCGRegSet arg_set = 0;
3016 
3017                 tcg_regset_set_reg(arg_set, reg);
3018                 temp_load(s, ts, arg_set, allocated_regs);
3019             }
3020 
3021             tcg_regset_set_reg(allocated_regs, reg);
3022         }
3023     }
3024 
3025     /* mark dead temporaries and free the associated registers */
3026     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3027         if (IS_DEAD_ARG(i)) {
3028             temp_dead(s, arg_temp(op->args[i]));
3029         }
3030     }
3031 
3032     /* clobber call registers */
3033     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3034         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3035             tcg_reg_free(s, i, allocated_regs);
3036         }
3037     }
3038 
3039     /* Save globals if they might be written by the helper, sync them if
3040        they might be read. */
3041     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3042         /* Nothing to do */
3043     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3044         sync_globals(s, allocated_regs);
3045     } else {
3046         save_globals(s, allocated_regs);
3047     }
3048 
3049     tcg_out_call(s, func_addr);
3050 
3051     /* assign output registers and emit moves if needed */
3052     for(i = 0; i < nb_oargs; i++) {
3053         arg = op->args[i];
3054         ts = arg_temp(arg);
3055         reg = tcg_target_call_oarg_regs[i];
3056         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3057 
3058         if (ts->fixed_reg) {
3059             if (ts->reg != reg) {
3060                 tcg_out_mov(s, ts->type, ts->reg, reg);
3061             }
3062         } else {
3063             if (ts->val_type == TEMP_VAL_REG) {
3064                 s->reg_to_temp[ts->reg] = NULL;
3065             }
3066             ts->val_type = TEMP_VAL_REG;
3067             ts->reg = reg;
3068             ts->mem_coherent = 0;
3069             s->reg_to_temp[reg] = ts;
3070             if (NEED_SYNC_ARG(i)) {
3071                 temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i));
3072             } else if (IS_DEAD_ARG(i)) {
3073                 temp_dead(s, ts);
3074             }
3075         }
3076     }
3077 }
3078 
3079 #ifdef CONFIG_PROFILER
3080 
3081 /* avoid copy/paste errors */
3082 #define PROF_ADD(to, from, field)                       \
3083     do {                                                \
3084         (to)->field += atomic_read(&((from)->field));   \
3085     } while (0)
3086 
3087 #define PROF_MAX(to, from, field)                                       \
3088     do {                                                                \
3089         typeof((from)->field) val__ = atomic_read(&((from)->field));    \
3090         if (val__ > (to)->field) {                                      \
3091             (to)->field = val__;                                        \
3092         }                                                               \
3093     } while (0)
3094 
3095 /* Pass in a zero'ed @prof */
3096 static inline
3097 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3098 {
3099     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3100     unsigned int i;
3101 
3102     for (i = 0; i < n_ctxs; i++) {
3103         TCGContext *s = atomic_read(&tcg_ctxs[i]);
3104         const TCGProfile *orig = &s->prof;
3105 
3106         if (counters) {
3107             PROF_ADD(prof, orig, tb_count1);
3108             PROF_ADD(prof, orig, tb_count);
3109             PROF_ADD(prof, orig, op_count);
3110             PROF_MAX(prof, orig, op_count_max);
3111             PROF_ADD(prof, orig, temp_count);
3112             PROF_MAX(prof, orig, temp_count_max);
3113             PROF_ADD(prof, orig, del_op_count);
3114             PROF_ADD(prof, orig, code_in_len);
3115             PROF_ADD(prof, orig, code_out_len);
3116             PROF_ADD(prof, orig, search_out_len);
3117             PROF_ADD(prof, orig, interm_time);
3118             PROF_ADD(prof, orig, code_time);
3119             PROF_ADD(prof, orig, la_time);
3120             PROF_ADD(prof, orig, opt_time);
3121             PROF_ADD(prof, orig, restore_count);
3122             PROF_ADD(prof, orig, restore_time);
3123         }
3124         if (table) {
3125             int i;
3126 
3127             for (i = 0; i < NB_OPS; i++) {
3128                 PROF_ADD(prof, orig, table_op_count[i]);
3129             }
3130         }
3131     }
3132 }
3133 
3134 #undef PROF_ADD
3135 #undef PROF_MAX
3136 
3137 static void tcg_profile_snapshot_counters(TCGProfile *prof)
3138 {
3139     tcg_profile_snapshot(prof, true, false);
3140 }
3141 
3142 static void tcg_profile_snapshot_table(TCGProfile *prof)
3143 {
3144     tcg_profile_snapshot(prof, false, true);
3145 }
3146 
3147 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3148 {
3149     TCGProfile prof = {};
3150     int i;
3151 
3152     tcg_profile_snapshot_table(&prof);
3153     for (i = 0; i < NB_OPS; i++) {
3154         cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name,
3155                     prof.table_op_count[i]);
3156     }
3157 }
3158 #else
3159 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3160 {
3161     cpu_fprintf(f, "[TCG profiler not compiled]\n");
3162 }
3163 #endif
3164 
3165 
3166 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
3167 {
3168 #ifdef CONFIG_PROFILER
3169     TCGProfile *prof = &s->prof;
3170 #endif
3171     int i, oi, oi_next, num_insns;
3172 
3173 #ifdef CONFIG_PROFILER
3174     {
3175         int n;
3176 
3177         n = s->gen_op_buf[0].prev + 1;
3178         atomic_set(&prof->op_count, prof->op_count + n);
3179         if (n > prof->op_count_max) {
3180             atomic_set(&prof->op_count_max, n);
3181         }
3182 
3183         n = s->nb_temps;
3184         atomic_set(&prof->temp_count, prof->temp_count + n);
3185         if (n > prof->temp_count_max) {
3186             atomic_set(&prof->temp_count_max, n);
3187         }
3188     }
3189 #endif
3190 
3191 #ifdef DEBUG_DISAS
3192     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
3193                  && qemu_log_in_addr_range(tb->pc))) {
3194         qemu_log_lock();
3195         qemu_log("OP:\n");
3196         tcg_dump_ops(s);
3197         qemu_log("\n");
3198         qemu_log_unlock();
3199     }
3200 #endif
3201 
3202 #ifdef CONFIG_PROFILER
3203     atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
3204 #endif
3205 
3206 #ifdef USE_TCG_OPTIMIZATIONS
3207     tcg_optimize(s);
3208 #endif
3209 
3210 #ifdef CONFIG_PROFILER
3211     atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
3212     atomic_set(&prof->la_time, prof->la_time - profile_getclock());
3213 #endif
3214 
3215     liveness_pass_1(s);
3216 
3217     if (s->nb_indirects > 0) {
3218 #ifdef DEBUG_DISAS
3219         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
3220                      && qemu_log_in_addr_range(tb->pc))) {
3221             qemu_log_lock();
3222             qemu_log("OP before indirect lowering:\n");
3223             tcg_dump_ops(s);
3224             qemu_log("\n");
3225             qemu_log_unlock();
3226         }
3227 #endif
3228         /* Replace indirect temps with direct temps.  */
3229         if (liveness_pass_2(s)) {
3230             /* If changes were made, re-run liveness.  */
3231             liveness_pass_1(s);
3232         }
3233     }
3234 
3235 #ifdef CONFIG_PROFILER
3236     atomic_set(&prof->la_time, prof->la_time + profile_getclock());
3237 #endif
3238 
3239 #ifdef DEBUG_DISAS
3240     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
3241                  && qemu_log_in_addr_range(tb->pc))) {
3242         qemu_log_lock();
3243         qemu_log("OP after optimization and liveness analysis:\n");
3244         tcg_dump_ops(s);
3245         qemu_log("\n");
3246         qemu_log_unlock();
3247     }
3248 #endif
3249 
3250     tcg_reg_alloc_start(s);
3251 
3252     s->code_buf = tb->tc.ptr;
3253     s->code_ptr = tb->tc.ptr;
3254 
3255 #ifdef TCG_TARGET_NEED_LDST_LABELS
3256     s->ldst_labels = NULL;
3257 #endif
3258 #ifdef TCG_TARGET_NEED_POOL_LABELS
3259     s->pool_labels = NULL;
3260 #endif
3261 
3262     num_insns = -1;
3263     for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
3264         TCGOp * const op = &s->gen_op_buf[oi];
3265         TCGOpcode opc = op->opc;
3266 
3267         oi_next = op->next;
3268 #ifdef CONFIG_PROFILER
3269         atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
3270 #endif
3271 
3272         switch (opc) {
3273         case INDEX_op_mov_i32:
3274         case INDEX_op_mov_i64:
3275             tcg_reg_alloc_mov(s, op);
3276             break;
3277         case INDEX_op_movi_i32:
3278         case INDEX_op_movi_i64:
3279             tcg_reg_alloc_movi(s, op);
3280             break;
3281         case INDEX_op_insn_start:
3282             if (num_insns >= 0) {
3283                 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
3284             }
3285             num_insns++;
3286             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
3287                 target_ulong a;
3288 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
3289                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
3290 #else
3291                 a = op->args[i];
3292 #endif
3293                 s->gen_insn_data[num_insns][i] = a;
3294             }
3295             break;
3296         case INDEX_op_discard:
3297             temp_dead(s, arg_temp(op->args[0]));
3298             break;
3299         case INDEX_op_set_label:
3300             tcg_reg_alloc_bb_end(s, s->reserved_regs);
3301             tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
3302             break;
3303         case INDEX_op_call:
3304             tcg_reg_alloc_call(s, op);
3305             break;
3306         default:
3307             /* Sanity check that we've not introduced any unhandled opcodes. */
3308             tcg_debug_assert(tcg_op_supported(opc));
3309             /* Note: in order to speed up the code, it would be much
3310                faster to have specialized register allocator functions for
3311                some common argument patterns */
3312             tcg_reg_alloc_op(s, op);
3313             break;
3314         }
3315 #ifdef CONFIG_DEBUG_TCG
3316         check_regs(s);
3317 #endif
3318         /* Test for (pending) buffer overflow.  The assumption is that any
3319            one operation beginning below the high water mark cannot overrun
3320            the buffer completely.  Thus we can test for overflow after
3321            generating code without having to check during generation.  */
3322         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
3323             return -1;
3324         }
3325     }
3326     tcg_debug_assert(num_insns >= 0);
3327     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
3328 
3329     /* Generate TB finalization at the end of block */
3330 #ifdef TCG_TARGET_NEED_LDST_LABELS
3331     if (!tcg_out_ldst_finalize(s)) {
3332         return -1;
3333     }
3334 #endif
3335 #ifdef TCG_TARGET_NEED_POOL_LABELS
3336     if (!tcg_out_pool_finalize(s)) {
3337         return -1;
3338     }
3339 #endif
3340 
3341     /* flush instruction cache */
3342     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
3343 
3344     return tcg_current_code_size(s);
3345 }
3346 
3347 #ifdef CONFIG_PROFILER
3348 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
3349 {
3350     TCGProfile prof = {};
3351     const TCGProfile *s;
3352     int64_t tb_count;
3353     int64_t tb_div_count;
3354     int64_t tot;
3355 
3356     tcg_profile_snapshot_counters(&prof);
3357     s = &prof;
3358     tb_count = s->tb_count;
3359     tb_div_count = tb_count ? tb_count : 1;
3360     tot = s->interm_time + s->code_time;
3361 
3362     cpu_fprintf(f, "JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
3363                 tot, tot / 2.4e9);
3364     cpu_fprintf(f, "translated TBs      %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
3365                 tb_count, s->tb_count1 - tb_count,
3366                 (double)(s->tb_count1 - s->tb_count)
3367                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
3368     cpu_fprintf(f, "avg ops/TB          %0.1f max=%d\n",
3369                 (double)s->op_count / tb_div_count, s->op_count_max);
3370     cpu_fprintf(f, "deleted ops/TB      %0.2f\n",
3371                 (double)s->del_op_count / tb_div_count);
3372     cpu_fprintf(f, "avg temps/TB        %0.2f max=%d\n",
3373                 (double)s->temp_count / tb_div_count, s->temp_count_max);
3374     cpu_fprintf(f, "avg host code/TB    %0.1f\n",
3375                 (double)s->code_out_len / tb_div_count);
3376     cpu_fprintf(f, "avg search data/TB  %0.1f\n",
3377                 (double)s->search_out_len / tb_div_count);
3378 
3379     cpu_fprintf(f, "cycles/op           %0.1f\n",
3380                 s->op_count ? (double)tot / s->op_count : 0);
3381     cpu_fprintf(f, "cycles/in byte      %0.1f\n",
3382                 s->code_in_len ? (double)tot / s->code_in_len : 0);
3383     cpu_fprintf(f, "cycles/out byte     %0.1f\n",
3384                 s->code_out_len ? (double)tot / s->code_out_len : 0);
3385     cpu_fprintf(f, "cycles/search byte     %0.1f\n",
3386                 s->search_out_len ? (double)tot / s->search_out_len : 0);
3387     if (tot == 0) {
3388         tot = 1;
3389     }
3390     cpu_fprintf(f, "  gen_interm time   %0.1f%%\n",
3391                 (double)s->interm_time / tot * 100.0);
3392     cpu_fprintf(f, "  gen_code time     %0.1f%%\n",
3393                 (double)s->code_time / tot * 100.0);
3394     cpu_fprintf(f, "optim./code time    %0.1f%%\n",
3395                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
3396                 * 100.0);
3397     cpu_fprintf(f, "liveness/code time  %0.1f%%\n",
3398                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
3399     cpu_fprintf(f, "cpu_restore count   %" PRId64 "\n",
3400                 s->restore_count);
3401     cpu_fprintf(f, "  avg cycles        %0.1f\n",
3402                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
3403 }
3404 #else
3405 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
3406 {
3407     cpu_fprintf(f, "[TCG profiler not compiled]\n");
3408 }
3409 #endif
3410 
3411 #ifdef ELF_HOST_MACHINE
3412 /* In order to use this feature, the backend needs to do three things:
3413 
3414    (1) Define ELF_HOST_MACHINE to indicate both what value to
3415        put into the ELF image and to indicate support for the feature.
3416 
3417    (2) Define tcg_register_jit.  This should create a buffer containing
3418        the contents of a .debug_frame section that describes the post-
3419        prologue unwind info for the tcg machine.
3420 
3421    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
3422 */
3423 
3424 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
3425 typedef enum {
3426     JIT_NOACTION = 0,
3427     JIT_REGISTER_FN,
3428     JIT_UNREGISTER_FN
3429 } jit_actions_t;
3430 
3431 struct jit_code_entry {
3432     struct jit_code_entry *next_entry;
3433     struct jit_code_entry *prev_entry;
3434     const void *symfile_addr;
3435     uint64_t symfile_size;
3436 };
3437 
3438 struct jit_descriptor {
3439     uint32_t version;
3440     uint32_t action_flag;
3441     struct jit_code_entry *relevant_entry;
3442     struct jit_code_entry *first_entry;
3443 };
3444 
3445 void __jit_debug_register_code(void) __attribute__((noinline));
3446 void __jit_debug_register_code(void)
3447 {
3448     asm("");
3449 }
3450 
3451 /* Must statically initialize the version, because GDB may check
3452    the version before we can set it.  */
3453 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
3454 
3455 /* End GDB interface.  */
3456 
3457 static int find_string(const char *strtab, const char *str)
3458 {
3459     const char *p = strtab + 1;
3460 
3461     while (1) {
3462         if (strcmp(p, str) == 0) {
3463             return p - strtab;
3464         }
3465         p += strlen(p) + 1;
3466     }
3467 }
3468 
3469 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
3470                                  const void *debug_frame,
3471                                  size_t debug_frame_size)
3472 {
3473     struct __attribute__((packed)) DebugInfo {
3474         uint32_t  len;
3475         uint16_t  version;
3476         uint32_t  abbrev;
3477         uint8_t   ptr_size;
3478         uint8_t   cu_die;
3479         uint16_t  cu_lang;
3480         uintptr_t cu_low_pc;
3481         uintptr_t cu_high_pc;
3482         uint8_t   fn_die;
3483         char      fn_name[16];
3484         uintptr_t fn_low_pc;
3485         uintptr_t fn_high_pc;
3486         uint8_t   cu_eoc;
3487     };
3488 
3489     struct ElfImage {
3490         ElfW(Ehdr) ehdr;
3491         ElfW(Phdr) phdr;
3492         ElfW(Shdr) shdr[7];
3493         ElfW(Sym)  sym[2];
3494         struct DebugInfo di;
3495         uint8_t    da[24];
3496         char       str[80];
3497     };
3498 
3499     struct ElfImage *img;
3500 
3501     static const struct ElfImage img_template = {
3502         .ehdr = {
3503             .e_ident[EI_MAG0] = ELFMAG0,
3504             .e_ident[EI_MAG1] = ELFMAG1,
3505             .e_ident[EI_MAG2] = ELFMAG2,
3506             .e_ident[EI_MAG3] = ELFMAG3,
3507             .e_ident[EI_CLASS] = ELF_CLASS,
3508             .e_ident[EI_DATA] = ELF_DATA,
3509             .e_ident[EI_VERSION] = EV_CURRENT,
3510             .e_type = ET_EXEC,
3511             .e_machine = ELF_HOST_MACHINE,
3512             .e_version = EV_CURRENT,
3513             .e_phoff = offsetof(struct ElfImage, phdr),
3514             .e_shoff = offsetof(struct ElfImage, shdr),
3515             .e_ehsize = sizeof(ElfW(Shdr)),
3516             .e_phentsize = sizeof(ElfW(Phdr)),
3517             .e_phnum = 1,
3518             .e_shentsize = sizeof(ElfW(Shdr)),
3519             .e_shnum = ARRAY_SIZE(img->shdr),
3520             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
3521 #ifdef ELF_HOST_FLAGS
3522             .e_flags = ELF_HOST_FLAGS,
3523 #endif
3524 #ifdef ELF_OSABI
3525             .e_ident[EI_OSABI] = ELF_OSABI,
3526 #endif
3527         },
3528         .phdr = {
3529             .p_type = PT_LOAD,
3530             .p_flags = PF_X,
3531         },
3532         .shdr = {
3533             [0] = { .sh_type = SHT_NULL },
3534             /* Trick: The contents of code_gen_buffer are not present in
3535                this fake ELF file; that got allocated elsewhere.  Therefore
3536                we mark .text as SHT_NOBITS (similar to .bss) so that readers
3537                will not look for contents.  We can record any address.  */
3538             [1] = { /* .text */
3539                 .sh_type = SHT_NOBITS,
3540                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
3541             },
3542             [2] = { /* .debug_info */
3543                 .sh_type = SHT_PROGBITS,
3544                 .sh_offset = offsetof(struct ElfImage, di),
3545                 .sh_size = sizeof(struct DebugInfo),
3546             },
3547             [3] = { /* .debug_abbrev */
3548                 .sh_type = SHT_PROGBITS,
3549                 .sh_offset = offsetof(struct ElfImage, da),
3550                 .sh_size = sizeof(img->da),
3551             },
3552             [4] = { /* .debug_frame */
3553                 .sh_type = SHT_PROGBITS,
3554                 .sh_offset = sizeof(struct ElfImage),
3555             },
3556             [5] = { /* .symtab */
3557                 .sh_type = SHT_SYMTAB,
3558                 .sh_offset = offsetof(struct ElfImage, sym),
3559                 .sh_size = sizeof(img->sym),
3560                 .sh_info = 1,
3561                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
3562                 .sh_entsize = sizeof(ElfW(Sym)),
3563             },
3564             [6] = { /* .strtab */
3565                 .sh_type = SHT_STRTAB,
3566                 .sh_offset = offsetof(struct ElfImage, str),
3567                 .sh_size = sizeof(img->str),
3568             }
3569         },
3570         .sym = {
3571             [1] = { /* code_gen_buffer */
3572                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
3573                 .st_shndx = 1,
3574             }
3575         },
3576         .di = {
3577             .len = sizeof(struct DebugInfo) - 4,
3578             .version = 2,
3579             .ptr_size = sizeof(void *),
3580             .cu_die = 1,
3581             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
3582             .fn_die = 2,
3583             .fn_name = "code_gen_buffer"
3584         },
3585         .da = {
3586             1,          /* abbrev number (the cu) */
3587             0x11, 1,    /* DW_TAG_compile_unit, has children */
3588             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
3589             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
3590             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
3591             0, 0,       /* end of abbrev */
3592             2,          /* abbrev number (the fn) */
3593             0x2e, 0,    /* DW_TAG_subprogram, no children */
3594             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
3595             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
3596             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
3597             0, 0,       /* end of abbrev */
3598             0           /* no more abbrev */
3599         },
3600         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
3601                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
3602     };
3603 
3604     /* We only need a single jit entry; statically allocate it.  */
3605     static struct jit_code_entry one_entry;
3606 
3607     uintptr_t buf = (uintptr_t)buf_ptr;
3608     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
3609     DebugFrameHeader *dfh;
3610 
3611     img = g_malloc(img_size);
3612     *img = img_template;
3613 
3614     img->phdr.p_vaddr = buf;
3615     img->phdr.p_paddr = buf;
3616     img->phdr.p_memsz = buf_size;
3617 
3618     img->shdr[1].sh_name = find_string(img->str, ".text");
3619     img->shdr[1].sh_addr = buf;
3620     img->shdr[1].sh_size = buf_size;
3621 
3622     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
3623     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
3624 
3625     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
3626     img->shdr[4].sh_size = debug_frame_size;
3627 
3628     img->shdr[5].sh_name = find_string(img->str, ".symtab");
3629     img->shdr[6].sh_name = find_string(img->str, ".strtab");
3630 
3631     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
3632     img->sym[1].st_value = buf;
3633     img->sym[1].st_size = buf_size;
3634 
3635     img->di.cu_low_pc = buf;
3636     img->di.cu_high_pc = buf + buf_size;
3637     img->di.fn_low_pc = buf;
3638     img->di.fn_high_pc = buf + buf_size;
3639 
3640     dfh = (DebugFrameHeader *)(img + 1);
3641     memcpy(dfh, debug_frame, debug_frame_size);
3642     dfh->fde.func_start = buf;
3643     dfh->fde.func_len = buf_size;
3644 
3645 #ifdef DEBUG_JIT
3646     /* Enable this block to be able to debug the ELF image file creation.
3647        One can use readelf, objdump, or other inspection utilities.  */
3648     {
3649         FILE *f = fopen("/tmp/qemu.jit", "w+b");
3650         if (f) {
3651             if (fwrite(img, img_size, 1, f) != img_size) {
3652                 /* Avoid stupid unused return value warning for fwrite.  */
3653             }
3654             fclose(f);
3655         }
3656     }
3657 #endif
3658 
3659     one_entry.symfile_addr = img;
3660     one_entry.symfile_size = img_size;
3661 
3662     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
3663     __jit_debug_descriptor.relevant_entry = &one_entry;
3664     __jit_debug_descriptor.first_entry = &one_entry;
3665     __jit_debug_register_code();
3666 }
3667 #else
3668 /* No support for the feature.  Provide the entry point expected by exec.c,
3669    and implement the internal function we declared earlier.  */
3670 
3671 static void tcg_register_jit_int(void *buf, size_t size,
3672                                  const void *debug_frame,
3673                                  size_t debug_frame_size)
3674 {
3675 }
3676 
3677 void tcg_register_jit(void *buf, size_t buf_size)
3678 {
3679 }
3680 #endif /* ELF_HOST_MACHINE */
3681