xref: /openbmc/qemu/tcg/tcg.c (revision 1c2adb95)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/cutils.h"
34 #include "qemu/host-utils.h"
35 #include "qemu/timer.h"
36 
37 /* Note: the long term plan is to reduce the dependencies on the QEMU
38    CPU definitions. Currently they are used for qemu_ld/st
39    instructions */
40 #define NO_CPU_IO_DEFS
41 #include "cpu.h"
42 
43 #include "exec/cpu-common.h"
44 #include "exec/exec-all.h"
45 
46 #include "tcg-op.h"
47 
48 #if UINTPTR_MAX == UINT32_MAX
49 # define ELF_CLASS  ELFCLASS32
50 #else
51 # define ELF_CLASS  ELFCLASS64
52 #endif
53 #ifdef HOST_WORDS_BIGENDIAN
54 # define ELF_DATA   ELFDATA2MSB
55 #else
56 # define ELF_DATA   ELFDATA2LSB
57 #endif
58 
59 #include "elf.h"
60 #include "exec/log.h"
61 #include "sysemu/sysemu.h"
62 
63 /* Forward declarations for functions declared in tcg-target.inc.c and
64    used here. */
65 static void tcg_target_init(TCGContext *s);
66 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 
71 /* The CIE and FDE header definitions will be common to all hosts.  */
72 typedef struct {
73     uint32_t len __attribute__((aligned((sizeof(void *)))));
74     uint32_t id;
75     uint8_t version;
76     char augmentation[1];
77     uint8_t code_align;
78     uint8_t data_align;
79     uint8_t return_column;
80 } DebugFrameCIE;
81 
82 typedef struct QEMU_PACKED {
83     uint32_t len __attribute__((aligned((sizeof(void *)))));
84     uint32_t cie_offset;
85     uintptr_t func_start;
86     uintptr_t func_len;
87 } DebugFrameFDEHeader;
88 
89 typedef struct QEMU_PACKED {
90     DebugFrameCIE cie;
91     DebugFrameFDEHeader fde;
92 } DebugFrameHeader;
93 
94 static void tcg_register_jit_int(void *buf, size_t size,
95                                  const void *debug_frame,
96                                  size_t debug_frame_size)
97     __attribute__((unused));
98 
99 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
100 static const char *target_parse_constraint(TCGArgConstraint *ct,
101                                            const char *ct_str, TCGType type);
102 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
103                        intptr_t arg2);
104 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
105 static void tcg_out_movi(TCGContext *s, TCGType type,
106                          TCGReg ret, tcg_target_long arg);
107 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
108                        const int *const_args);
109 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
110                        intptr_t arg2);
111 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
112                         TCGReg base, intptr_t ofs);
113 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
114 static int tcg_target_const_match(tcg_target_long val, TCGType type,
115                                   const TCGArgConstraint *arg_ct);
116 #ifdef TCG_TARGET_NEED_LDST_LABELS
117 static bool tcg_out_ldst_finalize(TCGContext *s);
118 #endif
119 
120 #define TCG_HIGHWATER 1024
121 
122 static TCGContext **tcg_ctxs;
123 static unsigned int n_tcg_ctxs;
124 TCGv_env cpu_env = 0;
125 
126 /*
127  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
128  * dynamically allocate from as demand dictates. Given appropriate region
129  * sizing, this minimizes flushes even when some TCG threads generate a lot
130  * more code than others.
131  */
132 struct tcg_region_state {
133     QemuMutex lock;
134 
135     /* fields set at init time */
136     void *start;
137     void *start_aligned;
138     void *end;
139     size_t n;
140     size_t size; /* size of one region */
141     size_t stride; /* .size + guard size */
142 
143     /* fields protected by the lock */
144     size_t current; /* current region index */
145     size_t agg_size_full; /* aggregate size of full regions */
146 };
147 
148 static struct tcg_region_state region;
149 
150 static TCGRegSet tcg_target_available_regs[2];
151 static TCGRegSet tcg_target_call_clobber_regs;
152 
153 #if TCG_TARGET_INSN_UNIT_SIZE == 1
154 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
155 {
156     *s->code_ptr++ = v;
157 }
158 
159 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
160                                                       uint8_t v)
161 {
162     *p = v;
163 }
164 #endif
165 
166 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
167 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
168 {
169     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
170         *s->code_ptr++ = v;
171     } else {
172         tcg_insn_unit *p = s->code_ptr;
173         memcpy(p, &v, sizeof(v));
174         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
175     }
176 }
177 
178 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
179                                                        uint16_t v)
180 {
181     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
182         *p = v;
183     } else {
184         memcpy(p, &v, sizeof(v));
185     }
186 }
187 #endif
188 
189 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
190 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
191 {
192     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
193         *s->code_ptr++ = v;
194     } else {
195         tcg_insn_unit *p = s->code_ptr;
196         memcpy(p, &v, sizeof(v));
197         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
198     }
199 }
200 
201 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
202                                                        uint32_t v)
203 {
204     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
205         *p = v;
206     } else {
207         memcpy(p, &v, sizeof(v));
208     }
209 }
210 #endif
211 
212 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
213 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
214 {
215     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
216         *s->code_ptr++ = v;
217     } else {
218         tcg_insn_unit *p = s->code_ptr;
219         memcpy(p, &v, sizeof(v));
220         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
221     }
222 }
223 
224 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
225                                                        uint64_t v)
226 {
227     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
228         *p = v;
229     } else {
230         memcpy(p, &v, sizeof(v));
231     }
232 }
233 #endif
234 
235 /* label relocation processing */
236 
237 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
238                           TCGLabel *l, intptr_t addend)
239 {
240     TCGRelocation *r;
241 
242     if (l->has_value) {
243         /* FIXME: This may break relocations on RISC targets that
244            modify instruction fields in place.  The caller may not have
245            written the initial value.  */
246         patch_reloc(code_ptr, type, l->u.value, addend);
247     } else {
248         /* add a new relocation entry */
249         r = tcg_malloc(sizeof(TCGRelocation));
250         r->type = type;
251         r->ptr = code_ptr;
252         r->addend = addend;
253         r->next = l->u.first_reloc;
254         l->u.first_reloc = r;
255     }
256 }
257 
258 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
259 {
260     intptr_t value = (intptr_t)ptr;
261     TCGRelocation *r;
262 
263     tcg_debug_assert(!l->has_value);
264 
265     for (r = l->u.first_reloc; r != NULL; r = r->next) {
266         patch_reloc(r->ptr, r->type, value, r->addend);
267     }
268 
269     l->has_value = 1;
270     l->u.value_ptr = ptr;
271 }
272 
273 TCGLabel *gen_new_label(void)
274 {
275     TCGContext *s = tcg_ctx;
276     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
277 
278     *l = (TCGLabel){
279         .id = s->nb_labels++
280     };
281 
282     return l;
283 }
284 
285 #include "tcg-target.inc.c"
286 
287 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
288 {
289     void *start, *end;
290 
291     start = region.start_aligned + curr_region * region.stride;
292     end = start + region.size;
293 
294     if (curr_region == 0) {
295         start = region.start;
296     }
297     if (curr_region == region.n - 1) {
298         end = region.end;
299     }
300 
301     *pstart = start;
302     *pend = end;
303 }
304 
305 static void tcg_region_assign(TCGContext *s, size_t curr_region)
306 {
307     void *start, *end;
308 
309     tcg_region_bounds(curr_region, &start, &end);
310 
311     s->code_gen_buffer = start;
312     s->code_gen_ptr = start;
313     s->code_gen_buffer_size = end - start;
314     s->code_gen_highwater = end - TCG_HIGHWATER;
315 }
316 
317 static bool tcg_region_alloc__locked(TCGContext *s)
318 {
319     if (region.current == region.n) {
320         return true;
321     }
322     tcg_region_assign(s, region.current);
323     region.current++;
324     return false;
325 }
326 
327 /*
328  * Request a new region once the one in use has filled up.
329  * Returns true on error.
330  */
331 static bool tcg_region_alloc(TCGContext *s)
332 {
333     bool err;
334     /* read the region size now; alloc__locked will overwrite it on success */
335     size_t size_full = s->code_gen_buffer_size;
336 
337     qemu_mutex_lock(&region.lock);
338     err = tcg_region_alloc__locked(s);
339     if (!err) {
340         region.agg_size_full += size_full - TCG_HIGHWATER;
341     }
342     qemu_mutex_unlock(&region.lock);
343     return err;
344 }
345 
346 /*
347  * Perform a context's first region allocation.
348  * This function does _not_ increment region.agg_size_full.
349  */
350 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
351 {
352     return tcg_region_alloc__locked(s);
353 }
354 
355 /* Call from a safe-work context */
356 void tcg_region_reset_all(void)
357 {
358     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
359     unsigned int i;
360 
361     qemu_mutex_lock(&region.lock);
362     region.current = 0;
363     region.agg_size_full = 0;
364 
365     for (i = 0; i < n_ctxs; i++) {
366         TCGContext *s = atomic_read(&tcg_ctxs[i]);
367         bool err = tcg_region_initial_alloc__locked(s);
368 
369         g_assert(!err);
370     }
371     qemu_mutex_unlock(&region.lock);
372 }
373 
374 #ifdef CONFIG_USER_ONLY
375 static size_t tcg_n_regions(void)
376 {
377     return 1;
378 }
379 #else
380 /*
381  * It is likely that some vCPUs will translate more code than others, so we
382  * first try to set more regions than max_cpus, with those regions being of
383  * reasonable size. If that's not possible we make do by evenly dividing
384  * the code_gen_buffer among the vCPUs.
385  */
386 static size_t tcg_n_regions(void)
387 {
388     size_t i;
389 
390     /* Use a single region if all we have is one vCPU thread */
391     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
392         return 1;
393     }
394 
395     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
396     for (i = 8; i > 0; i--) {
397         size_t regions_per_thread = i;
398         size_t region_size;
399 
400         region_size = tcg_init_ctx.code_gen_buffer_size;
401         region_size /= max_cpus * regions_per_thread;
402 
403         if (region_size >= 2 * 1024u * 1024) {
404             return max_cpus * regions_per_thread;
405         }
406     }
407     /* If we can't, then just allocate one region per vCPU thread */
408     return max_cpus;
409 }
410 #endif
411 
412 /*
413  * Initializes region partitioning.
414  *
415  * Called at init time from the parent thread (i.e. the one calling
416  * tcg_context_init), after the target's TCG globals have been set.
417  *
418  * Region partitioning works by splitting code_gen_buffer into separate regions,
419  * and then assigning regions to TCG threads so that the threads can translate
420  * code in parallel without synchronization.
421  *
422  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
423  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
424  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
425  * must have been parsed before calling this function, since it calls
426  * qemu_tcg_mttcg_enabled().
427  *
428  * In user-mode we use a single region.  Having multiple regions in user-mode
429  * is not supported, because the number of vCPU threads (recall that each thread
430  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
431  * OS, and usually this number is huge (tens of thousands is not uncommon).
432  * Thus, given this large bound on the number of vCPU threads and the fact
433  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
434  * that the availability of at least one region per vCPU thread.
435  *
436  * However, this user-mode limitation is unlikely to be a significant problem
437  * in practice. Multi-threaded guests share most if not all of their translated
438  * code, which makes parallel code generation less appealing than in softmmu.
439  */
440 void tcg_region_init(void)
441 {
442     void *buf = tcg_init_ctx.code_gen_buffer;
443     void *aligned;
444     size_t size = tcg_init_ctx.code_gen_buffer_size;
445     size_t page_size = qemu_real_host_page_size;
446     size_t region_size;
447     size_t n_regions;
448     size_t i;
449 
450     n_regions = tcg_n_regions();
451 
452     /* The first region will be 'aligned - buf' bytes larger than the others */
453     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
454     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
455     /*
456      * Make region_size a multiple of page_size, using aligned as the start.
457      * As a result of this we might end up with a few extra pages at the end of
458      * the buffer; we will assign those to the last region.
459      */
460     region_size = (size - (aligned - buf)) / n_regions;
461     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
462 
463     /* A region must have at least 2 pages; one code, one guard */
464     g_assert(region_size >= 2 * page_size);
465 
466     /* init the region struct */
467     qemu_mutex_init(&region.lock);
468     region.n = n_regions;
469     region.size = region_size - page_size;
470     region.stride = region_size;
471     region.start = buf;
472     region.start_aligned = aligned;
473     /* page-align the end, since its last page will be a guard page */
474     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
475     /* account for that last guard page */
476     region.end -= page_size;
477 
478     /* set guard pages */
479     for (i = 0; i < region.n; i++) {
480         void *start, *end;
481         int rc;
482 
483         tcg_region_bounds(i, &start, &end);
484         rc = qemu_mprotect_none(end, page_size);
485         g_assert(!rc);
486     }
487 
488     /* In user-mode we support only one ctx, so do the initial allocation now */
489 #ifdef CONFIG_USER_ONLY
490     {
491         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
492 
493         g_assert(!err);
494     }
495 #endif
496 }
497 
498 /*
499  * All TCG threads except the parent (i.e. the one that called tcg_context_init
500  * and registered the target's TCG globals) must register with this function
501  * before initiating translation.
502  *
503  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
504  * of tcg_region_init() for the reasoning behind this.
505  *
506  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
507  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
508  * is not used anymore for translation once this function is called.
509  *
510  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
511  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
512  */
513 #ifdef CONFIG_USER_ONLY
514 void tcg_register_thread(void)
515 {
516     tcg_ctx = &tcg_init_ctx;
517 }
518 #else
519 void tcg_register_thread(void)
520 {
521     TCGContext *s = g_malloc(sizeof(*s));
522     unsigned int i, n;
523     bool err;
524 
525     *s = tcg_init_ctx;
526 
527     /* Relink mem_base.  */
528     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
529         if (tcg_init_ctx.temps[i].mem_base) {
530             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
531             tcg_debug_assert(b >= 0 && b < n);
532             s->temps[i].mem_base = &s->temps[b];
533         }
534     }
535 
536     /* Claim an entry in tcg_ctxs */
537     n = atomic_fetch_inc(&n_tcg_ctxs);
538     g_assert(n < max_cpus);
539     atomic_set(&tcg_ctxs[n], s);
540 
541     tcg_ctx = s;
542     qemu_mutex_lock(&region.lock);
543     err = tcg_region_initial_alloc__locked(tcg_ctx);
544     g_assert(!err);
545     qemu_mutex_unlock(&region.lock);
546 }
547 #endif /* !CONFIG_USER_ONLY */
548 
549 /*
550  * Returns the size (in bytes) of all translated code (i.e. from all regions)
551  * currently in the cache.
552  * See also: tcg_code_capacity()
553  * Do not confuse with tcg_current_code_size(); that one applies to a single
554  * TCG context.
555  */
556 size_t tcg_code_size(void)
557 {
558     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
559     unsigned int i;
560     size_t total;
561 
562     qemu_mutex_lock(&region.lock);
563     total = region.agg_size_full;
564     for (i = 0; i < n_ctxs; i++) {
565         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
566         size_t size;
567 
568         size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
569         g_assert(size <= s->code_gen_buffer_size);
570         total += size;
571     }
572     qemu_mutex_unlock(&region.lock);
573     return total;
574 }
575 
576 /*
577  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
578  * regions.
579  * See also: tcg_code_size()
580  */
581 size_t tcg_code_capacity(void)
582 {
583     size_t guard_size, capacity;
584 
585     /* no need for synchronization; these variables are set at init time */
586     guard_size = region.stride - region.size;
587     capacity = region.end + guard_size - region.start;
588     capacity -= region.n * (guard_size + TCG_HIGHWATER);
589     return capacity;
590 }
591 
592 /* pool based memory allocation */
593 void *tcg_malloc_internal(TCGContext *s, int size)
594 {
595     TCGPool *p;
596     int pool_size;
597 
598     if (size > TCG_POOL_CHUNK_SIZE) {
599         /* big malloc: insert a new pool (XXX: could optimize) */
600         p = g_malloc(sizeof(TCGPool) + size);
601         p->size = size;
602         p->next = s->pool_first_large;
603         s->pool_first_large = p;
604         return p->data;
605     } else {
606         p = s->pool_current;
607         if (!p) {
608             p = s->pool_first;
609             if (!p)
610                 goto new_pool;
611         } else {
612             if (!p->next) {
613             new_pool:
614                 pool_size = TCG_POOL_CHUNK_SIZE;
615                 p = g_malloc(sizeof(TCGPool) + pool_size);
616                 p->size = pool_size;
617                 p->next = NULL;
618                 if (s->pool_current)
619                     s->pool_current->next = p;
620                 else
621                     s->pool_first = p;
622             } else {
623                 p = p->next;
624             }
625         }
626     }
627     s->pool_current = p;
628     s->pool_cur = p->data + size;
629     s->pool_end = p->data + p->size;
630     return p->data;
631 }
632 
633 void tcg_pool_reset(TCGContext *s)
634 {
635     TCGPool *p, *t;
636     for (p = s->pool_first_large; p; p = t) {
637         t = p->next;
638         g_free(p);
639     }
640     s->pool_first_large = NULL;
641     s->pool_cur = s->pool_end = NULL;
642     s->pool_current = NULL;
643 }
644 
645 typedef struct TCGHelperInfo {
646     void *func;
647     const char *name;
648     unsigned flags;
649     unsigned sizemask;
650 } TCGHelperInfo;
651 
652 #include "exec/helper-proto.h"
653 
654 static const TCGHelperInfo all_helpers[] = {
655 #include "exec/helper-tcg.h"
656 };
657 static GHashTable *helper_table;
658 
659 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
660 static void process_op_defs(TCGContext *s);
661 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
662                                             TCGReg reg, const char *name);
663 
664 void tcg_context_init(TCGContext *s)
665 {
666     int op, total_args, n, i;
667     TCGOpDef *def;
668     TCGArgConstraint *args_ct;
669     int *sorted_args;
670     TCGTemp *ts;
671 
672     memset(s, 0, sizeof(*s));
673     s->nb_globals = 0;
674 
675     /* Count total number of arguments and allocate the corresponding
676        space */
677     total_args = 0;
678     for(op = 0; op < NB_OPS; op++) {
679         def = &tcg_op_defs[op];
680         n = def->nb_iargs + def->nb_oargs;
681         total_args += n;
682     }
683 
684     args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
685     sorted_args = g_malloc(sizeof(int) * total_args);
686 
687     for(op = 0; op < NB_OPS; op++) {
688         def = &tcg_op_defs[op];
689         def->args_ct = args_ct;
690         def->sorted_args = sorted_args;
691         n = def->nb_iargs + def->nb_oargs;
692         sorted_args += n;
693         args_ct += n;
694     }
695 
696     /* Register helpers.  */
697     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
698     helper_table = g_hash_table_new(NULL, NULL);
699 
700     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
701         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
702                             (gpointer)&all_helpers[i]);
703     }
704 
705     tcg_target_init(s);
706     process_op_defs(s);
707 
708     /* Reverse the order of the saved registers, assuming they're all at
709        the start of tcg_target_reg_alloc_order.  */
710     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
711         int r = tcg_target_reg_alloc_order[n];
712         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
713             break;
714         }
715     }
716     for (i = 0; i < n; ++i) {
717         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
718     }
719     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
720         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
721     }
722 
723     tcg_ctx = s;
724     /*
725      * In user-mode we simply share the init context among threads, since we
726      * use a single region. See the documentation tcg_region_init() for the
727      * reasoning behind this.
728      * In softmmu we will have at most max_cpus TCG threads.
729      */
730 #ifdef CONFIG_USER_ONLY
731     tcg_ctxs = &tcg_ctx;
732     n_tcg_ctxs = 1;
733 #else
734     tcg_ctxs = g_new(TCGContext *, max_cpus);
735 #endif
736 
737     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
738     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
739     cpu_env = temp_tcgv_ptr(ts);
740 }
741 
742 /*
743  * Allocate TBs right before their corresponding translated code, making
744  * sure that TBs and code are on different cache lines.
745  */
746 TranslationBlock *tcg_tb_alloc(TCGContext *s)
747 {
748     uintptr_t align = qemu_icache_linesize;
749     TranslationBlock *tb;
750     void *next;
751 
752  retry:
753     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
754     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
755 
756     if (unlikely(next > s->code_gen_highwater)) {
757         if (tcg_region_alloc(s)) {
758             return NULL;
759         }
760         goto retry;
761     }
762     atomic_set(&s->code_gen_ptr, next);
763     s->data_gen_ptr = NULL;
764     return tb;
765 }
766 
767 void tcg_prologue_init(TCGContext *s)
768 {
769     size_t prologue_size, total_size;
770     void *buf0, *buf1;
771 
772     /* Put the prologue at the beginning of code_gen_buffer.  */
773     buf0 = s->code_gen_buffer;
774     s->code_ptr = buf0;
775     s->code_buf = buf0;
776     s->code_gen_prologue = buf0;
777 
778     /* Generate the prologue.  */
779     tcg_target_qemu_prologue(s);
780     buf1 = s->code_ptr;
781     flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
782 
783     /* Deduct the prologue from the buffer.  */
784     prologue_size = tcg_current_code_size(s);
785     s->code_gen_ptr = buf1;
786     s->code_gen_buffer = buf1;
787     s->code_buf = buf1;
788     total_size = s->code_gen_buffer_size - prologue_size;
789     s->code_gen_buffer_size = total_size;
790 
791     /* Compute a high-water mark, at which we voluntarily flush the buffer
792        and start over.  The size here is arbitrary, significantly larger
793        than we expect the code generation for any one opcode to require.  */
794     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
795 
796     tcg_register_jit(s->code_gen_buffer, total_size);
797 
798 #ifdef DEBUG_DISAS
799     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
800         qemu_log_lock();
801         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
802         log_disas(buf0, prologue_size);
803         qemu_log("\n");
804         qemu_log_flush();
805         qemu_log_unlock();
806     }
807 #endif
808 
809     /* Assert that goto_ptr is implemented completely.  */
810     if (TCG_TARGET_HAS_goto_ptr) {
811         tcg_debug_assert(s->code_gen_epilogue != NULL);
812     }
813 }
814 
815 void tcg_func_start(TCGContext *s)
816 {
817     tcg_pool_reset(s);
818     s->nb_temps = s->nb_globals;
819 
820     /* No temps have been previously allocated for size or locality.  */
821     memset(s->free_temps, 0, sizeof(s->free_temps));
822 
823     s->nb_labels = 0;
824     s->current_frame_offset = s->frame_start;
825 
826 #ifdef CONFIG_DEBUG_TCG
827     s->goto_tb_issue_mask = 0;
828 #endif
829 
830     s->gen_op_buf[0].next = 1;
831     s->gen_op_buf[0].prev = 0;
832     s->gen_next_op_idx = 1;
833 }
834 
835 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
836 {
837     int n = s->nb_temps++;
838     tcg_debug_assert(n < TCG_MAX_TEMPS);
839     return memset(&s->temps[n], 0, sizeof(TCGTemp));
840 }
841 
842 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
843 {
844     TCGTemp *ts;
845 
846     tcg_debug_assert(s->nb_globals == s->nb_temps);
847     s->nb_globals++;
848     ts = tcg_temp_alloc(s);
849     ts->temp_global = 1;
850 
851     return ts;
852 }
853 
854 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
855                                             TCGReg reg, const char *name)
856 {
857     TCGTemp *ts;
858 
859     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
860         tcg_abort();
861     }
862 
863     ts = tcg_global_alloc(s);
864     ts->base_type = type;
865     ts->type = type;
866     ts->fixed_reg = 1;
867     ts->reg = reg;
868     ts->name = name;
869     tcg_regset_set_reg(s->reserved_regs, reg);
870 
871     return ts;
872 }
873 
874 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
875 {
876     s->frame_start = start;
877     s->frame_end = start + size;
878     s->frame_temp
879         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
880 }
881 
882 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
883                                      intptr_t offset, const char *name)
884 {
885     TCGContext *s = tcg_ctx;
886     TCGTemp *base_ts = tcgv_ptr_temp(base);
887     TCGTemp *ts = tcg_global_alloc(s);
888     int indirect_reg = 0, bigendian = 0;
889 #ifdef HOST_WORDS_BIGENDIAN
890     bigendian = 1;
891 #endif
892 
893     if (!base_ts->fixed_reg) {
894         /* We do not support double-indirect registers.  */
895         tcg_debug_assert(!base_ts->indirect_reg);
896         base_ts->indirect_base = 1;
897         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
898                             ? 2 : 1);
899         indirect_reg = 1;
900     }
901 
902     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
903         TCGTemp *ts2 = tcg_global_alloc(s);
904         char buf[64];
905 
906         ts->base_type = TCG_TYPE_I64;
907         ts->type = TCG_TYPE_I32;
908         ts->indirect_reg = indirect_reg;
909         ts->mem_allocated = 1;
910         ts->mem_base = base_ts;
911         ts->mem_offset = offset + bigendian * 4;
912         pstrcpy(buf, sizeof(buf), name);
913         pstrcat(buf, sizeof(buf), "_0");
914         ts->name = strdup(buf);
915 
916         tcg_debug_assert(ts2 == ts + 1);
917         ts2->base_type = TCG_TYPE_I64;
918         ts2->type = TCG_TYPE_I32;
919         ts2->indirect_reg = indirect_reg;
920         ts2->mem_allocated = 1;
921         ts2->mem_base = base_ts;
922         ts2->mem_offset = offset + (1 - bigendian) * 4;
923         pstrcpy(buf, sizeof(buf), name);
924         pstrcat(buf, sizeof(buf), "_1");
925         ts2->name = strdup(buf);
926     } else {
927         ts->base_type = type;
928         ts->type = type;
929         ts->indirect_reg = indirect_reg;
930         ts->mem_allocated = 1;
931         ts->mem_base = base_ts;
932         ts->mem_offset = offset;
933         ts->name = name;
934     }
935     return ts;
936 }
937 
938 static TCGTemp *tcg_temp_new_internal(TCGType type, int temp_local)
939 {
940     TCGContext *s = tcg_ctx;
941     TCGTemp *ts;
942     int idx, k;
943 
944     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
945     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
946     if (idx < TCG_MAX_TEMPS) {
947         /* There is already an available temp with the right type.  */
948         clear_bit(idx, s->free_temps[k].l);
949 
950         ts = &s->temps[idx];
951         ts->temp_allocated = 1;
952         tcg_debug_assert(ts->base_type == type);
953         tcg_debug_assert(ts->temp_local == temp_local);
954     } else {
955         ts = tcg_temp_alloc(s);
956         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
957             TCGTemp *ts2 = tcg_temp_alloc(s);
958 
959             ts->base_type = type;
960             ts->type = TCG_TYPE_I32;
961             ts->temp_allocated = 1;
962             ts->temp_local = temp_local;
963 
964             tcg_debug_assert(ts2 == ts + 1);
965             ts2->base_type = TCG_TYPE_I64;
966             ts2->type = TCG_TYPE_I32;
967             ts2->temp_allocated = 1;
968             ts2->temp_local = temp_local;
969         } else {
970             ts->base_type = type;
971             ts->type = type;
972             ts->temp_allocated = 1;
973             ts->temp_local = temp_local;
974         }
975     }
976 
977 #if defined(CONFIG_DEBUG_TCG)
978     s->temps_in_use++;
979 #endif
980     return ts;
981 }
982 
983 TCGv_i32 tcg_temp_new_internal_i32(int temp_local)
984 {
985     TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, temp_local);
986     return temp_tcgv_i32(t);
987 }
988 
989 TCGv_i64 tcg_temp_new_internal_i64(int temp_local)
990 {
991     TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, temp_local);
992     return temp_tcgv_i64(t);
993 }
994 
995 static void tcg_temp_free_internal(TCGTemp *ts)
996 {
997     TCGContext *s = tcg_ctx;
998     int k, idx;
999 
1000 #if defined(CONFIG_DEBUG_TCG)
1001     s->temps_in_use--;
1002     if (s->temps_in_use < 0) {
1003         fprintf(stderr, "More temporaries freed than allocated!\n");
1004     }
1005 #endif
1006 
1007     tcg_debug_assert(ts->temp_global == 0);
1008     tcg_debug_assert(ts->temp_allocated != 0);
1009     ts->temp_allocated = 0;
1010 
1011     idx = temp_idx(ts);
1012     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1013     set_bit(idx, s->free_temps[k].l);
1014 }
1015 
1016 void tcg_temp_free_i32(TCGv_i32 arg)
1017 {
1018     tcg_temp_free_internal(tcgv_i32_temp(arg));
1019 }
1020 
1021 void tcg_temp_free_i64(TCGv_i64 arg)
1022 {
1023     tcg_temp_free_internal(tcgv_i64_temp(arg));
1024 }
1025 
1026 TCGv_i32 tcg_const_i32(int32_t val)
1027 {
1028     TCGv_i32 t0;
1029     t0 = tcg_temp_new_i32();
1030     tcg_gen_movi_i32(t0, val);
1031     return t0;
1032 }
1033 
1034 TCGv_i64 tcg_const_i64(int64_t val)
1035 {
1036     TCGv_i64 t0;
1037     t0 = tcg_temp_new_i64();
1038     tcg_gen_movi_i64(t0, val);
1039     return t0;
1040 }
1041 
1042 TCGv_i32 tcg_const_local_i32(int32_t val)
1043 {
1044     TCGv_i32 t0;
1045     t0 = tcg_temp_local_new_i32();
1046     tcg_gen_movi_i32(t0, val);
1047     return t0;
1048 }
1049 
1050 TCGv_i64 tcg_const_local_i64(int64_t val)
1051 {
1052     TCGv_i64 t0;
1053     t0 = tcg_temp_local_new_i64();
1054     tcg_gen_movi_i64(t0, val);
1055     return t0;
1056 }
1057 
1058 #if defined(CONFIG_DEBUG_TCG)
1059 void tcg_clear_temp_count(void)
1060 {
1061     TCGContext *s = tcg_ctx;
1062     s->temps_in_use = 0;
1063 }
1064 
1065 int tcg_check_temp_count(void)
1066 {
1067     TCGContext *s = tcg_ctx;
1068     if (s->temps_in_use) {
1069         /* Clear the count so that we don't give another
1070          * warning immediately next time around.
1071          */
1072         s->temps_in_use = 0;
1073         return 1;
1074     }
1075     return 0;
1076 }
1077 #endif
1078 
1079 /* Return true if OP may appear in the opcode stream.
1080    Test the runtime variable that controls each opcode.  */
1081 bool tcg_op_supported(TCGOpcode op)
1082 {
1083     switch (op) {
1084     case INDEX_op_discard:
1085     case INDEX_op_set_label:
1086     case INDEX_op_call:
1087     case INDEX_op_br:
1088     case INDEX_op_mb:
1089     case INDEX_op_insn_start:
1090     case INDEX_op_exit_tb:
1091     case INDEX_op_goto_tb:
1092     case INDEX_op_qemu_ld_i32:
1093     case INDEX_op_qemu_st_i32:
1094     case INDEX_op_qemu_ld_i64:
1095     case INDEX_op_qemu_st_i64:
1096         return true;
1097 
1098     case INDEX_op_goto_ptr:
1099         return TCG_TARGET_HAS_goto_ptr;
1100 
1101     case INDEX_op_mov_i32:
1102     case INDEX_op_movi_i32:
1103     case INDEX_op_setcond_i32:
1104     case INDEX_op_brcond_i32:
1105     case INDEX_op_ld8u_i32:
1106     case INDEX_op_ld8s_i32:
1107     case INDEX_op_ld16u_i32:
1108     case INDEX_op_ld16s_i32:
1109     case INDEX_op_ld_i32:
1110     case INDEX_op_st8_i32:
1111     case INDEX_op_st16_i32:
1112     case INDEX_op_st_i32:
1113     case INDEX_op_add_i32:
1114     case INDEX_op_sub_i32:
1115     case INDEX_op_mul_i32:
1116     case INDEX_op_and_i32:
1117     case INDEX_op_or_i32:
1118     case INDEX_op_xor_i32:
1119     case INDEX_op_shl_i32:
1120     case INDEX_op_shr_i32:
1121     case INDEX_op_sar_i32:
1122         return true;
1123 
1124     case INDEX_op_movcond_i32:
1125         return TCG_TARGET_HAS_movcond_i32;
1126     case INDEX_op_div_i32:
1127     case INDEX_op_divu_i32:
1128         return TCG_TARGET_HAS_div_i32;
1129     case INDEX_op_rem_i32:
1130     case INDEX_op_remu_i32:
1131         return TCG_TARGET_HAS_rem_i32;
1132     case INDEX_op_div2_i32:
1133     case INDEX_op_divu2_i32:
1134         return TCG_TARGET_HAS_div2_i32;
1135     case INDEX_op_rotl_i32:
1136     case INDEX_op_rotr_i32:
1137         return TCG_TARGET_HAS_rot_i32;
1138     case INDEX_op_deposit_i32:
1139         return TCG_TARGET_HAS_deposit_i32;
1140     case INDEX_op_extract_i32:
1141         return TCG_TARGET_HAS_extract_i32;
1142     case INDEX_op_sextract_i32:
1143         return TCG_TARGET_HAS_sextract_i32;
1144     case INDEX_op_add2_i32:
1145         return TCG_TARGET_HAS_add2_i32;
1146     case INDEX_op_sub2_i32:
1147         return TCG_TARGET_HAS_sub2_i32;
1148     case INDEX_op_mulu2_i32:
1149         return TCG_TARGET_HAS_mulu2_i32;
1150     case INDEX_op_muls2_i32:
1151         return TCG_TARGET_HAS_muls2_i32;
1152     case INDEX_op_muluh_i32:
1153         return TCG_TARGET_HAS_muluh_i32;
1154     case INDEX_op_mulsh_i32:
1155         return TCG_TARGET_HAS_mulsh_i32;
1156     case INDEX_op_ext8s_i32:
1157         return TCG_TARGET_HAS_ext8s_i32;
1158     case INDEX_op_ext16s_i32:
1159         return TCG_TARGET_HAS_ext16s_i32;
1160     case INDEX_op_ext8u_i32:
1161         return TCG_TARGET_HAS_ext8u_i32;
1162     case INDEX_op_ext16u_i32:
1163         return TCG_TARGET_HAS_ext16u_i32;
1164     case INDEX_op_bswap16_i32:
1165         return TCG_TARGET_HAS_bswap16_i32;
1166     case INDEX_op_bswap32_i32:
1167         return TCG_TARGET_HAS_bswap32_i32;
1168     case INDEX_op_not_i32:
1169         return TCG_TARGET_HAS_not_i32;
1170     case INDEX_op_neg_i32:
1171         return TCG_TARGET_HAS_neg_i32;
1172     case INDEX_op_andc_i32:
1173         return TCG_TARGET_HAS_andc_i32;
1174     case INDEX_op_orc_i32:
1175         return TCG_TARGET_HAS_orc_i32;
1176     case INDEX_op_eqv_i32:
1177         return TCG_TARGET_HAS_eqv_i32;
1178     case INDEX_op_nand_i32:
1179         return TCG_TARGET_HAS_nand_i32;
1180     case INDEX_op_nor_i32:
1181         return TCG_TARGET_HAS_nor_i32;
1182     case INDEX_op_clz_i32:
1183         return TCG_TARGET_HAS_clz_i32;
1184     case INDEX_op_ctz_i32:
1185         return TCG_TARGET_HAS_ctz_i32;
1186     case INDEX_op_ctpop_i32:
1187         return TCG_TARGET_HAS_ctpop_i32;
1188 
1189     case INDEX_op_brcond2_i32:
1190     case INDEX_op_setcond2_i32:
1191         return TCG_TARGET_REG_BITS == 32;
1192 
1193     case INDEX_op_mov_i64:
1194     case INDEX_op_movi_i64:
1195     case INDEX_op_setcond_i64:
1196     case INDEX_op_brcond_i64:
1197     case INDEX_op_ld8u_i64:
1198     case INDEX_op_ld8s_i64:
1199     case INDEX_op_ld16u_i64:
1200     case INDEX_op_ld16s_i64:
1201     case INDEX_op_ld32u_i64:
1202     case INDEX_op_ld32s_i64:
1203     case INDEX_op_ld_i64:
1204     case INDEX_op_st8_i64:
1205     case INDEX_op_st16_i64:
1206     case INDEX_op_st32_i64:
1207     case INDEX_op_st_i64:
1208     case INDEX_op_add_i64:
1209     case INDEX_op_sub_i64:
1210     case INDEX_op_mul_i64:
1211     case INDEX_op_and_i64:
1212     case INDEX_op_or_i64:
1213     case INDEX_op_xor_i64:
1214     case INDEX_op_shl_i64:
1215     case INDEX_op_shr_i64:
1216     case INDEX_op_sar_i64:
1217     case INDEX_op_ext_i32_i64:
1218     case INDEX_op_extu_i32_i64:
1219         return TCG_TARGET_REG_BITS == 64;
1220 
1221     case INDEX_op_movcond_i64:
1222         return TCG_TARGET_HAS_movcond_i64;
1223     case INDEX_op_div_i64:
1224     case INDEX_op_divu_i64:
1225         return TCG_TARGET_HAS_div_i64;
1226     case INDEX_op_rem_i64:
1227     case INDEX_op_remu_i64:
1228         return TCG_TARGET_HAS_rem_i64;
1229     case INDEX_op_div2_i64:
1230     case INDEX_op_divu2_i64:
1231         return TCG_TARGET_HAS_div2_i64;
1232     case INDEX_op_rotl_i64:
1233     case INDEX_op_rotr_i64:
1234         return TCG_TARGET_HAS_rot_i64;
1235     case INDEX_op_deposit_i64:
1236         return TCG_TARGET_HAS_deposit_i64;
1237     case INDEX_op_extract_i64:
1238         return TCG_TARGET_HAS_extract_i64;
1239     case INDEX_op_sextract_i64:
1240         return TCG_TARGET_HAS_sextract_i64;
1241     case INDEX_op_extrl_i64_i32:
1242         return TCG_TARGET_HAS_extrl_i64_i32;
1243     case INDEX_op_extrh_i64_i32:
1244         return TCG_TARGET_HAS_extrh_i64_i32;
1245     case INDEX_op_ext8s_i64:
1246         return TCG_TARGET_HAS_ext8s_i64;
1247     case INDEX_op_ext16s_i64:
1248         return TCG_TARGET_HAS_ext16s_i64;
1249     case INDEX_op_ext32s_i64:
1250         return TCG_TARGET_HAS_ext32s_i64;
1251     case INDEX_op_ext8u_i64:
1252         return TCG_TARGET_HAS_ext8u_i64;
1253     case INDEX_op_ext16u_i64:
1254         return TCG_TARGET_HAS_ext16u_i64;
1255     case INDEX_op_ext32u_i64:
1256         return TCG_TARGET_HAS_ext32u_i64;
1257     case INDEX_op_bswap16_i64:
1258         return TCG_TARGET_HAS_bswap16_i64;
1259     case INDEX_op_bswap32_i64:
1260         return TCG_TARGET_HAS_bswap32_i64;
1261     case INDEX_op_bswap64_i64:
1262         return TCG_TARGET_HAS_bswap64_i64;
1263     case INDEX_op_not_i64:
1264         return TCG_TARGET_HAS_not_i64;
1265     case INDEX_op_neg_i64:
1266         return TCG_TARGET_HAS_neg_i64;
1267     case INDEX_op_andc_i64:
1268         return TCG_TARGET_HAS_andc_i64;
1269     case INDEX_op_orc_i64:
1270         return TCG_TARGET_HAS_orc_i64;
1271     case INDEX_op_eqv_i64:
1272         return TCG_TARGET_HAS_eqv_i64;
1273     case INDEX_op_nand_i64:
1274         return TCG_TARGET_HAS_nand_i64;
1275     case INDEX_op_nor_i64:
1276         return TCG_TARGET_HAS_nor_i64;
1277     case INDEX_op_clz_i64:
1278         return TCG_TARGET_HAS_clz_i64;
1279     case INDEX_op_ctz_i64:
1280         return TCG_TARGET_HAS_ctz_i64;
1281     case INDEX_op_ctpop_i64:
1282         return TCG_TARGET_HAS_ctpop_i64;
1283     case INDEX_op_add2_i64:
1284         return TCG_TARGET_HAS_add2_i64;
1285     case INDEX_op_sub2_i64:
1286         return TCG_TARGET_HAS_sub2_i64;
1287     case INDEX_op_mulu2_i64:
1288         return TCG_TARGET_HAS_mulu2_i64;
1289     case INDEX_op_muls2_i64:
1290         return TCG_TARGET_HAS_muls2_i64;
1291     case INDEX_op_muluh_i64:
1292         return TCG_TARGET_HAS_muluh_i64;
1293     case INDEX_op_mulsh_i64:
1294         return TCG_TARGET_HAS_mulsh_i64;
1295 
1296     case NB_OPS:
1297         break;
1298     }
1299     g_assert_not_reached();
1300 }
1301 
1302 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1303    and endian swap. Maybe it would be better to do the alignment
1304    and endian swap in tcg_reg_alloc_call(). */
1305 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1306 {
1307     TCGContext *s = tcg_ctx;
1308     int i, real_args, nb_rets, pi;
1309     unsigned sizemask, flags;
1310     TCGHelperInfo *info;
1311     TCGOp *op;
1312 
1313     info = g_hash_table_lookup(helper_table, (gpointer)func);
1314     flags = info->flags;
1315     sizemask = info->sizemask;
1316 
1317 #if defined(__sparc__) && !defined(__arch64__) \
1318     && !defined(CONFIG_TCG_INTERPRETER)
1319     /* We have 64-bit values in one register, but need to pass as two
1320        separate parameters.  Split them.  */
1321     int orig_sizemask = sizemask;
1322     int orig_nargs = nargs;
1323     TCGv_i64 retl, reth;
1324     TCGTemp *split_args[MAX_OPC_PARAM];
1325 
1326     TCGV_UNUSED_I64(retl);
1327     TCGV_UNUSED_I64(reth);
1328     if (sizemask != 0) {
1329         for (i = real_args = 0; i < nargs; ++i) {
1330             int is_64bit = sizemask & (1 << (i+1)*2);
1331             if (is_64bit) {
1332                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1333                 TCGv_i32 h = tcg_temp_new_i32();
1334                 TCGv_i32 l = tcg_temp_new_i32();
1335                 tcg_gen_extr_i64_i32(l, h, orig);
1336                 split_args[real_args++] = tcgv_i32_temp(h);
1337                 split_args[real_args++] = tcgv_i32_temp(l);
1338             } else {
1339                 split_args[real_args++] = args[i];
1340             }
1341         }
1342         nargs = real_args;
1343         args = split_args;
1344         sizemask = 0;
1345     }
1346 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1347     for (i = 0; i < nargs; ++i) {
1348         int is_64bit = sizemask & (1 << (i+1)*2);
1349         int is_signed = sizemask & (2 << (i+1)*2);
1350         if (!is_64bit) {
1351             TCGv_i64 temp = tcg_temp_new_i64();
1352             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1353             if (is_signed) {
1354                 tcg_gen_ext32s_i64(temp, orig);
1355             } else {
1356                 tcg_gen_ext32u_i64(temp, orig);
1357             }
1358             args[i] = tcgv_i64_temp(temp);
1359         }
1360     }
1361 #endif /* TCG_TARGET_EXTEND_ARGS */
1362 
1363     i = s->gen_next_op_idx;
1364     tcg_debug_assert(i < OPC_BUF_SIZE);
1365     s->gen_op_buf[0].prev = i;
1366     s->gen_next_op_idx = i + 1;
1367     op = &s->gen_op_buf[i];
1368 
1369     /* Set links for sequential allocation during translation.  */
1370     memset(op, 0, offsetof(TCGOp, args));
1371     op->opc = INDEX_op_call;
1372     op->prev = i - 1;
1373     op->next = i + 1;
1374 
1375     pi = 0;
1376     if (ret != NULL) {
1377 #if defined(__sparc__) && !defined(__arch64__) \
1378     && !defined(CONFIG_TCG_INTERPRETER)
1379         if (orig_sizemask & 1) {
1380             /* The 32-bit ABI is going to return the 64-bit value in
1381                the %o0/%o1 register pair.  Prepare for this by using
1382                two return temporaries, and reassemble below.  */
1383             retl = tcg_temp_new_i64();
1384             reth = tcg_temp_new_i64();
1385             op->args[pi++] = tcgv_i64_arg(reth);
1386             op->args[pi++] = tcgv_i64_arg(retl);
1387             nb_rets = 2;
1388         } else {
1389             op->args[pi++] = temp_arg(ret);
1390             nb_rets = 1;
1391         }
1392 #else
1393         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1394 #ifdef HOST_WORDS_BIGENDIAN
1395             op->args[pi++] = temp_arg(ret + 1);
1396             op->args[pi++] = temp_arg(ret);
1397 #else
1398             op->args[pi++] = temp_arg(ret);
1399             op->args[pi++] = temp_arg(ret + 1);
1400 #endif
1401             nb_rets = 2;
1402         } else {
1403             op->args[pi++] = temp_arg(ret);
1404             nb_rets = 1;
1405         }
1406 #endif
1407     } else {
1408         nb_rets = 0;
1409     }
1410     op->callo = nb_rets;
1411 
1412     real_args = 0;
1413     for (i = 0; i < nargs; i++) {
1414         int is_64bit = sizemask & (1 << (i+1)*2);
1415         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1416 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1417             /* some targets want aligned 64 bit args */
1418             if (real_args & 1) {
1419                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1420                 real_args++;
1421             }
1422 #endif
1423            /* If stack grows up, then we will be placing successive
1424               arguments at lower addresses, which means we need to
1425               reverse the order compared to how we would normally
1426               treat either big or little-endian.  For those arguments
1427               that will wind up in registers, this still works for
1428               HPPA (the only current STACK_GROWSUP target) since the
1429               argument registers are *also* allocated in decreasing
1430               order.  If another such target is added, this logic may
1431               have to get more complicated to differentiate between
1432               stack arguments and register arguments.  */
1433 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1434             op->args[pi++] = temp_arg(args[i] + 1);
1435             op->args[pi++] = temp_arg(args[i]);
1436 #else
1437             op->args[pi++] = temp_arg(args[i]);
1438             op->args[pi++] = temp_arg(args[i] + 1);
1439 #endif
1440             real_args += 2;
1441             continue;
1442         }
1443 
1444         op->args[pi++] = temp_arg(args[i]);
1445         real_args++;
1446     }
1447     op->args[pi++] = (uintptr_t)func;
1448     op->args[pi++] = flags;
1449     op->calli = real_args;
1450 
1451     /* Make sure the fields didn't overflow.  */
1452     tcg_debug_assert(op->calli == real_args);
1453     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1454 
1455 #if defined(__sparc__) && !defined(__arch64__) \
1456     && !defined(CONFIG_TCG_INTERPRETER)
1457     /* Free all of the parts we allocated above.  */
1458     for (i = real_args = 0; i < orig_nargs; ++i) {
1459         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1460         if (is_64bit) {
1461             tcg_temp_free_internal(args[real_args++]);
1462             tcg_temp_free_internal(args[real_args++]);
1463         } else {
1464             real_args++;
1465         }
1466     }
1467     if (orig_sizemask & 1) {
1468         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1469            Note that describing these as TCGv_i64 eliminates an unnecessary
1470            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1471         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1472         tcg_temp_free_i64(retl);
1473         tcg_temp_free_i64(reth);
1474     }
1475 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1476     for (i = 0; i < nargs; ++i) {
1477         int is_64bit = sizemask & (1 << (i+1)*2);
1478         if (!is_64bit) {
1479             tcg_temp_free_internal(args[i]);
1480         }
1481     }
1482 #endif /* TCG_TARGET_EXTEND_ARGS */
1483 }
1484 
1485 static void tcg_reg_alloc_start(TCGContext *s)
1486 {
1487     int i, n;
1488     TCGTemp *ts;
1489 
1490     for (i = 0, n = s->nb_globals; i < n; i++) {
1491         ts = &s->temps[i];
1492         ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1493     }
1494     for (n = s->nb_temps; i < n; i++) {
1495         ts = &s->temps[i];
1496         ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1497         ts->mem_allocated = 0;
1498         ts->fixed_reg = 0;
1499     }
1500 
1501     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1502 }
1503 
1504 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1505                                  TCGTemp *ts)
1506 {
1507     int idx = temp_idx(ts);
1508 
1509     if (ts->temp_global) {
1510         pstrcpy(buf, buf_size, ts->name);
1511     } else if (ts->temp_local) {
1512         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1513     } else {
1514         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1515     }
1516     return buf;
1517 }
1518 
1519 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1520                              int buf_size, TCGArg arg)
1521 {
1522     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1523 }
1524 
1525 /* Find helper name.  */
1526 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1527 {
1528     const char *ret = NULL;
1529     if (helper_table) {
1530         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1531         if (info) {
1532             ret = info->name;
1533         }
1534     }
1535     return ret;
1536 }
1537 
1538 static const char * const cond_name[] =
1539 {
1540     [TCG_COND_NEVER] = "never",
1541     [TCG_COND_ALWAYS] = "always",
1542     [TCG_COND_EQ] = "eq",
1543     [TCG_COND_NE] = "ne",
1544     [TCG_COND_LT] = "lt",
1545     [TCG_COND_GE] = "ge",
1546     [TCG_COND_LE] = "le",
1547     [TCG_COND_GT] = "gt",
1548     [TCG_COND_LTU] = "ltu",
1549     [TCG_COND_GEU] = "geu",
1550     [TCG_COND_LEU] = "leu",
1551     [TCG_COND_GTU] = "gtu"
1552 };
1553 
1554 static const char * const ldst_name[] =
1555 {
1556     [MO_UB]   = "ub",
1557     [MO_SB]   = "sb",
1558     [MO_LEUW] = "leuw",
1559     [MO_LESW] = "lesw",
1560     [MO_LEUL] = "leul",
1561     [MO_LESL] = "lesl",
1562     [MO_LEQ]  = "leq",
1563     [MO_BEUW] = "beuw",
1564     [MO_BESW] = "besw",
1565     [MO_BEUL] = "beul",
1566     [MO_BESL] = "besl",
1567     [MO_BEQ]  = "beq",
1568 };
1569 
1570 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1571 #ifdef ALIGNED_ONLY
1572     [MO_UNALN >> MO_ASHIFT]    = "un+",
1573     [MO_ALIGN >> MO_ASHIFT]    = "",
1574 #else
1575     [MO_UNALN >> MO_ASHIFT]    = "",
1576     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1577 #endif
1578     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1579     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1580     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1581     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1582     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1583     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1584 };
1585 
1586 void tcg_dump_ops(TCGContext *s)
1587 {
1588     char buf[128];
1589     TCGOp *op;
1590     int oi;
1591 
1592     for (oi = s->gen_op_buf[0].next; oi != 0; oi = op->next) {
1593         int i, k, nb_oargs, nb_iargs, nb_cargs;
1594         const TCGOpDef *def;
1595         TCGOpcode c;
1596         int col = 0;
1597 
1598         op = &s->gen_op_buf[oi];
1599         c = op->opc;
1600         def = &tcg_op_defs[c];
1601 
1602         if (c == INDEX_op_insn_start) {
1603             col += qemu_log("%s ----", oi != s->gen_op_buf[0].next ? "\n" : "");
1604 
1605             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1606                 target_ulong a;
1607 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1608                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1609 #else
1610                 a = op->args[i];
1611 #endif
1612                 col += qemu_log(" " TARGET_FMT_lx, a);
1613             }
1614         } else if (c == INDEX_op_call) {
1615             /* variable number of arguments */
1616             nb_oargs = op->callo;
1617             nb_iargs = op->calli;
1618             nb_cargs = def->nb_cargs;
1619 
1620             /* function name, flags, out args */
1621             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1622                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
1623                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
1624             for (i = 0; i < nb_oargs; i++) {
1625                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1626                                                        op->args[i]));
1627             }
1628             for (i = 0; i < nb_iargs; i++) {
1629                 TCGArg arg = op->args[nb_oargs + i];
1630                 const char *t = "<dummy>";
1631                 if (arg != TCG_CALL_DUMMY_ARG) {
1632                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1633                 }
1634                 col += qemu_log(",%s", t);
1635             }
1636         } else {
1637             col += qemu_log(" %s ", def->name);
1638 
1639             nb_oargs = def->nb_oargs;
1640             nb_iargs = def->nb_iargs;
1641             nb_cargs = def->nb_cargs;
1642 
1643             k = 0;
1644             for (i = 0; i < nb_oargs; i++) {
1645                 if (k != 0) {
1646                     col += qemu_log(",");
1647                 }
1648                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1649                                                       op->args[k++]));
1650             }
1651             for (i = 0; i < nb_iargs; i++) {
1652                 if (k != 0) {
1653                     col += qemu_log(",");
1654                 }
1655                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1656                                                       op->args[k++]));
1657             }
1658             switch (c) {
1659             case INDEX_op_brcond_i32:
1660             case INDEX_op_setcond_i32:
1661             case INDEX_op_movcond_i32:
1662             case INDEX_op_brcond2_i32:
1663             case INDEX_op_setcond2_i32:
1664             case INDEX_op_brcond_i64:
1665             case INDEX_op_setcond_i64:
1666             case INDEX_op_movcond_i64:
1667                 if (op->args[k] < ARRAY_SIZE(cond_name)
1668                     && cond_name[op->args[k]]) {
1669                     col += qemu_log(",%s", cond_name[op->args[k++]]);
1670                 } else {
1671                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
1672                 }
1673                 i = 1;
1674                 break;
1675             case INDEX_op_qemu_ld_i32:
1676             case INDEX_op_qemu_st_i32:
1677             case INDEX_op_qemu_ld_i64:
1678             case INDEX_op_qemu_st_i64:
1679                 {
1680                     TCGMemOpIdx oi = op->args[k++];
1681                     TCGMemOp op = get_memop(oi);
1682                     unsigned ix = get_mmuidx(oi);
1683 
1684                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1685                         col += qemu_log(",$0x%x,%u", op, ix);
1686                     } else {
1687                         const char *s_al, *s_op;
1688                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1689                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1690                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
1691                     }
1692                     i = 1;
1693                 }
1694                 break;
1695             default:
1696                 i = 0;
1697                 break;
1698             }
1699             switch (c) {
1700             case INDEX_op_set_label:
1701             case INDEX_op_br:
1702             case INDEX_op_brcond_i32:
1703             case INDEX_op_brcond_i64:
1704             case INDEX_op_brcond2_i32:
1705                 col += qemu_log("%s$L%d", k ? "," : "",
1706                                 arg_label(op->args[k])->id);
1707                 i++, k++;
1708                 break;
1709             default:
1710                 break;
1711             }
1712             for (; i < nb_cargs; i++, k++) {
1713                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
1714             }
1715         }
1716         if (op->life) {
1717             unsigned life = op->life;
1718 
1719             for (; col < 48; ++col) {
1720                 putc(' ', qemu_logfile);
1721             }
1722 
1723             if (life & (SYNC_ARG * 3)) {
1724                 qemu_log("  sync:");
1725                 for (i = 0; i < 2; ++i) {
1726                     if (life & (SYNC_ARG << i)) {
1727                         qemu_log(" %d", i);
1728                     }
1729                 }
1730             }
1731             life /= DEAD_ARG;
1732             if (life) {
1733                 qemu_log("  dead:");
1734                 for (i = 0; life; ++i, life >>= 1) {
1735                     if (life & 1) {
1736                         qemu_log(" %d", i);
1737                     }
1738                 }
1739             }
1740         }
1741         qemu_log("\n");
1742     }
1743 }
1744 
1745 /* we give more priority to constraints with less registers */
1746 static int get_constraint_priority(const TCGOpDef *def, int k)
1747 {
1748     const TCGArgConstraint *arg_ct;
1749 
1750     int i, n;
1751     arg_ct = &def->args_ct[k];
1752     if (arg_ct->ct & TCG_CT_ALIAS) {
1753         /* an alias is equivalent to a single register */
1754         n = 1;
1755     } else {
1756         if (!(arg_ct->ct & TCG_CT_REG))
1757             return 0;
1758         n = 0;
1759         for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1760             if (tcg_regset_test_reg(arg_ct->u.regs, i))
1761                 n++;
1762         }
1763     }
1764     return TCG_TARGET_NB_REGS - n + 1;
1765 }
1766 
1767 /* sort from highest priority to lowest */
1768 static void sort_constraints(TCGOpDef *def, int start, int n)
1769 {
1770     int i, j, p1, p2, tmp;
1771 
1772     for(i = 0; i < n; i++)
1773         def->sorted_args[start + i] = start + i;
1774     if (n <= 1)
1775         return;
1776     for(i = 0; i < n - 1; i++) {
1777         for(j = i + 1; j < n; j++) {
1778             p1 = get_constraint_priority(def, def->sorted_args[start + i]);
1779             p2 = get_constraint_priority(def, def->sorted_args[start + j]);
1780             if (p1 < p2) {
1781                 tmp = def->sorted_args[start + i];
1782                 def->sorted_args[start + i] = def->sorted_args[start + j];
1783                 def->sorted_args[start + j] = tmp;
1784             }
1785         }
1786     }
1787 }
1788 
1789 static void process_op_defs(TCGContext *s)
1790 {
1791     TCGOpcode op;
1792 
1793     for (op = 0; op < NB_OPS; op++) {
1794         TCGOpDef *def = &tcg_op_defs[op];
1795         const TCGTargetOpDef *tdefs;
1796         TCGType type;
1797         int i, nb_args;
1798 
1799         if (def->flags & TCG_OPF_NOT_PRESENT) {
1800             continue;
1801         }
1802 
1803         nb_args = def->nb_iargs + def->nb_oargs;
1804         if (nb_args == 0) {
1805             continue;
1806         }
1807 
1808         tdefs = tcg_target_op_def(op);
1809         /* Missing TCGTargetOpDef entry. */
1810         tcg_debug_assert(tdefs != NULL);
1811 
1812         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
1813         for (i = 0; i < nb_args; i++) {
1814             const char *ct_str = tdefs->args_ct_str[i];
1815             /* Incomplete TCGTargetOpDef entry. */
1816             tcg_debug_assert(ct_str != NULL);
1817 
1818             def->args_ct[i].u.regs = 0;
1819             def->args_ct[i].ct = 0;
1820             while (*ct_str != '\0') {
1821                 switch(*ct_str) {
1822                 case '0' ... '9':
1823                     {
1824                         int oarg = *ct_str - '0';
1825                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
1826                         tcg_debug_assert(oarg < def->nb_oargs);
1827                         tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
1828                         /* TCG_CT_ALIAS is for the output arguments.
1829                            The input is tagged with TCG_CT_IALIAS. */
1830                         def->args_ct[i] = def->args_ct[oarg];
1831                         def->args_ct[oarg].ct |= TCG_CT_ALIAS;
1832                         def->args_ct[oarg].alias_index = i;
1833                         def->args_ct[i].ct |= TCG_CT_IALIAS;
1834                         def->args_ct[i].alias_index = oarg;
1835                     }
1836                     ct_str++;
1837                     break;
1838                 case '&':
1839                     def->args_ct[i].ct |= TCG_CT_NEWREG;
1840                     ct_str++;
1841                     break;
1842                 case 'i':
1843                     def->args_ct[i].ct |= TCG_CT_CONST;
1844                     ct_str++;
1845                     break;
1846                 default:
1847                     ct_str = target_parse_constraint(&def->args_ct[i],
1848                                                      ct_str, type);
1849                     /* Typo in TCGTargetOpDef constraint. */
1850                     tcg_debug_assert(ct_str != NULL);
1851                 }
1852             }
1853         }
1854 
1855         /* TCGTargetOpDef entry with too much information? */
1856         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
1857 
1858         /* sort the constraints (XXX: this is just an heuristic) */
1859         sort_constraints(def, 0, def->nb_oargs);
1860         sort_constraints(def, def->nb_oargs, def->nb_iargs);
1861     }
1862 }
1863 
1864 void tcg_op_remove(TCGContext *s, TCGOp *op)
1865 {
1866     int next = op->next;
1867     int prev = op->prev;
1868 
1869     /* We should never attempt to remove the list terminator.  */
1870     tcg_debug_assert(op != &s->gen_op_buf[0]);
1871 
1872     s->gen_op_buf[next].prev = prev;
1873     s->gen_op_buf[prev].next = next;
1874 
1875     memset(op, 0, sizeof(*op));
1876 
1877 #ifdef CONFIG_PROFILER
1878     atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
1879 #endif
1880 }
1881 
1882 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
1883                             TCGOpcode opc, int nargs)
1884 {
1885     int oi = s->gen_next_op_idx;
1886     int prev = old_op->prev;
1887     int next = old_op - s->gen_op_buf;
1888     TCGOp *new_op;
1889 
1890     tcg_debug_assert(oi < OPC_BUF_SIZE);
1891     s->gen_next_op_idx = oi + 1;
1892 
1893     new_op = &s->gen_op_buf[oi];
1894     *new_op = (TCGOp){
1895         .opc = opc,
1896         .prev = prev,
1897         .next = next
1898     };
1899     s->gen_op_buf[prev].next = oi;
1900     old_op->prev = oi;
1901 
1902     return new_op;
1903 }
1904 
1905 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
1906                            TCGOpcode opc, int nargs)
1907 {
1908     int oi = s->gen_next_op_idx;
1909     int prev = old_op - s->gen_op_buf;
1910     int next = old_op->next;
1911     TCGOp *new_op;
1912 
1913     tcg_debug_assert(oi < OPC_BUF_SIZE);
1914     s->gen_next_op_idx = oi + 1;
1915 
1916     new_op = &s->gen_op_buf[oi];
1917     *new_op = (TCGOp){
1918         .opc = opc,
1919         .prev = prev,
1920         .next = next
1921     };
1922     s->gen_op_buf[next].prev = oi;
1923     old_op->next = oi;
1924 
1925     return new_op;
1926 }
1927 
1928 #define TS_DEAD  1
1929 #define TS_MEM   2
1930 
1931 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
1932 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
1933 
1934 /* liveness analysis: end of function: all temps are dead, and globals
1935    should be in memory. */
1936 static void tcg_la_func_end(TCGContext *s)
1937 {
1938     int ng = s->nb_globals;
1939     int nt = s->nb_temps;
1940     int i;
1941 
1942     for (i = 0; i < ng; ++i) {
1943         s->temps[i].state = TS_DEAD | TS_MEM;
1944     }
1945     for (i = ng; i < nt; ++i) {
1946         s->temps[i].state = TS_DEAD;
1947     }
1948 }
1949 
1950 /* liveness analysis: end of basic block: all temps are dead, globals
1951    and local temps should be in memory. */
1952 static void tcg_la_bb_end(TCGContext *s)
1953 {
1954     int ng = s->nb_globals;
1955     int nt = s->nb_temps;
1956     int i;
1957 
1958     for (i = 0; i < ng; ++i) {
1959         s->temps[i].state = TS_DEAD | TS_MEM;
1960     }
1961     for (i = ng; i < nt; ++i) {
1962         s->temps[i].state = (s->temps[i].temp_local
1963                              ? TS_DEAD | TS_MEM
1964                              : TS_DEAD);
1965     }
1966 }
1967 
1968 /* Liveness analysis : update the opc_arg_life array to tell if a
1969    given input arguments is dead. Instructions updating dead
1970    temporaries are removed. */
1971 static void liveness_pass_1(TCGContext *s)
1972 {
1973     int nb_globals = s->nb_globals;
1974     int oi, oi_prev;
1975 
1976     tcg_la_func_end(s);
1977 
1978     for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
1979         int i, nb_iargs, nb_oargs;
1980         TCGOpcode opc_new, opc_new2;
1981         bool have_opc_new2;
1982         TCGLifeData arg_life = 0;
1983         TCGTemp *arg_ts;
1984 
1985         TCGOp * const op = &s->gen_op_buf[oi];
1986         TCGOpcode opc = op->opc;
1987         const TCGOpDef *def = &tcg_op_defs[opc];
1988 
1989         oi_prev = op->prev;
1990 
1991         switch (opc) {
1992         case INDEX_op_call:
1993             {
1994                 int call_flags;
1995 
1996                 nb_oargs = op->callo;
1997                 nb_iargs = op->calli;
1998                 call_flags = op->args[nb_oargs + nb_iargs + 1];
1999 
2000                 /* pure functions can be removed if their result is unused */
2001                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2002                     for (i = 0; i < nb_oargs; i++) {
2003                         arg_ts = arg_temp(op->args[i]);
2004                         if (arg_ts->state != TS_DEAD) {
2005                             goto do_not_remove_call;
2006                         }
2007                     }
2008                     goto do_remove;
2009                 } else {
2010                 do_not_remove_call:
2011 
2012                     /* output args are dead */
2013                     for (i = 0; i < nb_oargs; i++) {
2014                         arg_ts = arg_temp(op->args[i]);
2015                         if (arg_ts->state & TS_DEAD) {
2016                             arg_life |= DEAD_ARG << i;
2017                         }
2018                         if (arg_ts->state & TS_MEM) {
2019                             arg_life |= SYNC_ARG << i;
2020                         }
2021                         arg_ts->state = TS_DEAD;
2022                     }
2023 
2024                     if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2025                                         TCG_CALL_NO_READ_GLOBALS))) {
2026                         /* globals should go back to memory */
2027                         for (i = 0; i < nb_globals; i++) {
2028                             s->temps[i].state = TS_DEAD | TS_MEM;
2029                         }
2030                     } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2031                         /* globals should be synced to memory */
2032                         for (i = 0; i < nb_globals; i++) {
2033                             s->temps[i].state |= TS_MEM;
2034                         }
2035                     }
2036 
2037                     /* record arguments that die in this helper */
2038                     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2039                         arg_ts = arg_temp(op->args[i]);
2040                         if (arg_ts && arg_ts->state & TS_DEAD) {
2041                             arg_life |= DEAD_ARG << i;
2042                         }
2043                     }
2044                     /* input arguments are live for preceding opcodes */
2045                     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2046                         arg_ts = arg_temp(op->args[i]);
2047                         if (arg_ts) {
2048                             arg_ts->state &= ~TS_DEAD;
2049                         }
2050                     }
2051                 }
2052             }
2053             break;
2054         case INDEX_op_insn_start:
2055             break;
2056         case INDEX_op_discard:
2057             /* mark the temporary as dead */
2058             arg_temp(op->args[0])->state = TS_DEAD;
2059             break;
2060 
2061         case INDEX_op_add2_i32:
2062             opc_new = INDEX_op_add_i32;
2063             goto do_addsub2;
2064         case INDEX_op_sub2_i32:
2065             opc_new = INDEX_op_sub_i32;
2066             goto do_addsub2;
2067         case INDEX_op_add2_i64:
2068             opc_new = INDEX_op_add_i64;
2069             goto do_addsub2;
2070         case INDEX_op_sub2_i64:
2071             opc_new = INDEX_op_sub_i64;
2072         do_addsub2:
2073             nb_iargs = 4;
2074             nb_oargs = 2;
2075             /* Test if the high part of the operation is dead, but not
2076                the low part.  The result can be optimized to a simple
2077                add or sub.  This happens often for x86_64 guest when the
2078                cpu mode is set to 32 bit.  */
2079             if (arg_temp(op->args[1])->state == TS_DEAD) {
2080                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2081                     goto do_remove;
2082                 }
2083                 /* Replace the opcode and adjust the args in place,
2084                    leaving 3 unused args at the end.  */
2085                 op->opc = opc = opc_new;
2086                 op->args[1] = op->args[2];
2087                 op->args[2] = op->args[4];
2088                 /* Fall through and mark the single-word operation live.  */
2089                 nb_iargs = 2;
2090                 nb_oargs = 1;
2091             }
2092             goto do_not_remove;
2093 
2094         case INDEX_op_mulu2_i32:
2095             opc_new = INDEX_op_mul_i32;
2096             opc_new2 = INDEX_op_muluh_i32;
2097             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2098             goto do_mul2;
2099         case INDEX_op_muls2_i32:
2100             opc_new = INDEX_op_mul_i32;
2101             opc_new2 = INDEX_op_mulsh_i32;
2102             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2103             goto do_mul2;
2104         case INDEX_op_mulu2_i64:
2105             opc_new = INDEX_op_mul_i64;
2106             opc_new2 = INDEX_op_muluh_i64;
2107             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2108             goto do_mul2;
2109         case INDEX_op_muls2_i64:
2110             opc_new = INDEX_op_mul_i64;
2111             opc_new2 = INDEX_op_mulsh_i64;
2112             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2113             goto do_mul2;
2114         do_mul2:
2115             nb_iargs = 2;
2116             nb_oargs = 2;
2117             if (arg_temp(op->args[1])->state == TS_DEAD) {
2118                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2119                     /* Both parts of the operation are dead.  */
2120                     goto do_remove;
2121                 }
2122                 /* The high part of the operation is dead; generate the low. */
2123                 op->opc = opc = opc_new;
2124                 op->args[1] = op->args[2];
2125                 op->args[2] = op->args[3];
2126             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2127                 /* The low part of the operation is dead; generate the high. */
2128                 op->opc = opc = opc_new2;
2129                 op->args[0] = op->args[1];
2130                 op->args[1] = op->args[2];
2131                 op->args[2] = op->args[3];
2132             } else {
2133                 goto do_not_remove;
2134             }
2135             /* Mark the single-word operation live.  */
2136             nb_oargs = 1;
2137             goto do_not_remove;
2138 
2139         default:
2140             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2141             nb_iargs = def->nb_iargs;
2142             nb_oargs = def->nb_oargs;
2143 
2144             /* Test if the operation can be removed because all
2145                its outputs are dead. We assume that nb_oargs == 0
2146                implies side effects */
2147             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2148                 for (i = 0; i < nb_oargs; i++) {
2149                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2150                         goto do_not_remove;
2151                     }
2152                 }
2153             do_remove:
2154                 tcg_op_remove(s, op);
2155             } else {
2156             do_not_remove:
2157                 /* output args are dead */
2158                 for (i = 0; i < nb_oargs; i++) {
2159                     arg_ts = arg_temp(op->args[i]);
2160                     if (arg_ts->state & TS_DEAD) {
2161                         arg_life |= DEAD_ARG << i;
2162                     }
2163                     if (arg_ts->state & TS_MEM) {
2164                         arg_life |= SYNC_ARG << i;
2165                     }
2166                     arg_ts->state = TS_DEAD;
2167                 }
2168 
2169                 /* if end of basic block, update */
2170                 if (def->flags & TCG_OPF_BB_END) {
2171                     tcg_la_bb_end(s);
2172                 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2173                     /* globals should be synced to memory */
2174                     for (i = 0; i < nb_globals; i++) {
2175                         s->temps[i].state |= TS_MEM;
2176                     }
2177                 }
2178 
2179                 /* record arguments that die in this opcode */
2180                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2181                     arg_ts = arg_temp(op->args[i]);
2182                     if (arg_ts->state & TS_DEAD) {
2183                         arg_life |= DEAD_ARG << i;
2184                     }
2185                 }
2186                 /* input arguments are live for preceding opcodes */
2187                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2188                     arg_temp(op->args[i])->state &= ~TS_DEAD;
2189                 }
2190             }
2191             break;
2192         }
2193         op->life = arg_life;
2194     }
2195 }
2196 
2197 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2198 static bool liveness_pass_2(TCGContext *s)
2199 {
2200     int nb_globals = s->nb_globals;
2201     int nb_temps, i, oi, oi_next;
2202     bool changes = false;
2203 
2204     /* Create a temporary for each indirect global.  */
2205     for (i = 0; i < nb_globals; ++i) {
2206         TCGTemp *its = &s->temps[i];
2207         if (its->indirect_reg) {
2208             TCGTemp *dts = tcg_temp_alloc(s);
2209             dts->type = its->type;
2210             dts->base_type = its->base_type;
2211             its->state_ptr = dts;
2212         } else {
2213             its->state_ptr = NULL;
2214         }
2215         /* All globals begin dead.  */
2216         its->state = TS_DEAD;
2217     }
2218     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2219         TCGTemp *its = &s->temps[i];
2220         its->state_ptr = NULL;
2221         its->state = TS_DEAD;
2222     }
2223 
2224     for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
2225         TCGOp *op = &s->gen_op_buf[oi];
2226         TCGOpcode opc = op->opc;
2227         const TCGOpDef *def = &tcg_op_defs[opc];
2228         TCGLifeData arg_life = op->life;
2229         int nb_iargs, nb_oargs, call_flags;
2230         TCGTemp *arg_ts, *dir_ts;
2231 
2232         oi_next = op->next;
2233 
2234         if (opc == INDEX_op_call) {
2235             nb_oargs = op->callo;
2236             nb_iargs = op->calli;
2237             call_flags = op->args[nb_oargs + nb_iargs + 1];
2238         } else {
2239             nb_iargs = def->nb_iargs;
2240             nb_oargs = def->nb_oargs;
2241 
2242             /* Set flags similar to how calls require.  */
2243             if (def->flags & TCG_OPF_BB_END) {
2244                 /* Like writing globals: save_globals */
2245                 call_flags = 0;
2246             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2247                 /* Like reading globals: sync_globals */
2248                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2249             } else {
2250                 /* No effect on globals.  */
2251                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2252                               TCG_CALL_NO_WRITE_GLOBALS);
2253             }
2254         }
2255 
2256         /* Make sure that input arguments are available.  */
2257         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2258             arg_ts = arg_temp(op->args[i]);
2259             if (arg_ts) {
2260                 dir_ts = arg_ts->state_ptr;
2261                 if (dir_ts && arg_ts->state == TS_DEAD) {
2262                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2263                                       ? INDEX_op_ld_i32
2264                                       : INDEX_op_ld_i64);
2265                     TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
2266 
2267                     lop->args[0] = temp_arg(dir_ts);
2268                     lop->args[1] = temp_arg(arg_ts->mem_base);
2269                     lop->args[2] = arg_ts->mem_offset;
2270 
2271                     /* Loaded, but synced with memory.  */
2272                     arg_ts->state = TS_MEM;
2273                 }
2274             }
2275         }
2276 
2277         /* Perform input replacement, and mark inputs that became dead.
2278            No action is required except keeping temp_state up to date
2279            so that we reload when needed.  */
2280         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2281             arg_ts = arg_temp(op->args[i]);
2282             if (arg_ts) {
2283                 dir_ts = arg_ts->state_ptr;
2284                 if (dir_ts) {
2285                     op->args[i] = temp_arg(dir_ts);
2286                     changes = true;
2287                     if (IS_DEAD_ARG(i)) {
2288                         arg_ts->state = TS_DEAD;
2289                     }
2290                 }
2291             }
2292         }
2293 
2294         /* Liveness analysis should ensure that the following are
2295            all correct, for call sites and basic block end points.  */
2296         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2297             /* Nothing to do */
2298         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2299             for (i = 0; i < nb_globals; ++i) {
2300                 /* Liveness should see that globals are synced back,
2301                    that is, either TS_DEAD or TS_MEM.  */
2302                 arg_ts = &s->temps[i];
2303                 tcg_debug_assert(arg_ts->state_ptr == 0
2304                                  || arg_ts->state != 0);
2305             }
2306         } else {
2307             for (i = 0; i < nb_globals; ++i) {
2308                 /* Liveness should see that globals are saved back,
2309                    that is, TS_DEAD, waiting to be reloaded.  */
2310                 arg_ts = &s->temps[i];
2311                 tcg_debug_assert(arg_ts->state_ptr == 0
2312                                  || arg_ts->state == TS_DEAD);
2313             }
2314         }
2315 
2316         /* Outputs become available.  */
2317         for (i = 0; i < nb_oargs; i++) {
2318             arg_ts = arg_temp(op->args[i]);
2319             dir_ts = arg_ts->state_ptr;
2320             if (!dir_ts) {
2321                 continue;
2322             }
2323             op->args[i] = temp_arg(dir_ts);
2324             changes = true;
2325 
2326             /* The output is now live and modified.  */
2327             arg_ts->state = 0;
2328 
2329             /* Sync outputs upon their last write.  */
2330             if (NEED_SYNC_ARG(i)) {
2331                 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2332                                   ? INDEX_op_st_i32
2333                                   : INDEX_op_st_i64);
2334                 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
2335 
2336                 sop->args[0] = temp_arg(dir_ts);
2337                 sop->args[1] = temp_arg(arg_ts->mem_base);
2338                 sop->args[2] = arg_ts->mem_offset;
2339 
2340                 arg_ts->state = TS_MEM;
2341             }
2342             /* Drop outputs that are dead.  */
2343             if (IS_DEAD_ARG(i)) {
2344                 arg_ts->state = TS_DEAD;
2345             }
2346         }
2347     }
2348 
2349     return changes;
2350 }
2351 
2352 #ifdef CONFIG_DEBUG_TCG
2353 static void dump_regs(TCGContext *s)
2354 {
2355     TCGTemp *ts;
2356     int i;
2357     char buf[64];
2358 
2359     for(i = 0; i < s->nb_temps; i++) {
2360         ts = &s->temps[i];
2361         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2362         switch(ts->val_type) {
2363         case TEMP_VAL_REG:
2364             printf("%s", tcg_target_reg_names[ts->reg]);
2365             break;
2366         case TEMP_VAL_MEM:
2367             printf("%d(%s)", (int)ts->mem_offset,
2368                    tcg_target_reg_names[ts->mem_base->reg]);
2369             break;
2370         case TEMP_VAL_CONST:
2371             printf("$0x%" TCG_PRIlx, ts->val);
2372             break;
2373         case TEMP_VAL_DEAD:
2374             printf("D");
2375             break;
2376         default:
2377             printf("???");
2378             break;
2379         }
2380         printf("\n");
2381     }
2382 
2383     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2384         if (s->reg_to_temp[i] != NULL) {
2385             printf("%s: %s\n",
2386                    tcg_target_reg_names[i],
2387                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
2388         }
2389     }
2390 }
2391 
2392 static void check_regs(TCGContext *s)
2393 {
2394     int reg;
2395     int k;
2396     TCGTemp *ts;
2397     char buf[64];
2398 
2399     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2400         ts = s->reg_to_temp[reg];
2401         if (ts != NULL) {
2402             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
2403                 printf("Inconsistency for register %s:\n",
2404                        tcg_target_reg_names[reg]);
2405                 goto fail;
2406             }
2407         }
2408     }
2409     for (k = 0; k < s->nb_temps; k++) {
2410         ts = &s->temps[k];
2411         if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
2412             && s->reg_to_temp[ts->reg] != ts) {
2413             printf("Inconsistency for temp %s:\n",
2414                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2415         fail:
2416             printf("reg state:\n");
2417             dump_regs(s);
2418             tcg_abort();
2419         }
2420     }
2421 }
2422 #endif
2423 
2424 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
2425 {
2426 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
2427     /* Sparc64 stack is accessed with offset of 2047 */
2428     s->current_frame_offset = (s->current_frame_offset +
2429                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
2430         ~(sizeof(tcg_target_long) - 1);
2431 #endif
2432     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
2433         s->frame_end) {
2434         tcg_abort();
2435     }
2436     ts->mem_offset = s->current_frame_offset;
2437     ts->mem_base = s->frame_temp;
2438     ts->mem_allocated = 1;
2439     s->current_frame_offset += sizeof(tcg_target_long);
2440 }
2441 
2442 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet);
2443 
2444 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
2445    mark it free; otherwise mark it dead.  */
2446 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
2447 {
2448     if (ts->fixed_reg) {
2449         return;
2450     }
2451     if (ts->val_type == TEMP_VAL_REG) {
2452         s->reg_to_temp[ts->reg] = NULL;
2453     }
2454     ts->val_type = (free_or_dead < 0
2455                     || ts->temp_local
2456                     || ts->temp_global
2457                     ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
2458 }
2459 
2460 /* Mark a temporary as dead.  */
2461 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
2462 {
2463     temp_free_or_dead(s, ts, 1);
2464 }
2465 
2466 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
2467    registers needs to be allocated to store a constant.  If 'free_or_dead'
2468    is non-zero, subsequently release the temporary; if it is positive, the
2469    temp is dead; if it is negative, the temp is free.  */
2470 static void temp_sync(TCGContext *s, TCGTemp *ts,
2471                       TCGRegSet allocated_regs, int free_or_dead)
2472 {
2473     if (ts->fixed_reg) {
2474         return;
2475     }
2476     if (!ts->mem_coherent) {
2477         if (!ts->mem_allocated) {
2478             temp_allocate_frame(s, ts);
2479         }
2480         switch (ts->val_type) {
2481         case TEMP_VAL_CONST:
2482             /* If we're going to free the temp immediately, then we won't
2483                require it later in a register, so attempt to store the
2484                constant to memory directly.  */
2485             if (free_or_dead
2486                 && tcg_out_sti(s, ts->type, ts->val,
2487                                ts->mem_base->reg, ts->mem_offset)) {
2488                 break;
2489             }
2490             temp_load(s, ts, tcg_target_available_regs[ts->type],
2491                       allocated_regs);
2492             /* fallthrough */
2493 
2494         case TEMP_VAL_REG:
2495             tcg_out_st(s, ts->type, ts->reg,
2496                        ts->mem_base->reg, ts->mem_offset);
2497             break;
2498 
2499         case TEMP_VAL_MEM:
2500             break;
2501 
2502         case TEMP_VAL_DEAD:
2503         default:
2504             tcg_abort();
2505         }
2506         ts->mem_coherent = 1;
2507     }
2508     if (free_or_dead) {
2509         temp_free_or_dead(s, ts, free_or_dead);
2510     }
2511 }
2512 
2513 /* free register 'reg' by spilling the corresponding temporary if necessary */
2514 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
2515 {
2516     TCGTemp *ts = s->reg_to_temp[reg];
2517     if (ts != NULL) {
2518         temp_sync(s, ts, allocated_regs, -1);
2519     }
2520 }
2521 
2522 /* Allocate a register belonging to reg1 & ~reg2 */
2523 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs,
2524                             TCGRegSet allocated_regs, bool rev)
2525 {
2526     int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
2527     const int *order;
2528     TCGReg reg;
2529     TCGRegSet reg_ct;
2530 
2531     reg_ct = desired_regs & ~allocated_regs;
2532     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
2533 
2534     /* first try free registers */
2535     for(i = 0; i < n; i++) {
2536         reg = order[i];
2537         if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL)
2538             return reg;
2539     }
2540 
2541     /* XXX: do better spill choice */
2542     for(i = 0; i < n; i++) {
2543         reg = order[i];
2544         if (tcg_regset_test_reg(reg_ct, reg)) {
2545             tcg_reg_free(s, reg, allocated_regs);
2546             return reg;
2547         }
2548     }
2549 
2550     tcg_abort();
2551 }
2552 
2553 /* Make sure the temporary is in a register.  If needed, allocate the register
2554    from DESIRED while avoiding ALLOCATED.  */
2555 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
2556                       TCGRegSet allocated_regs)
2557 {
2558     TCGReg reg;
2559 
2560     switch (ts->val_type) {
2561     case TEMP_VAL_REG:
2562         return;
2563     case TEMP_VAL_CONST:
2564         reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
2565         tcg_out_movi(s, ts->type, reg, ts->val);
2566         ts->mem_coherent = 0;
2567         break;
2568     case TEMP_VAL_MEM:
2569         reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
2570         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
2571         ts->mem_coherent = 1;
2572         break;
2573     case TEMP_VAL_DEAD:
2574     default:
2575         tcg_abort();
2576     }
2577     ts->reg = reg;
2578     ts->val_type = TEMP_VAL_REG;
2579     s->reg_to_temp[reg] = ts;
2580 }
2581 
2582 /* Save a temporary to memory. 'allocated_regs' is used in case a
2583    temporary registers needs to be allocated to store a constant.  */
2584 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
2585 {
2586     /* The liveness analysis already ensures that globals are back
2587        in memory. Keep an tcg_debug_assert for safety. */
2588     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
2589 }
2590 
2591 /* save globals to their canonical location and assume they can be
2592    modified be the following code. 'allocated_regs' is used in case a
2593    temporary registers needs to be allocated to store a constant. */
2594 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
2595 {
2596     int i, n;
2597 
2598     for (i = 0, n = s->nb_globals; i < n; i++) {
2599         temp_save(s, &s->temps[i], allocated_regs);
2600     }
2601 }
2602 
2603 /* sync globals to their canonical location and assume they can be
2604    read by the following code. 'allocated_regs' is used in case a
2605    temporary registers needs to be allocated to store a constant. */
2606 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
2607 {
2608     int i, n;
2609 
2610     for (i = 0, n = s->nb_globals; i < n; i++) {
2611         TCGTemp *ts = &s->temps[i];
2612         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
2613                          || ts->fixed_reg
2614                          || ts->mem_coherent);
2615     }
2616 }
2617 
2618 /* at the end of a basic block, we assume all temporaries are dead and
2619    all globals are stored at their canonical location. */
2620 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
2621 {
2622     int i;
2623 
2624     for (i = s->nb_globals; i < s->nb_temps; i++) {
2625         TCGTemp *ts = &s->temps[i];
2626         if (ts->temp_local) {
2627             temp_save(s, ts, allocated_regs);
2628         } else {
2629             /* The liveness analysis already ensures that temps are dead.
2630                Keep an tcg_debug_assert for safety. */
2631             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
2632         }
2633     }
2634 
2635     save_globals(s, allocated_regs);
2636 }
2637 
2638 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
2639                                   tcg_target_ulong val, TCGLifeData arg_life)
2640 {
2641     if (ots->fixed_reg) {
2642         /* For fixed registers, we do not do any constant propagation.  */
2643         tcg_out_movi(s, ots->type, ots->reg, val);
2644         return;
2645     }
2646 
2647     /* The movi is not explicitly generated here.  */
2648     if (ots->val_type == TEMP_VAL_REG) {
2649         s->reg_to_temp[ots->reg] = NULL;
2650     }
2651     ots->val_type = TEMP_VAL_CONST;
2652     ots->val = val;
2653     ots->mem_coherent = 0;
2654     if (NEED_SYNC_ARG(0)) {
2655         temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0));
2656     } else if (IS_DEAD_ARG(0)) {
2657         temp_dead(s, ots);
2658     }
2659 }
2660 
2661 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
2662 {
2663     TCGTemp *ots = arg_temp(op->args[0]);
2664     tcg_target_ulong val = op->args[1];
2665 
2666     tcg_reg_alloc_do_movi(s, ots, val, op->life);
2667 }
2668 
2669 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
2670 {
2671     const TCGLifeData arg_life = op->life;
2672     TCGRegSet allocated_regs;
2673     TCGTemp *ts, *ots;
2674     TCGType otype, itype;
2675 
2676     allocated_regs = s->reserved_regs;
2677     ots = arg_temp(op->args[0]);
2678     ts = arg_temp(op->args[1]);
2679 
2680     /* Note that otype != itype for no-op truncation.  */
2681     otype = ots->type;
2682     itype = ts->type;
2683 
2684     if (ts->val_type == TEMP_VAL_CONST) {
2685         /* propagate constant or generate sti */
2686         tcg_target_ulong val = ts->val;
2687         if (IS_DEAD_ARG(1)) {
2688             temp_dead(s, ts);
2689         }
2690         tcg_reg_alloc_do_movi(s, ots, val, arg_life);
2691         return;
2692     }
2693 
2694     /* If the source value is in memory we're going to be forced
2695        to have it in a register in order to perform the copy.  Copy
2696        the SOURCE value into its own register first, that way we
2697        don't have to reload SOURCE the next time it is used. */
2698     if (ts->val_type == TEMP_VAL_MEM) {
2699         temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs);
2700     }
2701 
2702     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
2703     if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
2704         /* mov to a non-saved dead register makes no sense (even with
2705            liveness analysis disabled). */
2706         tcg_debug_assert(NEED_SYNC_ARG(0));
2707         if (!ots->mem_allocated) {
2708             temp_allocate_frame(s, ots);
2709         }
2710         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
2711         if (IS_DEAD_ARG(1)) {
2712             temp_dead(s, ts);
2713         }
2714         temp_dead(s, ots);
2715     } else {
2716         if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
2717             /* the mov can be suppressed */
2718             if (ots->val_type == TEMP_VAL_REG) {
2719                 s->reg_to_temp[ots->reg] = NULL;
2720             }
2721             ots->reg = ts->reg;
2722             temp_dead(s, ts);
2723         } else {
2724             if (ots->val_type != TEMP_VAL_REG) {
2725                 /* When allocating a new register, make sure to not spill the
2726                    input one. */
2727                 tcg_regset_set_reg(allocated_regs, ts->reg);
2728                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
2729                                          allocated_regs, ots->indirect_base);
2730             }
2731             tcg_out_mov(s, otype, ots->reg, ts->reg);
2732         }
2733         ots->val_type = TEMP_VAL_REG;
2734         ots->mem_coherent = 0;
2735         s->reg_to_temp[ots->reg] = ots;
2736         if (NEED_SYNC_ARG(0)) {
2737             temp_sync(s, ots, allocated_regs, 0);
2738         }
2739     }
2740 }
2741 
2742 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
2743 {
2744     const TCGLifeData arg_life = op->life;
2745     const TCGOpDef * const def = &tcg_op_defs[op->opc];
2746     TCGRegSet i_allocated_regs;
2747     TCGRegSet o_allocated_regs;
2748     int i, k, nb_iargs, nb_oargs;
2749     TCGReg reg;
2750     TCGArg arg;
2751     const TCGArgConstraint *arg_ct;
2752     TCGTemp *ts;
2753     TCGArg new_args[TCG_MAX_OP_ARGS];
2754     int const_args[TCG_MAX_OP_ARGS];
2755 
2756     nb_oargs = def->nb_oargs;
2757     nb_iargs = def->nb_iargs;
2758 
2759     /* copy constants */
2760     memcpy(new_args + nb_oargs + nb_iargs,
2761            op->args + nb_oargs + nb_iargs,
2762            sizeof(TCGArg) * def->nb_cargs);
2763 
2764     i_allocated_regs = s->reserved_regs;
2765     o_allocated_regs = s->reserved_regs;
2766 
2767     /* satisfy input constraints */
2768     for (k = 0; k < nb_iargs; k++) {
2769         i = def->sorted_args[nb_oargs + k];
2770         arg = op->args[i];
2771         arg_ct = &def->args_ct[i];
2772         ts = arg_temp(arg);
2773 
2774         if (ts->val_type == TEMP_VAL_CONST
2775             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
2776             /* constant is OK for instruction */
2777             const_args[i] = 1;
2778             new_args[i] = ts->val;
2779             goto iarg_end;
2780         }
2781 
2782         temp_load(s, ts, arg_ct->u.regs, i_allocated_regs);
2783 
2784         if (arg_ct->ct & TCG_CT_IALIAS) {
2785             if (ts->fixed_reg) {
2786                 /* if fixed register, we must allocate a new register
2787                    if the alias is not the same register */
2788                 if (arg != op->args[arg_ct->alias_index])
2789                     goto allocate_in_reg;
2790             } else {
2791                 /* if the input is aliased to an output and if it is
2792                    not dead after the instruction, we must allocate
2793                    a new register and move it */
2794                 if (!IS_DEAD_ARG(i)) {
2795                     goto allocate_in_reg;
2796                 }
2797                 /* check if the current register has already been allocated
2798                    for another input aliased to an output */
2799                 int k2, i2;
2800                 for (k2 = 0 ; k2 < k ; k2++) {
2801                     i2 = def->sorted_args[nb_oargs + k2];
2802                     if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
2803                         (new_args[i2] == ts->reg)) {
2804                         goto allocate_in_reg;
2805                     }
2806                 }
2807             }
2808         }
2809         reg = ts->reg;
2810         if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2811             /* nothing to do : the constraint is satisfied */
2812         } else {
2813         allocate_in_reg:
2814             /* allocate a new register matching the constraint
2815                and move the temporary register into it */
2816             reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
2817                                 ts->indirect_base);
2818             tcg_out_mov(s, ts->type, reg, ts->reg);
2819         }
2820         new_args[i] = reg;
2821         const_args[i] = 0;
2822         tcg_regset_set_reg(i_allocated_regs, reg);
2823     iarg_end: ;
2824     }
2825 
2826     /* mark dead temporaries and free the associated registers */
2827     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2828         if (IS_DEAD_ARG(i)) {
2829             temp_dead(s, arg_temp(op->args[i]));
2830         }
2831     }
2832 
2833     if (def->flags & TCG_OPF_BB_END) {
2834         tcg_reg_alloc_bb_end(s, i_allocated_regs);
2835     } else {
2836         if (def->flags & TCG_OPF_CALL_CLOBBER) {
2837             /* XXX: permit generic clobber register list ? */
2838             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
2839                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
2840                     tcg_reg_free(s, i, i_allocated_regs);
2841                 }
2842             }
2843         }
2844         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2845             /* sync globals if the op has side effects and might trigger
2846                an exception. */
2847             sync_globals(s, i_allocated_regs);
2848         }
2849 
2850         /* satisfy the output constraints */
2851         for(k = 0; k < nb_oargs; k++) {
2852             i = def->sorted_args[k];
2853             arg = op->args[i];
2854             arg_ct = &def->args_ct[i];
2855             ts = arg_temp(arg);
2856             if ((arg_ct->ct & TCG_CT_ALIAS)
2857                 && !const_args[arg_ct->alias_index]) {
2858                 reg = new_args[arg_ct->alias_index];
2859             } else if (arg_ct->ct & TCG_CT_NEWREG) {
2860                 reg = tcg_reg_alloc(s, arg_ct->u.regs,
2861                                     i_allocated_regs | o_allocated_regs,
2862                                     ts->indirect_base);
2863             } else {
2864                 /* if fixed register, we try to use it */
2865                 reg = ts->reg;
2866                 if (ts->fixed_reg &&
2867                     tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2868                     goto oarg_end;
2869                 }
2870                 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
2871                                     ts->indirect_base);
2872             }
2873             tcg_regset_set_reg(o_allocated_regs, reg);
2874             /* if a fixed register is used, then a move will be done afterwards */
2875             if (!ts->fixed_reg) {
2876                 if (ts->val_type == TEMP_VAL_REG) {
2877                     s->reg_to_temp[ts->reg] = NULL;
2878                 }
2879                 ts->val_type = TEMP_VAL_REG;
2880                 ts->reg = reg;
2881                 /* temp value is modified, so the value kept in memory is
2882                    potentially not the same */
2883                 ts->mem_coherent = 0;
2884                 s->reg_to_temp[reg] = ts;
2885             }
2886         oarg_end:
2887             new_args[i] = reg;
2888         }
2889     }
2890 
2891     /* emit instruction */
2892     tcg_out_op(s, op->opc, new_args, const_args);
2893 
2894     /* move the outputs in the correct register if needed */
2895     for(i = 0; i < nb_oargs; i++) {
2896         ts = arg_temp(op->args[i]);
2897         reg = new_args[i];
2898         if (ts->fixed_reg && ts->reg != reg) {
2899             tcg_out_mov(s, ts->type, ts->reg, reg);
2900         }
2901         if (NEED_SYNC_ARG(i)) {
2902             temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i));
2903         } else if (IS_DEAD_ARG(i)) {
2904             temp_dead(s, ts);
2905         }
2906     }
2907 }
2908 
2909 #ifdef TCG_TARGET_STACK_GROWSUP
2910 #define STACK_DIR(x) (-(x))
2911 #else
2912 #define STACK_DIR(x) (x)
2913 #endif
2914 
2915 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
2916 {
2917     const int nb_oargs = op->callo;
2918     const int nb_iargs = op->calli;
2919     const TCGLifeData arg_life = op->life;
2920     int flags, nb_regs, i;
2921     TCGReg reg;
2922     TCGArg arg;
2923     TCGTemp *ts;
2924     intptr_t stack_offset;
2925     size_t call_stack_size;
2926     tcg_insn_unit *func_addr;
2927     int allocate_args;
2928     TCGRegSet allocated_regs;
2929 
2930     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
2931     flags = op->args[nb_oargs + nb_iargs + 1];
2932 
2933     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2934     if (nb_regs > nb_iargs) {
2935         nb_regs = nb_iargs;
2936     }
2937 
2938     /* assign stack slots first */
2939     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
2940     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
2941         ~(TCG_TARGET_STACK_ALIGN - 1);
2942     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
2943     if (allocate_args) {
2944         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
2945            preallocate call stack */
2946         tcg_abort();
2947     }
2948 
2949     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
2950     for (i = nb_regs; i < nb_iargs; i++) {
2951         arg = op->args[nb_oargs + i];
2952 #ifdef TCG_TARGET_STACK_GROWSUP
2953         stack_offset -= sizeof(tcg_target_long);
2954 #endif
2955         if (arg != TCG_CALL_DUMMY_ARG) {
2956             ts = arg_temp(arg);
2957             temp_load(s, ts, tcg_target_available_regs[ts->type],
2958                       s->reserved_regs);
2959             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
2960         }
2961 #ifndef TCG_TARGET_STACK_GROWSUP
2962         stack_offset += sizeof(tcg_target_long);
2963 #endif
2964     }
2965 
2966     /* assign input registers */
2967     allocated_regs = s->reserved_regs;
2968     for (i = 0; i < nb_regs; i++) {
2969         arg = op->args[nb_oargs + i];
2970         if (arg != TCG_CALL_DUMMY_ARG) {
2971             ts = arg_temp(arg);
2972             reg = tcg_target_call_iarg_regs[i];
2973             tcg_reg_free(s, reg, allocated_regs);
2974 
2975             if (ts->val_type == TEMP_VAL_REG) {
2976                 if (ts->reg != reg) {
2977                     tcg_out_mov(s, ts->type, reg, ts->reg);
2978                 }
2979             } else {
2980                 TCGRegSet arg_set = 0;
2981 
2982                 tcg_regset_set_reg(arg_set, reg);
2983                 temp_load(s, ts, arg_set, allocated_regs);
2984             }
2985 
2986             tcg_regset_set_reg(allocated_regs, reg);
2987         }
2988     }
2989 
2990     /* mark dead temporaries and free the associated registers */
2991     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2992         if (IS_DEAD_ARG(i)) {
2993             temp_dead(s, arg_temp(op->args[i]));
2994         }
2995     }
2996 
2997     /* clobber call registers */
2998     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
2999         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3000             tcg_reg_free(s, i, allocated_regs);
3001         }
3002     }
3003 
3004     /* Save globals if they might be written by the helper, sync them if
3005        they might be read. */
3006     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3007         /* Nothing to do */
3008     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3009         sync_globals(s, allocated_regs);
3010     } else {
3011         save_globals(s, allocated_regs);
3012     }
3013 
3014     tcg_out_call(s, func_addr);
3015 
3016     /* assign output registers and emit moves if needed */
3017     for(i = 0; i < nb_oargs; i++) {
3018         arg = op->args[i];
3019         ts = arg_temp(arg);
3020         reg = tcg_target_call_oarg_regs[i];
3021         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3022 
3023         if (ts->fixed_reg) {
3024             if (ts->reg != reg) {
3025                 tcg_out_mov(s, ts->type, ts->reg, reg);
3026             }
3027         } else {
3028             if (ts->val_type == TEMP_VAL_REG) {
3029                 s->reg_to_temp[ts->reg] = NULL;
3030             }
3031             ts->val_type = TEMP_VAL_REG;
3032             ts->reg = reg;
3033             ts->mem_coherent = 0;
3034             s->reg_to_temp[reg] = ts;
3035             if (NEED_SYNC_ARG(i)) {
3036                 temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i));
3037             } else if (IS_DEAD_ARG(i)) {
3038                 temp_dead(s, ts);
3039             }
3040         }
3041     }
3042 }
3043 
3044 #ifdef CONFIG_PROFILER
3045 
3046 /* avoid copy/paste errors */
3047 #define PROF_ADD(to, from, field)                       \
3048     do {                                                \
3049         (to)->field += atomic_read(&((from)->field));   \
3050     } while (0)
3051 
3052 #define PROF_MAX(to, from, field)                                       \
3053     do {                                                                \
3054         typeof((from)->field) val__ = atomic_read(&((from)->field));    \
3055         if (val__ > (to)->field) {                                      \
3056             (to)->field = val__;                                        \
3057         }                                                               \
3058     } while (0)
3059 
3060 /* Pass in a zero'ed @prof */
3061 static inline
3062 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3063 {
3064     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3065     unsigned int i;
3066 
3067     for (i = 0; i < n_ctxs; i++) {
3068         TCGContext *s = atomic_read(&tcg_ctxs[i]);
3069         const TCGProfile *orig = &s->prof;
3070 
3071         if (counters) {
3072             PROF_ADD(prof, orig, tb_count1);
3073             PROF_ADD(prof, orig, tb_count);
3074             PROF_ADD(prof, orig, op_count);
3075             PROF_MAX(prof, orig, op_count_max);
3076             PROF_ADD(prof, orig, temp_count);
3077             PROF_MAX(prof, orig, temp_count_max);
3078             PROF_ADD(prof, orig, del_op_count);
3079             PROF_ADD(prof, orig, code_in_len);
3080             PROF_ADD(prof, orig, code_out_len);
3081             PROF_ADD(prof, orig, search_out_len);
3082             PROF_ADD(prof, orig, interm_time);
3083             PROF_ADD(prof, orig, code_time);
3084             PROF_ADD(prof, orig, la_time);
3085             PROF_ADD(prof, orig, opt_time);
3086             PROF_ADD(prof, orig, restore_count);
3087             PROF_ADD(prof, orig, restore_time);
3088         }
3089         if (table) {
3090             int i;
3091 
3092             for (i = 0; i < NB_OPS; i++) {
3093                 PROF_ADD(prof, orig, table_op_count[i]);
3094             }
3095         }
3096     }
3097 }
3098 
3099 #undef PROF_ADD
3100 #undef PROF_MAX
3101 
3102 static void tcg_profile_snapshot_counters(TCGProfile *prof)
3103 {
3104     tcg_profile_snapshot(prof, true, false);
3105 }
3106 
3107 static void tcg_profile_snapshot_table(TCGProfile *prof)
3108 {
3109     tcg_profile_snapshot(prof, false, true);
3110 }
3111 
3112 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3113 {
3114     TCGProfile prof = {};
3115     int i;
3116 
3117     tcg_profile_snapshot_table(&prof);
3118     for (i = 0; i < NB_OPS; i++) {
3119         cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name,
3120                     prof.table_op_count[i]);
3121     }
3122 }
3123 #else
3124 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3125 {
3126     cpu_fprintf(f, "[TCG profiler not compiled]\n");
3127 }
3128 #endif
3129 
3130 
3131 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
3132 {
3133 #ifdef CONFIG_PROFILER
3134     TCGProfile *prof = &s->prof;
3135 #endif
3136     int i, oi, oi_next, num_insns;
3137 
3138 #ifdef CONFIG_PROFILER
3139     {
3140         int n;
3141 
3142         n = s->gen_op_buf[0].prev + 1;
3143         atomic_set(&prof->op_count, prof->op_count + n);
3144         if (n > prof->op_count_max) {
3145             atomic_set(&prof->op_count_max, n);
3146         }
3147 
3148         n = s->nb_temps;
3149         atomic_set(&prof->temp_count, prof->temp_count + n);
3150         if (n > prof->temp_count_max) {
3151             atomic_set(&prof->temp_count_max, n);
3152         }
3153     }
3154 #endif
3155 
3156 #ifdef DEBUG_DISAS
3157     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
3158                  && qemu_log_in_addr_range(tb->pc))) {
3159         qemu_log_lock();
3160         qemu_log("OP:\n");
3161         tcg_dump_ops(s);
3162         qemu_log("\n");
3163         qemu_log_unlock();
3164     }
3165 #endif
3166 
3167 #ifdef CONFIG_PROFILER
3168     atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
3169 #endif
3170 
3171 #ifdef USE_TCG_OPTIMIZATIONS
3172     tcg_optimize(s);
3173 #endif
3174 
3175 #ifdef CONFIG_PROFILER
3176     atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
3177     atomic_set(&prof->la_time, prof->la_time - profile_getclock());
3178 #endif
3179 
3180     liveness_pass_1(s);
3181 
3182     if (s->nb_indirects > 0) {
3183 #ifdef DEBUG_DISAS
3184         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
3185                      && qemu_log_in_addr_range(tb->pc))) {
3186             qemu_log_lock();
3187             qemu_log("OP before indirect lowering:\n");
3188             tcg_dump_ops(s);
3189             qemu_log("\n");
3190             qemu_log_unlock();
3191         }
3192 #endif
3193         /* Replace indirect temps with direct temps.  */
3194         if (liveness_pass_2(s)) {
3195             /* If changes were made, re-run liveness.  */
3196             liveness_pass_1(s);
3197         }
3198     }
3199 
3200 #ifdef CONFIG_PROFILER
3201     atomic_set(&prof->la_time, prof->la_time + profile_getclock());
3202 #endif
3203 
3204 #ifdef DEBUG_DISAS
3205     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
3206                  && qemu_log_in_addr_range(tb->pc))) {
3207         qemu_log_lock();
3208         qemu_log("OP after optimization and liveness analysis:\n");
3209         tcg_dump_ops(s);
3210         qemu_log("\n");
3211         qemu_log_unlock();
3212     }
3213 #endif
3214 
3215     tcg_reg_alloc_start(s);
3216 
3217     s->code_buf = tb->tc.ptr;
3218     s->code_ptr = tb->tc.ptr;
3219 
3220 #ifdef TCG_TARGET_NEED_LDST_LABELS
3221     s->ldst_labels = NULL;
3222 #endif
3223 #ifdef TCG_TARGET_NEED_POOL_LABELS
3224     s->pool_labels = NULL;
3225 #endif
3226 
3227     num_insns = -1;
3228     for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
3229         TCGOp * const op = &s->gen_op_buf[oi];
3230         TCGOpcode opc = op->opc;
3231 
3232         oi_next = op->next;
3233 #ifdef CONFIG_PROFILER
3234         atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
3235 #endif
3236 
3237         switch (opc) {
3238         case INDEX_op_mov_i32:
3239         case INDEX_op_mov_i64:
3240             tcg_reg_alloc_mov(s, op);
3241             break;
3242         case INDEX_op_movi_i32:
3243         case INDEX_op_movi_i64:
3244             tcg_reg_alloc_movi(s, op);
3245             break;
3246         case INDEX_op_insn_start:
3247             if (num_insns >= 0) {
3248                 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
3249             }
3250             num_insns++;
3251             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
3252                 target_ulong a;
3253 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
3254                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
3255 #else
3256                 a = op->args[i];
3257 #endif
3258                 s->gen_insn_data[num_insns][i] = a;
3259             }
3260             break;
3261         case INDEX_op_discard:
3262             temp_dead(s, arg_temp(op->args[0]));
3263             break;
3264         case INDEX_op_set_label:
3265             tcg_reg_alloc_bb_end(s, s->reserved_regs);
3266             tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
3267             break;
3268         case INDEX_op_call:
3269             tcg_reg_alloc_call(s, op);
3270             break;
3271         default:
3272             /* Sanity check that we've not introduced any unhandled opcodes. */
3273             tcg_debug_assert(tcg_op_supported(opc));
3274             /* Note: in order to speed up the code, it would be much
3275                faster to have specialized register allocator functions for
3276                some common argument patterns */
3277             tcg_reg_alloc_op(s, op);
3278             break;
3279         }
3280 #ifdef CONFIG_DEBUG_TCG
3281         check_regs(s);
3282 #endif
3283         /* Test for (pending) buffer overflow.  The assumption is that any
3284            one operation beginning below the high water mark cannot overrun
3285            the buffer completely.  Thus we can test for overflow after
3286            generating code without having to check during generation.  */
3287         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
3288             return -1;
3289         }
3290     }
3291     tcg_debug_assert(num_insns >= 0);
3292     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
3293 
3294     /* Generate TB finalization at the end of block */
3295 #ifdef TCG_TARGET_NEED_LDST_LABELS
3296     if (!tcg_out_ldst_finalize(s)) {
3297         return -1;
3298     }
3299 #endif
3300 #ifdef TCG_TARGET_NEED_POOL_LABELS
3301     if (!tcg_out_pool_finalize(s)) {
3302         return -1;
3303     }
3304 #endif
3305 
3306     /* flush instruction cache */
3307     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
3308 
3309     return tcg_current_code_size(s);
3310 }
3311 
3312 #ifdef CONFIG_PROFILER
3313 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
3314 {
3315     TCGProfile prof = {};
3316     const TCGProfile *s;
3317     int64_t tb_count;
3318     int64_t tb_div_count;
3319     int64_t tot;
3320 
3321     tcg_profile_snapshot_counters(&prof);
3322     s = &prof;
3323     tb_count = s->tb_count;
3324     tb_div_count = tb_count ? tb_count : 1;
3325     tot = s->interm_time + s->code_time;
3326 
3327     cpu_fprintf(f, "JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
3328                 tot, tot / 2.4e9);
3329     cpu_fprintf(f, "translated TBs      %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
3330                 tb_count, s->tb_count1 - tb_count,
3331                 (double)(s->tb_count1 - s->tb_count)
3332                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
3333     cpu_fprintf(f, "avg ops/TB          %0.1f max=%d\n",
3334                 (double)s->op_count / tb_div_count, s->op_count_max);
3335     cpu_fprintf(f, "deleted ops/TB      %0.2f\n",
3336                 (double)s->del_op_count / tb_div_count);
3337     cpu_fprintf(f, "avg temps/TB        %0.2f max=%d\n",
3338                 (double)s->temp_count / tb_div_count, s->temp_count_max);
3339     cpu_fprintf(f, "avg host code/TB    %0.1f\n",
3340                 (double)s->code_out_len / tb_div_count);
3341     cpu_fprintf(f, "avg search data/TB  %0.1f\n",
3342                 (double)s->search_out_len / tb_div_count);
3343 
3344     cpu_fprintf(f, "cycles/op           %0.1f\n",
3345                 s->op_count ? (double)tot / s->op_count : 0);
3346     cpu_fprintf(f, "cycles/in byte      %0.1f\n",
3347                 s->code_in_len ? (double)tot / s->code_in_len : 0);
3348     cpu_fprintf(f, "cycles/out byte     %0.1f\n",
3349                 s->code_out_len ? (double)tot / s->code_out_len : 0);
3350     cpu_fprintf(f, "cycles/search byte     %0.1f\n",
3351                 s->search_out_len ? (double)tot / s->search_out_len : 0);
3352     if (tot == 0) {
3353         tot = 1;
3354     }
3355     cpu_fprintf(f, "  gen_interm time   %0.1f%%\n",
3356                 (double)s->interm_time / tot * 100.0);
3357     cpu_fprintf(f, "  gen_code time     %0.1f%%\n",
3358                 (double)s->code_time / tot * 100.0);
3359     cpu_fprintf(f, "optim./code time    %0.1f%%\n",
3360                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
3361                 * 100.0);
3362     cpu_fprintf(f, "liveness/code time  %0.1f%%\n",
3363                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
3364     cpu_fprintf(f, "cpu_restore count   %" PRId64 "\n",
3365                 s->restore_count);
3366     cpu_fprintf(f, "  avg cycles        %0.1f\n",
3367                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
3368 }
3369 #else
3370 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
3371 {
3372     cpu_fprintf(f, "[TCG profiler not compiled]\n");
3373 }
3374 #endif
3375 
3376 #ifdef ELF_HOST_MACHINE
3377 /* In order to use this feature, the backend needs to do three things:
3378 
3379    (1) Define ELF_HOST_MACHINE to indicate both what value to
3380        put into the ELF image and to indicate support for the feature.
3381 
3382    (2) Define tcg_register_jit.  This should create a buffer containing
3383        the contents of a .debug_frame section that describes the post-
3384        prologue unwind info for the tcg machine.
3385 
3386    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
3387 */
3388 
3389 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
3390 typedef enum {
3391     JIT_NOACTION = 0,
3392     JIT_REGISTER_FN,
3393     JIT_UNREGISTER_FN
3394 } jit_actions_t;
3395 
3396 struct jit_code_entry {
3397     struct jit_code_entry *next_entry;
3398     struct jit_code_entry *prev_entry;
3399     const void *symfile_addr;
3400     uint64_t symfile_size;
3401 };
3402 
3403 struct jit_descriptor {
3404     uint32_t version;
3405     uint32_t action_flag;
3406     struct jit_code_entry *relevant_entry;
3407     struct jit_code_entry *first_entry;
3408 };
3409 
3410 void __jit_debug_register_code(void) __attribute__((noinline));
3411 void __jit_debug_register_code(void)
3412 {
3413     asm("");
3414 }
3415 
3416 /* Must statically initialize the version, because GDB may check
3417    the version before we can set it.  */
3418 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
3419 
3420 /* End GDB interface.  */
3421 
3422 static int find_string(const char *strtab, const char *str)
3423 {
3424     const char *p = strtab + 1;
3425 
3426     while (1) {
3427         if (strcmp(p, str) == 0) {
3428             return p - strtab;
3429         }
3430         p += strlen(p) + 1;
3431     }
3432 }
3433 
3434 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
3435                                  const void *debug_frame,
3436                                  size_t debug_frame_size)
3437 {
3438     struct __attribute__((packed)) DebugInfo {
3439         uint32_t  len;
3440         uint16_t  version;
3441         uint32_t  abbrev;
3442         uint8_t   ptr_size;
3443         uint8_t   cu_die;
3444         uint16_t  cu_lang;
3445         uintptr_t cu_low_pc;
3446         uintptr_t cu_high_pc;
3447         uint8_t   fn_die;
3448         char      fn_name[16];
3449         uintptr_t fn_low_pc;
3450         uintptr_t fn_high_pc;
3451         uint8_t   cu_eoc;
3452     };
3453 
3454     struct ElfImage {
3455         ElfW(Ehdr) ehdr;
3456         ElfW(Phdr) phdr;
3457         ElfW(Shdr) shdr[7];
3458         ElfW(Sym)  sym[2];
3459         struct DebugInfo di;
3460         uint8_t    da[24];
3461         char       str[80];
3462     };
3463 
3464     struct ElfImage *img;
3465 
3466     static const struct ElfImage img_template = {
3467         .ehdr = {
3468             .e_ident[EI_MAG0] = ELFMAG0,
3469             .e_ident[EI_MAG1] = ELFMAG1,
3470             .e_ident[EI_MAG2] = ELFMAG2,
3471             .e_ident[EI_MAG3] = ELFMAG3,
3472             .e_ident[EI_CLASS] = ELF_CLASS,
3473             .e_ident[EI_DATA] = ELF_DATA,
3474             .e_ident[EI_VERSION] = EV_CURRENT,
3475             .e_type = ET_EXEC,
3476             .e_machine = ELF_HOST_MACHINE,
3477             .e_version = EV_CURRENT,
3478             .e_phoff = offsetof(struct ElfImage, phdr),
3479             .e_shoff = offsetof(struct ElfImage, shdr),
3480             .e_ehsize = sizeof(ElfW(Shdr)),
3481             .e_phentsize = sizeof(ElfW(Phdr)),
3482             .e_phnum = 1,
3483             .e_shentsize = sizeof(ElfW(Shdr)),
3484             .e_shnum = ARRAY_SIZE(img->shdr),
3485             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
3486 #ifdef ELF_HOST_FLAGS
3487             .e_flags = ELF_HOST_FLAGS,
3488 #endif
3489 #ifdef ELF_OSABI
3490             .e_ident[EI_OSABI] = ELF_OSABI,
3491 #endif
3492         },
3493         .phdr = {
3494             .p_type = PT_LOAD,
3495             .p_flags = PF_X,
3496         },
3497         .shdr = {
3498             [0] = { .sh_type = SHT_NULL },
3499             /* Trick: The contents of code_gen_buffer are not present in
3500                this fake ELF file; that got allocated elsewhere.  Therefore
3501                we mark .text as SHT_NOBITS (similar to .bss) so that readers
3502                will not look for contents.  We can record any address.  */
3503             [1] = { /* .text */
3504                 .sh_type = SHT_NOBITS,
3505                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
3506             },
3507             [2] = { /* .debug_info */
3508                 .sh_type = SHT_PROGBITS,
3509                 .sh_offset = offsetof(struct ElfImage, di),
3510                 .sh_size = sizeof(struct DebugInfo),
3511             },
3512             [3] = { /* .debug_abbrev */
3513                 .sh_type = SHT_PROGBITS,
3514                 .sh_offset = offsetof(struct ElfImage, da),
3515                 .sh_size = sizeof(img->da),
3516             },
3517             [4] = { /* .debug_frame */
3518                 .sh_type = SHT_PROGBITS,
3519                 .sh_offset = sizeof(struct ElfImage),
3520             },
3521             [5] = { /* .symtab */
3522                 .sh_type = SHT_SYMTAB,
3523                 .sh_offset = offsetof(struct ElfImage, sym),
3524                 .sh_size = sizeof(img->sym),
3525                 .sh_info = 1,
3526                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
3527                 .sh_entsize = sizeof(ElfW(Sym)),
3528             },
3529             [6] = { /* .strtab */
3530                 .sh_type = SHT_STRTAB,
3531                 .sh_offset = offsetof(struct ElfImage, str),
3532                 .sh_size = sizeof(img->str),
3533             }
3534         },
3535         .sym = {
3536             [1] = { /* code_gen_buffer */
3537                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
3538                 .st_shndx = 1,
3539             }
3540         },
3541         .di = {
3542             .len = sizeof(struct DebugInfo) - 4,
3543             .version = 2,
3544             .ptr_size = sizeof(void *),
3545             .cu_die = 1,
3546             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
3547             .fn_die = 2,
3548             .fn_name = "code_gen_buffer"
3549         },
3550         .da = {
3551             1,          /* abbrev number (the cu) */
3552             0x11, 1,    /* DW_TAG_compile_unit, has children */
3553             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
3554             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
3555             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
3556             0, 0,       /* end of abbrev */
3557             2,          /* abbrev number (the fn) */
3558             0x2e, 0,    /* DW_TAG_subprogram, no children */
3559             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
3560             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
3561             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
3562             0, 0,       /* end of abbrev */
3563             0           /* no more abbrev */
3564         },
3565         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
3566                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
3567     };
3568 
3569     /* We only need a single jit entry; statically allocate it.  */
3570     static struct jit_code_entry one_entry;
3571 
3572     uintptr_t buf = (uintptr_t)buf_ptr;
3573     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
3574     DebugFrameHeader *dfh;
3575 
3576     img = g_malloc(img_size);
3577     *img = img_template;
3578 
3579     img->phdr.p_vaddr = buf;
3580     img->phdr.p_paddr = buf;
3581     img->phdr.p_memsz = buf_size;
3582 
3583     img->shdr[1].sh_name = find_string(img->str, ".text");
3584     img->shdr[1].sh_addr = buf;
3585     img->shdr[1].sh_size = buf_size;
3586 
3587     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
3588     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
3589 
3590     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
3591     img->shdr[4].sh_size = debug_frame_size;
3592 
3593     img->shdr[5].sh_name = find_string(img->str, ".symtab");
3594     img->shdr[6].sh_name = find_string(img->str, ".strtab");
3595 
3596     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
3597     img->sym[1].st_value = buf;
3598     img->sym[1].st_size = buf_size;
3599 
3600     img->di.cu_low_pc = buf;
3601     img->di.cu_high_pc = buf + buf_size;
3602     img->di.fn_low_pc = buf;
3603     img->di.fn_high_pc = buf + buf_size;
3604 
3605     dfh = (DebugFrameHeader *)(img + 1);
3606     memcpy(dfh, debug_frame, debug_frame_size);
3607     dfh->fde.func_start = buf;
3608     dfh->fde.func_len = buf_size;
3609 
3610 #ifdef DEBUG_JIT
3611     /* Enable this block to be able to debug the ELF image file creation.
3612        One can use readelf, objdump, or other inspection utilities.  */
3613     {
3614         FILE *f = fopen("/tmp/qemu.jit", "w+b");
3615         if (f) {
3616             if (fwrite(img, img_size, 1, f) != img_size) {
3617                 /* Avoid stupid unused return value warning for fwrite.  */
3618             }
3619             fclose(f);
3620         }
3621     }
3622 #endif
3623 
3624     one_entry.symfile_addr = img;
3625     one_entry.symfile_size = img_size;
3626 
3627     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
3628     __jit_debug_descriptor.relevant_entry = &one_entry;
3629     __jit_debug_descriptor.first_entry = &one_entry;
3630     __jit_debug_register_code();
3631 }
3632 #else
3633 /* No support for the feature.  Provide the entry point expected by exec.c,
3634    and implement the internal function we declared earlier.  */
3635 
3636 static void tcg_register_jit_int(void *buf, size_t size,
3637                                  const void *debug_frame,
3638                                  size_t debug_frame_size)
3639 {
3640 }
3641 
3642 void tcg_register_jit(void *buf, size_t buf_size)
3643 {
3644 }
3645 #endif /* ELF_HOST_MACHINE */
3646