xref: /openbmc/qemu/tcg/tcg.c (revision 52f91c37)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_LIVENESS_ANALYSIS
27 #define USE_TCG_OPTIMIZATIONS
28 
29 #include "config.h"
30 
31 /* Define to jump the ELF file used to communicate with GDB.  */
32 #undef DEBUG_JIT
33 
34 #if !defined(CONFIG_DEBUG_TCG) && !defined(NDEBUG)
35 /* define it to suppress various consistency checks (faster) */
36 #define NDEBUG
37 #endif
38 
39 #include "qemu-common.h"
40 #include "qemu/cache-utils.h"
41 #include "qemu/host-utils.h"
42 #include "qemu/timer.h"
43 
44 /* Note: the long term plan is to reduce the dependencies on the QEMU
45    CPU definitions. Currently they are used for qemu_ld/st
46    instructions */
47 #define NO_CPU_IO_DEFS
48 #include "cpu.h"
49 
50 #include "tcg-op.h"
51 
52 #if UINTPTR_MAX == UINT32_MAX
53 # define ELF_CLASS  ELFCLASS32
54 #else
55 # define ELF_CLASS  ELFCLASS64
56 #endif
57 #ifdef HOST_WORDS_BIGENDIAN
58 # define ELF_DATA   ELFDATA2MSB
59 #else
60 # define ELF_DATA   ELFDATA2LSB
61 #endif
62 
63 #include "elf.h"
64 
65 /* Forward declarations for functions declared in tcg-target.c and used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static void patch_reloc(uint8_t *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 
71 /* The CIE and FDE header definitions will be common to all hosts.  */
72 typedef struct {
73     uint32_t len __attribute__((aligned((sizeof(void *)))));
74     uint32_t id;
75     uint8_t version;
76     char augmentation[1];
77     uint8_t code_align;
78     uint8_t data_align;
79     uint8_t return_column;
80 } DebugFrameCIE;
81 
82 typedef struct QEMU_PACKED {
83     uint32_t len __attribute__((aligned((sizeof(void *)))));
84     uint32_t cie_offset;
85     uintptr_t func_start;
86     uintptr_t func_len;
87 } DebugFrameFDEHeader;
88 
89 static void tcg_register_jit_int(void *buf, size_t size,
90                                  void *debug_frame, size_t debug_frame_size)
91     __attribute__((unused));
92 
93 /* Forward declarations for functions declared and used in tcg-target.c. */
94 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str);
95 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
96                        intptr_t arg2);
97 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
98 static void tcg_out_movi(TCGContext *s, TCGType type,
99                          TCGReg ret, tcg_target_long arg);
100 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
101                        const int *const_args);
102 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
103                        intptr_t arg2);
104 static int tcg_target_const_match(tcg_target_long val, TCGType type,
105                                   const TCGArgConstraint *arg_ct);
106 static void tcg_out_tb_init(TCGContext *s);
107 static void tcg_out_tb_finalize(TCGContext *s);
108 
109 
110 TCGOpDef tcg_op_defs[] = {
111 #define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags },
112 #include "tcg-opc.h"
113 #undef DEF
114 };
115 const size_t tcg_op_defs_max = ARRAY_SIZE(tcg_op_defs);
116 
117 static TCGRegSet tcg_target_available_regs[2];
118 static TCGRegSet tcg_target_call_clobber_regs;
119 
120 static inline void tcg_out8(TCGContext *s, uint8_t v)
121 {
122     *s->code_ptr++ = v;
123 }
124 
125 static inline void tcg_out16(TCGContext *s, uint16_t v)
126 {
127     uint8_t *p = s->code_ptr;
128     *(uint16_t *)p = v;
129     s->code_ptr = p + 2;
130 }
131 
132 static inline void tcg_out32(TCGContext *s, uint32_t v)
133 {
134     uint8_t *p = s->code_ptr;
135     *(uint32_t *)p = v;
136     s->code_ptr = p + 4;
137 }
138 
139 static inline void tcg_out64(TCGContext *s, uint64_t v)
140 {
141     uint8_t *p = s->code_ptr;
142     *(uint64_t *)p = v;
143     s->code_ptr = p + 8;
144 }
145 
146 /* label relocation processing */
147 
148 static void tcg_out_reloc(TCGContext *s, uint8_t *code_ptr, int type,
149                           int label_index, intptr_t addend)
150 {
151     TCGLabel *l;
152     TCGRelocation *r;
153 
154     l = &s->labels[label_index];
155     if (l->has_value) {
156         /* FIXME: This may break relocations on RISC targets that
157            modify instruction fields in place.  The caller may not have
158            written the initial value.  */
159         patch_reloc(code_ptr, type, l->u.value, addend);
160     } else {
161         /* add a new relocation entry */
162         r = tcg_malloc(sizeof(TCGRelocation));
163         r->type = type;
164         r->ptr = code_ptr;
165         r->addend = addend;
166         r->next = l->u.first_reloc;
167         l->u.first_reloc = r;
168     }
169 }
170 
171 static void tcg_out_label(TCGContext *s, int label_index, void *ptr)
172 {
173     TCGLabel *l;
174     TCGRelocation *r;
175     intptr_t value = (intptr_t)ptr;
176 
177     l = &s->labels[label_index];
178     if (l->has_value) {
179         tcg_abort();
180     }
181     r = l->u.first_reloc;
182     while (r != NULL) {
183         patch_reloc(r->ptr, r->type, value, r->addend);
184         r = r->next;
185     }
186     l->has_value = 1;
187     l->u.value = value;
188 }
189 
190 int gen_new_label(void)
191 {
192     TCGContext *s = &tcg_ctx;
193     int idx;
194     TCGLabel *l;
195 
196     if (s->nb_labels >= TCG_MAX_LABELS)
197         tcg_abort();
198     idx = s->nb_labels++;
199     l = &s->labels[idx];
200     l->has_value = 0;
201     l->u.first_reloc = NULL;
202     return idx;
203 }
204 
205 #include "tcg-target.c"
206 
207 /* pool based memory allocation */
208 void *tcg_malloc_internal(TCGContext *s, int size)
209 {
210     TCGPool *p;
211     int pool_size;
212 
213     if (size > TCG_POOL_CHUNK_SIZE) {
214         /* big malloc: insert a new pool (XXX: could optimize) */
215         p = g_malloc(sizeof(TCGPool) + size);
216         p->size = size;
217         p->next = s->pool_first_large;
218         s->pool_first_large = p;
219         return p->data;
220     } else {
221         p = s->pool_current;
222         if (!p) {
223             p = s->pool_first;
224             if (!p)
225                 goto new_pool;
226         } else {
227             if (!p->next) {
228             new_pool:
229                 pool_size = TCG_POOL_CHUNK_SIZE;
230                 p = g_malloc(sizeof(TCGPool) + pool_size);
231                 p->size = pool_size;
232                 p->next = NULL;
233                 if (s->pool_current)
234                     s->pool_current->next = p;
235                 else
236                     s->pool_first = p;
237             } else {
238                 p = p->next;
239             }
240         }
241     }
242     s->pool_current = p;
243     s->pool_cur = p->data + size;
244     s->pool_end = p->data + p->size;
245     return p->data;
246 }
247 
248 void tcg_pool_reset(TCGContext *s)
249 {
250     TCGPool *p, *t;
251     for (p = s->pool_first_large; p; p = t) {
252         t = p->next;
253         g_free(p);
254     }
255     s->pool_first_large = NULL;
256     s->pool_cur = s->pool_end = NULL;
257     s->pool_current = NULL;
258 }
259 
260 #include "helper.h"
261 
262 typedef struct TCGHelperInfo {
263     void *func;
264     const char *name;
265 } TCGHelperInfo;
266 
267 static const TCGHelperInfo all_helpers[] = {
268 #define GEN_HELPER 2
269 #include "helper.h"
270 
271     /* Include tcg-runtime.c functions.  */
272     { tcg_helper_div_i32, "div_i32" },
273     { tcg_helper_rem_i32, "rem_i32" },
274     { tcg_helper_divu_i32, "divu_i32" },
275     { tcg_helper_remu_i32, "remu_i32" },
276 
277     { tcg_helper_shl_i64, "shl_i64" },
278     { tcg_helper_shr_i64, "shr_i64" },
279     { tcg_helper_sar_i64, "sar_i64" },
280     { tcg_helper_div_i64, "div_i64" },
281     { tcg_helper_rem_i64, "rem_i64" },
282     { tcg_helper_divu_i64, "divu_i64" },
283     { tcg_helper_remu_i64, "remu_i64" },
284     { tcg_helper_mulsh_i64, "mulsh_i64" },
285     { tcg_helper_muluh_i64, "muluh_i64" },
286 };
287 
288 void tcg_context_init(TCGContext *s)
289 {
290     int op, total_args, n, i;
291     TCGOpDef *def;
292     TCGArgConstraint *args_ct;
293     int *sorted_args;
294     GHashTable *helper_table;
295 
296     memset(s, 0, sizeof(*s));
297     s->nb_globals = 0;
298 
299     /* Count total number of arguments and allocate the corresponding
300        space */
301     total_args = 0;
302     for(op = 0; op < NB_OPS; op++) {
303         def = &tcg_op_defs[op];
304         n = def->nb_iargs + def->nb_oargs;
305         total_args += n;
306     }
307 
308     args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
309     sorted_args = g_malloc(sizeof(int) * total_args);
310 
311     for(op = 0; op < NB_OPS; op++) {
312         def = &tcg_op_defs[op];
313         def->args_ct = args_ct;
314         def->sorted_args = sorted_args;
315         n = def->nb_iargs + def->nb_oargs;
316         sorted_args += n;
317         args_ct += n;
318     }
319 
320     /* Register helpers.  */
321     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
322     s->helpers = helper_table = g_hash_table_new(NULL, NULL);
323 
324     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
325         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
326                             (gpointer)all_helpers[i].name);
327     }
328 
329     tcg_target_init(s);
330 }
331 
332 void tcg_prologue_init(TCGContext *s)
333 {
334     /* init global prologue and epilogue */
335     s->code_buf = s->code_gen_prologue;
336     s->code_ptr = s->code_buf;
337     tcg_target_qemu_prologue(s);
338     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
339 
340 #ifdef DEBUG_DISAS
341     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
342         size_t size = s->code_ptr - s->code_buf;
343         qemu_log("PROLOGUE: [size=%zu]\n", size);
344         log_disas(s->code_buf, size);
345         qemu_log("\n");
346         qemu_log_flush();
347     }
348 #endif
349 }
350 
351 void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size)
352 {
353     s->frame_start = start;
354     s->frame_end = start + size;
355     s->frame_reg = reg;
356 }
357 
358 void tcg_func_start(TCGContext *s)
359 {
360     tcg_pool_reset(s);
361     s->nb_temps = s->nb_globals;
362 
363     /* No temps have been previously allocated for size or locality.  */
364     memset(s->free_temps, 0, sizeof(s->free_temps));
365 
366     s->labels = tcg_malloc(sizeof(TCGLabel) * TCG_MAX_LABELS);
367     s->nb_labels = 0;
368     s->current_frame_offset = s->frame_start;
369 
370 #ifdef CONFIG_DEBUG_TCG
371     s->goto_tb_issue_mask = 0;
372 #endif
373 
374     s->gen_opc_ptr = s->gen_opc_buf;
375     s->gen_opparam_ptr = s->gen_opparam_buf;
376 
377     s->be = tcg_malloc(sizeof(TCGBackendData));
378 }
379 
380 static inline void tcg_temp_alloc(TCGContext *s, int n)
381 {
382     if (n > TCG_MAX_TEMPS)
383         tcg_abort();
384 }
385 
386 static inline int tcg_global_reg_new_internal(TCGType type, int reg,
387                                               const char *name)
388 {
389     TCGContext *s = &tcg_ctx;
390     TCGTemp *ts;
391     int idx;
392 
393 #if TCG_TARGET_REG_BITS == 32
394     if (type != TCG_TYPE_I32)
395         tcg_abort();
396 #endif
397     if (tcg_regset_test_reg(s->reserved_regs, reg))
398         tcg_abort();
399     idx = s->nb_globals;
400     tcg_temp_alloc(s, s->nb_globals + 1);
401     ts = &s->temps[s->nb_globals];
402     ts->base_type = type;
403     ts->type = type;
404     ts->fixed_reg = 1;
405     ts->reg = reg;
406     ts->name = name;
407     s->nb_globals++;
408     tcg_regset_set_reg(s->reserved_regs, reg);
409     return idx;
410 }
411 
412 TCGv_i32 tcg_global_reg_new_i32(int reg, const char *name)
413 {
414     int idx;
415 
416     idx = tcg_global_reg_new_internal(TCG_TYPE_I32, reg, name);
417     return MAKE_TCGV_I32(idx);
418 }
419 
420 TCGv_i64 tcg_global_reg_new_i64(int reg, const char *name)
421 {
422     int idx;
423 
424     idx = tcg_global_reg_new_internal(TCG_TYPE_I64, reg, name);
425     return MAKE_TCGV_I64(idx);
426 }
427 
428 static inline int tcg_global_mem_new_internal(TCGType type, int reg,
429                                               intptr_t offset,
430                                               const char *name)
431 {
432     TCGContext *s = &tcg_ctx;
433     TCGTemp *ts;
434     int idx;
435 
436     idx = s->nb_globals;
437 #if TCG_TARGET_REG_BITS == 32
438     if (type == TCG_TYPE_I64) {
439         char buf[64];
440         tcg_temp_alloc(s, s->nb_globals + 2);
441         ts = &s->temps[s->nb_globals];
442         ts->base_type = type;
443         ts->type = TCG_TYPE_I32;
444         ts->fixed_reg = 0;
445         ts->mem_allocated = 1;
446         ts->mem_reg = reg;
447 #ifdef HOST_WORDS_BIGENDIAN
448         ts->mem_offset = offset + 4;
449 #else
450         ts->mem_offset = offset;
451 #endif
452         pstrcpy(buf, sizeof(buf), name);
453         pstrcat(buf, sizeof(buf), "_0");
454         ts->name = strdup(buf);
455         ts++;
456 
457         ts->base_type = type;
458         ts->type = TCG_TYPE_I32;
459         ts->fixed_reg = 0;
460         ts->mem_allocated = 1;
461         ts->mem_reg = reg;
462 #ifdef HOST_WORDS_BIGENDIAN
463         ts->mem_offset = offset;
464 #else
465         ts->mem_offset = offset + 4;
466 #endif
467         pstrcpy(buf, sizeof(buf), name);
468         pstrcat(buf, sizeof(buf), "_1");
469         ts->name = strdup(buf);
470 
471         s->nb_globals += 2;
472     } else
473 #endif
474     {
475         tcg_temp_alloc(s, s->nb_globals + 1);
476         ts = &s->temps[s->nb_globals];
477         ts->base_type = type;
478         ts->type = type;
479         ts->fixed_reg = 0;
480         ts->mem_allocated = 1;
481         ts->mem_reg = reg;
482         ts->mem_offset = offset;
483         ts->name = name;
484         s->nb_globals++;
485     }
486     return idx;
487 }
488 
489 TCGv_i32 tcg_global_mem_new_i32(int reg, intptr_t offset, const char *name)
490 {
491     int idx = tcg_global_mem_new_internal(TCG_TYPE_I32, reg, offset, name);
492     return MAKE_TCGV_I32(idx);
493 }
494 
495 TCGv_i64 tcg_global_mem_new_i64(int reg, intptr_t offset, const char *name)
496 {
497     int idx = tcg_global_mem_new_internal(TCG_TYPE_I64, reg, offset, name);
498     return MAKE_TCGV_I64(idx);
499 }
500 
501 static inline int tcg_temp_new_internal(TCGType type, int temp_local)
502 {
503     TCGContext *s = &tcg_ctx;
504     TCGTemp *ts;
505     int idx, k;
506 
507     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
508     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
509     if (idx < TCG_MAX_TEMPS) {
510         /* There is already an available temp with the right type.  */
511         clear_bit(idx, s->free_temps[k].l);
512 
513         ts = &s->temps[idx];
514         ts->temp_allocated = 1;
515         assert(ts->base_type == type);
516         assert(ts->temp_local == temp_local);
517     } else {
518         idx = s->nb_temps;
519 #if TCG_TARGET_REG_BITS == 32
520         if (type == TCG_TYPE_I64) {
521             tcg_temp_alloc(s, s->nb_temps + 2);
522             ts = &s->temps[s->nb_temps];
523             ts->base_type = type;
524             ts->type = TCG_TYPE_I32;
525             ts->temp_allocated = 1;
526             ts->temp_local = temp_local;
527             ts->name = NULL;
528             ts++;
529             ts->base_type = type;
530             ts->type = TCG_TYPE_I32;
531             ts->temp_allocated = 1;
532             ts->temp_local = temp_local;
533             ts->name = NULL;
534             s->nb_temps += 2;
535         } else
536 #endif
537         {
538             tcg_temp_alloc(s, s->nb_temps + 1);
539             ts = &s->temps[s->nb_temps];
540             ts->base_type = type;
541             ts->type = type;
542             ts->temp_allocated = 1;
543             ts->temp_local = temp_local;
544             ts->name = NULL;
545             s->nb_temps++;
546         }
547     }
548 
549 #if defined(CONFIG_DEBUG_TCG)
550     s->temps_in_use++;
551 #endif
552     return idx;
553 }
554 
555 TCGv_i32 tcg_temp_new_internal_i32(int temp_local)
556 {
557     int idx;
558 
559     idx = tcg_temp_new_internal(TCG_TYPE_I32, temp_local);
560     return MAKE_TCGV_I32(idx);
561 }
562 
563 TCGv_i64 tcg_temp_new_internal_i64(int temp_local)
564 {
565     int idx;
566 
567     idx = tcg_temp_new_internal(TCG_TYPE_I64, temp_local);
568     return MAKE_TCGV_I64(idx);
569 }
570 
571 static void tcg_temp_free_internal(int idx)
572 {
573     TCGContext *s = &tcg_ctx;
574     TCGTemp *ts;
575     int k;
576 
577 #if defined(CONFIG_DEBUG_TCG)
578     s->temps_in_use--;
579     if (s->temps_in_use < 0) {
580         fprintf(stderr, "More temporaries freed than allocated!\n");
581     }
582 #endif
583 
584     assert(idx >= s->nb_globals && idx < s->nb_temps);
585     ts = &s->temps[idx];
586     assert(ts->temp_allocated != 0);
587     ts->temp_allocated = 0;
588 
589     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
590     set_bit(idx, s->free_temps[k].l);
591 }
592 
593 void tcg_temp_free_i32(TCGv_i32 arg)
594 {
595     tcg_temp_free_internal(GET_TCGV_I32(arg));
596 }
597 
598 void tcg_temp_free_i64(TCGv_i64 arg)
599 {
600     tcg_temp_free_internal(GET_TCGV_I64(arg));
601 }
602 
603 TCGv_i32 tcg_const_i32(int32_t val)
604 {
605     TCGv_i32 t0;
606     t0 = tcg_temp_new_i32();
607     tcg_gen_movi_i32(t0, val);
608     return t0;
609 }
610 
611 TCGv_i64 tcg_const_i64(int64_t val)
612 {
613     TCGv_i64 t0;
614     t0 = tcg_temp_new_i64();
615     tcg_gen_movi_i64(t0, val);
616     return t0;
617 }
618 
619 TCGv_i32 tcg_const_local_i32(int32_t val)
620 {
621     TCGv_i32 t0;
622     t0 = tcg_temp_local_new_i32();
623     tcg_gen_movi_i32(t0, val);
624     return t0;
625 }
626 
627 TCGv_i64 tcg_const_local_i64(int64_t val)
628 {
629     TCGv_i64 t0;
630     t0 = tcg_temp_local_new_i64();
631     tcg_gen_movi_i64(t0, val);
632     return t0;
633 }
634 
635 #if defined(CONFIG_DEBUG_TCG)
636 void tcg_clear_temp_count(void)
637 {
638     TCGContext *s = &tcg_ctx;
639     s->temps_in_use = 0;
640 }
641 
642 int tcg_check_temp_count(void)
643 {
644     TCGContext *s = &tcg_ctx;
645     if (s->temps_in_use) {
646         /* Clear the count so that we don't give another
647          * warning immediately next time around.
648          */
649         s->temps_in_use = 0;
650         return 1;
651     }
652     return 0;
653 }
654 #endif
655 
656 /* Note: we convert the 64 bit args to 32 bit and do some alignment
657    and endian swap. Maybe it would be better to do the alignment
658    and endian swap in tcg_reg_alloc_call(). */
659 void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags,
660                    int sizemask, TCGArg ret, int nargs, TCGArg *args)
661 {
662     int i;
663     int real_args;
664     int nb_rets;
665     TCGArg *nparam;
666 
667 #if defined(__sparc__) && !defined(__arch64__) \
668     && !defined(CONFIG_TCG_INTERPRETER)
669     /* We have 64-bit values in one register, but need to pass as two
670        separate parameters.  Split them.  */
671     int orig_sizemask = sizemask;
672     int orig_nargs = nargs;
673     TCGv_i64 retl, reth;
674 
675     TCGV_UNUSED_I64(retl);
676     TCGV_UNUSED_I64(reth);
677     if (sizemask != 0) {
678         TCGArg *split_args = __builtin_alloca(sizeof(TCGArg) * nargs * 2);
679         for (i = real_args = 0; i < nargs; ++i) {
680             int is_64bit = sizemask & (1 << (i+1)*2);
681             if (is_64bit) {
682                 TCGv_i64 orig = MAKE_TCGV_I64(args[i]);
683                 TCGv_i32 h = tcg_temp_new_i32();
684                 TCGv_i32 l = tcg_temp_new_i32();
685                 tcg_gen_extr_i64_i32(l, h, orig);
686                 split_args[real_args++] = GET_TCGV_I32(h);
687                 split_args[real_args++] = GET_TCGV_I32(l);
688             } else {
689                 split_args[real_args++] = args[i];
690             }
691         }
692         nargs = real_args;
693         args = split_args;
694         sizemask = 0;
695     }
696 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
697     for (i = 0; i < nargs; ++i) {
698         int is_64bit = sizemask & (1 << (i+1)*2);
699         int is_signed = sizemask & (2 << (i+1)*2);
700         if (!is_64bit) {
701             TCGv_i64 temp = tcg_temp_new_i64();
702             TCGv_i64 orig = MAKE_TCGV_I64(args[i]);
703             if (is_signed) {
704                 tcg_gen_ext32s_i64(temp, orig);
705             } else {
706                 tcg_gen_ext32u_i64(temp, orig);
707             }
708             args[i] = GET_TCGV_I64(temp);
709         }
710     }
711 #endif /* TCG_TARGET_EXTEND_ARGS */
712 
713     *s->gen_opc_ptr++ = INDEX_op_call;
714     nparam = s->gen_opparam_ptr++;
715     if (ret != TCG_CALL_DUMMY_ARG) {
716 #if defined(__sparc__) && !defined(__arch64__) \
717     && !defined(CONFIG_TCG_INTERPRETER)
718         if (orig_sizemask & 1) {
719             /* The 32-bit ABI is going to return the 64-bit value in
720                the %o0/%o1 register pair.  Prepare for this by using
721                two return temporaries, and reassemble below.  */
722             retl = tcg_temp_new_i64();
723             reth = tcg_temp_new_i64();
724             *s->gen_opparam_ptr++ = GET_TCGV_I64(reth);
725             *s->gen_opparam_ptr++ = GET_TCGV_I64(retl);
726             nb_rets = 2;
727         } else {
728             *s->gen_opparam_ptr++ = ret;
729             nb_rets = 1;
730         }
731 #else
732         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
733 #ifdef HOST_WORDS_BIGENDIAN
734             *s->gen_opparam_ptr++ = ret + 1;
735             *s->gen_opparam_ptr++ = ret;
736 #else
737             *s->gen_opparam_ptr++ = ret;
738             *s->gen_opparam_ptr++ = ret + 1;
739 #endif
740             nb_rets = 2;
741         } else {
742             *s->gen_opparam_ptr++ = ret;
743             nb_rets = 1;
744         }
745 #endif
746     } else {
747         nb_rets = 0;
748     }
749     real_args = 0;
750     for (i = 0; i < nargs; i++) {
751 #if TCG_TARGET_REG_BITS < 64
752         int is_64bit = sizemask & (1 << (i+1)*2);
753         if (is_64bit) {
754 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
755             /* some targets want aligned 64 bit args */
756             if (real_args & 1) {
757                 *s->gen_opparam_ptr++ = TCG_CALL_DUMMY_ARG;
758                 real_args++;
759             }
760 #endif
761 	    /* If stack grows up, then we will be placing successive
762 	       arguments at lower addresses, which means we need to
763 	       reverse the order compared to how we would normally
764 	       treat either big or little-endian.  For those arguments
765 	       that will wind up in registers, this still works for
766 	       HPPA (the only current STACK_GROWSUP target) since the
767 	       argument registers are *also* allocated in decreasing
768 	       order.  If another such target is added, this logic may
769 	       have to get more complicated to differentiate between
770 	       stack arguments and register arguments.  */
771 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
772             *s->gen_opparam_ptr++ = args[i] + 1;
773             *s->gen_opparam_ptr++ = args[i];
774 #else
775             *s->gen_opparam_ptr++ = args[i];
776             *s->gen_opparam_ptr++ = args[i] + 1;
777 #endif
778             real_args += 2;
779             continue;
780         }
781 #endif /* TCG_TARGET_REG_BITS < 64 */
782 
783         *s->gen_opparam_ptr++ = args[i];
784         real_args++;
785     }
786     *s->gen_opparam_ptr++ = GET_TCGV_PTR(func);
787 
788     *s->gen_opparam_ptr++ = flags;
789 
790     *nparam = (nb_rets << 16) | (real_args + 1);
791 
792     /* total parameters, needed to go backward in the instruction stream */
793     *s->gen_opparam_ptr++ = 1 + nb_rets + real_args + 3;
794 
795 #if defined(__sparc__) && !defined(__arch64__) \
796     && !defined(CONFIG_TCG_INTERPRETER)
797     /* Free all of the parts we allocated above.  */
798     for (i = real_args = 0; i < orig_nargs; ++i) {
799         int is_64bit = orig_sizemask & (1 << (i+1)*2);
800         if (is_64bit) {
801             TCGv_i32 h = MAKE_TCGV_I32(args[real_args++]);
802             TCGv_i32 l = MAKE_TCGV_I32(args[real_args++]);
803             tcg_temp_free_i32(h);
804             tcg_temp_free_i32(l);
805         } else {
806             real_args++;
807         }
808     }
809     if (orig_sizemask & 1) {
810         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
811            Note that describing these as TCGv_i64 eliminates an unnecessary
812            zero-extension that tcg_gen_concat_i32_i64 would create.  */
813         tcg_gen_concat32_i64(MAKE_TCGV_I64(ret), retl, reth);
814         tcg_temp_free_i64(retl);
815         tcg_temp_free_i64(reth);
816     }
817 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
818     for (i = 0; i < nargs; ++i) {
819         int is_64bit = sizemask & (1 << (i+1)*2);
820         if (!is_64bit) {
821             TCGv_i64 temp = MAKE_TCGV_I64(args[i]);
822             tcg_temp_free_i64(temp);
823         }
824     }
825 #endif /* TCG_TARGET_EXTEND_ARGS */
826 }
827 
828 #if TCG_TARGET_REG_BITS == 32
829 void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
830                         int c, int right, int arith)
831 {
832     if (c == 0) {
833         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
834         tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
835     } else if (c >= 32) {
836         c -= 32;
837         if (right) {
838             if (arith) {
839                 tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
840                 tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), 31);
841             } else {
842                 tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
843                 tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
844             }
845         } else {
846             tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), c);
847             tcg_gen_movi_i32(TCGV_LOW(ret), 0);
848         }
849     } else {
850         TCGv_i32 t0, t1;
851 
852         t0 = tcg_temp_new_i32();
853         t1 = tcg_temp_new_i32();
854         if (right) {
855             tcg_gen_shli_i32(t0, TCGV_HIGH(arg1), 32 - c);
856             if (arith)
857                 tcg_gen_sari_i32(t1, TCGV_HIGH(arg1), c);
858             else
859                 tcg_gen_shri_i32(t1, TCGV_HIGH(arg1), c);
860             tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c);
861             tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t0);
862             tcg_gen_mov_i32(TCGV_HIGH(ret), t1);
863         } else {
864             tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c);
865             /* Note: ret can be the same as arg1, so we use t1 */
866             tcg_gen_shli_i32(t1, TCGV_LOW(arg1), c);
867             tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c);
868             tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t0);
869             tcg_gen_mov_i32(TCGV_LOW(ret), t1);
870         }
871         tcg_temp_free_i32(t0);
872         tcg_temp_free_i32(t1);
873     }
874 }
875 #endif
876 
877 static inline TCGMemOp tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st)
878 {
879     switch (op & MO_SIZE) {
880     case MO_8:
881         op &= ~MO_BSWAP;
882         break;
883     case MO_16:
884         break;
885     case MO_32:
886         if (!is64) {
887             op &= ~MO_SIGN;
888         }
889         break;
890     case MO_64:
891         if (!is64) {
892             tcg_abort();
893         }
894         break;
895     }
896     if (st) {
897         op &= ~MO_SIGN;
898     }
899     return op;
900 }
901 
902 static const TCGOpcode old_ld_opc[8] = {
903     [MO_UB] = INDEX_op_qemu_ld8u,
904     [MO_SB] = INDEX_op_qemu_ld8s,
905     [MO_UW] = INDEX_op_qemu_ld16u,
906     [MO_SW] = INDEX_op_qemu_ld16s,
907 #if TCG_TARGET_REG_BITS == 32
908     [MO_UL] = INDEX_op_qemu_ld32,
909     [MO_SL] = INDEX_op_qemu_ld32,
910 #else
911     [MO_UL] = INDEX_op_qemu_ld32u,
912     [MO_SL] = INDEX_op_qemu_ld32s,
913 #endif
914     [MO_Q]  = INDEX_op_qemu_ld64,
915 };
916 
917 static const TCGOpcode old_st_opc[4] = {
918     [MO_UB] = INDEX_op_qemu_st8,
919     [MO_UW] = INDEX_op_qemu_st16,
920     [MO_UL] = INDEX_op_qemu_st32,
921     [MO_Q]  = INDEX_op_qemu_st64,
922 };
923 
924 void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
925 {
926     memop = tcg_canonicalize_memop(memop, 0, 0);
927 
928     if (TCG_TARGET_HAS_new_ldst) {
929         *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i32;
930         tcg_add_param_i32(val);
931         tcg_add_param_tl(addr);
932         *tcg_ctx.gen_opparam_ptr++ = memop;
933         *tcg_ctx.gen_opparam_ptr++ = idx;
934         return;
935     }
936 
937     /* The old opcodes only support target-endian memory operations.  */
938     assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
939     assert(old_ld_opc[memop & MO_SSIZE] != 0);
940 
941     if (TCG_TARGET_REG_BITS == 32) {
942         *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE];
943         tcg_add_param_i32(val);
944         tcg_add_param_tl(addr);
945         *tcg_ctx.gen_opparam_ptr++ = idx;
946     } else {
947         TCGv_i64 val64 = tcg_temp_new_i64();
948 
949         *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE];
950         tcg_add_param_i64(val64);
951         tcg_add_param_tl(addr);
952         *tcg_ctx.gen_opparam_ptr++ = idx;
953 
954         tcg_gen_trunc_i64_i32(val, val64);
955         tcg_temp_free_i64(val64);
956     }
957 }
958 
959 void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
960 {
961     memop = tcg_canonicalize_memop(memop, 0, 1);
962 
963     if (TCG_TARGET_HAS_new_ldst) {
964         *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i32;
965         tcg_add_param_i32(val);
966         tcg_add_param_tl(addr);
967         *tcg_ctx.gen_opparam_ptr++ = memop;
968         *tcg_ctx.gen_opparam_ptr++ = idx;
969         return;
970     }
971 
972     /* The old opcodes only support target-endian memory operations.  */
973     assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
974     assert(old_st_opc[memop & MO_SIZE] != 0);
975 
976     if (TCG_TARGET_REG_BITS == 32) {
977         *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE];
978         tcg_add_param_i32(val);
979         tcg_add_param_tl(addr);
980         *tcg_ctx.gen_opparam_ptr++ = idx;
981     } else {
982         TCGv_i64 val64 = tcg_temp_new_i64();
983 
984         tcg_gen_extu_i32_i64(val64, val);
985 
986         *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE];
987         tcg_add_param_i64(val64);
988         tcg_add_param_tl(addr);
989         *tcg_ctx.gen_opparam_ptr++ = idx;
990 
991         tcg_temp_free_i64(val64);
992     }
993 }
994 
995 void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
996 {
997     memop = tcg_canonicalize_memop(memop, 1, 0);
998 
999 #if TCG_TARGET_REG_BITS == 32
1000     if ((memop & MO_SIZE) < MO_64) {
1001         tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
1002         if (memop & MO_SIGN) {
1003             tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
1004         } else {
1005             tcg_gen_movi_i32(TCGV_HIGH(val), 0);
1006         }
1007         return;
1008     }
1009 #endif
1010 
1011     if (TCG_TARGET_HAS_new_ldst) {
1012         *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i64;
1013         tcg_add_param_i64(val);
1014         tcg_add_param_tl(addr);
1015         *tcg_ctx.gen_opparam_ptr++ = memop;
1016         *tcg_ctx.gen_opparam_ptr++ = idx;
1017         return;
1018     }
1019 
1020     /* The old opcodes only support target-endian memory operations.  */
1021     assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
1022     assert(old_ld_opc[memop & MO_SSIZE] != 0);
1023 
1024     *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE];
1025     tcg_add_param_i64(val);
1026     tcg_add_param_tl(addr);
1027     *tcg_ctx.gen_opparam_ptr++ = idx;
1028 }
1029 
1030 void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
1031 {
1032     memop = tcg_canonicalize_memop(memop, 1, 1);
1033 
1034 #if TCG_TARGET_REG_BITS == 32
1035     if ((memop & MO_SIZE) < MO_64) {
1036         tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
1037         return;
1038     }
1039 #endif
1040 
1041     if (TCG_TARGET_HAS_new_ldst) {
1042         *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i64;
1043         tcg_add_param_i64(val);
1044         tcg_add_param_tl(addr);
1045         *tcg_ctx.gen_opparam_ptr++ = memop;
1046         *tcg_ctx.gen_opparam_ptr++ = idx;
1047         return;
1048     }
1049 
1050     /* The old opcodes only support target-endian memory operations.  */
1051     assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
1052     assert(old_st_opc[memop & MO_SIZE] != 0);
1053 
1054     *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE];
1055     tcg_add_param_i64(val);
1056     tcg_add_param_tl(addr);
1057     *tcg_ctx.gen_opparam_ptr++ = idx;
1058 }
1059 
1060 static void tcg_reg_alloc_start(TCGContext *s)
1061 {
1062     int i;
1063     TCGTemp *ts;
1064     for(i = 0; i < s->nb_globals; i++) {
1065         ts = &s->temps[i];
1066         if (ts->fixed_reg) {
1067             ts->val_type = TEMP_VAL_REG;
1068         } else {
1069             ts->val_type = TEMP_VAL_MEM;
1070         }
1071     }
1072     for(i = s->nb_globals; i < s->nb_temps; i++) {
1073         ts = &s->temps[i];
1074         if (ts->temp_local) {
1075             ts->val_type = TEMP_VAL_MEM;
1076         } else {
1077             ts->val_type = TEMP_VAL_DEAD;
1078         }
1079         ts->mem_allocated = 0;
1080         ts->fixed_reg = 0;
1081     }
1082     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1083         s->reg_to_temp[i] = -1;
1084     }
1085 }
1086 
1087 static char *tcg_get_arg_str_idx(TCGContext *s, char *buf, int buf_size,
1088                                  int idx)
1089 {
1090     TCGTemp *ts;
1091 
1092     assert(idx >= 0 && idx < s->nb_temps);
1093     ts = &s->temps[idx];
1094     if (idx < s->nb_globals) {
1095         pstrcpy(buf, buf_size, ts->name);
1096     } else {
1097         if (ts->temp_local)
1098             snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1099         else
1100             snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1101     }
1102     return buf;
1103 }
1104 
1105 char *tcg_get_arg_str_i32(TCGContext *s, char *buf, int buf_size, TCGv_i32 arg)
1106 {
1107     return tcg_get_arg_str_idx(s, buf, buf_size, GET_TCGV_I32(arg));
1108 }
1109 
1110 char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg)
1111 {
1112     return tcg_get_arg_str_idx(s, buf, buf_size, GET_TCGV_I64(arg));
1113 }
1114 
1115 /* Find helper name.  */
1116 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1117 {
1118     const char *ret = NULL;
1119     if (s->helpers) {
1120         ret = g_hash_table_lookup(s->helpers, (gpointer)val);
1121     }
1122     return ret;
1123 }
1124 
1125 static const char * const cond_name[] =
1126 {
1127     [TCG_COND_NEVER] = "never",
1128     [TCG_COND_ALWAYS] = "always",
1129     [TCG_COND_EQ] = "eq",
1130     [TCG_COND_NE] = "ne",
1131     [TCG_COND_LT] = "lt",
1132     [TCG_COND_GE] = "ge",
1133     [TCG_COND_LE] = "le",
1134     [TCG_COND_GT] = "gt",
1135     [TCG_COND_LTU] = "ltu",
1136     [TCG_COND_GEU] = "geu",
1137     [TCG_COND_LEU] = "leu",
1138     [TCG_COND_GTU] = "gtu"
1139 };
1140 
1141 static const char * const ldst_name[] =
1142 {
1143     [MO_UB]   = "ub",
1144     [MO_SB]   = "sb",
1145     [MO_LEUW] = "leuw",
1146     [MO_LESW] = "lesw",
1147     [MO_LEUL] = "leul",
1148     [MO_LESL] = "lesl",
1149     [MO_LEQ]  = "leq",
1150     [MO_BEUW] = "beuw",
1151     [MO_BESW] = "besw",
1152     [MO_BEUL] = "beul",
1153     [MO_BESL] = "besl",
1154     [MO_BEQ]  = "beq",
1155 };
1156 
1157 void tcg_dump_ops(TCGContext *s)
1158 {
1159     const uint16_t *opc_ptr;
1160     const TCGArg *args;
1161     TCGArg arg;
1162     TCGOpcode c;
1163     int i, k, nb_oargs, nb_iargs, nb_cargs, first_insn;
1164     const TCGOpDef *def;
1165     char buf[128];
1166 
1167     first_insn = 1;
1168     opc_ptr = s->gen_opc_buf;
1169     args = s->gen_opparam_buf;
1170     while (opc_ptr < s->gen_opc_ptr) {
1171         c = *opc_ptr++;
1172         def = &tcg_op_defs[c];
1173         if (c == INDEX_op_debug_insn_start) {
1174             uint64_t pc;
1175 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1176             pc = ((uint64_t)args[1] << 32) | args[0];
1177 #else
1178             pc = args[0];
1179 #endif
1180             if (!first_insn) {
1181                 qemu_log("\n");
1182             }
1183             qemu_log(" ---- 0x%" PRIx64, pc);
1184             first_insn = 0;
1185             nb_oargs = def->nb_oargs;
1186             nb_iargs = def->nb_iargs;
1187             nb_cargs = def->nb_cargs;
1188         } else if (c == INDEX_op_call) {
1189             TCGArg arg;
1190 
1191             /* variable number of arguments */
1192             arg = *args++;
1193             nb_oargs = arg >> 16;
1194             nb_iargs = arg & 0xffff;
1195             nb_cargs = def->nb_cargs;
1196 
1197             qemu_log(" %s ", def->name);
1198 
1199             /* function name */
1200             qemu_log("%s",
1201                      tcg_get_arg_str_idx(s, buf, sizeof(buf),
1202                                          args[nb_oargs + nb_iargs - 1]));
1203             /* flags */
1204             qemu_log(",$0x%" TCG_PRIlx, args[nb_oargs + nb_iargs]);
1205             /* nb out args */
1206             qemu_log(",$%d", nb_oargs);
1207             for(i = 0; i < nb_oargs; i++) {
1208                 qemu_log(",");
1209                 qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1210                                                    args[i]));
1211             }
1212             for(i = 0; i < (nb_iargs - 1); i++) {
1213                 qemu_log(",");
1214                 if (args[nb_oargs + i] == TCG_CALL_DUMMY_ARG) {
1215                     qemu_log("<dummy>");
1216                 } else {
1217                     qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1218                                                        args[nb_oargs + i]));
1219                 }
1220             }
1221         } else if (c == INDEX_op_movi_i32 || c == INDEX_op_movi_i64) {
1222             tcg_target_ulong val;
1223             const char *name;
1224 
1225             nb_oargs = def->nb_oargs;
1226             nb_iargs = def->nb_iargs;
1227             nb_cargs = def->nb_cargs;
1228             qemu_log(" %s %s,$", def->name,
1229                      tcg_get_arg_str_idx(s, buf, sizeof(buf), args[0]));
1230             val = args[1];
1231             name = tcg_find_helper(s, val);
1232             if (name) {
1233                 qemu_log("%s", name);
1234             } else {
1235                 if (c == INDEX_op_movi_i32) {
1236                     qemu_log("0x%x", (uint32_t)val);
1237                 } else {
1238                     qemu_log("0x%" PRIx64 , (uint64_t)val);
1239                 }
1240             }
1241         } else {
1242             qemu_log(" %s ", def->name);
1243             if (c == INDEX_op_nopn) {
1244                 /* variable number of arguments */
1245                 nb_cargs = *args;
1246                 nb_oargs = 0;
1247                 nb_iargs = 0;
1248             } else {
1249                 nb_oargs = def->nb_oargs;
1250                 nb_iargs = def->nb_iargs;
1251                 nb_cargs = def->nb_cargs;
1252             }
1253 
1254             k = 0;
1255             for(i = 0; i < nb_oargs; i++) {
1256                 if (k != 0) {
1257                     qemu_log(",");
1258                 }
1259                 qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1260                                                    args[k++]));
1261             }
1262             for(i = 0; i < nb_iargs; i++) {
1263                 if (k != 0) {
1264                     qemu_log(",");
1265                 }
1266                 qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1267                                                    args[k++]));
1268             }
1269             switch (c) {
1270             case INDEX_op_brcond_i32:
1271             case INDEX_op_setcond_i32:
1272             case INDEX_op_movcond_i32:
1273             case INDEX_op_brcond2_i32:
1274             case INDEX_op_setcond2_i32:
1275             case INDEX_op_brcond_i64:
1276             case INDEX_op_setcond_i64:
1277             case INDEX_op_movcond_i64:
1278                 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) {
1279                     qemu_log(",%s", cond_name[args[k++]]);
1280                 } else {
1281                     qemu_log(",$0x%" TCG_PRIlx, args[k++]);
1282                 }
1283                 i = 1;
1284                 break;
1285             case INDEX_op_qemu_ld_i32:
1286             case INDEX_op_qemu_st_i32:
1287             case INDEX_op_qemu_ld_i64:
1288             case INDEX_op_qemu_st_i64:
1289                 if (args[k] < ARRAY_SIZE(ldst_name) && ldst_name[args[k]]) {
1290                     qemu_log(",%s", ldst_name[args[k++]]);
1291                 } else {
1292                     qemu_log(",$0x%" TCG_PRIlx, args[k++]);
1293                 }
1294                 i = 1;
1295                 break;
1296             default:
1297                 i = 0;
1298                 break;
1299             }
1300             for(; i < nb_cargs; i++) {
1301                 if (k != 0) {
1302                     qemu_log(",");
1303                 }
1304                 arg = args[k++];
1305                 qemu_log("$0x%" TCG_PRIlx, arg);
1306             }
1307         }
1308         qemu_log("\n");
1309         args += nb_iargs + nb_oargs + nb_cargs;
1310     }
1311 }
1312 
1313 /* we give more priority to constraints with less registers */
1314 static int get_constraint_priority(const TCGOpDef *def, int k)
1315 {
1316     const TCGArgConstraint *arg_ct;
1317 
1318     int i, n;
1319     arg_ct = &def->args_ct[k];
1320     if (arg_ct->ct & TCG_CT_ALIAS) {
1321         /* an alias is equivalent to a single register */
1322         n = 1;
1323     } else {
1324         if (!(arg_ct->ct & TCG_CT_REG))
1325             return 0;
1326         n = 0;
1327         for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1328             if (tcg_regset_test_reg(arg_ct->u.regs, i))
1329                 n++;
1330         }
1331     }
1332     return TCG_TARGET_NB_REGS - n + 1;
1333 }
1334 
1335 /* sort from highest priority to lowest */
1336 static void sort_constraints(TCGOpDef *def, int start, int n)
1337 {
1338     int i, j, p1, p2, tmp;
1339 
1340     for(i = 0; i < n; i++)
1341         def->sorted_args[start + i] = start + i;
1342     if (n <= 1)
1343         return;
1344     for(i = 0; i < n - 1; i++) {
1345         for(j = i + 1; j < n; j++) {
1346             p1 = get_constraint_priority(def, def->sorted_args[start + i]);
1347             p2 = get_constraint_priority(def, def->sorted_args[start + j]);
1348             if (p1 < p2) {
1349                 tmp = def->sorted_args[start + i];
1350                 def->sorted_args[start + i] = def->sorted_args[start + j];
1351                 def->sorted_args[start + j] = tmp;
1352             }
1353         }
1354     }
1355 }
1356 
1357 void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs)
1358 {
1359     TCGOpcode op;
1360     TCGOpDef *def;
1361     const char *ct_str;
1362     int i, nb_args;
1363 
1364     for(;;) {
1365         if (tdefs->op == (TCGOpcode)-1)
1366             break;
1367         op = tdefs->op;
1368         assert((unsigned)op < NB_OPS);
1369         def = &tcg_op_defs[op];
1370 #if defined(CONFIG_DEBUG_TCG)
1371         /* Duplicate entry in op definitions? */
1372         assert(!def->used);
1373         def->used = 1;
1374 #endif
1375         nb_args = def->nb_iargs + def->nb_oargs;
1376         for(i = 0; i < nb_args; i++) {
1377             ct_str = tdefs->args_ct_str[i];
1378             /* Incomplete TCGTargetOpDef entry? */
1379             assert(ct_str != NULL);
1380             tcg_regset_clear(def->args_ct[i].u.regs);
1381             def->args_ct[i].ct = 0;
1382             if (ct_str[0] >= '0' && ct_str[0] <= '9') {
1383                 int oarg;
1384                 oarg = ct_str[0] - '0';
1385                 assert(oarg < def->nb_oargs);
1386                 assert(def->args_ct[oarg].ct & TCG_CT_REG);
1387                 /* TCG_CT_ALIAS is for the output arguments. The input
1388                    argument is tagged with TCG_CT_IALIAS. */
1389                 def->args_ct[i] = def->args_ct[oarg];
1390                 def->args_ct[oarg].ct = TCG_CT_ALIAS;
1391                 def->args_ct[oarg].alias_index = i;
1392                 def->args_ct[i].ct |= TCG_CT_IALIAS;
1393                 def->args_ct[i].alias_index = oarg;
1394             } else {
1395                 for(;;) {
1396                     if (*ct_str == '\0')
1397                         break;
1398                     switch(*ct_str) {
1399                     case 'i':
1400                         def->args_ct[i].ct |= TCG_CT_CONST;
1401                         ct_str++;
1402                         break;
1403                     default:
1404                         if (target_parse_constraint(&def->args_ct[i], &ct_str) < 0) {
1405                             fprintf(stderr, "Invalid constraint '%s' for arg %d of operation '%s'\n",
1406                                     ct_str, i, def->name);
1407                             exit(1);
1408                         }
1409                     }
1410                 }
1411             }
1412         }
1413 
1414         /* TCGTargetOpDef entry with too much information? */
1415         assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
1416 
1417         /* sort the constraints (XXX: this is just an heuristic) */
1418         sort_constraints(def, 0, def->nb_oargs);
1419         sort_constraints(def, def->nb_oargs, def->nb_iargs);
1420 
1421 #if 0
1422         {
1423             int i;
1424 
1425             printf("%s: sorted=", def->name);
1426             for(i = 0; i < def->nb_oargs + def->nb_iargs; i++)
1427                 printf(" %d", def->sorted_args[i]);
1428             printf("\n");
1429         }
1430 #endif
1431         tdefs++;
1432     }
1433 
1434 #if defined(CONFIG_DEBUG_TCG)
1435     i = 0;
1436     for (op = 0; op < ARRAY_SIZE(tcg_op_defs); op++) {
1437         const TCGOpDef *def = &tcg_op_defs[op];
1438         if (def->flags & TCG_OPF_NOT_PRESENT) {
1439             /* Wrong entry in op definitions? */
1440             if (def->used) {
1441                 fprintf(stderr, "Invalid op definition for %s\n", def->name);
1442                 i = 1;
1443             }
1444         } else {
1445             /* Missing entry in op definitions? */
1446             if (!def->used) {
1447                 fprintf(stderr, "Missing op definition for %s\n", def->name);
1448                 i = 1;
1449             }
1450         }
1451     }
1452     if (i == 1) {
1453         tcg_abort();
1454     }
1455 #endif
1456 }
1457 
1458 #ifdef USE_LIVENESS_ANALYSIS
1459 
1460 /* set a nop for an operation using 'nb_args' */
1461 static inline void tcg_set_nop(TCGContext *s, uint16_t *opc_ptr,
1462                                TCGArg *args, int nb_args)
1463 {
1464     if (nb_args == 0) {
1465         *opc_ptr = INDEX_op_nop;
1466     } else {
1467         *opc_ptr = INDEX_op_nopn;
1468         args[0] = nb_args;
1469         args[nb_args - 1] = nb_args;
1470     }
1471 }
1472 
1473 /* liveness analysis: end of function: all temps are dead, and globals
1474    should be in memory. */
1475 static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
1476                                    uint8_t *mem_temps)
1477 {
1478     memset(dead_temps, 1, s->nb_temps);
1479     memset(mem_temps, 1, s->nb_globals);
1480     memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals);
1481 }
1482 
1483 /* liveness analysis: end of basic block: all temps are dead, globals
1484    and local temps should be in memory. */
1485 static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
1486                                  uint8_t *mem_temps)
1487 {
1488     int i;
1489 
1490     memset(dead_temps, 1, s->nb_temps);
1491     memset(mem_temps, 1, s->nb_globals);
1492     for(i = s->nb_globals; i < s->nb_temps; i++) {
1493         mem_temps[i] = s->temps[i].temp_local;
1494     }
1495 }
1496 
1497 /* Liveness analysis : update the opc_dead_args array to tell if a
1498    given input arguments is dead. Instructions updating dead
1499    temporaries are removed. */
1500 static void tcg_liveness_analysis(TCGContext *s)
1501 {
1502     int i, op_index, nb_args, nb_iargs, nb_oargs, arg, nb_ops;
1503     TCGOpcode op, op_new, op_new2;
1504     TCGArg *args;
1505     const TCGOpDef *def;
1506     uint8_t *dead_temps, *mem_temps;
1507     uint16_t dead_args;
1508     uint8_t sync_args;
1509     bool have_op_new2;
1510 
1511     s->gen_opc_ptr++; /* skip end */
1512 
1513     nb_ops = s->gen_opc_ptr - s->gen_opc_buf;
1514 
1515     s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
1516     s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
1517 
1518     dead_temps = tcg_malloc(s->nb_temps);
1519     mem_temps = tcg_malloc(s->nb_temps);
1520     tcg_la_func_end(s, dead_temps, mem_temps);
1521 
1522     args = s->gen_opparam_ptr;
1523     op_index = nb_ops - 1;
1524     while (op_index >= 0) {
1525         op = s->gen_opc_buf[op_index];
1526         def = &tcg_op_defs[op];
1527         switch(op) {
1528         case INDEX_op_call:
1529             {
1530                 int call_flags;
1531 
1532                 nb_args = args[-1];
1533                 args -= nb_args;
1534                 nb_iargs = args[0] & 0xffff;
1535                 nb_oargs = args[0] >> 16;
1536                 args++;
1537                 call_flags = args[nb_oargs + nb_iargs];
1538 
1539                 /* pure functions can be removed if their result is not
1540                    used */
1541                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
1542                     for(i = 0; i < nb_oargs; i++) {
1543                         arg = args[i];
1544                         if (!dead_temps[arg] || mem_temps[arg]) {
1545                             goto do_not_remove_call;
1546                         }
1547                     }
1548                     tcg_set_nop(s, s->gen_opc_buf + op_index,
1549                                 args - 1, nb_args);
1550                 } else {
1551                 do_not_remove_call:
1552 
1553                     /* output args are dead */
1554                     dead_args = 0;
1555                     sync_args = 0;
1556                     for(i = 0; i < nb_oargs; i++) {
1557                         arg = args[i];
1558                         if (dead_temps[arg]) {
1559                             dead_args |= (1 << i);
1560                         }
1561                         if (mem_temps[arg]) {
1562                             sync_args |= (1 << i);
1563                         }
1564                         dead_temps[arg] = 1;
1565                         mem_temps[arg] = 0;
1566                     }
1567 
1568                     if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
1569                         /* globals should be synced to memory */
1570                         memset(mem_temps, 1, s->nb_globals);
1571                     }
1572                     if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
1573                                         TCG_CALL_NO_READ_GLOBALS))) {
1574                         /* globals should go back to memory */
1575                         memset(dead_temps, 1, s->nb_globals);
1576                     }
1577 
1578                     /* input args are live */
1579                     for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
1580                         arg = args[i];
1581                         if (arg != TCG_CALL_DUMMY_ARG) {
1582                             if (dead_temps[arg]) {
1583                                 dead_args |= (1 << i);
1584                             }
1585                             dead_temps[arg] = 0;
1586                         }
1587                     }
1588                     s->op_dead_args[op_index] = dead_args;
1589                     s->op_sync_args[op_index] = sync_args;
1590                 }
1591                 args--;
1592             }
1593             break;
1594         case INDEX_op_debug_insn_start:
1595             args -= def->nb_args;
1596             break;
1597         case INDEX_op_nopn:
1598             nb_args = args[-1];
1599             args -= nb_args;
1600             break;
1601         case INDEX_op_discard:
1602             args--;
1603             /* mark the temporary as dead */
1604             dead_temps[args[0]] = 1;
1605             mem_temps[args[0]] = 0;
1606             break;
1607         case INDEX_op_end:
1608             break;
1609 
1610         case INDEX_op_add2_i32:
1611             op_new = INDEX_op_add_i32;
1612             goto do_addsub2;
1613         case INDEX_op_sub2_i32:
1614             op_new = INDEX_op_sub_i32;
1615             goto do_addsub2;
1616         case INDEX_op_add2_i64:
1617             op_new = INDEX_op_add_i64;
1618             goto do_addsub2;
1619         case INDEX_op_sub2_i64:
1620             op_new = INDEX_op_sub_i64;
1621         do_addsub2:
1622             args -= 6;
1623             nb_iargs = 4;
1624             nb_oargs = 2;
1625             /* Test if the high part of the operation is dead, but not
1626                the low part.  The result can be optimized to a simple
1627                add or sub.  This happens often for x86_64 guest when the
1628                cpu mode is set to 32 bit.  */
1629             if (dead_temps[args[1]] && !mem_temps[args[1]]) {
1630                 if (dead_temps[args[0]] && !mem_temps[args[0]]) {
1631                     goto do_remove;
1632                 }
1633                 /* Create the single operation plus nop.  */
1634                 s->gen_opc_buf[op_index] = op = op_new;
1635                 args[1] = args[2];
1636                 args[2] = args[4];
1637                 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
1638                 tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 3);
1639                 /* Fall through and mark the single-word operation live.  */
1640                 nb_iargs = 2;
1641                 nb_oargs = 1;
1642             }
1643             goto do_not_remove;
1644 
1645         case INDEX_op_mulu2_i32:
1646             op_new = INDEX_op_mul_i32;
1647             op_new2 = INDEX_op_muluh_i32;
1648             have_op_new2 = TCG_TARGET_HAS_muluh_i32;
1649             goto do_mul2;
1650         case INDEX_op_muls2_i32:
1651             op_new = INDEX_op_mul_i32;
1652             op_new2 = INDEX_op_mulsh_i32;
1653             have_op_new2 = TCG_TARGET_HAS_mulsh_i32;
1654             goto do_mul2;
1655         case INDEX_op_mulu2_i64:
1656             op_new = INDEX_op_mul_i64;
1657             op_new2 = INDEX_op_muluh_i64;
1658             have_op_new2 = TCG_TARGET_HAS_muluh_i64;
1659             goto do_mul2;
1660         case INDEX_op_muls2_i64:
1661             op_new = INDEX_op_mul_i64;
1662             op_new2 = INDEX_op_mulsh_i64;
1663             have_op_new2 = TCG_TARGET_HAS_mulsh_i64;
1664             goto do_mul2;
1665         do_mul2:
1666             args -= 4;
1667             nb_iargs = 2;
1668             nb_oargs = 2;
1669             if (dead_temps[args[1]] && !mem_temps[args[1]]) {
1670                 if (dead_temps[args[0]] && !mem_temps[args[0]]) {
1671                     /* Both parts of the operation are dead.  */
1672                     goto do_remove;
1673                 }
1674                 /* The high part of the operation is dead; generate the low. */
1675                 s->gen_opc_buf[op_index] = op = op_new;
1676                 args[1] = args[2];
1677                 args[2] = args[3];
1678             } else if (have_op_new2 && dead_temps[args[0]]
1679                        && !mem_temps[args[0]]) {
1680                 /* The low part of the operation is dead; generate the high.  */
1681                 s->gen_opc_buf[op_index] = op = op_new2;
1682                 args[0] = args[1];
1683                 args[1] = args[2];
1684                 args[2] = args[3];
1685             } else {
1686                 goto do_not_remove;
1687             }
1688             assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
1689             tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1);
1690             /* Mark the single-word operation live.  */
1691             nb_oargs = 1;
1692             goto do_not_remove;
1693 
1694         default:
1695             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
1696             args -= def->nb_args;
1697             nb_iargs = def->nb_iargs;
1698             nb_oargs = def->nb_oargs;
1699 
1700             /* Test if the operation can be removed because all
1701                its outputs are dead. We assume that nb_oargs == 0
1702                implies side effects */
1703             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
1704                 for(i = 0; i < nb_oargs; i++) {
1705                     arg = args[i];
1706                     if (!dead_temps[arg] || mem_temps[arg]) {
1707                         goto do_not_remove;
1708                     }
1709                 }
1710             do_remove:
1711                 tcg_set_nop(s, s->gen_opc_buf + op_index, args, def->nb_args);
1712 #ifdef CONFIG_PROFILER
1713                 s->del_op_count++;
1714 #endif
1715             } else {
1716             do_not_remove:
1717 
1718                 /* output args are dead */
1719                 dead_args = 0;
1720                 sync_args = 0;
1721                 for(i = 0; i < nb_oargs; i++) {
1722                     arg = args[i];
1723                     if (dead_temps[arg]) {
1724                         dead_args |= (1 << i);
1725                     }
1726                     if (mem_temps[arg]) {
1727                         sync_args |= (1 << i);
1728                     }
1729                     dead_temps[arg] = 1;
1730                     mem_temps[arg] = 0;
1731                 }
1732 
1733                 /* if end of basic block, update */
1734                 if (def->flags & TCG_OPF_BB_END) {
1735                     tcg_la_bb_end(s, dead_temps, mem_temps);
1736                 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
1737                     /* globals should be synced to memory */
1738                     memset(mem_temps, 1, s->nb_globals);
1739                 }
1740 
1741                 /* input args are live */
1742                 for(i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
1743                     arg = args[i];
1744                     if (dead_temps[arg]) {
1745                         dead_args |= (1 << i);
1746                     }
1747                     dead_temps[arg] = 0;
1748                 }
1749                 s->op_dead_args[op_index] = dead_args;
1750                 s->op_sync_args[op_index] = sync_args;
1751             }
1752             break;
1753         }
1754         op_index--;
1755     }
1756 
1757     if (args != s->gen_opparam_buf) {
1758         tcg_abort();
1759     }
1760 }
1761 #else
1762 /* dummy liveness analysis */
1763 static void tcg_liveness_analysis(TCGContext *s)
1764 {
1765     int nb_ops;
1766     nb_ops = s->gen_opc_ptr - s->gen_opc_buf;
1767 
1768     s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
1769     memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t));
1770     s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
1771     memset(s->op_sync_args, 0, nb_ops * sizeof(uint8_t));
1772 }
1773 #endif
1774 
1775 #ifndef NDEBUG
1776 static void dump_regs(TCGContext *s)
1777 {
1778     TCGTemp *ts;
1779     int i;
1780     char buf[64];
1781 
1782     for(i = 0; i < s->nb_temps; i++) {
1783         ts = &s->temps[i];
1784         printf("  %10s: ", tcg_get_arg_str_idx(s, buf, sizeof(buf), i));
1785         switch(ts->val_type) {
1786         case TEMP_VAL_REG:
1787             printf("%s", tcg_target_reg_names[ts->reg]);
1788             break;
1789         case TEMP_VAL_MEM:
1790             printf("%d(%s)", (int)ts->mem_offset, tcg_target_reg_names[ts->mem_reg]);
1791             break;
1792         case TEMP_VAL_CONST:
1793             printf("$0x%" TCG_PRIlx, ts->val);
1794             break;
1795         case TEMP_VAL_DEAD:
1796             printf("D");
1797             break;
1798         default:
1799             printf("???");
1800             break;
1801         }
1802         printf("\n");
1803     }
1804 
1805     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1806         if (s->reg_to_temp[i] >= 0) {
1807             printf("%s: %s\n",
1808                    tcg_target_reg_names[i],
1809                    tcg_get_arg_str_idx(s, buf, sizeof(buf), s->reg_to_temp[i]));
1810         }
1811     }
1812 }
1813 
1814 static void check_regs(TCGContext *s)
1815 {
1816     int reg, k;
1817     TCGTemp *ts;
1818     char buf[64];
1819 
1820     for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
1821         k = s->reg_to_temp[reg];
1822         if (k >= 0) {
1823             ts = &s->temps[k];
1824             if (ts->val_type != TEMP_VAL_REG ||
1825                 ts->reg != reg) {
1826                 printf("Inconsistency for register %s:\n",
1827                        tcg_target_reg_names[reg]);
1828                 goto fail;
1829             }
1830         }
1831     }
1832     for(k = 0; k < s->nb_temps; k++) {
1833         ts = &s->temps[k];
1834         if (ts->val_type == TEMP_VAL_REG &&
1835             !ts->fixed_reg &&
1836             s->reg_to_temp[ts->reg] != k) {
1837                 printf("Inconsistency for temp %s:\n",
1838                        tcg_get_arg_str_idx(s, buf, sizeof(buf), k));
1839         fail:
1840                 printf("reg state:\n");
1841                 dump_regs(s);
1842                 tcg_abort();
1843         }
1844     }
1845 }
1846 #endif
1847 
1848 static void temp_allocate_frame(TCGContext *s, int temp)
1849 {
1850     TCGTemp *ts;
1851     ts = &s->temps[temp];
1852 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
1853     /* Sparc64 stack is accessed with offset of 2047 */
1854     s->current_frame_offset = (s->current_frame_offset +
1855                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
1856         ~(sizeof(tcg_target_long) - 1);
1857 #endif
1858     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
1859         s->frame_end) {
1860         tcg_abort();
1861     }
1862     ts->mem_offset = s->current_frame_offset;
1863     ts->mem_reg = s->frame_reg;
1864     ts->mem_allocated = 1;
1865     s->current_frame_offset += sizeof(tcg_target_long);
1866 }
1867 
1868 /* sync register 'reg' by saving it to the corresponding temporary */
1869 static inline void tcg_reg_sync(TCGContext *s, int reg)
1870 {
1871     TCGTemp *ts;
1872     int temp;
1873 
1874     temp = s->reg_to_temp[reg];
1875     ts = &s->temps[temp];
1876     assert(ts->val_type == TEMP_VAL_REG);
1877     if (!ts->mem_coherent && !ts->fixed_reg) {
1878         if (!ts->mem_allocated) {
1879             temp_allocate_frame(s, temp);
1880         }
1881         tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
1882     }
1883     ts->mem_coherent = 1;
1884 }
1885 
1886 /* free register 'reg' by spilling the corresponding temporary if necessary */
1887 static void tcg_reg_free(TCGContext *s, int reg)
1888 {
1889     int temp;
1890 
1891     temp = s->reg_to_temp[reg];
1892     if (temp != -1) {
1893         tcg_reg_sync(s, reg);
1894         s->temps[temp].val_type = TEMP_VAL_MEM;
1895         s->reg_to_temp[reg] = -1;
1896     }
1897 }
1898 
1899 /* Allocate a register belonging to reg1 & ~reg2 */
1900 static int tcg_reg_alloc(TCGContext *s, TCGRegSet reg1, TCGRegSet reg2)
1901 {
1902     int i, reg;
1903     TCGRegSet reg_ct;
1904 
1905     tcg_regset_andnot(reg_ct, reg1, reg2);
1906 
1907     /* first try free registers */
1908     for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) {
1909         reg = tcg_target_reg_alloc_order[i];
1910         if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == -1)
1911             return reg;
1912     }
1913 
1914     /* XXX: do better spill choice */
1915     for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) {
1916         reg = tcg_target_reg_alloc_order[i];
1917         if (tcg_regset_test_reg(reg_ct, reg)) {
1918             tcg_reg_free(s, reg);
1919             return reg;
1920         }
1921     }
1922 
1923     tcg_abort();
1924 }
1925 
1926 /* mark a temporary as dead. */
1927 static inline void temp_dead(TCGContext *s, int temp)
1928 {
1929     TCGTemp *ts;
1930 
1931     ts = &s->temps[temp];
1932     if (!ts->fixed_reg) {
1933         if (ts->val_type == TEMP_VAL_REG) {
1934             s->reg_to_temp[ts->reg] = -1;
1935         }
1936         if (temp < s->nb_globals || ts->temp_local) {
1937             ts->val_type = TEMP_VAL_MEM;
1938         } else {
1939             ts->val_type = TEMP_VAL_DEAD;
1940         }
1941     }
1942 }
1943 
1944 /* sync a temporary to memory. 'allocated_regs' is used in case a
1945    temporary registers needs to be allocated to store a constant. */
1946 static inline void temp_sync(TCGContext *s, int temp, TCGRegSet allocated_regs)
1947 {
1948     TCGTemp *ts;
1949 
1950     ts = &s->temps[temp];
1951     if (!ts->fixed_reg) {
1952         switch(ts->val_type) {
1953         case TEMP_VAL_CONST:
1954             ts->reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
1955                                     allocated_regs);
1956             ts->val_type = TEMP_VAL_REG;
1957             s->reg_to_temp[ts->reg] = temp;
1958             ts->mem_coherent = 0;
1959             tcg_out_movi(s, ts->type, ts->reg, ts->val);
1960             /* fallthrough*/
1961         case TEMP_VAL_REG:
1962             tcg_reg_sync(s, ts->reg);
1963             break;
1964         case TEMP_VAL_DEAD:
1965         case TEMP_VAL_MEM:
1966             break;
1967         default:
1968             tcg_abort();
1969         }
1970     }
1971 }
1972 
1973 /* save a temporary to memory. 'allocated_regs' is used in case a
1974    temporary registers needs to be allocated to store a constant. */
1975 static inline void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs)
1976 {
1977 #ifdef USE_LIVENESS_ANALYSIS
1978     /* The liveness analysis already ensures that globals are back
1979        in memory. Keep an assert for safety. */
1980     assert(s->temps[temp].val_type == TEMP_VAL_MEM || s->temps[temp].fixed_reg);
1981 #else
1982     temp_sync(s, temp, allocated_regs);
1983     temp_dead(s, temp);
1984 #endif
1985 }
1986 
1987 /* save globals to their canonical location and assume they can be
1988    modified be the following code. 'allocated_regs' is used in case a
1989    temporary registers needs to be allocated to store a constant. */
1990 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
1991 {
1992     int i;
1993 
1994     for(i = 0; i < s->nb_globals; i++) {
1995         temp_save(s, i, allocated_regs);
1996     }
1997 }
1998 
1999 /* sync globals to their canonical location and assume they can be
2000    read by the following code. 'allocated_regs' is used in case a
2001    temporary registers needs to be allocated to store a constant. */
2002 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
2003 {
2004     int i;
2005 
2006     for (i = 0; i < s->nb_globals; i++) {
2007 #ifdef USE_LIVENESS_ANALYSIS
2008         assert(s->temps[i].val_type != TEMP_VAL_REG || s->temps[i].fixed_reg ||
2009                s->temps[i].mem_coherent);
2010 #else
2011         temp_sync(s, i, allocated_regs);
2012 #endif
2013     }
2014 }
2015 
2016 /* at the end of a basic block, we assume all temporaries are dead and
2017    all globals are stored at their canonical location. */
2018 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
2019 {
2020     TCGTemp *ts;
2021     int i;
2022 
2023     for(i = s->nb_globals; i < s->nb_temps; i++) {
2024         ts = &s->temps[i];
2025         if (ts->temp_local) {
2026             temp_save(s, i, allocated_regs);
2027         } else {
2028 #ifdef USE_LIVENESS_ANALYSIS
2029             /* The liveness analysis already ensures that temps are dead.
2030                Keep an assert for safety. */
2031             assert(ts->val_type == TEMP_VAL_DEAD);
2032 #else
2033             temp_dead(s, i);
2034 #endif
2035         }
2036     }
2037 
2038     save_globals(s, allocated_regs);
2039 }
2040 
2041 #define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1)
2042 #define NEED_SYNC_ARG(n) ((sync_args >> (n)) & 1)
2043 
2044 static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
2045                                uint16_t dead_args, uint8_t sync_args)
2046 {
2047     TCGTemp *ots;
2048     tcg_target_ulong val;
2049 
2050     ots = &s->temps[args[0]];
2051     val = args[1];
2052 
2053     if (ots->fixed_reg) {
2054         /* for fixed registers, we do not do any constant
2055            propagation */
2056         tcg_out_movi(s, ots->type, ots->reg, val);
2057     } else {
2058         /* The movi is not explicitly generated here */
2059         if (ots->val_type == TEMP_VAL_REG)
2060             s->reg_to_temp[ots->reg] = -1;
2061         ots->val_type = TEMP_VAL_CONST;
2062         ots->val = val;
2063     }
2064     if (NEED_SYNC_ARG(0)) {
2065         temp_sync(s, args[0], s->reserved_regs);
2066     }
2067     if (IS_DEAD_ARG(0)) {
2068         temp_dead(s, args[0]);
2069     }
2070 }
2071 
2072 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
2073                               const TCGArg *args, uint16_t dead_args,
2074                               uint8_t sync_args)
2075 {
2076     TCGRegSet allocated_regs;
2077     TCGTemp *ts, *ots;
2078     const TCGArgConstraint *arg_ct, *oarg_ct;
2079 
2080     tcg_regset_set(allocated_regs, s->reserved_regs);
2081     ots = &s->temps[args[0]];
2082     ts = &s->temps[args[1]];
2083     oarg_ct = &def->args_ct[0];
2084     arg_ct = &def->args_ct[1];
2085 
2086     /* If the source value is not in a register, and we're going to be
2087        forced to have it in a register in order to perform the copy,
2088        then copy the SOURCE value into its own register first.  That way
2089        we don't have to reload SOURCE the next time it is used. */
2090     if (((NEED_SYNC_ARG(0) || ots->fixed_reg) && ts->val_type != TEMP_VAL_REG)
2091         || ts->val_type == TEMP_VAL_MEM) {
2092         ts->reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
2093         if (ts->val_type == TEMP_VAL_MEM) {
2094             tcg_out_ld(s, ts->type, ts->reg, ts->mem_reg, ts->mem_offset);
2095             ts->mem_coherent = 1;
2096         } else if (ts->val_type == TEMP_VAL_CONST) {
2097             tcg_out_movi(s, ts->type, ts->reg, ts->val);
2098         }
2099         s->reg_to_temp[ts->reg] = args[1];
2100         ts->val_type = TEMP_VAL_REG;
2101     }
2102 
2103     if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
2104         /* mov to a non-saved dead register makes no sense (even with
2105            liveness analysis disabled). */
2106         assert(NEED_SYNC_ARG(0));
2107         /* The code above should have moved the temp to a register. */
2108         assert(ts->val_type == TEMP_VAL_REG);
2109         if (!ots->mem_allocated) {
2110             temp_allocate_frame(s, args[0]);
2111         }
2112         tcg_out_st(s, ots->type, ts->reg, ots->mem_reg, ots->mem_offset);
2113         if (IS_DEAD_ARG(1)) {
2114             temp_dead(s, args[1]);
2115         }
2116         temp_dead(s, args[0]);
2117     } else if (ts->val_type == TEMP_VAL_CONST) {
2118         /* propagate constant */
2119         if (ots->val_type == TEMP_VAL_REG) {
2120             s->reg_to_temp[ots->reg] = -1;
2121         }
2122         ots->val_type = TEMP_VAL_CONST;
2123         ots->val = ts->val;
2124     } else {
2125         /* The code in the first if block should have moved the
2126            temp to a register. */
2127         assert(ts->val_type == TEMP_VAL_REG);
2128         if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
2129             /* the mov can be suppressed */
2130             if (ots->val_type == TEMP_VAL_REG) {
2131                 s->reg_to_temp[ots->reg] = -1;
2132             }
2133             ots->reg = ts->reg;
2134             temp_dead(s, args[1]);
2135         } else {
2136             if (ots->val_type != TEMP_VAL_REG) {
2137                 /* When allocating a new register, make sure to not spill the
2138                    input one. */
2139                 tcg_regset_set_reg(allocated_regs, ts->reg);
2140                 ots->reg = tcg_reg_alloc(s, oarg_ct->u.regs, allocated_regs);
2141             }
2142             tcg_out_mov(s, ots->type, ots->reg, ts->reg);
2143         }
2144         ots->val_type = TEMP_VAL_REG;
2145         ots->mem_coherent = 0;
2146         s->reg_to_temp[ots->reg] = args[0];
2147         if (NEED_SYNC_ARG(0)) {
2148             tcg_reg_sync(s, ots->reg);
2149         }
2150     }
2151 }
2152 
2153 static void tcg_reg_alloc_op(TCGContext *s,
2154                              const TCGOpDef *def, TCGOpcode opc,
2155                              const TCGArg *args, uint16_t dead_args,
2156                              uint8_t sync_args)
2157 {
2158     TCGRegSet allocated_regs;
2159     int i, k, nb_iargs, nb_oargs, reg;
2160     TCGArg arg;
2161     const TCGArgConstraint *arg_ct;
2162     TCGTemp *ts;
2163     TCGArg new_args[TCG_MAX_OP_ARGS];
2164     int const_args[TCG_MAX_OP_ARGS];
2165 
2166     nb_oargs = def->nb_oargs;
2167     nb_iargs = def->nb_iargs;
2168 
2169     /* copy constants */
2170     memcpy(new_args + nb_oargs + nb_iargs,
2171            args + nb_oargs + nb_iargs,
2172            sizeof(TCGArg) * def->nb_cargs);
2173 
2174     /* satisfy input constraints */
2175     tcg_regset_set(allocated_regs, s->reserved_regs);
2176     for(k = 0; k < nb_iargs; k++) {
2177         i = def->sorted_args[nb_oargs + k];
2178         arg = args[i];
2179         arg_ct = &def->args_ct[i];
2180         ts = &s->temps[arg];
2181         if (ts->val_type == TEMP_VAL_MEM) {
2182             reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
2183             tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
2184             ts->val_type = TEMP_VAL_REG;
2185             ts->reg = reg;
2186             ts->mem_coherent = 1;
2187             s->reg_to_temp[reg] = arg;
2188         } else if (ts->val_type == TEMP_VAL_CONST) {
2189             if (tcg_target_const_match(ts->val, ts->type, arg_ct)) {
2190                 /* constant is OK for instruction */
2191                 const_args[i] = 1;
2192                 new_args[i] = ts->val;
2193                 goto iarg_end;
2194             } else {
2195                 /* need to move to a register */
2196                 reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
2197                 tcg_out_movi(s, ts->type, reg, ts->val);
2198                 ts->val_type = TEMP_VAL_REG;
2199                 ts->reg = reg;
2200                 ts->mem_coherent = 0;
2201                 s->reg_to_temp[reg] = arg;
2202             }
2203         }
2204         assert(ts->val_type == TEMP_VAL_REG);
2205         if (arg_ct->ct & TCG_CT_IALIAS) {
2206             if (ts->fixed_reg) {
2207                 /* if fixed register, we must allocate a new register
2208                    if the alias is not the same register */
2209                 if (arg != args[arg_ct->alias_index])
2210                     goto allocate_in_reg;
2211             } else {
2212                 /* if the input is aliased to an output and if it is
2213                    not dead after the instruction, we must allocate
2214                    a new register and move it */
2215                 if (!IS_DEAD_ARG(i)) {
2216                     goto allocate_in_reg;
2217                 }
2218             }
2219         }
2220         reg = ts->reg;
2221         if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2222             /* nothing to do : the constraint is satisfied */
2223         } else {
2224         allocate_in_reg:
2225             /* allocate a new register matching the constraint
2226                and move the temporary register into it */
2227             reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
2228             tcg_out_mov(s, ts->type, reg, ts->reg);
2229         }
2230         new_args[i] = reg;
2231         const_args[i] = 0;
2232         tcg_regset_set_reg(allocated_regs, reg);
2233     iarg_end: ;
2234     }
2235 
2236     /* mark dead temporaries and free the associated registers */
2237     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2238         if (IS_DEAD_ARG(i)) {
2239             temp_dead(s, args[i]);
2240         }
2241     }
2242 
2243     if (def->flags & TCG_OPF_BB_END) {
2244         tcg_reg_alloc_bb_end(s, allocated_regs);
2245     } else {
2246         if (def->flags & TCG_OPF_CALL_CLOBBER) {
2247             /* XXX: permit generic clobber register list ? */
2248             for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2249                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) {
2250                     tcg_reg_free(s, reg);
2251                 }
2252             }
2253         }
2254         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2255             /* sync globals if the op has side effects and might trigger
2256                an exception. */
2257             sync_globals(s, allocated_regs);
2258         }
2259 
2260         /* satisfy the output constraints */
2261         tcg_regset_set(allocated_regs, s->reserved_regs);
2262         for(k = 0; k < nb_oargs; k++) {
2263             i = def->sorted_args[k];
2264             arg = args[i];
2265             arg_ct = &def->args_ct[i];
2266             ts = &s->temps[arg];
2267             if (arg_ct->ct & TCG_CT_ALIAS) {
2268                 reg = new_args[arg_ct->alias_index];
2269             } else {
2270                 /* if fixed register, we try to use it */
2271                 reg = ts->reg;
2272                 if (ts->fixed_reg &&
2273                     tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2274                     goto oarg_end;
2275                 }
2276                 reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
2277             }
2278             tcg_regset_set_reg(allocated_regs, reg);
2279             /* if a fixed register is used, then a move will be done afterwards */
2280             if (!ts->fixed_reg) {
2281                 if (ts->val_type == TEMP_VAL_REG) {
2282                     s->reg_to_temp[ts->reg] = -1;
2283                 }
2284                 ts->val_type = TEMP_VAL_REG;
2285                 ts->reg = reg;
2286                 /* temp value is modified, so the value kept in memory is
2287                    potentially not the same */
2288                 ts->mem_coherent = 0;
2289                 s->reg_to_temp[reg] = arg;
2290             }
2291         oarg_end:
2292             new_args[i] = reg;
2293         }
2294     }
2295 
2296     /* emit instruction */
2297     tcg_out_op(s, opc, new_args, const_args);
2298 
2299     /* move the outputs in the correct register if needed */
2300     for(i = 0; i < nb_oargs; i++) {
2301         ts = &s->temps[args[i]];
2302         reg = new_args[i];
2303         if (ts->fixed_reg && ts->reg != reg) {
2304             tcg_out_mov(s, ts->type, ts->reg, reg);
2305         }
2306         if (NEED_SYNC_ARG(i)) {
2307             tcg_reg_sync(s, reg);
2308         }
2309         if (IS_DEAD_ARG(i)) {
2310             temp_dead(s, args[i]);
2311         }
2312     }
2313 }
2314 
2315 #ifdef TCG_TARGET_STACK_GROWSUP
2316 #define STACK_DIR(x) (-(x))
2317 #else
2318 #define STACK_DIR(x) (x)
2319 #endif
2320 
2321 static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
2322                               TCGOpcode opc, const TCGArg *args,
2323                               uint16_t dead_args, uint8_t sync_args)
2324 {
2325     int nb_iargs, nb_oargs, flags, nb_regs, i, reg, nb_params;
2326     TCGArg arg, func_arg;
2327     TCGTemp *ts;
2328     intptr_t stack_offset;
2329     size_t call_stack_size;
2330     uintptr_t func_addr;
2331     int const_func_arg, allocate_args;
2332     TCGRegSet allocated_regs;
2333     const TCGArgConstraint *arg_ct;
2334 
2335     arg = *args++;
2336 
2337     nb_oargs = arg >> 16;
2338     nb_iargs = arg & 0xffff;
2339     nb_params = nb_iargs - 1;
2340 
2341     flags = args[nb_oargs + nb_iargs];
2342 
2343     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2344     if (nb_regs > nb_params)
2345         nb_regs = nb_params;
2346 
2347     /* assign stack slots first */
2348     call_stack_size = (nb_params - nb_regs) * sizeof(tcg_target_long);
2349     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
2350         ~(TCG_TARGET_STACK_ALIGN - 1);
2351     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
2352     if (allocate_args) {
2353         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
2354            preallocate call stack */
2355         tcg_abort();
2356     }
2357 
2358     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
2359     for(i = nb_regs; i < nb_params; i++) {
2360         arg = args[nb_oargs + i];
2361 #ifdef TCG_TARGET_STACK_GROWSUP
2362         stack_offset -= sizeof(tcg_target_long);
2363 #endif
2364         if (arg != TCG_CALL_DUMMY_ARG) {
2365             ts = &s->temps[arg];
2366             if (ts->val_type == TEMP_VAL_REG) {
2367                 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
2368             } else if (ts->val_type == TEMP_VAL_MEM) {
2369                 reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
2370                                     s->reserved_regs);
2371                 /* XXX: not correct if reading values from the stack */
2372                 tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
2373                 tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset);
2374             } else if (ts->val_type == TEMP_VAL_CONST) {
2375                 reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
2376                                     s->reserved_regs);
2377                 /* XXX: sign extend may be needed on some targets */
2378                 tcg_out_movi(s, ts->type, reg, ts->val);
2379                 tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset);
2380             } else {
2381                 tcg_abort();
2382             }
2383         }
2384 #ifndef TCG_TARGET_STACK_GROWSUP
2385         stack_offset += sizeof(tcg_target_long);
2386 #endif
2387     }
2388 
2389     /* assign input registers */
2390     tcg_regset_set(allocated_regs, s->reserved_regs);
2391     for(i = 0; i < nb_regs; i++) {
2392         arg = args[nb_oargs + i];
2393         if (arg != TCG_CALL_DUMMY_ARG) {
2394             ts = &s->temps[arg];
2395             reg = tcg_target_call_iarg_regs[i];
2396             tcg_reg_free(s, reg);
2397             if (ts->val_type == TEMP_VAL_REG) {
2398                 if (ts->reg != reg) {
2399                     tcg_out_mov(s, ts->type, reg, ts->reg);
2400                 }
2401             } else if (ts->val_type == TEMP_VAL_MEM) {
2402                 tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
2403             } else if (ts->val_type == TEMP_VAL_CONST) {
2404                 /* XXX: sign extend ? */
2405                 tcg_out_movi(s, ts->type, reg, ts->val);
2406             } else {
2407                 tcg_abort();
2408             }
2409             tcg_regset_set_reg(allocated_regs, reg);
2410         }
2411     }
2412 
2413     /* assign function address */
2414     func_arg = args[nb_oargs + nb_iargs - 1];
2415     arg_ct = &def->args_ct[0];
2416     ts = &s->temps[func_arg];
2417     func_addr = ts->val;
2418     const_func_arg = 0;
2419     if (ts->val_type == TEMP_VAL_MEM) {
2420         reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
2421         tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
2422         func_arg = reg;
2423         tcg_regset_set_reg(allocated_regs, reg);
2424     } else if (ts->val_type == TEMP_VAL_REG) {
2425         reg = ts->reg;
2426         if (!tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2427             reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
2428             tcg_out_mov(s, ts->type, reg, ts->reg);
2429         }
2430         func_arg = reg;
2431         tcg_regset_set_reg(allocated_regs, reg);
2432     } else if (ts->val_type == TEMP_VAL_CONST) {
2433         if (tcg_target_const_match(func_addr, ts->type, arg_ct)) {
2434             const_func_arg = 1;
2435             func_arg = func_addr;
2436         } else {
2437             reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
2438             tcg_out_movi(s, ts->type, reg, func_addr);
2439             func_arg = reg;
2440             tcg_regset_set_reg(allocated_regs, reg);
2441         }
2442     } else {
2443         tcg_abort();
2444     }
2445 
2446 
2447     /* mark dead temporaries and free the associated registers */
2448     for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2449         if (IS_DEAD_ARG(i)) {
2450             temp_dead(s, args[i]);
2451         }
2452     }
2453 
2454     /* clobber call registers */
2455     for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2456         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) {
2457             tcg_reg_free(s, reg);
2458         }
2459     }
2460 
2461     /* Save globals if they might be written by the helper, sync them if
2462        they might be read. */
2463     if (flags & TCG_CALL_NO_READ_GLOBALS) {
2464         /* Nothing to do */
2465     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
2466         sync_globals(s, allocated_regs);
2467     } else {
2468         save_globals(s, allocated_regs);
2469     }
2470 
2471     tcg_out_op(s, opc, &func_arg, &const_func_arg);
2472 
2473     /* assign output registers and emit moves if needed */
2474     for(i = 0; i < nb_oargs; i++) {
2475         arg = args[i];
2476         ts = &s->temps[arg];
2477         reg = tcg_target_call_oarg_regs[i];
2478         assert(s->reg_to_temp[reg] == -1);
2479 
2480         if (ts->fixed_reg) {
2481             if (ts->reg != reg) {
2482                 tcg_out_mov(s, ts->type, ts->reg, reg);
2483             }
2484         } else {
2485             if (ts->val_type == TEMP_VAL_REG) {
2486                 s->reg_to_temp[ts->reg] = -1;
2487             }
2488             ts->val_type = TEMP_VAL_REG;
2489             ts->reg = reg;
2490             ts->mem_coherent = 0;
2491             s->reg_to_temp[reg] = arg;
2492             if (NEED_SYNC_ARG(i)) {
2493                 tcg_reg_sync(s, reg);
2494             }
2495             if (IS_DEAD_ARG(i)) {
2496                 temp_dead(s, args[i]);
2497             }
2498         }
2499     }
2500 
2501     return nb_iargs + nb_oargs + def->nb_cargs + 1;
2502 }
2503 
2504 #ifdef CONFIG_PROFILER
2505 
2506 static int64_t tcg_table_op_count[NB_OPS];
2507 
2508 static void dump_op_count(void)
2509 {
2510     int i;
2511     FILE *f;
2512     f = fopen("/tmp/op.log", "w");
2513     for(i = INDEX_op_end; i < NB_OPS; i++) {
2514         fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name, tcg_table_op_count[i]);
2515     }
2516     fclose(f);
2517 }
2518 #endif
2519 
2520 
2521 static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
2522                                       long search_pc)
2523 {
2524     TCGOpcode opc;
2525     int op_index;
2526     const TCGOpDef *def;
2527     const TCGArg *args;
2528 
2529 #ifdef DEBUG_DISAS
2530     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
2531         qemu_log("OP:\n");
2532         tcg_dump_ops(s);
2533         qemu_log("\n");
2534     }
2535 #endif
2536 
2537 #ifdef CONFIG_PROFILER
2538     s->opt_time -= profile_getclock();
2539 #endif
2540 
2541 #ifdef USE_TCG_OPTIMIZATIONS
2542     s->gen_opparam_ptr =
2543         tcg_optimize(s, s->gen_opc_ptr, s->gen_opparam_buf, tcg_op_defs);
2544 #endif
2545 
2546 #ifdef CONFIG_PROFILER
2547     s->opt_time += profile_getclock();
2548     s->la_time -= profile_getclock();
2549 #endif
2550 
2551     tcg_liveness_analysis(s);
2552 
2553 #ifdef CONFIG_PROFILER
2554     s->la_time += profile_getclock();
2555 #endif
2556 
2557 #ifdef DEBUG_DISAS
2558     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT))) {
2559         qemu_log("OP after optimization and liveness analysis:\n");
2560         tcg_dump_ops(s);
2561         qemu_log("\n");
2562     }
2563 #endif
2564 
2565     tcg_reg_alloc_start(s);
2566 
2567     s->code_buf = gen_code_buf;
2568     s->code_ptr = gen_code_buf;
2569 
2570     tcg_out_tb_init(s);
2571 
2572     args = s->gen_opparam_buf;
2573     op_index = 0;
2574 
2575     for(;;) {
2576         opc = s->gen_opc_buf[op_index];
2577 #ifdef CONFIG_PROFILER
2578         tcg_table_op_count[opc]++;
2579 #endif
2580         def = &tcg_op_defs[opc];
2581 #if 0
2582         printf("%s: %d %d %d\n", def->name,
2583                def->nb_oargs, def->nb_iargs, def->nb_cargs);
2584         //        dump_regs(s);
2585 #endif
2586         switch(opc) {
2587         case INDEX_op_mov_i32:
2588         case INDEX_op_mov_i64:
2589             tcg_reg_alloc_mov(s, def, args, s->op_dead_args[op_index],
2590                               s->op_sync_args[op_index]);
2591             break;
2592         case INDEX_op_movi_i32:
2593         case INDEX_op_movi_i64:
2594             tcg_reg_alloc_movi(s, args, s->op_dead_args[op_index],
2595                                s->op_sync_args[op_index]);
2596             break;
2597         case INDEX_op_debug_insn_start:
2598             /* debug instruction */
2599             break;
2600         case INDEX_op_nop:
2601         case INDEX_op_nop1:
2602         case INDEX_op_nop2:
2603         case INDEX_op_nop3:
2604             break;
2605         case INDEX_op_nopn:
2606             args += args[0];
2607             goto next;
2608         case INDEX_op_discard:
2609             temp_dead(s, args[0]);
2610             break;
2611         case INDEX_op_set_label:
2612             tcg_reg_alloc_bb_end(s, s->reserved_regs);
2613             tcg_out_label(s, args[0], s->code_ptr);
2614             break;
2615         case INDEX_op_call:
2616             args += tcg_reg_alloc_call(s, def, opc, args,
2617                                        s->op_dead_args[op_index],
2618                                        s->op_sync_args[op_index]);
2619             goto next;
2620         case INDEX_op_end:
2621             goto the_end;
2622         default:
2623             /* Sanity check that we've not introduced any unhandled opcodes. */
2624             if (def->flags & TCG_OPF_NOT_PRESENT) {
2625                 tcg_abort();
2626             }
2627             /* Note: in order to speed up the code, it would be much
2628                faster to have specialized register allocator functions for
2629                some common argument patterns */
2630             tcg_reg_alloc_op(s, def, opc, args, s->op_dead_args[op_index],
2631                              s->op_sync_args[op_index]);
2632             break;
2633         }
2634         args += def->nb_args;
2635     next:
2636         if (search_pc >= 0 && search_pc < s->code_ptr - gen_code_buf) {
2637             return op_index;
2638         }
2639         op_index++;
2640 #ifndef NDEBUG
2641         check_regs(s);
2642 #endif
2643     }
2644  the_end:
2645     /* Generate TB finalization at the end of block */
2646     tcg_out_tb_finalize(s);
2647     return -1;
2648 }
2649 
2650 int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf)
2651 {
2652 #ifdef CONFIG_PROFILER
2653     {
2654         int n;
2655         n = (s->gen_opc_ptr - s->gen_opc_buf);
2656         s->op_count += n;
2657         if (n > s->op_count_max)
2658             s->op_count_max = n;
2659 
2660         s->temp_count += s->nb_temps;
2661         if (s->nb_temps > s->temp_count_max)
2662             s->temp_count_max = s->nb_temps;
2663     }
2664 #endif
2665 
2666     tcg_gen_code_common(s, gen_code_buf, -1);
2667 
2668     /* flush instruction cache */
2669     flush_icache_range((uintptr_t)gen_code_buf, (uintptr_t)s->code_ptr);
2670 
2671     return s->code_ptr -  gen_code_buf;
2672 }
2673 
2674 /* Return the index of the micro operation such as the pc after is <
2675    offset bytes from the start of the TB.  The contents of gen_code_buf must
2676    not be changed, though writing the same values is ok.
2677    Return -1 if not found. */
2678 int tcg_gen_code_search_pc(TCGContext *s, uint8_t *gen_code_buf, long offset)
2679 {
2680     return tcg_gen_code_common(s, gen_code_buf, offset);
2681 }
2682 
2683 #ifdef CONFIG_PROFILER
2684 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
2685 {
2686     TCGContext *s = &tcg_ctx;
2687     int64_t tot;
2688 
2689     tot = s->interm_time + s->code_time;
2690     cpu_fprintf(f, "JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
2691                 tot, tot / 2.4e9);
2692     cpu_fprintf(f, "translated TBs      %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
2693                 s->tb_count,
2694                 s->tb_count1 - s->tb_count,
2695                 s->tb_count1 ? (double)(s->tb_count1 - s->tb_count) / s->tb_count1 * 100.0 : 0);
2696     cpu_fprintf(f, "avg ops/TB          %0.1f max=%d\n",
2697                 s->tb_count ? (double)s->op_count / s->tb_count : 0, s->op_count_max);
2698     cpu_fprintf(f, "deleted ops/TB      %0.2f\n",
2699                 s->tb_count ?
2700                 (double)s->del_op_count / s->tb_count : 0);
2701     cpu_fprintf(f, "avg temps/TB        %0.2f max=%d\n",
2702                 s->tb_count ?
2703                 (double)s->temp_count / s->tb_count : 0,
2704                 s->temp_count_max);
2705 
2706     cpu_fprintf(f, "cycles/op           %0.1f\n",
2707                 s->op_count ? (double)tot / s->op_count : 0);
2708     cpu_fprintf(f, "cycles/in byte      %0.1f\n",
2709                 s->code_in_len ? (double)tot / s->code_in_len : 0);
2710     cpu_fprintf(f, "cycles/out byte     %0.1f\n",
2711                 s->code_out_len ? (double)tot / s->code_out_len : 0);
2712     if (tot == 0)
2713         tot = 1;
2714     cpu_fprintf(f, "  gen_interm time   %0.1f%%\n",
2715                 (double)s->interm_time / tot * 100.0);
2716     cpu_fprintf(f, "  gen_code time     %0.1f%%\n",
2717                 (double)s->code_time / tot * 100.0);
2718     cpu_fprintf(f, "optim./code time    %0.1f%%\n",
2719                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
2720                 * 100.0);
2721     cpu_fprintf(f, "liveness/code time  %0.1f%%\n",
2722                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
2723     cpu_fprintf(f, "cpu_restore count   %" PRId64 "\n",
2724                 s->restore_count);
2725     cpu_fprintf(f, "  avg cycles        %0.1f\n",
2726                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
2727 
2728     dump_op_count();
2729 }
2730 #else
2731 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
2732 {
2733     cpu_fprintf(f, "[TCG profiler not compiled]\n");
2734 }
2735 #endif
2736 
2737 #ifdef ELF_HOST_MACHINE
2738 /* In order to use this feature, the backend needs to do three things:
2739 
2740    (1) Define ELF_HOST_MACHINE to indicate both what value to
2741        put into the ELF image and to indicate support for the feature.
2742 
2743    (2) Define tcg_register_jit.  This should create a buffer containing
2744        the contents of a .debug_frame section that describes the post-
2745        prologue unwind info for the tcg machine.
2746 
2747    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
2748 */
2749 
2750 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
2751 typedef enum {
2752     JIT_NOACTION = 0,
2753     JIT_REGISTER_FN,
2754     JIT_UNREGISTER_FN
2755 } jit_actions_t;
2756 
2757 struct jit_code_entry {
2758     struct jit_code_entry *next_entry;
2759     struct jit_code_entry *prev_entry;
2760     const void *symfile_addr;
2761     uint64_t symfile_size;
2762 };
2763 
2764 struct jit_descriptor {
2765     uint32_t version;
2766     uint32_t action_flag;
2767     struct jit_code_entry *relevant_entry;
2768     struct jit_code_entry *first_entry;
2769 };
2770 
2771 void __jit_debug_register_code(void) __attribute__((noinline));
2772 void __jit_debug_register_code(void)
2773 {
2774     asm("");
2775 }
2776 
2777 /* Must statically initialize the version, because GDB may check
2778    the version before we can set it.  */
2779 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
2780 
2781 /* End GDB interface.  */
2782 
2783 static int find_string(const char *strtab, const char *str)
2784 {
2785     const char *p = strtab + 1;
2786 
2787     while (1) {
2788         if (strcmp(p, str) == 0) {
2789             return p - strtab;
2790         }
2791         p += strlen(p) + 1;
2792     }
2793 }
2794 
2795 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
2796                                  void *debug_frame, size_t debug_frame_size)
2797 {
2798     struct __attribute__((packed)) DebugInfo {
2799         uint32_t  len;
2800         uint16_t  version;
2801         uint32_t  abbrev;
2802         uint8_t   ptr_size;
2803         uint8_t   cu_die;
2804         uint16_t  cu_lang;
2805         uintptr_t cu_low_pc;
2806         uintptr_t cu_high_pc;
2807         uint8_t   fn_die;
2808         char      fn_name[16];
2809         uintptr_t fn_low_pc;
2810         uintptr_t fn_high_pc;
2811         uint8_t   cu_eoc;
2812     };
2813 
2814     struct ElfImage {
2815         ElfW(Ehdr) ehdr;
2816         ElfW(Phdr) phdr;
2817         ElfW(Shdr) shdr[7];
2818         ElfW(Sym)  sym[2];
2819         struct DebugInfo di;
2820         uint8_t    da[24];
2821         char       str[80];
2822     };
2823 
2824     struct ElfImage *img;
2825 
2826     static const struct ElfImage img_template = {
2827         .ehdr = {
2828             .e_ident[EI_MAG0] = ELFMAG0,
2829             .e_ident[EI_MAG1] = ELFMAG1,
2830             .e_ident[EI_MAG2] = ELFMAG2,
2831             .e_ident[EI_MAG3] = ELFMAG3,
2832             .e_ident[EI_CLASS] = ELF_CLASS,
2833             .e_ident[EI_DATA] = ELF_DATA,
2834             .e_ident[EI_VERSION] = EV_CURRENT,
2835             .e_type = ET_EXEC,
2836             .e_machine = ELF_HOST_MACHINE,
2837             .e_version = EV_CURRENT,
2838             .e_phoff = offsetof(struct ElfImage, phdr),
2839             .e_shoff = offsetof(struct ElfImage, shdr),
2840             .e_ehsize = sizeof(ElfW(Shdr)),
2841             .e_phentsize = sizeof(ElfW(Phdr)),
2842             .e_phnum = 1,
2843             .e_shentsize = sizeof(ElfW(Shdr)),
2844             .e_shnum = ARRAY_SIZE(img->shdr),
2845             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
2846 #ifdef ELF_HOST_FLAGS
2847             .e_flags = ELF_HOST_FLAGS,
2848 #endif
2849 #ifdef ELF_OSABI
2850             .e_ident[EI_OSABI] = ELF_OSABI,
2851 #endif
2852         },
2853         .phdr = {
2854             .p_type = PT_LOAD,
2855             .p_flags = PF_X,
2856         },
2857         .shdr = {
2858             [0] = { .sh_type = SHT_NULL },
2859             /* Trick: The contents of code_gen_buffer are not present in
2860                this fake ELF file; that got allocated elsewhere.  Therefore
2861                we mark .text as SHT_NOBITS (similar to .bss) so that readers
2862                will not look for contents.  We can record any address.  */
2863             [1] = { /* .text */
2864                 .sh_type = SHT_NOBITS,
2865                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
2866             },
2867             [2] = { /* .debug_info */
2868                 .sh_type = SHT_PROGBITS,
2869                 .sh_offset = offsetof(struct ElfImage, di),
2870                 .sh_size = sizeof(struct DebugInfo),
2871             },
2872             [3] = { /* .debug_abbrev */
2873                 .sh_type = SHT_PROGBITS,
2874                 .sh_offset = offsetof(struct ElfImage, da),
2875                 .sh_size = sizeof(img->da),
2876             },
2877             [4] = { /* .debug_frame */
2878                 .sh_type = SHT_PROGBITS,
2879                 .sh_offset = sizeof(struct ElfImage),
2880             },
2881             [5] = { /* .symtab */
2882                 .sh_type = SHT_SYMTAB,
2883                 .sh_offset = offsetof(struct ElfImage, sym),
2884                 .sh_size = sizeof(img->sym),
2885                 .sh_info = 1,
2886                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
2887                 .sh_entsize = sizeof(ElfW(Sym)),
2888             },
2889             [6] = { /* .strtab */
2890                 .sh_type = SHT_STRTAB,
2891                 .sh_offset = offsetof(struct ElfImage, str),
2892                 .sh_size = sizeof(img->str),
2893             }
2894         },
2895         .sym = {
2896             [1] = { /* code_gen_buffer */
2897                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
2898                 .st_shndx = 1,
2899             }
2900         },
2901         .di = {
2902             .len = sizeof(struct DebugInfo) - 4,
2903             .version = 2,
2904             .ptr_size = sizeof(void *),
2905             .cu_die = 1,
2906             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
2907             .fn_die = 2,
2908             .fn_name = "code_gen_buffer"
2909         },
2910         .da = {
2911             1,          /* abbrev number (the cu) */
2912             0x11, 1,    /* DW_TAG_compile_unit, has children */
2913             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
2914             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
2915             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
2916             0, 0,       /* end of abbrev */
2917             2,          /* abbrev number (the fn) */
2918             0x2e, 0,    /* DW_TAG_subprogram, no children */
2919             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
2920             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
2921             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
2922             0, 0,       /* end of abbrev */
2923             0           /* no more abbrev */
2924         },
2925         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
2926                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
2927     };
2928 
2929     /* We only need a single jit entry; statically allocate it.  */
2930     static struct jit_code_entry one_entry;
2931 
2932     uintptr_t buf = (uintptr_t)buf_ptr;
2933     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
2934 
2935     img = g_malloc(img_size);
2936     *img = img_template;
2937     memcpy(img + 1, debug_frame, debug_frame_size);
2938 
2939     img->phdr.p_vaddr = buf;
2940     img->phdr.p_paddr = buf;
2941     img->phdr.p_memsz = buf_size;
2942 
2943     img->shdr[1].sh_name = find_string(img->str, ".text");
2944     img->shdr[1].sh_addr = buf;
2945     img->shdr[1].sh_size = buf_size;
2946 
2947     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
2948     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
2949 
2950     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
2951     img->shdr[4].sh_size = debug_frame_size;
2952 
2953     img->shdr[5].sh_name = find_string(img->str, ".symtab");
2954     img->shdr[6].sh_name = find_string(img->str, ".strtab");
2955 
2956     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
2957     img->sym[1].st_value = buf;
2958     img->sym[1].st_size = buf_size;
2959 
2960     img->di.cu_low_pc = buf;
2961     img->di.cu_high_pc = buf + buf_size;
2962     img->di.fn_low_pc = buf;
2963     img->di.fn_high_pc = buf + buf_size;
2964 
2965 #ifdef DEBUG_JIT
2966     /* Enable this block to be able to debug the ELF image file creation.
2967        One can use readelf, objdump, or other inspection utilities.  */
2968     {
2969         FILE *f = fopen("/tmp/qemu.jit", "w+b");
2970         if (f) {
2971             if (fwrite(img, img_size, 1, f) != img_size) {
2972                 /* Avoid stupid unused return value warning for fwrite.  */
2973             }
2974             fclose(f);
2975         }
2976     }
2977 #endif
2978 
2979     one_entry.symfile_addr = img;
2980     one_entry.symfile_size = img_size;
2981 
2982     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
2983     __jit_debug_descriptor.relevant_entry = &one_entry;
2984     __jit_debug_descriptor.first_entry = &one_entry;
2985     __jit_debug_register_code();
2986 }
2987 #else
2988 /* No support for the feature.  Provide the entry point expected by exec.c,
2989    and implement the internal function we declared earlier.  */
2990 
2991 static void tcg_register_jit_int(void *buf, size_t size,
2992                                  void *debug_frame, size_t debug_frame_size)
2993 {
2994 }
2995 
2996 void tcg_register_jit(void *buf, size_t buf_size)
2997 {
2998 }
2999 #endif /* ELF_HOST_MACHINE */
3000