xref: /openbmc/qemu/tcg/tcg.c (revision ddf0676f)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/cacheflush.h"
38 #include "qemu/cacheinfo.h"
39 
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41    CPU definitions. Currently they are used for qemu_ld/st
42    instructions */
43 #define NO_CPU_IO_DEFS
44 
45 #include "exec/exec-all.h"
46 #include "tcg/tcg-op.h"
47 
48 #if UINTPTR_MAX == UINT32_MAX
49 # define ELF_CLASS  ELFCLASS32
50 #else
51 # define ELF_CLASS  ELFCLASS64
52 #endif
53 #if HOST_BIG_ENDIAN
54 # define ELF_DATA   ELFDATA2MSB
55 #else
56 # define ELF_DATA   ELFDATA2LSB
57 #endif
58 
59 #include "elf.h"
60 #include "exec/log.h"
61 #include "tcg/tcg-ldst.h"
62 #include "tcg-internal.h"
63 #include "accel/tcg/perf.h"
64 
65 /* Forward declarations for functions declared in tcg-target.c.inc and
66    used here. */
67 static void tcg_target_init(TCGContext *s);
68 static void tcg_target_qemu_prologue(TCGContext *s);
69 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
70                         intptr_t value, intptr_t addend);
71 
72 /* The CIE and FDE header definitions will be common to all hosts.  */
73 typedef struct {
74     uint32_t len __attribute__((aligned((sizeof(void *)))));
75     uint32_t id;
76     uint8_t version;
77     char augmentation[1];
78     uint8_t code_align;
79     uint8_t data_align;
80     uint8_t return_column;
81 } DebugFrameCIE;
82 
83 typedef struct QEMU_PACKED {
84     uint32_t len __attribute__((aligned((sizeof(void *)))));
85     uint32_t cie_offset;
86     uintptr_t func_start;
87     uintptr_t func_len;
88 } DebugFrameFDEHeader;
89 
90 typedef struct QEMU_PACKED {
91     DebugFrameCIE cie;
92     DebugFrameFDEHeader fde;
93 } DebugFrameHeader;
94 
95 static void tcg_register_jit_int(const void *buf, size_t size,
96                                  const void *debug_frame,
97                                  size_t debug_frame_size)
98     __attribute__((unused));
99 
100 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
101 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
102                        intptr_t arg2);
103 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
104 static void tcg_out_movi(TCGContext *s, TCGType type,
105                          TCGReg ret, tcg_target_long arg);
106 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
107 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
108 static void tcg_out_goto_tb(TCGContext *s, int which);
109 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
110                        const TCGArg args[TCG_MAX_OP_ARGS],
111                        const int const_args[TCG_MAX_OP_ARGS]);
112 #if TCG_TARGET_MAYBE_vec
113 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
114                             TCGReg dst, TCGReg src);
115 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
116                              TCGReg dst, TCGReg base, intptr_t offset);
117 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
118                              TCGReg dst, int64_t arg);
119 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
120                            unsigned vecl, unsigned vece,
121                            const TCGArg args[TCG_MAX_OP_ARGS],
122                            const int const_args[TCG_MAX_OP_ARGS]);
123 #else
124 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
125                                    TCGReg dst, TCGReg src)
126 {
127     g_assert_not_reached();
128 }
129 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
130                                     TCGReg dst, TCGReg base, intptr_t offset)
131 {
132     g_assert_not_reached();
133 }
134 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
135                                     TCGReg dst, int64_t arg)
136 {
137     g_assert_not_reached();
138 }
139 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
140                                   unsigned vecl, unsigned vece,
141                                   const TCGArg args[TCG_MAX_OP_ARGS],
142                                   const int const_args[TCG_MAX_OP_ARGS])
143 {
144     g_assert_not_reached();
145 }
146 #endif
147 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
148                        intptr_t arg2);
149 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
150                         TCGReg base, intptr_t ofs);
151 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
152                          const TCGHelperInfo *info);
153 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
154 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
155 #ifdef TCG_TARGET_NEED_LDST_LABELS
156 static int tcg_out_ldst_finalize(TCGContext *s);
157 #endif
158 
159 TCGContext tcg_init_ctx;
160 __thread TCGContext *tcg_ctx;
161 
162 TCGContext **tcg_ctxs;
163 unsigned int tcg_cur_ctxs;
164 unsigned int tcg_max_ctxs;
165 TCGv_env cpu_env = 0;
166 const void *tcg_code_gen_epilogue;
167 uintptr_t tcg_splitwx_diff;
168 
169 #ifndef CONFIG_TCG_INTERPRETER
170 tcg_prologue_fn *tcg_qemu_tb_exec;
171 #endif
172 
173 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
174 static TCGRegSet tcg_target_call_clobber_regs;
175 
176 #if TCG_TARGET_INSN_UNIT_SIZE == 1
177 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
178 {
179     *s->code_ptr++ = v;
180 }
181 
182 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
183                                                       uint8_t v)
184 {
185     *p = v;
186 }
187 #endif
188 
189 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
190 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
191 {
192     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
193         *s->code_ptr++ = v;
194     } else {
195         tcg_insn_unit *p = s->code_ptr;
196         memcpy(p, &v, sizeof(v));
197         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
198     }
199 }
200 
201 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
202                                                        uint16_t v)
203 {
204     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
205         *p = v;
206     } else {
207         memcpy(p, &v, sizeof(v));
208     }
209 }
210 #endif
211 
212 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
213 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
214 {
215     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
216         *s->code_ptr++ = v;
217     } else {
218         tcg_insn_unit *p = s->code_ptr;
219         memcpy(p, &v, sizeof(v));
220         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
221     }
222 }
223 
224 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
225                                                        uint32_t v)
226 {
227     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
228         *p = v;
229     } else {
230         memcpy(p, &v, sizeof(v));
231     }
232 }
233 #endif
234 
235 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
236 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
237 {
238     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
239         *s->code_ptr++ = v;
240     } else {
241         tcg_insn_unit *p = s->code_ptr;
242         memcpy(p, &v, sizeof(v));
243         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
244     }
245 }
246 
247 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
248                                                        uint64_t v)
249 {
250     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
251         *p = v;
252     } else {
253         memcpy(p, &v, sizeof(v));
254     }
255 }
256 #endif
257 
258 /* label relocation processing */
259 
260 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
261                           TCGLabel *l, intptr_t addend)
262 {
263     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
264 
265     r->type = type;
266     r->ptr = code_ptr;
267     r->addend = addend;
268     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
269 }
270 
271 static void tcg_out_label(TCGContext *s, TCGLabel *l)
272 {
273     tcg_debug_assert(!l->has_value);
274     l->has_value = 1;
275     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
276 }
277 
278 TCGLabel *gen_new_label(void)
279 {
280     TCGContext *s = tcg_ctx;
281     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
282 
283     memset(l, 0, sizeof(TCGLabel));
284     l->id = s->nb_labels++;
285     QSIMPLEQ_INIT(&l->relocs);
286 
287     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
288 
289     return l;
290 }
291 
292 static bool tcg_resolve_relocs(TCGContext *s)
293 {
294     TCGLabel *l;
295 
296     QSIMPLEQ_FOREACH(l, &s->labels, next) {
297         TCGRelocation *r;
298         uintptr_t value = l->u.value;
299 
300         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
301             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
302                 return false;
303             }
304         }
305     }
306     return true;
307 }
308 
309 static void set_jmp_reset_offset(TCGContext *s, int which)
310 {
311     /*
312      * We will check for overflow at the end of the opcode loop in
313      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
314      */
315     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
316 }
317 
318 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
319 {
320     /*
321      * We will check for overflow at the end of the opcode loop in
322      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
323      */
324     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
325 }
326 
327 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
328 {
329     /*
330      * Return the read-execute version of the pointer, for the benefit
331      * of any pc-relative addressing mode.
332      */
333     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
334 }
335 
336 /* Signal overflow, starting over with fewer guest insns. */
337 static G_NORETURN
338 void tcg_raise_tb_overflow(TCGContext *s)
339 {
340     siglongjmp(s->jmp_trans, -2);
341 }
342 
343 #define C_PFX1(P, A)                    P##A
344 #define C_PFX2(P, A, B)                 P##A##_##B
345 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
346 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
347 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
348 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
349 
350 /* Define an enumeration for the various combinations. */
351 
352 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
353 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
354 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
355 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
356 
357 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
358 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
359 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
360 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
361 
362 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
363 
364 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
365 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
366 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
367 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
368 
369 typedef enum {
370 #include "tcg-target-con-set.h"
371 } TCGConstraintSetIndex;
372 
373 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
374 
375 #undef C_O0_I1
376 #undef C_O0_I2
377 #undef C_O0_I3
378 #undef C_O0_I4
379 #undef C_O1_I1
380 #undef C_O1_I2
381 #undef C_O1_I3
382 #undef C_O1_I4
383 #undef C_N1_I2
384 #undef C_O2_I1
385 #undef C_O2_I2
386 #undef C_O2_I3
387 #undef C_O2_I4
388 
389 /* Put all of the constraint sets into an array, indexed by the enum. */
390 
391 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
392 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
393 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
394 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
395 
396 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
397 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
398 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
399 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
400 
401 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
402 
403 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
404 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
405 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
406 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
407 
408 static const TCGTargetOpDef constraint_sets[] = {
409 #include "tcg-target-con-set.h"
410 };
411 
412 
413 #undef C_O0_I1
414 #undef C_O0_I2
415 #undef C_O0_I3
416 #undef C_O0_I4
417 #undef C_O1_I1
418 #undef C_O1_I2
419 #undef C_O1_I3
420 #undef C_O1_I4
421 #undef C_N1_I2
422 #undef C_O2_I1
423 #undef C_O2_I2
424 #undef C_O2_I3
425 #undef C_O2_I4
426 
427 /* Expand the enumerator to be returned from tcg_target_op_def(). */
428 
429 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
430 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
431 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
432 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
433 
434 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
435 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
436 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
437 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
438 
439 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
440 
441 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
442 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
443 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
444 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
445 
446 #include "tcg-target.c.inc"
447 
448 static void alloc_tcg_plugin_context(TCGContext *s)
449 {
450 #ifdef CONFIG_PLUGIN
451     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
452     s->plugin_tb->insns =
453         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
454 #endif
455 }
456 
457 /*
458  * All TCG threads except the parent (i.e. the one that called tcg_context_init
459  * and registered the target's TCG globals) must register with this function
460  * before initiating translation.
461  *
462  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
463  * of tcg_region_init() for the reasoning behind this.
464  *
465  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
466  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
467  * is not used anymore for translation once this function is called.
468  *
469  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
470  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
471  */
472 #ifdef CONFIG_USER_ONLY
473 void tcg_register_thread(void)
474 {
475     tcg_ctx = &tcg_init_ctx;
476 }
477 #else
478 void tcg_register_thread(void)
479 {
480     TCGContext *s = g_malloc(sizeof(*s));
481     unsigned int i, n;
482 
483     *s = tcg_init_ctx;
484 
485     /* Relink mem_base.  */
486     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
487         if (tcg_init_ctx.temps[i].mem_base) {
488             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
489             tcg_debug_assert(b >= 0 && b < n);
490             s->temps[i].mem_base = &s->temps[b];
491         }
492     }
493 
494     /* Claim an entry in tcg_ctxs */
495     n = qatomic_fetch_inc(&tcg_cur_ctxs);
496     g_assert(n < tcg_max_ctxs);
497     qatomic_set(&tcg_ctxs[n], s);
498 
499     if (n > 0) {
500         alloc_tcg_plugin_context(s);
501         tcg_region_initial_alloc(s);
502     }
503 
504     tcg_ctx = s;
505 }
506 #endif /* !CONFIG_USER_ONLY */
507 
508 /* pool based memory allocation */
509 void *tcg_malloc_internal(TCGContext *s, int size)
510 {
511     TCGPool *p;
512     int pool_size;
513 
514     if (size > TCG_POOL_CHUNK_SIZE) {
515         /* big malloc: insert a new pool (XXX: could optimize) */
516         p = g_malloc(sizeof(TCGPool) + size);
517         p->size = size;
518         p->next = s->pool_first_large;
519         s->pool_first_large = p;
520         return p->data;
521     } else {
522         p = s->pool_current;
523         if (!p) {
524             p = s->pool_first;
525             if (!p)
526                 goto new_pool;
527         } else {
528             if (!p->next) {
529             new_pool:
530                 pool_size = TCG_POOL_CHUNK_SIZE;
531                 p = g_malloc(sizeof(TCGPool) + pool_size);
532                 p->size = pool_size;
533                 p->next = NULL;
534                 if (s->pool_current) {
535                     s->pool_current->next = p;
536                 } else {
537                     s->pool_first = p;
538                 }
539             } else {
540                 p = p->next;
541             }
542         }
543     }
544     s->pool_current = p;
545     s->pool_cur = p->data + size;
546     s->pool_end = p->data + p->size;
547     return p->data;
548 }
549 
550 void tcg_pool_reset(TCGContext *s)
551 {
552     TCGPool *p, *t;
553     for (p = s->pool_first_large; p; p = t) {
554         t = p->next;
555         g_free(p);
556     }
557     s->pool_first_large = NULL;
558     s->pool_cur = s->pool_end = NULL;
559     s->pool_current = NULL;
560 }
561 
562 #include "exec/helper-proto.h"
563 
564 static TCGHelperInfo all_helpers[] = {
565 #include "exec/helper-tcg.h"
566 };
567 static GHashTable *helper_table;
568 
569 #ifdef CONFIG_TCG_INTERPRETER
570 static ffi_type *typecode_to_ffi(int argmask)
571 {
572     /*
573      * libffi does not support __int128_t, so we have forced Int128
574      * to use the structure definition instead of the builtin type.
575      */
576     static ffi_type *ffi_type_i128_elements[3] = {
577         &ffi_type_uint64,
578         &ffi_type_uint64,
579         NULL
580     };
581     static ffi_type ffi_type_i128 = {
582         .size = 16,
583         .alignment = __alignof__(Int128),
584         .type = FFI_TYPE_STRUCT,
585         .elements = ffi_type_i128_elements,
586     };
587 
588     switch (argmask) {
589     case dh_typecode_void:
590         return &ffi_type_void;
591     case dh_typecode_i32:
592         return &ffi_type_uint32;
593     case dh_typecode_s32:
594         return &ffi_type_sint32;
595     case dh_typecode_i64:
596         return &ffi_type_uint64;
597     case dh_typecode_s64:
598         return &ffi_type_sint64;
599     case dh_typecode_ptr:
600         return &ffi_type_pointer;
601     case dh_typecode_i128:
602         return &ffi_type_i128;
603     }
604     g_assert_not_reached();
605 }
606 
607 static void init_ffi_layouts(void)
608 {
609     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
610     GHashTable *ffi_table = g_hash_table_new(NULL, NULL);
611 
612     for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
613         TCGHelperInfo *info = &all_helpers[i];
614         unsigned typemask = info->typemask;
615         gpointer hash = (gpointer)(uintptr_t)typemask;
616         struct {
617             ffi_cif cif;
618             ffi_type *args[];
619         } *ca;
620         ffi_status status;
621         int nargs;
622         ffi_cif *cif;
623 
624         cif = g_hash_table_lookup(ffi_table, hash);
625         if (cif) {
626             info->cif = cif;
627             continue;
628         }
629 
630         /* Ignoring the return type, find the last non-zero field. */
631         nargs = 32 - clz32(typemask >> 3);
632         nargs = DIV_ROUND_UP(nargs, 3);
633         assert(nargs <= MAX_CALL_IARGS);
634 
635         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
636         ca->cif.rtype = typecode_to_ffi(typemask & 7);
637         ca->cif.nargs = nargs;
638 
639         if (nargs != 0) {
640             ca->cif.arg_types = ca->args;
641             for (int j = 0; j < nargs; ++j) {
642                 int typecode = extract32(typemask, (j + 1) * 3, 3);
643                 ca->args[j] = typecode_to_ffi(typecode);
644             }
645         }
646 
647         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
648                               ca->cif.rtype, ca->cif.arg_types);
649         assert(status == FFI_OK);
650 
651         cif = &ca->cif;
652         info->cif = cif;
653         g_hash_table_insert(ffi_table, hash, (gpointer)cif);
654     }
655 
656     g_hash_table_destroy(ffi_table);
657 }
658 #endif /* CONFIG_TCG_INTERPRETER */
659 
660 typedef struct TCGCumulativeArgs {
661     int arg_idx;                /* tcg_gen_callN args[] */
662     int info_in_idx;            /* TCGHelperInfo in[] */
663     int arg_slot;               /* regs+stack slot */
664     int ref_slot;               /* stack slots for references */
665 } TCGCumulativeArgs;
666 
667 static void layout_arg_even(TCGCumulativeArgs *cum)
668 {
669     cum->arg_slot += cum->arg_slot & 1;
670 }
671 
672 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
673                          TCGCallArgumentKind kind)
674 {
675     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
676 
677     *loc = (TCGCallArgumentLoc){
678         .kind = kind,
679         .arg_idx = cum->arg_idx,
680         .arg_slot = cum->arg_slot,
681     };
682     cum->info_in_idx++;
683     cum->arg_slot++;
684 }
685 
686 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
687                                 TCGHelperInfo *info, int n)
688 {
689     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
690 
691     for (int i = 0; i < n; ++i) {
692         /* Layout all using the same arg_idx, adjusting the subindex. */
693         loc[i] = (TCGCallArgumentLoc){
694             .kind = TCG_CALL_ARG_NORMAL,
695             .arg_idx = cum->arg_idx,
696             .tmp_subindex = i,
697             .arg_slot = cum->arg_slot + i,
698         };
699     }
700     cum->info_in_idx += n;
701     cum->arg_slot += n;
702 }
703 
704 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
705 {
706     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
707     int n = 128 / TCG_TARGET_REG_BITS;
708 
709     /* The first subindex carries the pointer. */
710     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
711 
712     /*
713      * The callee is allowed to clobber memory associated with
714      * structure pass by-reference.  Therefore we must make copies.
715      * Allocate space from "ref_slot", which will be adjusted to
716      * follow the parameters on the stack.
717      */
718     loc[0].ref_slot = cum->ref_slot;
719 
720     /*
721      * Subsequent words also go into the reference slot, but
722      * do not accumulate into the regular arguments.
723      */
724     for (int i = 1; i < n; ++i) {
725         loc[i] = (TCGCallArgumentLoc){
726             .kind = TCG_CALL_ARG_BY_REF_N,
727             .arg_idx = cum->arg_idx,
728             .tmp_subindex = i,
729             .ref_slot = cum->ref_slot + i,
730         };
731     }
732     cum->info_in_idx += n;
733     cum->ref_slot += n;
734 }
735 
736 static void init_call_layout(TCGHelperInfo *info)
737 {
738     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
739     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
740     unsigned typemask = info->typemask;
741     unsigned typecode;
742     TCGCumulativeArgs cum = { };
743 
744     /*
745      * Parse and place any function return value.
746      */
747     typecode = typemask & 7;
748     switch (typecode) {
749     case dh_typecode_void:
750         info->nr_out = 0;
751         break;
752     case dh_typecode_i32:
753     case dh_typecode_s32:
754     case dh_typecode_ptr:
755         info->nr_out = 1;
756         info->out_kind = TCG_CALL_RET_NORMAL;
757         break;
758     case dh_typecode_i64:
759     case dh_typecode_s64:
760         info->nr_out = 64 / TCG_TARGET_REG_BITS;
761         info->out_kind = TCG_CALL_RET_NORMAL;
762         /* Query the last register now to trigger any assert early. */
763         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
764         break;
765     case dh_typecode_i128:
766         info->nr_out = 128 / TCG_TARGET_REG_BITS;
767         info->out_kind = TCG_TARGET_CALL_RET_I128;
768         switch (TCG_TARGET_CALL_RET_I128) {
769         case TCG_CALL_RET_NORMAL:
770             /* Query the last register now to trigger any assert early. */
771             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
772             break;
773         case TCG_CALL_RET_BY_VEC:
774             /* Query the single register now to trigger any assert early. */
775             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
776             break;
777         case TCG_CALL_RET_BY_REF:
778             /*
779              * Allocate the first argument to the output.
780              * We don't need to store this anywhere, just make it
781              * unavailable for use in the input loop below.
782              */
783             cum.arg_slot = 1;
784             break;
785         default:
786             qemu_build_not_reached();
787         }
788         break;
789     default:
790         g_assert_not_reached();
791     }
792 
793     /*
794      * Parse and place function arguments.
795      */
796     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
797         TCGCallArgumentKind kind;
798         TCGType type;
799 
800         typecode = typemask & 7;
801         switch (typecode) {
802         case dh_typecode_i32:
803         case dh_typecode_s32:
804             type = TCG_TYPE_I32;
805             break;
806         case dh_typecode_i64:
807         case dh_typecode_s64:
808             type = TCG_TYPE_I64;
809             break;
810         case dh_typecode_ptr:
811             type = TCG_TYPE_PTR;
812             break;
813         case dh_typecode_i128:
814             type = TCG_TYPE_I128;
815             break;
816         default:
817             g_assert_not_reached();
818         }
819 
820         switch (type) {
821         case TCG_TYPE_I32:
822             switch (TCG_TARGET_CALL_ARG_I32) {
823             case TCG_CALL_ARG_EVEN:
824                 layout_arg_even(&cum);
825                 /* fall through */
826             case TCG_CALL_ARG_NORMAL:
827                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
828                 break;
829             case TCG_CALL_ARG_EXTEND:
830                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
831                 layout_arg_1(&cum, info, kind);
832                 break;
833             default:
834                 qemu_build_not_reached();
835             }
836             break;
837 
838         case TCG_TYPE_I64:
839             switch (TCG_TARGET_CALL_ARG_I64) {
840             case TCG_CALL_ARG_EVEN:
841                 layout_arg_even(&cum);
842                 /* fall through */
843             case TCG_CALL_ARG_NORMAL:
844                 if (TCG_TARGET_REG_BITS == 32) {
845                     layout_arg_normal_n(&cum, info, 2);
846                 } else {
847                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
848                 }
849                 break;
850             default:
851                 qemu_build_not_reached();
852             }
853             break;
854 
855         case TCG_TYPE_I128:
856             switch (TCG_TARGET_CALL_ARG_I128) {
857             case TCG_CALL_ARG_EVEN:
858                 layout_arg_even(&cum);
859                 /* fall through */
860             case TCG_CALL_ARG_NORMAL:
861                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
862                 break;
863             case TCG_CALL_ARG_BY_REF:
864                 layout_arg_by_ref(&cum, info);
865                 break;
866             default:
867                 qemu_build_not_reached();
868             }
869             break;
870 
871         default:
872             g_assert_not_reached();
873         }
874     }
875     info->nr_in = cum.info_in_idx;
876 
877     /* Validate that we didn't overrun the input array. */
878     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
879     /* Validate the backend has enough argument space. */
880     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
881 
882     /*
883      * Relocate the "ref_slot" area to the end of the parameters.
884      * Minimizing this stack offset helps code size for x86,
885      * which has a signed 8-bit offset encoding.
886      */
887     if (cum.ref_slot != 0) {
888         int ref_base = 0;
889 
890         if (cum.arg_slot > max_reg_slots) {
891             int align = __alignof(Int128) / sizeof(tcg_target_long);
892 
893             ref_base = cum.arg_slot - max_reg_slots;
894             if (align > 1) {
895                 ref_base = ROUND_UP(ref_base, align);
896             }
897         }
898         assert(ref_base + cum.ref_slot <= max_stk_slots);
899 
900         if (ref_base != 0) {
901             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
902                 TCGCallArgumentLoc *loc = &info->in[i];
903                 switch (loc->kind) {
904                 case TCG_CALL_ARG_BY_REF:
905                 case TCG_CALL_ARG_BY_REF_N:
906                     loc->ref_slot += ref_base;
907                     break;
908                 default:
909                     break;
910                 }
911             }
912         }
913     }
914 }
915 
916 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
917 static void process_op_defs(TCGContext *s);
918 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
919                                             TCGReg reg, const char *name);
920 
921 static void tcg_context_init(unsigned max_cpus)
922 {
923     TCGContext *s = &tcg_init_ctx;
924     int op, total_args, n, i;
925     TCGOpDef *def;
926     TCGArgConstraint *args_ct;
927     TCGTemp *ts;
928 
929     memset(s, 0, sizeof(*s));
930     s->nb_globals = 0;
931 
932     /* Count total number of arguments and allocate the corresponding
933        space */
934     total_args = 0;
935     for(op = 0; op < NB_OPS; op++) {
936         def = &tcg_op_defs[op];
937         n = def->nb_iargs + def->nb_oargs;
938         total_args += n;
939     }
940 
941     args_ct = g_new0(TCGArgConstraint, total_args);
942 
943     for(op = 0; op < NB_OPS; op++) {
944         def = &tcg_op_defs[op];
945         def->args_ct = args_ct;
946         n = def->nb_iargs + def->nb_oargs;
947         args_ct += n;
948     }
949 
950     /* Register helpers.  */
951     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
952     helper_table = g_hash_table_new(NULL, NULL);
953 
954     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
955         init_call_layout(&all_helpers[i]);
956         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
957                             (gpointer)&all_helpers[i]);
958     }
959 
960 #ifdef CONFIG_TCG_INTERPRETER
961     init_ffi_layouts();
962 #endif
963 
964     tcg_target_init(s);
965     process_op_defs(s);
966 
967     /* Reverse the order of the saved registers, assuming they're all at
968        the start of tcg_target_reg_alloc_order.  */
969     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
970         int r = tcg_target_reg_alloc_order[n];
971         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
972             break;
973         }
974     }
975     for (i = 0; i < n; ++i) {
976         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
977     }
978     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
979         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
980     }
981 
982     alloc_tcg_plugin_context(s);
983 
984     tcg_ctx = s;
985     /*
986      * In user-mode we simply share the init context among threads, since we
987      * use a single region. See the documentation tcg_region_init() for the
988      * reasoning behind this.
989      * In softmmu we will have at most max_cpus TCG threads.
990      */
991 #ifdef CONFIG_USER_ONLY
992     tcg_ctxs = &tcg_ctx;
993     tcg_cur_ctxs = 1;
994     tcg_max_ctxs = 1;
995 #else
996     tcg_max_ctxs = max_cpus;
997     tcg_ctxs = g_new0(TCGContext *, max_cpus);
998 #endif
999 
1000     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1001     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1002     cpu_env = temp_tcgv_ptr(ts);
1003 }
1004 
1005 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1006 {
1007     tcg_context_init(max_cpus);
1008     tcg_region_init(tb_size, splitwx, max_cpus);
1009 }
1010 
1011 /*
1012  * Allocate TBs right before their corresponding translated code, making
1013  * sure that TBs and code are on different cache lines.
1014  */
1015 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1016 {
1017     uintptr_t align = qemu_icache_linesize;
1018     TranslationBlock *tb;
1019     void *next;
1020 
1021  retry:
1022     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1023     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1024 
1025     if (unlikely(next > s->code_gen_highwater)) {
1026         if (tcg_region_alloc(s)) {
1027             return NULL;
1028         }
1029         goto retry;
1030     }
1031     qatomic_set(&s->code_gen_ptr, next);
1032     s->data_gen_ptr = NULL;
1033     return tb;
1034 }
1035 
1036 void tcg_prologue_init(TCGContext *s)
1037 {
1038     size_t prologue_size;
1039 
1040     s->code_ptr = s->code_gen_ptr;
1041     s->code_buf = s->code_gen_ptr;
1042     s->data_gen_ptr = NULL;
1043 
1044 #ifndef CONFIG_TCG_INTERPRETER
1045     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1046 #endif
1047 
1048 #ifdef TCG_TARGET_NEED_POOL_LABELS
1049     s->pool_labels = NULL;
1050 #endif
1051 
1052     qemu_thread_jit_write();
1053     /* Generate the prologue.  */
1054     tcg_target_qemu_prologue(s);
1055 
1056 #ifdef TCG_TARGET_NEED_POOL_LABELS
1057     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1058     {
1059         int result = tcg_out_pool_finalize(s);
1060         tcg_debug_assert(result == 0);
1061     }
1062 #endif
1063 
1064     prologue_size = tcg_current_code_size(s);
1065     perf_report_prologue(s->code_gen_ptr, prologue_size);
1066 
1067 #ifndef CONFIG_TCG_INTERPRETER
1068     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1069                         (uintptr_t)s->code_buf, prologue_size);
1070 #endif
1071 
1072 #ifdef DEBUG_DISAS
1073     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1074         FILE *logfile = qemu_log_trylock();
1075         if (logfile) {
1076             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1077             if (s->data_gen_ptr) {
1078                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1079                 size_t data_size = prologue_size - code_size;
1080                 size_t i;
1081 
1082                 disas(logfile, s->code_gen_ptr, code_size);
1083 
1084                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1085                     if (sizeof(tcg_target_ulong) == 8) {
1086                         fprintf(logfile,
1087                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1088                                 (uintptr_t)s->data_gen_ptr + i,
1089                                 *(uint64_t *)(s->data_gen_ptr + i));
1090                     } else {
1091                         fprintf(logfile,
1092                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1093                                 (uintptr_t)s->data_gen_ptr + i,
1094                                 *(uint32_t *)(s->data_gen_ptr + i));
1095                     }
1096                 }
1097             } else {
1098                 disas(logfile, s->code_gen_ptr, prologue_size);
1099             }
1100             fprintf(logfile, "\n");
1101             qemu_log_unlock(logfile);
1102         }
1103     }
1104 #endif
1105 
1106 #ifndef CONFIG_TCG_INTERPRETER
1107     /*
1108      * Assert that goto_ptr is implemented completely, setting an epilogue.
1109      * For tci, we use NULL as the signal to return from the interpreter,
1110      * so skip this check.
1111      */
1112     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1113 #endif
1114 
1115     tcg_region_prologue_set(s);
1116 }
1117 
1118 void tcg_func_start(TCGContext *s)
1119 {
1120     tcg_pool_reset(s);
1121     s->nb_temps = s->nb_globals;
1122 
1123     /* No temps have been previously allocated for size or locality.  */
1124     memset(s->free_temps, 0, sizeof(s->free_temps));
1125 
1126     /* No constant temps have been previously allocated. */
1127     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1128         if (s->const_table[i]) {
1129             g_hash_table_remove_all(s->const_table[i]);
1130         }
1131     }
1132 
1133     s->nb_ops = 0;
1134     s->nb_labels = 0;
1135     s->current_frame_offset = s->frame_start;
1136 
1137 #ifdef CONFIG_DEBUG_TCG
1138     s->goto_tb_issue_mask = 0;
1139 #endif
1140 
1141     QTAILQ_INIT(&s->ops);
1142     QTAILQ_INIT(&s->free_ops);
1143     QSIMPLEQ_INIT(&s->labels);
1144 }
1145 
1146 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1147 {
1148     int n = s->nb_temps++;
1149 
1150     if (n >= TCG_MAX_TEMPS) {
1151         tcg_raise_tb_overflow(s);
1152     }
1153     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1154 }
1155 
1156 static TCGTemp *tcg_global_alloc(TCGContext *s)
1157 {
1158     TCGTemp *ts;
1159 
1160     tcg_debug_assert(s->nb_globals == s->nb_temps);
1161     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1162     s->nb_globals++;
1163     ts = tcg_temp_alloc(s);
1164     ts->kind = TEMP_GLOBAL;
1165 
1166     return ts;
1167 }
1168 
1169 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1170                                             TCGReg reg, const char *name)
1171 {
1172     TCGTemp *ts;
1173 
1174     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1175         tcg_abort();
1176     }
1177 
1178     ts = tcg_global_alloc(s);
1179     ts->base_type = type;
1180     ts->type = type;
1181     ts->kind = TEMP_FIXED;
1182     ts->reg = reg;
1183     ts->name = name;
1184     tcg_regset_set_reg(s->reserved_regs, reg);
1185 
1186     return ts;
1187 }
1188 
1189 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1190 {
1191     s->frame_start = start;
1192     s->frame_end = start + size;
1193     s->frame_temp
1194         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1195 }
1196 
1197 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1198                                      intptr_t offset, const char *name)
1199 {
1200     TCGContext *s = tcg_ctx;
1201     TCGTemp *base_ts = tcgv_ptr_temp(base);
1202     TCGTemp *ts = tcg_global_alloc(s);
1203     int indirect_reg = 0;
1204 
1205     switch (base_ts->kind) {
1206     case TEMP_FIXED:
1207         break;
1208     case TEMP_GLOBAL:
1209         /* We do not support double-indirect registers.  */
1210         tcg_debug_assert(!base_ts->indirect_reg);
1211         base_ts->indirect_base = 1;
1212         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1213                             ? 2 : 1);
1214         indirect_reg = 1;
1215         break;
1216     default:
1217         g_assert_not_reached();
1218     }
1219 
1220     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1221         TCGTemp *ts2 = tcg_global_alloc(s);
1222         char buf[64];
1223 
1224         ts->base_type = TCG_TYPE_I64;
1225         ts->type = TCG_TYPE_I32;
1226         ts->indirect_reg = indirect_reg;
1227         ts->mem_allocated = 1;
1228         ts->mem_base = base_ts;
1229         ts->mem_offset = offset;
1230         pstrcpy(buf, sizeof(buf), name);
1231         pstrcat(buf, sizeof(buf), "_0");
1232         ts->name = strdup(buf);
1233 
1234         tcg_debug_assert(ts2 == ts + 1);
1235         ts2->base_type = TCG_TYPE_I64;
1236         ts2->type = TCG_TYPE_I32;
1237         ts2->indirect_reg = indirect_reg;
1238         ts2->mem_allocated = 1;
1239         ts2->mem_base = base_ts;
1240         ts2->mem_offset = offset + 4;
1241         ts2->temp_subindex = 1;
1242         pstrcpy(buf, sizeof(buf), name);
1243         pstrcat(buf, sizeof(buf), "_1");
1244         ts2->name = strdup(buf);
1245     } else {
1246         ts->base_type = type;
1247         ts->type = type;
1248         ts->indirect_reg = indirect_reg;
1249         ts->mem_allocated = 1;
1250         ts->mem_base = base_ts;
1251         ts->mem_offset = offset;
1252         ts->name = name;
1253     }
1254     return ts;
1255 }
1256 
1257 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1258 {
1259     TCGContext *s = tcg_ctx;
1260     TCGTemp *ts;
1261     int n;
1262 
1263     if (kind == TEMP_EBB) {
1264         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1265 
1266         if (idx < TCG_MAX_TEMPS) {
1267             /* There is already an available temp with the right type.  */
1268             clear_bit(idx, s->free_temps[type].l);
1269 
1270             ts = &s->temps[idx];
1271             ts->temp_allocated = 1;
1272             tcg_debug_assert(ts->base_type == type);
1273             tcg_debug_assert(ts->kind == kind);
1274             goto done;
1275         }
1276     } else {
1277         tcg_debug_assert(kind == TEMP_TB);
1278     }
1279 
1280     switch (type) {
1281     case TCG_TYPE_I32:
1282     case TCG_TYPE_V64:
1283     case TCG_TYPE_V128:
1284     case TCG_TYPE_V256:
1285         n = 1;
1286         break;
1287     case TCG_TYPE_I64:
1288         n = 64 / TCG_TARGET_REG_BITS;
1289         break;
1290     case TCG_TYPE_I128:
1291         n = 128 / TCG_TARGET_REG_BITS;
1292         break;
1293     default:
1294         g_assert_not_reached();
1295     }
1296 
1297     ts = tcg_temp_alloc(s);
1298     ts->base_type = type;
1299     ts->temp_allocated = 1;
1300     ts->kind = kind;
1301 
1302     if (n == 1) {
1303         ts->type = type;
1304     } else {
1305         ts->type = TCG_TYPE_REG;
1306 
1307         for (int i = 1; i < n; ++i) {
1308             TCGTemp *ts2 = tcg_temp_alloc(s);
1309 
1310             tcg_debug_assert(ts2 == ts + i);
1311             ts2->base_type = type;
1312             ts2->type = TCG_TYPE_REG;
1313             ts2->temp_allocated = 1;
1314             ts2->temp_subindex = i;
1315             ts2->kind = kind;
1316         }
1317     }
1318 
1319  done:
1320 #if defined(CONFIG_DEBUG_TCG)
1321     s->temps_in_use++;
1322 #endif
1323     return ts;
1324 }
1325 
1326 TCGv_vec tcg_temp_new_vec(TCGType type)
1327 {
1328     TCGTemp *t;
1329 
1330 #ifdef CONFIG_DEBUG_TCG
1331     switch (type) {
1332     case TCG_TYPE_V64:
1333         assert(TCG_TARGET_HAS_v64);
1334         break;
1335     case TCG_TYPE_V128:
1336         assert(TCG_TARGET_HAS_v128);
1337         break;
1338     case TCG_TYPE_V256:
1339         assert(TCG_TARGET_HAS_v256);
1340         break;
1341     default:
1342         g_assert_not_reached();
1343     }
1344 #endif
1345 
1346     t = tcg_temp_new_internal(type, TEMP_EBB);
1347     return temp_tcgv_vec(t);
1348 }
1349 
1350 /* Create a new temp of the same type as an existing temp.  */
1351 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1352 {
1353     TCGTemp *t = tcgv_vec_temp(match);
1354 
1355     tcg_debug_assert(t->temp_allocated != 0);
1356 
1357     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1358     return temp_tcgv_vec(t);
1359 }
1360 
1361 void tcg_temp_free_internal(TCGTemp *ts)
1362 {
1363     TCGContext *s = tcg_ctx;
1364 
1365     switch (ts->kind) {
1366     case TEMP_CONST:
1367         /*
1368          * In order to simplify users of tcg_constant_*,
1369          * silently ignore free.
1370          */
1371         return;
1372     case TEMP_EBB:
1373     case TEMP_TB:
1374         break;
1375     default:
1376         g_assert_not_reached();
1377     }
1378 
1379     tcg_debug_assert(ts->temp_allocated != 0);
1380     ts->temp_allocated = 0;
1381 
1382 #if defined(CONFIG_DEBUG_TCG)
1383     assert(s->temps_in_use > 0);
1384     s->temps_in_use--;
1385 #endif
1386 
1387     if (ts->kind == TEMP_EBB) {
1388         int idx = temp_idx(ts);
1389         set_bit(idx, s->free_temps[ts->base_type].l);
1390     }
1391 }
1392 
1393 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1394 {
1395     TCGContext *s = tcg_ctx;
1396     GHashTable *h = s->const_table[type];
1397     TCGTemp *ts;
1398 
1399     if (h == NULL) {
1400         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1401         s->const_table[type] = h;
1402     }
1403 
1404     ts = g_hash_table_lookup(h, &val);
1405     if (ts == NULL) {
1406         int64_t *val_ptr;
1407 
1408         ts = tcg_temp_alloc(s);
1409 
1410         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1411             TCGTemp *ts2 = tcg_temp_alloc(s);
1412 
1413             tcg_debug_assert(ts2 == ts + 1);
1414 
1415             ts->base_type = TCG_TYPE_I64;
1416             ts->type = TCG_TYPE_I32;
1417             ts->kind = TEMP_CONST;
1418             ts->temp_allocated = 1;
1419 
1420             ts2->base_type = TCG_TYPE_I64;
1421             ts2->type = TCG_TYPE_I32;
1422             ts2->kind = TEMP_CONST;
1423             ts2->temp_allocated = 1;
1424             ts2->temp_subindex = 1;
1425 
1426             /*
1427              * Retain the full value of the 64-bit constant in the low
1428              * part, so that the hash table works.  Actual uses will
1429              * truncate the value to the low part.
1430              */
1431             ts[HOST_BIG_ENDIAN].val = val;
1432             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1433             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1434         } else {
1435             ts->base_type = type;
1436             ts->type = type;
1437             ts->kind = TEMP_CONST;
1438             ts->temp_allocated = 1;
1439             ts->val = val;
1440             val_ptr = &ts->val;
1441         }
1442         g_hash_table_insert(h, val_ptr, ts);
1443     }
1444 
1445     return ts;
1446 }
1447 
1448 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1449 {
1450     val = dup_const(vece, val);
1451     return temp_tcgv_vec(tcg_constant_internal(type, val));
1452 }
1453 
1454 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1455 {
1456     TCGTemp *t = tcgv_vec_temp(match);
1457 
1458     tcg_debug_assert(t->temp_allocated != 0);
1459     return tcg_constant_vec(t->base_type, vece, val);
1460 }
1461 
1462 TCGv_i32 tcg_const_i32(int32_t val)
1463 {
1464     TCGv_i32 t0;
1465     t0 = tcg_temp_new_i32();
1466     tcg_gen_movi_i32(t0, val);
1467     return t0;
1468 }
1469 
1470 TCGv_i64 tcg_const_i64(int64_t val)
1471 {
1472     TCGv_i64 t0;
1473     t0 = tcg_temp_new_i64();
1474     tcg_gen_movi_i64(t0, val);
1475     return t0;
1476 }
1477 
1478 #if defined(CONFIG_DEBUG_TCG)
1479 void tcg_clear_temp_count(void)
1480 {
1481     TCGContext *s = tcg_ctx;
1482     s->temps_in_use = 0;
1483 }
1484 
1485 int tcg_check_temp_count(void)
1486 {
1487     TCGContext *s = tcg_ctx;
1488     if (s->temps_in_use) {
1489         /* Clear the count so that we don't give another
1490          * warning immediately next time around.
1491          */
1492         s->temps_in_use = 0;
1493         return 1;
1494     }
1495     return 0;
1496 }
1497 #endif
1498 
1499 /* Return true if OP may appear in the opcode stream.
1500    Test the runtime variable that controls each opcode.  */
1501 bool tcg_op_supported(TCGOpcode op)
1502 {
1503     const bool have_vec
1504         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1505 
1506     switch (op) {
1507     case INDEX_op_discard:
1508     case INDEX_op_set_label:
1509     case INDEX_op_call:
1510     case INDEX_op_br:
1511     case INDEX_op_mb:
1512     case INDEX_op_insn_start:
1513     case INDEX_op_exit_tb:
1514     case INDEX_op_goto_tb:
1515     case INDEX_op_goto_ptr:
1516     case INDEX_op_qemu_ld_i32:
1517     case INDEX_op_qemu_st_i32:
1518     case INDEX_op_qemu_ld_i64:
1519     case INDEX_op_qemu_st_i64:
1520         return true;
1521 
1522     case INDEX_op_qemu_st8_i32:
1523         return TCG_TARGET_HAS_qemu_st8_i32;
1524 
1525     case INDEX_op_mov_i32:
1526     case INDEX_op_setcond_i32:
1527     case INDEX_op_brcond_i32:
1528     case INDEX_op_ld8u_i32:
1529     case INDEX_op_ld8s_i32:
1530     case INDEX_op_ld16u_i32:
1531     case INDEX_op_ld16s_i32:
1532     case INDEX_op_ld_i32:
1533     case INDEX_op_st8_i32:
1534     case INDEX_op_st16_i32:
1535     case INDEX_op_st_i32:
1536     case INDEX_op_add_i32:
1537     case INDEX_op_sub_i32:
1538     case INDEX_op_mul_i32:
1539     case INDEX_op_and_i32:
1540     case INDEX_op_or_i32:
1541     case INDEX_op_xor_i32:
1542     case INDEX_op_shl_i32:
1543     case INDEX_op_shr_i32:
1544     case INDEX_op_sar_i32:
1545         return true;
1546 
1547     case INDEX_op_movcond_i32:
1548         return TCG_TARGET_HAS_movcond_i32;
1549     case INDEX_op_div_i32:
1550     case INDEX_op_divu_i32:
1551         return TCG_TARGET_HAS_div_i32;
1552     case INDEX_op_rem_i32:
1553     case INDEX_op_remu_i32:
1554         return TCG_TARGET_HAS_rem_i32;
1555     case INDEX_op_div2_i32:
1556     case INDEX_op_divu2_i32:
1557         return TCG_TARGET_HAS_div2_i32;
1558     case INDEX_op_rotl_i32:
1559     case INDEX_op_rotr_i32:
1560         return TCG_TARGET_HAS_rot_i32;
1561     case INDEX_op_deposit_i32:
1562         return TCG_TARGET_HAS_deposit_i32;
1563     case INDEX_op_extract_i32:
1564         return TCG_TARGET_HAS_extract_i32;
1565     case INDEX_op_sextract_i32:
1566         return TCG_TARGET_HAS_sextract_i32;
1567     case INDEX_op_extract2_i32:
1568         return TCG_TARGET_HAS_extract2_i32;
1569     case INDEX_op_add2_i32:
1570         return TCG_TARGET_HAS_add2_i32;
1571     case INDEX_op_sub2_i32:
1572         return TCG_TARGET_HAS_sub2_i32;
1573     case INDEX_op_mulu2_i32:
1574         return TCG_TARGET_HAS_mulu2_i32;
1575     case INDEX_op_muls2_i32:
1576         return TCG_TARGET_HAS_muls2_i32;
1577     case INDEX_op_muluh_i32:
1578         return TCG_TARGET_HAS_muluh_i32;
1579     case INDEX_op_mulsh_i32:
1580         return TCG_TARGET_HAS_mulsh_i32;
1581     case INDEX_op_ext8s_i32:
1582         return TCG_TARGET_HAS_ext8s_i32;
1583     case INDEX_op_ext16s_i32:
1584         return TCG_TARGET_HAS_ext16s_i32;
1585     case INDEX_op_ext8u_i32:
1586         return TCG_TARGET_HAS_ext8u_i32;
1587     case INDEX_op_ext16u_i32:
1588         return TCG_TARGET_HAS_ext16u_i32;
1589     case INDEX_op_bswap16_i32:
1590         return TCG_TARGET_HAS_bswap16_i32;
1591     case INDEX_op_bswap32_i32:
1592         return TCG_TARGET_HAS_bswap32_i32;
1593     case INDEX_op_not_i32:
1594         return TCG_TARGET_HAS_not_i32;
1595     case INDEX_op_neg_i32:
1596         return TCG_TARGET_HAS_neg_i32;
1597     case INDEX_op_andc_i32:
1598         return TCG_TARGET_HAS_andc_i32;
1599     case INDEX_op_orc_i32:
1600         return TCG_TARGET_HAS_orc_i32;
1601     case INDEX_op_eqv_i32:
1602         return TCG_TARGET_HAS_eqv_i32;
1603     case INDEX_op_nand_i32:
1604         return TCG_TARGET_HAS_nand_i32;
1605     case INDEX_op_nor_i32:
1606         return TCG_TARGET_HAS_nor_i32;
1607     case INDEX_op_clz_i32:
1608         return TCG_TARGET_HAS_clz_i32;
1609     case INDEX_op_ctz_i32:
1610         return TCG_TARGET_HAS_ctz_i32;
1611     case INDEX_op_ctpop_i32:
1612         return TCG_TARGET_HAS_ctpop_i32;
1613 
1614     case INDEX_op_brcond2_i32:
1615     case INDEX_op_setcond2_i32:
1616         return TCG_TARGET_REG_BITS == 32;
1617 
1618     case INDEX_op_mov_i64:
1619     case INDEX_op_setcond_i64:
1620     case INDEX_op_brcond_i64:
1621     case INDEX_op_ld8u_i64:
1622     case INDEX_op_ld8s_i64:
1623     case INDEX_op_ld16u_i64:
1624     case INDEX_op_ld16s_i64:
1625     case INDEX_op_ld32u_i64:
1626     case INDEX_op_ld32s_i64:
1627     case INDEX_op_ld_i64:
1628     case INDEX_op_st8_i64:
1629     case INDEX_op_st16_i64:
1630     case INDEX_op_st32_i64:
1631     case INDEX_op_st_i64:
1632     case INDEX_op_add_i64:
1633     case INDEX_op_sub_i64:
1634     case INDEX_op_mul_i64:
1635     case INDEX_op_and_i64:
1636     case INDEX_op_or_i64:
1637     case INDEX_op_xor_i64:
1638     case INDEX_op_shl_i64:
1639     case INDEX_op_shr_i64:
1640     case INDEX_op_sar_i64:
1641     case INDEX_op_ext_i32_i64:
1642     case INDEX_op_extu_i32_i64:
1643         return TCG_TARGET_REG_BITS == 64;
1644 
1645     case INDEX_op_movcond_i64:
1646         return TCG_TARGET_HAS_movcond_i64;
1647     case INDEX_op_div_i64:
1648     case INDEX_op_divu_i64:
1649         return TCG_TARGET_HAS_div_i64;
1650     case INDEX_op_rem_i64:
1651     case INDEX_op_remu_i64:
1652         return TCG_TARGET_HAS_rem_i64;
1653     case INDEX_op_div2_i64:
1654     case INDEX_op_divu2_i64:
1655         return TCG_TARGET_HAS_div2_i64;
1656     case INDEX_op_rotl_i64:
1657     case INDEX_op_rotr_i64:
1658         return TCG_TARGET_HAS_rot_i64;
1659     case INDEX_op_deposit_i64:
1660         return TCG_TARGET_HAS_deposit_i64;
1661     case INDEX_op_extract_i64:
1662         return TCG_TARGET_HAS_extract_i64;
1663     case INDEX_op_sextract_i64:
1664         return TCG_TARGET_HAS_sextract_i64;
1665     case INDEX_op_extract2_i64:
1666         return TCG_TARGET_HAS_extract2_i64;
1667     case INDEX_op_extrl_i64_i32:
1668         return TCG_TARGET_HAS_extrl_i64_i32;
1669     case INDEX_op_extrh_i64_i32:
1670         return TCG_TARGET_HAS_extrh_i64_i32;
1671     case INDEX_op_ext8s_i64:
1672         return TCG_TARGET_HAS_ext8s_i64;
1673     case INDEX_op_ext16s_i64:
1674         return TCG_TARGET_HAS_ext16s_i64;
1675     case INDEX_op_ext32s_i64:
1676         return TCG_TARGET_HAS_ext32s_i64;
1677     case INDEX_op_ext8u_i64:
1678         return TCG_TARGET_HAS_ext8u_i64;
1679     case INDEX_op_ext16u_i64:
1680         return TCG_TARGET_HAS_ext16u_i64;
1681     case INDEX_op_ext32u_i64:
1682         return TCG_TARGET_HAS_ext32u_i64;
1683     case INDEX_op_bswap16_i64:
1684         return TCG_TARGET_HAS_bswap16_i64;
1685     case INDEX_op_bswap32_i64:
1686         return TCG_TARGET_HAS_bswap32_i64;
1687     case INDEX_op_bswap64_i64:
1688         return TCG_TARGET_HAS_bswap64_i64;
1689     case INDEX_op_not_i64:
1690         return TCG_TARGET_HAS_not_i64;
1691     case INDEX_op_neg_i64:
1692         return TCG_TARGET_HAS_neg_i64;
1693     case INDEX_op_andc_i64:
1694         return TCG_TARGET_HAS_andc_i64;
1695     case INDEX_op_orc_i64:
1696         return TCG_TARGET_HAS_orc_i64;
1697     case INDEX_op_eqv_i64:
1698         return TCG_TARGET_HAS_eqv_i64;
1699     case INDEX_op_nand_i64:
1700         return TCG_TARGET_HAS_nand_i64;
1701     case INDEX_op_nor_i64:
1702         return TCG_TARGET_HAS_nor_i64;
1703     case INDEX_op_clz_i64:
1704         return TCG_TARGET_HAS_clz_i64;
1705     case INDEX_op_ctz_i64:
1706         return TCG_TARGET_HAS_ctz_i64;
1707     case INDEX_op_ctpop_i64:
1708         return TCG_TARGET_HAS_ctpop_i64;
1709     case INDEX_op_add2_i64:
1710         return TCG_TARGET_HAS_add2_i64;
1711     case INDEX_op_sub2_i64:
1712         return TCG_TARGET_HAS_sub2_i64;
1713     case INDEX_op_mulu2_i64:
1714         return TCG_TARGET_HAS_mulu2_i64;
1715     case INDEX_op_muls2_i64:
1716         return TCG_TARGET_HAS_muls2_i64;
1717     case INDEX_op_muluh_i64:
1718         return TCG_TARGET_HAS_muluh_i64;
1719     case INDEX_op_mulsh_i64:
1720         return TCG_TARGET_HAS_mulsh_i64;
1721 
1722     case INDEX_op_mov_vec:
1723     case INDEX_op_dup_vec:
1724     case INDEX_op_dupm_vec:
1725     case INDEX_op_ld_vec:
1726     case INDEX_op_st_vec:
1727     case INDEX_op_add_vec:
1728     case INDEX_op_sub_vec:
1729     case INDEX_op_and_vec:
1730     case INDEX_op_or_vec:
1731     case INDEX_op_xor_vec:
1732     case INDEX_op_cmp_vec:
1733         return have_vec;
1734     case INDEX_op_dup2_vec:
1735         return have_vec && TCG_TARGET_REG_BITS == 32;
1736     case INDEX_op_not_vec:
1737         return have_vec && TCG_TARGET_HAS_not_vec;
1738     case INDEX_op_neg_vec:
1739         return have_vec && TCG_TARGET_HAS_neg_vec;
1740     case INDEX_op_abs_vec:
1741         return have_vec && TCG_TARGET_HAS_abs_vec;
1742     case INDEX_op_andc_vec:
1743         return have_vec && TCG_TARGET_HAS_andc_vec;
1744     case INDEX_op_orc_vec:
1745         return have_vec && TCG_TARGET_HAS_orc_vec;
1746     case INDEX_op_nand_vec:
1747         return have_vec && TCG_TARGET_HAS_nand_vec;
1748     case INDEX_op_nor_vec:
1749         return have_vec && TCG_TARGET_HAS_nor_vec;
1750     case INDEX_op_eqv_vec:
1751         return have_vec && TCG_TARGET_HAS_eqv_vec;
1752     case INDEX_op_mul_vec:
1753         return have_vec && TCG_TARGET_HAS_mul_vec;
1754     case INDEX_op_shli_vec:
1755     case INDEX_op_shri_vec:
1756     case INDEX_op_sari_vec:
1757         return have_vec && TCG_TARGET_HAS_shi_vec;
1758     case INDEX_op_shls_vec:
1759     case INDEX_op_shrs_vec:
1760     case INDEX_op_sars_vec:
1761         return have_vec && TCG_TARGET_HAS_shs_vec;
1762     case INDEX_op_shlv_vec:
1763     case INDEX_op_shrv_vec:
1764     case INDEX_op_sarv_vec:
1765         return have_vec && TCG_TARGET_HAS_shv_vec;
1766     case INDEX_op_rotli_vec:
1767         return have_vec && TCG_TARGET_HAS_roti_vec;
1768     case INDEX_op_rotls_vec:
1769         return have_vec && TCG_TARGET_HAS_rots_vec;
1770     case INDEX_op_rotlv_vec:
1771     case INDEX_op_rotrv_vec:
1772         return have_vec && TCG_TARGET_HAS_rotv_vec;
1773     case INDEX_op_ssadd_vec:
1774     case INDEX_op_usadd_vec:
1775     case INDEX_op_sssub_vec:
1776     case INDEX_op_ussub_vec:
1777         return have_vec && TCG_TARGET_HAS_sat_vec;
1778     case INDEX_op_smin_vec:
1779     case INDEX_op_umin_vec:
1780     case INDEX_op_smax_vec:
1781     case INDEX_op_umax_vec:
1782         return have_vec && TCG_TARGET_HAS_minmax_vec;
1783     case INDEX_op_bitsel_vec:
1784         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1785     case INDEX_op_cmpsel_vec:
1786         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1787 
1788     default:
1789         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1790         return true;
1791     }
1792 }
1793 
1794 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
1795 
1796 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1797 {
1798     const TCGHelperInfo *info;
1799     TCGv_i64 extend_free[MAX_CALL_IARGS];
1800     int n_extend = 0;
1801     TCGOp *op;
1802     int i, n, pi = 0, total_args;
1803 
1804     info = g_hash_table_lookup(helper_table, (gpointer)func);
1805     total_args = info->nr_out + info->nr_in + 2;
1806     op = tcg_op_alloc(INDEX_op_call, total_args);
1807 
1808 #ifdef CONFIG_PLUGIN
1809     /* Flag helpers that may affect guest state */
1810     if (tcg_ctx->plugin_insn &&
1811         !(info->flags & TCG_CALL_PLUGIN) &&
1812         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1813         tcg_ctx->plugin_insn->calls_helpers = true;
1814     }
1815 #endif
1816 
1817     TCGOP_CALLO(op) = n = info->nr_out;
1818     switch (n) {
1819     case 0:
1820         tcg_debug_assert(ret == NULL);
1821         break;
1822     case 1:
1823         tcg_debug_assert(ret != NULL);
1824         op->args[pi++] = temp_arg(ret);
1825         break;
1826     case 2:
1827     case 4:
1828         tcg_debug_assert(ret != NULL);
1829         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
1830         tcg_debug_assert(ret->temp_subindex == 0);
1831         for (i = 0; i < n; ++i) {
1832             op->args[pi++] = temp_arg(ret + i);
1833         }
1834         break;
1835     default:
1836         g_assert_not_reached();
1837     }
1838 
1839     TCGOP_CALLI(op) = n = info->nr_in;
1840     for (i = 0; i < n; i++) {
1841         const TCGCallArgumentLoc *loc = &info->in[i];
1842         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
1843 
1844         switch (loc->kind) {
1845         case TCG_CALL_ARG_NORMAL:
1846         case TCG_CALL_ARG_BY_REF:
1847         case TCG_CALL_ARG_BY_REF_N:
1848             op->args[pi++] = temp_arg(ts);
1849             break;
1850 
1851         case TCG_CALL_ARG_EXTEND_U:
1852         case TCG_CALL_ARG_EXTEND_S:
1853             {
1854                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
1855                 TCGv_i32 orig = temp_tcgv_i32(ts);
1856 
1857                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
1858                     tcg_gen_ext_i32_i64(temp, orig);
1859                 } else {
1860                     tcg_gen_extu_i32_i64(temp, orig);
1861                 }
1862                 op->args[pi++] = tcgv_i64_arg(temp);
1863                 extend_free[n_extend++] = temp;
1864             }
1865             break;
1866 
1867         default:
1868             g_assert_not_reached();
1869         }
1870     }
1871     op->args[pi++] = (uintptr_t)func;
1872     op->args[pi++] = (uintptr_t)info;
1873     tcg_debug_assert(pi == total_args);
1874 
1875     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
1876 
1877     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
1878     for (i = 0; i < n_extend; ++i) {
1879         tcg_temp_free_i64(extend_free[i]);
1880     }
1881 }
1882 
1883 static void tcg_reg_alloc_start(TCGContext *s)
1884 {
1885     int i, n;
1886 
1887     for (i = 0, n = s->nb_temps; i < n; i++) {
1888         TCGTemp *ts = &s->temps[i];
1889         TCGTempVal val = TEMP_VAL_MEM;
1890 
1891         switch (ts->kind) {
1892         case TEMP_CONST:
1893             val = TEMP_VAL_CONST;
1894             break;
1895         case TEMP_FIXED:
1896             val = TEMP_VAL_REG;
1897             break;
1898         case TEMP_GLOBAL:
1899             break;
1900         case TEMP_EBB:
1901             val = TEMP_VAL_DEAD;
1902             /* fall through */
1903         case TEMP_TB:
1904             ts->mem_allocated = 0;
1905             break;
1906         default:
1907             g_assert_not_reached();
1908         }
1909         ts->val_type = val;
1910     }
1911 
1912     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1913 }
1914 
1915 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1916                                  TCGTemp *ts)
1917 {
1918     int idx = temp_idx(ts);
1919 
1920     switch (ts->kind) {
1921     case TEMP_FIXED:
1922     case TEMP_GLOBAL:
1923         pstrcpy(buf, buf_size, ts->name);
1924         break;
1925     case TEMP_TB:
1926         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1927         break;
1928     case TEMP_EBB:
1929         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1930         break;
1931     case TEMP_CONST:
1932         switch (ts->type) {
1933         case TCG_TYPE_I32:
1934             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1935             break;
1936 #if TCG_TARGET_REG_BITS > 32
1937         case TCG_TYPE_I64:
1938             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1939             break;
1940 #endif
1941         case TCG_TYPE_V64:
1942         case TCG_TYPE_V128:
1943         case TCG_TYPE_V256:
1944             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1945                      64 << (ts->type - TCG_TYPE_V64), ts->val);
1946             break;
1947         default:
1948             g_assert_not_reached();
1949         }
1950         break;
1951     }
1952     return buf;
1953 }
1954 
1955 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1956                              int buf_size, TCGArg arg)
1957 {
1958     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1959 }
1960 
1961 static const char * const cond_name[] =
1962 {
1963     [TCG_COND_NEVER] = "never",
1964     [TCG_COND_ALWAYS] = "always",
1965     [TCG_COND_EQ] = "eq",
1966     [TCG_COND_NE] = "ne",
1967     [TCG_COND_LT] = "lt",
1968     [TCG_COND_GE] = "ge",
1969     [TCG_COND_LE] = "le",
1970     [TCG_COND_GT] = "gt",
1971     [TCG_COND_LTU] = "ltu",
1972     [TCG_COND_GEU] = "geu",
1973     [TCG_COND_LEU] = "leu",
1974     [TCG_COND_GTU] = "gtu"
1975 };
1976 
1977 static const char * const ldst_name[] =
1978 {
1979     [MO_UB]   = "ub",
1980     [MO_SB]   = "sb",
1981     [MO_LEUW] = "leuw",
1982     [MO_LESW] = "lesw",
1983     [MO_LEUL] = "leul",
1984     [MO_LESL] = "lesl",
1985     [MO_LEUQ] = "leq",
1986     [MO_BEUW] = "beuw",
1987     [MO_BESW] = "besw",
1988     [MO_BEUL] = "beul",
1989     [MO_BESL] = "besl",
1990     [MO_BEUQ] = "beq",
1991 };
1992 
1993 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1994 #ifdef TARGET_ALIGNED_ONLY
1995     [MO_UNALN >> MO_ASHIFT]    = "un+",
1996     [MO_ALIGN >> MO_ASHIFT]    = "",
1997 #else
1998     [MO_UNALN >> MO_ASHIFT]    = "",
1999     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2000 #endif
2001     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2002     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2003     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2004     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2005     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2006     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2007 };
2008 
2009 static const char bswap_flag_name[][6] = {
2010     [TCG_BSWAP_IZ] = "iz",
2011     [TCG_BSWAP_OZ] = "oz",
2012     [TCG_BSWAP_OS] = "os",
2013     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2014     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2015 };
2016 
2017 static inline bool tcg_regset_single(TCGRegSet d)
2018 {
2019     return (d & (d - 1)) == 0;
2020 }
2021 
2022 static inline TCGReg tcg_regset_first(TCGRegSet d)
2023 {
2024     if (TCG_TARGET_NB_REGS <= 32) {
2025         return ctz32(d);
2026     } else {
2027         return ctz64(d);
2028     }
2029 }
2030 
2031 /* Return only the number of characters output -- no error return. */
2032 #define ne_fprintf(...) \
2033     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2034 
2035 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2036 {
2037     char buf[128];
2038     TCGOp *op;
2039 
2040     QTAILQ_FOREACH(op, &s->ops, link) {
2041         int i, k, nb_oargs, nb_iargs, nb_cargs;
2042         const TCGOpDef *def;
2043         TCGOpcode c;
2044         int col = 0;
2045 
2046         c = op->opc;
2047         def = &tcg_op_defs[c];
2048 
2049         if (c == INDEX_op_insn_start) {
2050             nb_oargs = 0;
2051             col += ne_fprintf(f, "\n ----");
2052 
2053             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2054                 target_ulong a;
2055 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2056                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2057 #else
2058                 a = op->args[i];
2059 #endif
2060                 col += ne_fprintf(f, " " TARGET_FMT_lx, a);
2061             }
2062         } else if (c == INDEX_op_call) {
2063             const TCGHelperInfo *info = tcg_call_info(op);
2064             void *func = tcg_call_func(op);
2065 
2066             /* variable number of arguments */
2067             nb_oargs = TCGOP_CALLO(op);
2068             nb_iargs = TCGOP_CALLI(op);
2069             nb_cargs = def->nb_cargs;
2070 
2071             col += ne_fprintf(f, " %s ", def->name);
2072 
2073             /*
2074              * Print the function name from TCGHelperInfo, if available.
2075              * Note that plugins have a template function for the info,
2076              * but the actual function pointer comes from the plugin.
2077              */
2078             if (func == info->func) {
2079                 col += ne_fprintf(f, "%s", info->name);
2080             } else {
2081                 col += ne_fprintf(f, "plugin(%p)", func);
2082             }
2083 
2084             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2085             for (i = 0; i < nb_oargs; i++) {
2086                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2087                                                             op->args[i]));
2088             }
2089             for (i = 0; i < nb_iargs; i++) {
2090                 TCGArg arg = op->args[nb_oargs + i];
2091                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2092                 col += ne_fprintf(f, ",%s", t);
2093             }
2094         } else {
2095             col += ne_fprintf(f, " %s ", def->name);
2096 
2097             nb_oargs = def->nb_oargs;
2098             nb_iargs = def->nb_iargs;
2099             nb_cargs = def->nb_cargs;
2100 
2101             if (def->flags & TCG_OPF_VECTOR) {
2102                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2103                                   8 << TCGOP_VECE(op));
2104             }
2105 
2106             k = 0;
2107             for (i = 0; i < nb_oargs; i++) {
2108                 const char *sep =  k ? "," : "";
2109                 col += ne_fprintf(f, "%s%s", sep,
2110                                   tcg_get_arg_str(s, buf, sizeof(buf),
2111                                                   op->args[k++]));
2112             }
2113             for (i = 0; i < nb_iargs; i++) {
2114                 const char *sep =  k ? "," : "";
2115                 col += ne_fprintf(f, "%s%s", sep,
2116                                   tcg_get_arg_str(s, buf, sizeof(buf),
2117                                                   op->args[k++]));
2118             }
2119             switch (c) {
2120             case INDEX_op_brcond_i32:
2121             case INDEX_op_setcond_i32:
2122             case INDEX_op_movcond_i32:
2123             case INDEX_op_brcond2_i32:
2124             case INDEX_op_setcond2_i32:
2125             case INDEX_op_brcond_i64:
2126             case INDEX_op_setcond_i64:
2127             case INDEX_op_movcond_i64:
2128             case INDEX_op_cmp_vec:
2129             case INDEX_op_cmpsel_vec:
2130                 if (op->args[k] < ARRAY_SIZE(cond_name)
2131                     && cond_name[op->args[k]]) {
2132                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2133                 } else {
2134                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2135                 }
2136                 i = 1;
2137                 break;
2138             case INDEX_op_qemu_ld_i32:
2139             case INDEX_op_qemu_st_i32:
2140             case INDEX_op_qemu_st8_i32:
2141             case INDEX_op_qemu_ld_i64:
2142             case INDEX_op_qemu_st_i64:
2143                 {
2144                     MemOpIdx oi = op->args[k++];
2145                     MemOp op = get_memop(oi);
2146                     unsigned ix = get_mmuidx(oi);
2147 
2148                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2149                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
2150                     } else {
2151                         const char *s_al, *s_op;
2152                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2153                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2154                         col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
2155                     }
2156                     i = 1;
2157                 }
2158                 break;
2159             case INDEX_op_bswap16_i32:
2160             case INDEX_op_bswap16_i64:
2161             case INDEX_op_bswap32_i32:
2162             case INDEX_op_bswap32_i64:
2163             case INDEX_op_bswap64_i64:
2164                 {
2165                     TCGArg flags = op->args[k];
2166                     const char *name = NULL;
2167 
2168                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2169                         name = bswap_flag_name[flags];
2170                     }
2171                     if (name) {
2172                         col += ne_fprintf(f, ",%s", name);
2173                     } else {
2174                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2175                     }
2176                     i = k = 1;
2177                 }
2178                 break;
2179             default:
2180                 i = 0;
2181                 break;
2182             }
2183             switch (c) {
2184             case INDEX_op_set_label:
2185             case INDEX_op_br:
2186             case INDEX_op_brcond_i32:
2187             case INDEX_op_brcond_i64:
2188             case INDEX_op_brcond2_i32:
2189                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2190                                   arg_label(op->args[k])->id);
2191                 i++, k++;
2192                 break;
2193             default:
2194                 break;
2195             }
2196             for (; i < nb_cargs; i++, k++) {
2197                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2198                                   op->args[k]);
2199             }
2200         }
2201 
2202         if (have_prefs || op->life) {
2203             for (; col < 40; ++col) {
2204                 putc(' ', f);
2205             }
2206         }
2207 
2208         if (op->life) {
2209             unsigned life = op->life;
2210 
2211             if (life & (SYNC_ARG * 3)) {
2212                 ne_fprintf(f, "  sync:");
2213                 for (i = 0; i < 2; ++i) {
2214                     if (life & (SYNC_ARG << i)) {
2215                         ne_fprintf(f, " %d", i);
2216                     }
2217                 }
2218             }
2219             life /= DEAD_ARG;
2220             if (life) {
2221                 ne_fprintf(f, "  dead:");
2222                 for (i = 0; life; ++i, life >>= 1) {
2223                     if (life & 1) {
2224                         ne_fprintf(f, " %d", i);
2225                     }
2226                 }
2227             }
2228         }
2229 
2230         if (have_prefs) {
2231             for (i = 0; i < nb_oargs; ++i) {
2232                 TCGRegSet set = output_pref(op, i);
2233 
2234                 if (i == 0) {
2235                     ne_fprintf(f, "  pref=");
2236                 } else {
2237                     ne_fprintf(f, ",");
2238                 }
2239                 if (set == 0) {
2240                     ne_fprintf(f, "none");
2241                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2242                     ne_fprintf(f, "all");
2243 #ifdef CONFIG_DEBUG_TCG
2244                 } else if (tcg_regset_single(set)) {
2245                     TCGReg reg = tcg_regset_first(set);
2246                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2247 #endif
2248                 } else if (TCG_TARGET_NB_REGS <= 32) {
2249                     ne_fprintf(f, "0x%x", (uint32_t)set);
2250                 } else {
2251                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2252                 }
2253             }
2254         }
2255 
2256         putc('\n', f);
2257     }
2258 }
2259 
2260 /* we give more priority to constraints with less registers */
2261 static int get_constraint_priority(const TCGOpDef *def, int k)
2262 {
2263     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2264     int n = ctpop64(arg_ct->regs);
2265 
2266     /*
2267      * Sort constraints of a single register first, which includes output
2268      * aliases (which must exactly match the input already allocated).
2269      */
2270     if (n == 1 || arg_ct->oalias) {
2271         return INT_MAX;
2272     }
2273 
2274     /*
2275      * Sort register pairs next, first then second immediately after.
2276      * Arbitrarily sort multiple pairs by the index of the first reg;
2277      * there shouldn't be many pairs.
2278      */
2279     switch (arg_ct->pair) {
2280     case 1:
2281     case 3:
2282         return (k + 1) * 2;
2283     case 2:
2284         return (arg_ct->pair_index + 1) * 2 - 1;
2285     }
2286 
2287     /* Finally, sort by decreasing register count. */
2288     assert(n > 1);
2289     return -n;
2290 }
2291 
2292 /* sort from highest priority to lowest */
2293 static void sort_constraints(TCGOpDef *def, int start, int n)
2294 {
2295     int i, j;
2296     TCGArgConstraint *a = def->args_ct;
2297 
2298     for (i = 0; i < n; i++) {
2299         a[start + i].sort_index = start + i;
2300     }
2301     if (n <= 1) {
2302         return;
2303     }
2304     for (i = 0; i < n - 1; i++) {
2305         for (j = i + 1; j < n; j++) {
2306             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2307             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2308             if (p1 < p2) {
2309                 int tmp = a[start + i].sort_index;
2310                 a[start + i].sort_index = a[start + j].sort_index;
2311                 a[start + j].sort_index = tmp;
2312             }
2313         }
2314     }
2315 }
2316 
2317 static void process_op_defs(TCGContext *s)
2318 {
2319     TCGOpcode op;
2320 
2321     for (op = 0; op < NB_OPS; op++) {
2322         TCGOpDef *def = &tcg_op_defs[op];
2323         const TCGTargetOpDef *tdefs;
2324         bool saw_alias_pair = false;
2325         int i, o, i2, o2, nb_args;
2326 
2327         if (def->flags & TCG_OPF_NOT_PRESENT) {
2328             continue;
2329         }
2330 
2331         nb_args = def->nb_iargs + def->nb_oargs;
2332         if (nb_args == 0) {
2333             continue;
2334         }
2335 
2336         /*
2337          * Macro magic should make it impossible, but double-check that
2338          * the array index is in range.  Since the signness of an enum
2339          * is implementation defined, force the result to unsigned.
2340          */
2341         unsigned con_set = tcg_target_op_def(op);
2342         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2343         tdefs = &constraint_sets[con_set];
2344 
2345         for (i = 0; i < nb_args; i++) {
2346             const char *ct_str = tdefs->args_ct_str[i];
2347             bool input_p = i >= def->nb_oargs;
2348 
2349             /* Incomplete TCGTargetOpDef entry. */
2350             tcg_debug_assert(ct_str != NULL);
2351 
2352             switch (*ct_str) {
2353             case '0' ... '9':
2354                 o = *ct_str - '0';
2355                 tcg_debug_assert(input_p);
2356                 tcg_debug_assert(o < def->nb_oargs);
2357                 tcg_debug_assert(def->args_ct[o].regs != 0);
2358                 tcg_debug_assert(!def->args_ct[o].oalias);
2359                 def->args_ct[i] = def->args_ct[o];
2360                 /* The output sets oalias.  */
2361                 def->args_ct[o].oalias = 1;
2362                 def->args_ct[o].alias_index = i;
2363                 /* The input sets ialias. */
2364                 def->args_ct[i].ialias = 1;
2365                 def->args_ct[i].alias_index = o;
2366                 if (def->args_ct[i].pair) {
2367                     saw_alias_pair = true;
2368                 }
2369                 tcg_debug_assert(ct_str[1] == '\0');
2370                 continue;
2371 
2372             case '&':
2373                 tcg_debug_assert(!input_p);
2374                 def->args_ct[i].newreg = true;
2375                 ct_str++;
2376                 break;
2377 
2378             case 'p': /* plus */
2379                 /* Allocate to the register after the previous. */
2380                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2381                 o = i - 1;
2382                 tcg_debug_assert(!def->args_ct[o].pair);
2383                 tcg_debug_assert(!def->args_ct[o].ct);
2384                 def->args_ct[i] = (TCGArgConstraint){
2385                     .pair = 2,
2386                     .pair_index = o,
2387                     .regs = def->args_ct[o].regs << 1,
2388                 };
2389                 def->args_ct[o].pair = 1;
2390                 def->args_ct[o].pair_index = i;
2391                 tcg_debug_assert(ct_str[1] == '\0');
2392                 continue;
2393 
2394             case 'm': /* minus */
2395                 /* Allocate to the register before the previous. */
2396                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2397                 o = i - 1;
2398                 tcg_debug_assert(!def->args_ct[o].pair);
2399                 tcg_debug_assert(!def->args_ct[o].ct);
2400                 def->args_ct[i] = (TCGArgConstraint){
2401                     .pair = 1,
2402                     .pair_index = o,
2403                     .regs = def->args_ct[o].regs >> 1,
2404                 };
2405                 def->args_ct[o].pair = 2;
2406                 def->args_ct[o].pair_index = i;
2407                 tcg_debug_assert(ct_str[1] == '\0');
2408                 continue;
2409             }
2410 
2411             do {
2412                 switch (*ct_str) {
2413                 case 'i':
2414                     def->args_ct[i].ct |= TCG_CT_CONST;
2415                     break;
2416 
2417                 /* Include all of the target-specific constraints. */
2418 
2419 #undef CONST
2420 #define CONST(CASE, MASK) \
2421     case CASE: def->args_ct[i].ct |= MASK; break;
2422 #define REGS(CASE, MASK) \
2423     case CASE: def->args_ct[i].regs |= MASK; break;
2424 
2425 #include "tcg-target-con-str.h"
2426 
2427 #undef REGS
2428 #undef CONST
2429                 default:
2430                 case '0' ... '9':
2431                 case '&':
2432                 case 'p':
2433                 case 'm':
2434                     /* Typo in TCGTargetOpDef constraint. */
2435                     g_assert_not_reached();
2436                 }
2437             } while (*++ct_str != '\0');
2438         }
2439 
2440         /* TCGTargetOpDef entry with too much information? */
2441         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2442 
2443         /*
2444          * Fix up output pairs that are aliased with inputs.
2445          * When we created the alias, we copied pair from the output.
2446          * There are three cases:
2447          *    (1a) Pairs of inputs alias pairs of outputs.
2448          *    (1b) One input aliases the first of a pair of outputs.
2449          *    (2)  One input aliases the second of a pair of outputs.
2450          *
2451          * Case 1a is handled by making sure that the pair_index'es are
2452          * properly updated so that they appear the same as a pair of inputs.
2453          *
2454          * Case 1b is handled by setting the pair_index of the input to
2455          * itself, simply so it doesn't point to an unrelated argument.
2456          * Since we don't encounter the "second" during the input allocation
2457          * phase, nothing happens with the second half of the input pair.
2458          *
2459          * Case 2 is handled by setting the second input to pair=3, the
2460          * first output to pair=3, and the pair_index'es to match.
2461          */
2462         if (saw_alias_pair) {
2463             for (i = def->nb_oargs; i < nb_args; i++) {
2464                 /*
2465                  * Since [0-9pm] must be alone in the constraint string,
2466                  * the only way they can both be set is if the pair comes
2467                  * from the output alias.
2468                  */
2469                 if (!def->args_ct[i].ialias) {
2470                     continue;
2471                 }
2472                 switch (def->args_ct[i].pair) {
2473                 case 0:
2474                     break;
2475                 case 1:
2476                     o = def->args_ct[i].alias_index;
2477                     o2 = def->args_ct[o].pair_index;
2478                     tcg_debug_assert(def->args_ct[o].pair == 1);
2479                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2480                     if (def->args_ct[o2].oalias) {
2481                         /* Case 1a */
2482                         i2 = def->args_ct[o2].alias_index;
2483                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2484                         def->args_ct[i2].pair_index = i;
2485                         def->args_ct[i].pair_index = i2;
2486                     } else {
2487                         /* Case 1b */
2488                         def->args_ct[i].pair_index = i;
2489                     }
2490                     break;
2491                 case 2:
2492                     o = def->args_ct[i].alias_index;
2493                     o2 = def->args_ct[o].pair_index;
2494                     tcg_debug_assert(def->args_ct[o].pair == 2);
2495                     tcg_debug_assert(def->args_ct[o2].pair == 1);
2496                     if (def->args_ct[o2].oalias) {
2497                         /* Case 1a */
2498                         i2 = def->args_ct[o2].alias_index;
2499                         tcg_debug_assert(def->args_ct[i2].pair == 1);
2500                         def->args_ct[i2].pair_index = i;
2501                         def->args_ct[i].pair_index = i2;
2502                     } else {
2503                         /* Case 2 */
2504                         def->args_ct[i].pair = 3;
2505                         def->args_ct[o2].pair = 3;
2506                         def->args_ct[i].pair_index = o2;
2507                         def->args_ct[o2].pair_index = i;
2508                     }
2509                     break;
2510                 default:
2511                     g_assert_not_reached();
2512                 }
2513             }
2514         }
2515 
2516         /* sort the constraints (XXX: this is just an heuristic) */
2517         sort_constraints(def, 0, def->nb_oargs);
2518         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2519     }
2520 }
2521 
2522 void tcg_op_remove(TCGContext *s, TCGOp *op)
2523 {
2524     TCGLabel *label;
2525 
2526     switch (op->opc) {
2527     case INDEX_op_br:
2528         label = arg_label(op->args[0]);
2529         label->refs--;
2530         break;
2531     case INDEX_op_brcond_i32:
2532     case INDEX_op_brcond_i64:
2533         label = arg_label(op->args[3]);
2534         label->refs--;
2535         break;
2536     case INDEX_op_brcond2_i32:
2537         label = arg_label(op->args[5]);
2538         label->refs--;
2539         break;
2540     default:
2541         break;
2542     }
2543 
2544     QTAILQ_REMOVE(&s->ops, op, link);
2545     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2546     s->nb_ops--;
2547 
2548 #ifdef CONFIG_PROFILER
2549     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2550 #endif
2551 }
2552 
2553 void tcg_remove_ops_after(TCGOp *op)
2554 {
2555     TCGContext *s = tcg_ctx;
2556 
2557     while (true) {
2558         TCGOp *last = tcg_last_op();
2559         if (last == op) {
2560             return;
2561         }
2562         tcg_op_remove(s, last);
2563     }
2564 }
2565 
2566 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
2567 {
2568     TCGContext *s = tcg_ctx;
2569     TCGOp *op = NULL;
2570 
2571     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
2572         QTAILQ_FOREACH(op, &s->free_ops, link) {
2573             if (nargs <= op->nargs) {
2574                 QTAILQ_REMOVE(&s->free_ops, op, link);
2575                 nargs = op->nargs;
2576                 goto found;
2577             }
2578         }
2579     }
2580 
2581     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
2582     nargs = MAX(4, nargs);
2583     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
2584 
2585  found:
2586     memset(op, 0, offsetof(TCGOp, link));
2587     op->opc = opc;
2588     op->nargs = nargs;
2589 
2590     /* Check for bitfield overflow. */
2591     tcg_debug_assert(op->nargs == nargs);
2592 
2593     s->nb_ops++;
2594     return op;
2595 }
2596 
2597 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
2598 {
2599     TCGOp *op = tcg_op_alloc(opc, nargs);
2600     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2601     return op;
2602 }
2603 
2604 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
2605                             TCGOpcode opc, unsigned nargs)
2606 {
2607     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2608     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2609     return new_op;
2610 }
2611 
2612 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
2613                            TCGOpcode opc, unsigned nargs)
2614 {
2615     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2616     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2617     return new_op;
2618 }
2619 
2620 /* Reachable analysis : remove unreachable code.  */
2621 static void __attribute__((noinline))
2622 reachable_code_pass(TCGContext *s)
2623 {
2624     TCGOp *op, *op_next, *op_prev;
2625     bool dead = false;
2626 
2627     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2628         bool remove = dead;
2629         TCGLabel *label;
2630 
2631         switch (op->opc) {
2632         case INDEX_op_set_label:
2633             label = arg_label(op->args[0]);
2634 
2635             /*
2636              * Optimization can fold conditional branches to unconditional.
2637              * If we find a label which is preceded by an unconditional
2638              * branch to next, remove the branch.  We couldn't do this when
2639              * processing the branch because any dead code between the branch
2640              * and label had not yet been removed.
2641              */
2642             op_prev = QTAILQ_PREV(op, link);
2643             if (op_prev->opc == INDEX_op_br &&
2644                 label == arg_label(op_prev->args[0])) {
2645                 tcg_op_remove(s, op_prev);
2646                 /* Fall through means insns become live again.  */
2647                 dead = false;
2648             }
2649 
2650             if (label->refs == 0) {
2651                 /*
2652                  * While there is an occasional backward branch, virtually
2653                  * all branches generated by the translators are forward.
2654                  * Which means that generally we will have already removed
2655                  * all references to the label that will be, and there is
2656                  * little to be gained by iterating.
2657                  */
2658                 remove = true;
2659             } else {
2660                 /* Once we see a label, insns become live again.  */
2661                 dead = false;
2662                 remove = false;
2663             }
2664             break;
2665 
2666         case INDEX_op_br:
2667         case INDEX_op_exit_tb:
2668         case INDEX_op_goto_ptr:
2669             /* Unconditional branches; everything following is dead.  */
2670             dead = true;
2671             break;
2672 
2673         case INDEX_op_call:
2674             /* Notice noreturn helper calls, raising exceptions.  */
2675             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2676                 dead = true;
2677             }
2678             break;
2679 
2680         case INDEX_op_insn_start:
2681             /* Never remove -- we need to keep these for unwind.  */
2682             remove = false;
2683             break;
2684 
2685         default:
2686             break;
2687         }
2688 
2689         if (remove) {
2690             tcg_op_remove(s, op);
2691         }
2692     }
2693 }
2694 
2695 #define TS_DEAD  1
2696 #define TS_MEM   2
2697 
2698 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2699 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2700 
2701 /* For liveness_pass_1, the register preferences for a given temp.  */
2702 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2703 {
2704     return ts->state_ptr;
2705 }
2706 
2707 /* For liveness_pass_1, reset the preferences for a given temp to the
2708  * maximal regset for its type.
2709  */
2710 static inline void la_reset_pref(TCGTemp *ts)
2711 {
2712     *la_temp_pref(ts)
2713         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2714 }
2715 
2716 /* liveness analysis: end of function: all temps are dead, and globals
2717    should be in memory. */
2718 static void la_func_end(TCGContext *s, int ng, int nt)
2719 {
2720     int i;
2721 
2722     for (i = 0; i < ng; ++i) {
2723         s->temps[i].state = TS_DEAD | TS_MEM;
2724         la_reset_pref(&s->temps[i]);
2725     }
2726     for (i = ng; i < nt; ++i) {
2727         s->temps[i].state = TS_DEAD;
2728         la_reset_pref(&s->temps[i]);
2729     }
2730 }
2731 
2732 /* liveness analysis: end of basic block: all temps are dead, globals
2733    and local temps should be in memory. */
2734 static void la_bb_end(TCGContext *s, int ng, int nt)
2735 {
2736     int i;
2737 
2738     for (i = 0; i < nt; ++i) {
2739         TCGTemp *ts = &s->temps[i];
2740         int state;
2741 
2742         switch (ts->kind) {
2743         case TEMP_FIXED:
2744         case TEMP_GLOBAL:
2745         case TEMP_TB:
2746             state = TS_DEAD | TS_MEM;
2747             break;
2748         case TEMP_EBB:
2749         case TEMP_CONST:
2750             state = TS_DEAD;
2751             break;
2752         default:
2753             g_assert_not_reached();
2754         }
2755         ts->state = state;
2756         la_reset_pref(ts);
2757     }
2758 }
2759 
2760 /* liveness analysis: sync globals back to memory.  */
2761 static void la_global_sync(TCGContext *s, int ng)
2762 {
2763     int i;
2764 
2765     for (i = 0; i < ng; ++i) {
2766         int state = s->temps[i].state;
2767         s->temps[i].state = state | TS_MEM;
2768         if (state == TS_DEAD) {
2769             /* If the global was previously dead, reset prefs.  */
2770             la_reset_pref(&s->temps[i]);
2771         }
2772     }
2773 }
2774 
2775 /*
2776  * liveness analysis: conditional branch: all temps are dead unless
2777  * explicitly live-across-conditional-branch, globals and local temps
2778  * should be synced.
2779  */
2780 static void la_bb_sync(TCGContext *s, int ng, int nt)
2781 {
2782     la_global_sync(s, ng);
2783 
2784     for (int i = ng; i < nt; ++i) {
2785         TCGTemp *ts = &s->temps[i];
2786         int state;
2787 
2788         switch (ts->kind) {
2789         case TEMP_TB:
2790             state = ts->state;
2791             ts->state = state | TS_MEM;
2792             if (state != TS_DEAD) {
2793                 continue;
2794             }
2795             break;
2796         case TEMP_EBB:
2797         case TEMP_CONST:
2798             continue;
2799         default:
2800             g_assert_not_reached();
2801         }
2802         la_reset_pref(&s->temps[i]);
2803     }
2804 }
2805 
2806 /* liveness analysis: sync globals back to memory and kill.  */
2807 static void la_global_kill(TCGContext *s, int ng)
2808 {
2809     int i;
2810 
2811     for (i = 0; i < ng; i++) {
2812         s->temps[i].state = TS_DEAD | TS_MEM;
2813         la_reset_pref(&s->temps[i]);
2814     }
2815 }
2816 
2817 /* liveness analysis: note live globals crossing calls.  */
2818 static void la_cross_call(TCGContext *s, int nt)
2819 {
2820     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2821     int i;
2822 
2823     for (i = 0; i < nt; i++) {
2824         TCGTemp *ts = &s->temps[i];
2825         if (!(ts->state & TS_DEAD)) {
2826             TCGRegSet *pset = la_temp_pref(ts);
2827             TCGRegSet set = *pset;
2828 
2829             set &= mask;
2830             /* If the combination is not possible, restart.  */
2831             if (set == 0) {
2832                 set = tcg_target_available_regs[ts->type] & mask;
2833             }
2834             *pset = set;
2835         }
2836     }
2837 }
2838 
2839 /*
2840  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
2841  * to TEMP_EBB, if possible.
2842  */
2843 static void __attribute__((noinline))
2844 liveness_pass_0(TCGContext *s)
2845 {
2846     void * const multiple_ebb = (void *)(uintptr_t)-1;
2847     int nb_temps = s->nb_temps;
2848     TCGOp *op, *ebb;
2849 
2850     for (int i = s->nb_globals; i < nb_temps; ++i) {
2851         s->temps[i].state_ptr = NULL;
2852     }
2853 
2854     /*
2855      * Represent each EBB by the op at which it begins.  In the case of
2856      * the first EBB, this is the first op, otherwise it is a label.
2857      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
2858      * within a single EBB, else MULTIPLE_EBB.
2859      */
2860     ebb = QTAILQ_FIRST(&s->ops);
2861     QTAILQ_FOREACH(op, &s->ops, link) {
2862         const TCGOpDef *def;
2863         int nb_oargs, nb_iargs;
2864 
2865         switch (op->opc) {
2866         case INDEX_op_set_label:
2867             ebb = op;
2868             continue;
2869         case INDEX_op_discard:
2870             continue;
2871         case INDEX_op_call:
2872             nb_oargs = TCGOP_CALLO(op);
2873             nb_iargs = TCGOP_CALLI(op);
2874             break;
2875         default:
2876             def = &tcg_op_defs[op->opc];
2877             nb_oargs = def->nb_oargs;
2878             nb_iargs = def->nb_iargs;
2879             break;
2880         }
2881 
2882         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
2883             TCGTemp *ts = arg_temp(op->args[i]);
2884 
2885             if (ts->kind != TEMP_TB) {
2886                 continue;
2887             }
2888             if (ts->state_ptr == NULL) {
2889                 ts->state_ptr = ebb;
2890             } else if (ts->state_ptr != ebb) {
2891                 ts->state_ptr = multiple_ebb;
2892             }
2893         }
2894     }
2895 
2896     /*
2897      * For TEMP_TB that turned out not to be used beyond one EBB,
2898      * reduce the liveness to TEMP_EBB.
2899      */
2900     for (int i = s->nb_globals; i < nb_temps; ++i) {
2901         TCGTemp *ts = &s->temps[i];
2902         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
2903             ts->kind = TEMP_EBB;
2904         }
2905     }
2906 }
2907 
2908 /* Liveness analysis : update the opc_arg_life array to tell if a
2909    given input arguments is dead. Instructions updating dead
2910    temporaries are removed. */
2911 static void __attribute__((noinline))
2912 liveness_pass_1(TCGContext *s)
2913 {
2914     int nb_globals = s->nb_globals;
2915     int nb_temps = s->nb_temps;
2916     TCGOp *op, *op_prev;
2917     TCGRegSet *prefs;
2918     int i;
2919 
2920     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2921     for (i = 0; i < nb_temps; ++i) {
2922         s->temps[i].state_ptr = prefs + i;
2923     }
2924 
2925     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2926     la_func_end(s, nb_globals, nb_temps);
2927 
2928     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2929         int nb_iargs, nb_oargs;
2930         TCGOpcode opc_new, opc_new2;
2931         bool have_opc_new2;
2932         TCGLifeData arg_life = 0;
2933         TCGTemp *ts;
2934         TCGOpcode opc = op->opc;
2935         const TCGOpDef *def = &tcg_op_defs[opc];
2936 
2937         switch (opc) {
2938         case INDEX_op_call:
2939             {
2940                 const TCGHelperInfo *info = tcg_call_info(op);
2941                 int call_flags = tcg_call_flags(op);
2942 
2943                 nb_oargs = TCGOP_CALLO(op);
2944                 nb_iargs = TCGOP_CALLI(op);
2945 
2946                 /* pure functions can be removed if their result is unused */
2947                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2948                     for (i = 0; i < nb_oargs; i++) {
2949                         ts = arg_temp(op->args[i]);
2950                         if (ts->state != TS_DEAD) {
2951                             goto do_not_remove_call;
2952                         }
2953                     }
2954                     goto do_remove;
2955                 }
2956             do_not_remove_call:
2957 
2958                 /* Output args are dead.  */
2959                 for (i = 0; i < nb_oargs; i++) {
2960                     ts = arg_temp(op->args[i]);
2961                     if (ts->state & TS_DEAD) {
2962                         arg_life |= DEAD_ARG << i;
2963                     }
2964                     if (ts->state & TS_MEM) {
2965                         arg_life |= SYNC_ARG << i;
2966                     }
2967                     ts->state = TS_DEAD;
2968                     la_reset_pref(ts);
2969                 }
2970 
2971                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
2972                 memset(op->output_pref, 0, sizeof(op->output_pref));
2973 
2974                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2975                                     TCG_CALL_NO_READ_GLOBALS))) {
2976                     la_global_kill(s, nb_globals);
2977                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2978                     la_global_sync(s, nb_globals);
2979                 }
2980 
2981                 /* Record arguments that die in this helper.  */
2982                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2983                     ts = arg_temp(op->args[i]);
2984                     if (ts->state & TS_DEAD) {
2985                         arg_life |= DEAD_ARG << i;
2986                     }
2987                 }
2988 
2989                 /* For all live registers, remove call-clobbered prefs.  */
2990                 la_cross_call(s, nb_temps);
2991 
2992                 /*
2993                  * Input arguments are live for preceding opcodes.
2994                  *
2995                  * For those arguments that die, and will be allocated in
2996                  * registers, clear the register set for that arg, to be
2997                  * filled in below.  For args that will be on the stack,
2998                  * reset to any available reg.  Process arguments in reverse
2999                  * order so that if a temp is used more than once, the stack
3000                  * reset to max happens before the register reset to 0.
3001                  */
3002                 for (i = nb_iargs - 1; i >= 0; i--) {
3003                     const TCGCallArgumentLoc *loc = &info->in[i];
3004                     ts = arg_temp(op->args[nb_oargs + i]);
3005 
3006                     if (ts->state & TS_DEAD) {
3007                         switch (loc->kind) {
3008                         case TCG_CALL_ARG_NORMAL:
3009                         case TCG_CALL_ARG_EXTEND_U:
3010                         case TCG_CALL_ARG_EXTEND_S:
3011                             if (REG_P(loc)) {
3012                                 *la_temp_pref(ts) = 0;
3013                                 break;
3014                             }
3015                             /* fall through */
3016                         default:
3017                             *la_temp_pref(ts) =
3018                                 tcg_target_available_regs[ts->type];
3019                             break;
3020                         }
3021                         ts->state &= ~TS_DEAD;
3022                     }
3023                 }
3024 
3025                 /*
3026                  * For each input argument, add its input register to prefs.
3027                  * If a temp is used once, this produces a single set bit;
3028                  * if a temp is used multiple times, this produces a set.
3029                  */
3030                 for (i = 0; i < nb_iargs; i++) {
3031                     const TCGCallArgumentLoc *loc = &info->in[i];
3032                     ts = arg_temp(op->args[nb_oargs + i]);
3033 
3034                     switch (loc->kind) {
3035                     case TCG_CALL_ARG_NORMAL:
3036                     case TCG_CALL_ARG_EXTEND_U:
3037                     case TCG_CALL_ARG_EXTEND_S:
3038                         if (REG_P(loc)) {
3039                             tcg_regset_set_reg(*la_temp_pref(ts),
3040                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3041                         }
3042                         break;
3043                     default:
3044                         break;
3045                     }
3046                 }
3047             }
3048             break;
3049         case INDEX_op_insn_start:
3050             break;
3051         case INDEX_op_discard:
3052             /* mark the temporary as dead */
3053             ts = arg_temp(op->args[0]);
3054             ts->state = TS_DEAD;
3055             la_reset_pref(ts);
3056             break;
3057 
3058         case INDEX_op_add2_i32:
3059             opc_new = INDEX_op_add_i32;
3060             goto do_addsub2;
3061         case INDEX_op_sub2_i32:
3062             opc_new = INDEX_op_sub_i32;
3063             goto do_addsub2;
3064         case INDEX_op_add2_i64:
3065             opc_new = INDEX_op_add_i64;
3066             goto do_addsub2;
3067         case INDEX_op_sub2_i64:
3068             opc_new = INDEX_op_sub_i64;
3069         do_addsub2:
3070             nb_iargs = 4;
3071             nb_oargs = 2;
3072             /* Test if the high part of the operation is dead, but not
3073                the low part.  The result can be optimized to a simple
3074                add or sub.  This happens often for x86_64 guest when the
3075                cpu mode is set to 32 bit.  */
3076             if (arg_temp(op->args[1])->state == TS_DEAD) {
3077                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3078                     goto do_remove;
3079                 }
3080                 /* Replace the opcode and adjust the args in place,
3081                    leaving 3 unused args at the end.  */
3082                 op->opc = opc = opc_new;
3083                 op->args[1] = op->args[2];
3084                 op->args[2] = op->args[4];
3085                 /* Fall through and mark the single-word operation live.  */
3086                 nb_iargs = 2;
3087                 nb_oargs = 1;
3088             }
3089             goto do_not_remove;
3090 
3091         case INDEX_op_mulu2_i32:
3092             opc_new = INDEX_op_mul_i32;
3093             opc_new2 = INDEX_op_muluh_i32;
3094             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3095             goto do_mul2;
3096         case INDEX_op_muls2_i32:
3097             opc_new = INDEX_op_mul_i32;
3098             opc_new2 = INDEX_op_mulsh_i32;
3099             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3100             goto do_mul2;
3101         case INDEX_op_mulu2_i64:
3102             opc_new = INDEX_op_mul_i64;
3103             opc_new2 = INDEX_op_muluh_i64;
3104             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3105             goto do_mul2;
3106         case INDEX_op_muls2_i64:
3107             opc_new = INDEX_op_mul_i64;
3108             opc_new2 = INDEX_op_mulsh_i64;
3109             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3110             goto do_mul2;
3111         do_mul2:
3112             nb_iargs = 2;
3113             nb_oargs = 2;
3114             if (arg_temp(op->args[1])->state == TS_DEAD) {
3115                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3116                     /* Both parts of the operation are dead.  */
3117                     goto do_remove;
3118                 }
3119                 /* The high part of the operation is dead; generate the low. */
3120                 op->opc = opc = opc_new;
3121                 op->args[1] = op->args[2];
3122                 op->args[2] = op->args[3];
3123             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3124                 /* The low part of the operation is dead; generate the high. */
3125                 op->opc = opc = opc_new2;
3126                 op->args[0] = op->args[1];
3127                 op->args[1] = op->args[2];
3128                 op->args[2] = op->args[3];
3129             } else {
3130                 goto do_not_remove;
3131             }
3132             /* Mark the single-word operation live.  */
3133             nb_oargs = 1;
3134             goto do_not_remove;
3135 
3136         default:
3137             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3138             nb_iargs = def->nb_iargs;
3139             nb_oargs = def->nb_oargs;
3140 
3141             /* Test if the operation can be removed because all
3142                its outputs are dead. We assume that nb_oargs == 0
3143                implies side effects */
3144             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3145                 for (i = 0; i < nb_oargs; i++) {
3146                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3147                         goto do_not_remove;
3148                     }
3149                 }
3150                 goto do_remove;
3151             }
3152             goto do_not_remove;
3153 
3154         do_remove:
3155             tcg_op_remove(s, op);
3156             break;
3157 
3158         do_not_remove:
3159             for (i = 0; i < nb_oargs; i++) {
3160                 ts = arg_temp(op->args[i]);
3161 
3162                 /* Remember the preference of the uses that followed.  */
3163                 if (i < ARRAY_SIZE(op->output_pref)) {
3164                     op->output_pref[i] = *la_temp_pref(ts);
3165                 }
3166 
3167                 /* Output args are dead.  */
3168                 if (ts->state & TS_DEAD) {
3169                     arg_life |= DEAD_ARG << i;
3170                 }
3171                 if (ts->state & TS_MEM) {
3172                     arg_life |= SYNC_ARG << i;
3173                 }
3174                 ts->state = TS_DEAD;
3175                 la_reset_pref(ts);
3176             }
3177 
3178             /* If end of basic block, update.  */
3179             if (def->flags & TCG_OPF_BB_EXIT) {
3180                 la_func_end(s, nb_globals, nb_temps);
3181             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3182                 la_bb_sync(s, nb_globals, nb_temps);
3183             } else if (def->flags & TCG_OPF_BB_END) {
3184                 la_bb_end(s, nb_globals, nb_temps);
3185             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3186                 la_global_sync(s, nb_globals);
3187                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3188                     la_cross_call(s, nb_temps);
3189                 }
3190             }
3191 
3192             /* Record arguments that die in this opcode.  */
3193             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3194                 ts = arg_temp(op->args[i]);
3195                 if (ts->state & TS_DEAD) {
3196                     arg_life |= DEAD_ARG << i;
3197                 }
3198             }
3199 
3200             /* Input arguments are live for preceding opcodes.  */
3201             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3202                 ts = arg_temp(op->args[i]);
3203                 if (ts->state & TS_DEAD) {
3204                     /* For operands that were dead, initially allow
3205                        all regs for the type.  */
3206                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3207                     ts->state &= ~TS_DEAD;
3208                 }
3209             }
3210 
3211             /* Incorporate constraints for this operand.  */
3212             switch (opc) {
3213             case INDEX_op_mov_i32:
3214             case INDEX_op_mov_i64:
3215                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3216                    have proper constraints.  That said, special case
3217                    moves to propagate preferences backward.  */
3218                 if (IS_DEAD_ARG(1)) {
3219                     *la_temp_pref(arg_temp(op->args[0]))
3220                         = *la_temp_pref(arg_temp(op->args[1]));
3221                 }
3222                 break;
3223 
3224             default:
3225                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3226                     const TCGArgConstraint *ct = &def->args_ct[i];
3227                     TCGRegSet set, *pset;
3228 
3229                     ts = arg_temp(op->args[i]);
3230                     pset = la_temp_pref(ts);
3231                     set = *pset;
3232 
3233                     set &= ct->regs;
3234                     if (ct->ialias) {
3235                         set &= output_pref(op, ct->alias_index);
3236                     }
3237                     /* If the combination is not possible, restart.  */
3238                     if (set == 0) {
3239                         set = ct->regs;
3240                     }
3241                     *pset = set;
3242                 }
3243                 break;
3244             }
3245             break;
3246         }
3247         op->life = arg_life;
3248     }
3249 }
3250 
3251 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3252 static bool __attribute__((noinline))
3253 liveness_pass_2(TCGContext *s)
3254 {
3255     int nb_globals = s->nb_globals;
3256     int nb_temps, i;
3257     bool changes = false;
3258     TCGOp *op, *op_next;
3259 
3260     /* Create a temporary for each indirect global.  */
3261     for (i = 0; i < nb_globals; ++i) {
3262         TCGTemp *its = &s->temps[i];
3263         if (its->indirect_reg) {
3264             TCGTemp *dts = tcg_temp_alloc(s);
3265             dts->type = its->type;
3266             dts->base_type = its->base_type;
3267             dts->temp_subindex = its->temp_subindex;
3268             dts->kind = TEMP_EBB;
3269             its->state_ptr = dts;
3270         } else {
3271             its->state_ptr = NULL;
3272         }
3273         /* All globals begin dead.  */
3274         its->state = TS_DEAD;
3275     }
3276     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3277         TCGTemp *its = &s->temps[i];
3278         its->state_ptr = NULL;
3279         its->state = TS_DEAD;
3280     }
3281 
3282     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3283         TCGOpcode opc = op->opc;
3284         const TCGOpDef *def = &tcg_op_defs[opc];
3285         TCGLifeData arg_life = op->life;
3286         int nb_iargs, nb_oargs, call_flags;
3287         TCGTemp *arg_ts, *dir_ts;
3288 
3289         if (opc == INDEX_op_call) {
3290             nb_oargs = TCGOP_CALLO(op);
3291             nb_iargs = TCGOP_CALLI(op);
3292             call_flags = tcg_call_flags(op);
3293         } else {
3294             nb_iargs = def->nb_iargs;
3295             nb_oargs = def->nb_oargs;
3296 
3297             /* Set flags similar to how calls require.  */
3298             if (def->flags & TCG_OPF_COND_BRANCH) {
3299                 /* Like reading globals: sync_globals */
3300                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3301             } else if (def->flags & TCG_OPF_BB_END) {
3302                 /* Like writing globals: save_globals */
3303                 call_flags = 0;
3304             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3305                 /* Like reading globals: sync_globals */
3306                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3307             } else {
3308                 /* No effect on globals.  */
3309                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3310                               TCG_CALL_NO_WRITE_GLOBALS);
3311             }
3312         }
3313 
3314         /* Make sure that input arguments are available.  */
3315         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3316             arg_ts = arg_temp(op->args[i]);
3317             dir_ts = arg_ts->state_ptr;
3318             if (dir_ts && arg_ts->state == TS_DEAD) {
3319                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3320                                   ? INDEX_op_ld_i32
3321                                   : INDEX_op_ld_i64);
3322                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3323 
3324                 lop->args[0] = temp_arg(dir_ts);
3325                 lop->args[1] = temp_arg(arg_ts->mem_base);
3326                 lop->args[2] = arg_ts->mem_offset;
3327 
3328                 /* Loaded, but synced with memory.  */
3329                 arg_ts->state = TS_MEM;
3330             }
3331         }
3332 
3333         /* Perform input replacement, and mark inputs that became dead.
3334            No action is required except keeping temp_state up to date
3335            so that we reload when needed.  */
3336         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3337             arg_ts = arg_temp(op->args[i]);
3338             dir_ts = arg_ts->state_ptr;
3339             if (dir_ts) {
3340                 op->args[i] = temp_arg(dir_ts);
3341                 changes = true;
3342                 if (IS_DEAD_ARG(i)) {
3343                     arg_ts->state = TS_DEAD;
3344                 }
3345             }
3346         }
3347 
3348         /* Liveness analysis should ensure that the following are
3349            all correct, for call sites and basic block end points.  */
3350         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3351             /* Nothing to do */
3352         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3353             for (i = 0; i < nb_globals; ++i) {
3354                 /* Liveness should see that globals are synced back,
3355                    that is, either TS_DEAD or TS_MEM.  */
3356                 arg_ts = &s->temps[i];
3357                 tcg_debug_assert(arg_ts->state_ptr == 0
3358                                  || arg_ts->state != 0);
3359             }
3360         } else {
3361             for (i = 0; i < nb_globals; ++i) {
3362                 /* Liveness should see that globals are saved back,
3363                    that is, TS_DEAD, waiting to be reloaded.  */
3364                 arg_ts = &s->temps[i];
3365                 tcg_debug_assert(arg_ts->state_ptr == 0
3366                                  || arg_ts->state == TS_DEAD);
3367             }
3368         }
3369 
3370         /* Outputs become available.  */
3371         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3372             arg_ts = arg_temp(op->args[0]);
3373             dir_ts = arg_ts->state_ptr;
3374             if (dir_ts) {
3375                 op->args[0] = temp_arg(dir_ts);
3376                 changes = true;
3377 
3378                 /* The output is now live and modified.  */
3379                 arg_ts->state = 0;
3380 
3381                 if (NEED_SYNC_ARG(0)) {
3382                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3383                                       ? INDEX_op_st_i32
3384                                       : INDEX_op_st_i64);
3385                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3386                     TCGTemp *out_ts = dir_ts;
3387 
3388                     if (IS_DEAD_ARG(0)) {
3389                         out_ts = arg_temp(op->args[1]);
3390                         arg_ts->state = TS_DEAD;
3391                         tcg_op_remove(s, op);
3392                     } else {
3393                         arg_ts->state = TS_MEM;
3394                     }
3395 
3396                     sop->args[0] = temp_arg(out_ts);
3397                     sop->args[1] = temp_arg(arg_ts->mem_base);
3398                     sop->args[2] = arg_ts->mem_offset;
3399                 } else {
3400                     tcg_debug_assert(!IS_DEAD_ARG(0));
3401                 }
3402             }
3403         } else {
3404             for (i = 0; i < nb_oargs; i++) {
3405                 arg_ts = arg_temp(op->args[i]);
3406                 dir_ts = arg_ts->state_ptr;
3407                 if (!dir_ts) {
3408                     continue;
3409                 }
3410                 op->args[i] = temp_arg(dir_ts);
3411                 changes = true;
3412 
3413                 /* The output is now live and modified.  */
3414                 arg_ts->state = 0;
3415 
3416                 /* Sync outputs upon their last write.  */
3417                 if (NEED_SYNC_ARG(i)) {
3418                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3419                                       ? INDEX_op_st_i32
3420                                       : INDEX_op_st_i64);
3421                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3422 
3423                     sop->args[0] = temp_arg(dir_ts);
3424                     sop->args[1] = temp_arg(arg_ts->mem_base);
3425                     sop->args[2] = arg_ts->mem_offset;
3426 
3427                     arg_ts->state = TS_MEM;
3428                 }
3429                 /* Drop outputs that are dead.  */
3430                 if (IS_DEAD_ARG(i)) {
3431                     arg_ts->state = TS_DEAD;
3432                 }
3433             }
3434         }
3435     }
3436 
3437     return changes;
3438 }
3439 
3440 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3441 {
3442     intptr_t off;
3443     int size, align;
3444 
3445     /* When allocating an object, look at the full type. */
3446     size = tcg_type_size(ts->base_type);
3447     switch (ts->base_type) {
3448     case TCG_TYPE_I32:
3449         align = 4;
3450         break;
3451     case TCG_TYPE_I64:
3452     case TCG_TYPE_V64:
3453         align = 8;
3454         break;
3455     case TCG_TYPE_I128:
3456     case TCG_TYPE_V128:
3457     case TCG_TYPE_V256:
3458         /*
3459          * Note that we do not require aligned storage for V256,
3460          * and that we provide alignment for I128 to match V128,
3461          * even if that's above what the host ABI requires.
3462          */
3463         align = 16;
3464         break;
3465     default:
3466         g_assert_not_reached();
3467     }
3468 
3469     /*
3470      * Assume the stack is sufficiently aligned.
3471      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3472      * and do not require 16 byte vector alignment.  This seems slightly
3473      * easier than fully parameterizing the above switch statement.
3474      */
3475     align = MIN(TCG_TARGET_STACK_ALIGN, align);
3476     off = ROUND_UP(s->current_frame_offset, align);
3477 
3478     /* If we've exhausted the stack frame, restart with a smaller TB. */
3479     if (off + size > s->frame_end) {
3480         tcg_raise_tb_overflow(s);
3481     }
3482     s->current_frame_offset = off + size;
3483 #if defined(__sparc__)
3484     off += TCG_TARGET_STACK_BIAS;
3485 #endif
3486 
3487     /* If the object was subdivided, assign memory to all the parts. */
3488     if (ts->base_type != ts->type) {
3489         int part_size = tcg_type_size(ts->type);
3490         int part_count = size / part_size;
3491 
3492         /*
3493          * Each part is allocated sequentially in tcg_temp_new_internal.
3494          * Jump back to the first part by subtracting the current index.
3495          */
3496         ts -= ts->temp_subindex;
3497         for (int i = 0; i < part_count; ++i) {
3498             ts[i].mem_offset = off + i * part_size;
3499             ts[i].mem_base = s->frame_temp;
3500             ts[i].mem_allocated = 1;
3501         }
3502     } else {
3503         ts->mem_offset = off;
3504         ts->mem_base = s->frame_temp;
3505         ts->mem_allocated = 1;
3506     }
3507 }
3508 
3509 /* Assign @reg to @ts, and update reg_to_temp[]. */
3510 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
3511 {
3512     if (ts->val_type == TEMP_VAL_REG) {
3513         TCGReg old = ts->reg;
3514         tcg_debug_assert(s->reg_to_temp[old] == ts);
3515         if (old == reg) {
3516             return;
3517         }
3518         s->reg_to_temp[old] = NULL;
3519     }
3520     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3521     s->reg_to_temp[reg] = ts;
3522     ts->val_type = TEMP_VAL_REG;
3523     ts->reg = reg;
3524 }
3525 
3526 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
3527 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
3528 {
3529     tcg_debug_assert(type != TEMP_VAL_REG);
3530     if (ts->val_type == TEMP_VAL_REG) {
3531         TCGReg reg = ts->reg;
3532         tcg_debug_assert(s->reg_to_temp[reg] == ts);
3533         s->reg_to_temp[reg] = NULL;
3534     }
3535     ts->val_type = type;
3536 }
3537 
3538 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3539 
3540 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3541    mark it free; otherwise mark it dead.  */
3542 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3543 {
3544     TCGTempVal new_type;
3545 
3546     switch (ts->kind) {
3547     case TEMP_FIXED:
3548         return;
3549     case TEMP_GLOBAL:
3550     case TEMP_TB:
3551         new_type = TEMP_VAL_MEM;
3552         break;
3553     case TEMP_EBB:
3554         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3555         break;
3556     case TEMP_CONST:
3557         new_type = TEMP_VAL_CONST;
3558         break;
3559     default:
3560         g_assert_not_reached();
3561     }
3562     set_temp_val_nonreg(s, ts, new_type);
3563 }
3564 
3565 /* Mark a temporary as dead.  */
3566 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3567 {
3568     temp_free_or_dead(s, ts, 1);
3569 }
3570 
3571 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3572    registers needs to be allocated to store a constant.  If 'free_or_dead'
3573    is non-zero, subsequently release the temporary; if it is positive, the
3574    temp is dead; if it is negative, the temp is free.  */
3575 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3576                       TCGRegSet preferred_regs, int free_or_dead)
3577 {
3578     if (!temp_readonly(ts) && !ts->mem_coherent) {
3579         if (!ts->mem_allocated) {
3580             temp_allocate_frame(s, ts);
3581         }
3582         switch (ts->val_type) {
3583         case TEMP_VAL_CONST:
3584             /* If we're going to free the temp immediately, then we won't
3585                require it later in a register, so attempt to store the
3586                constant to memory directly.  */
3587             if (free_or_dead
3588                 && tcg_out_sti(s, ts->type, ts->val,
3589                                ts->mem_base->reg, ts->mem_offset)) {
3590                 break;
3591             }
3592             temp_load(s, ts, tcg_target_available_regs[ts->type],
3593                       allocated_regs, preferred_regs);
3594             /* fallthrough */
3595 
3596         case TEMP_VAL_REG:
3597             tcg_out_st(s, ts->type, ts->reg,
3598                        ts->mem_base->reg, ts->mem_offset);
3599             break;
3600 
3601         case TEMP_VAL_MEM:
3602             break;
3603 
3604         case TEMP_VAL_DEAD:
3605         default:
3606             tcg_abort();
3607         }
3608         ts->mem_coherent = 1;
3609     }
3610     if (free_or_dead) {
3611         temp_free_or_dead(s, ts, free_or_dead);
3612     }
3613 }
3614 
3615 /* free register 'reg' by spilling the corresponding temporary if necessary */
3616 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3617 {
3618     TCGTemp *ts = s->reg_to_temp[reg];
3619     if (ts != NULL) {
3620         temp_sync(s, ts, allocated_regs, 0, -1);
3621     }
3622 }
3623 
3624 /**
3625  * tcg_reg_alloc:
3626  * @required_regs: Set of registers in which we must allocate.
3627  * @allocated_regs: Set of registers which must be avoided.
3628  * @preferred_regs: Set of registers we should prefer.
3629  * @rev: True if we search the registers in "indirect" order.
3630  *
3631  * The allocated register must be in @required_regs & ~@allocated_regs,
3632  * but if we can put it in @preferred_regs we may save a move later.
3633  */
3634 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3635                             TCGRegSet allocated_regs,
3636                             TCGRegSet preferred_regs, bool rev)
3637 {
3638     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3639     TCGRegSet reg_ct[2];
3640     const int *order;
3641 
3642     reg_ct[1] = required_regs & ~allocated_regs;
3643     tcg_debug_assert(reg_ct[1] != 0);
3644     reg_ct[0] = reg_ct[1] & preferred_regs;
3645 
3646     /* Skip the preferred_regs option if it cannot be satisfied,
3647        or if the preference made no difference.  */
3648     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3649 
3650     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3651 
3652     /* Try free registers, preferences first.  */
3653     for (j = f; j < 2; j++) {
3654         TCGRegSet set = reg_ct[j];
3655 
3656         if (tcg_regset_single(set)) {
3657             /* One register in the set.  */
3658             TCGReg reg = tcg_regset_first(set);
3659             if (s->reg_to_temp[reg] == NULL) {
3660                 return reg;
3661             }
3662         } else {
3663             for (i = 0; i < n; i++) {
3664                 TCGReg reg = order[i];
3665                 if (s->reg_to_temp[reg] == NULL &&
3666                     tcg_regset_test_reg(set, reg)) {
3667                     return reg;
3668                 }
3669             }
3670         }
3671     }
3672 
3673     /* We must spill something.  */
3674     for (j = f; j < 2; j++) {
3675         TCGRegSet set = reg_ct[j];
3676 
3677         if (tcg_regset_single(set)) {
3678             /* One register in the set.  */
3679             TCGReg reg = tcg_regset_first(set);
3680             tcg_reg_free(s, reg, allocated_regs);
3681             return reg;
3682         } else {
3683             for (i = 0; i < n; i++) {
3684                 TCGReg reg = order[i];
3685                 if (tcg_regset_test_reg(set, reg)) {
3686                     tcg_reg_free(s, reg, allocated_regs);
3687                     return reg;
3688                 }
3689             }
3690         }
3691     }
3692 
3693     tcg_abort();
3694 }
3695 
3696 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
3697                                  TCGRegSet allocated_regs,
3698                                  TCGRegSet preferred_regs, bool rev)
3699 {
3700     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3701     TCGRegSet reg_ct[2];
3702     const int *order;
3703 
3704     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
3705     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
3706     tcg_debug_assert(reg_ct[1] != 0);
3707     reg_ct[0] = reg_ct[1] & preferred_regs;
3708 
3709     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3710 
3711     /*
3712      * Skip the preferred_regs option if it cannot be satisfied,
3713      * or if the preference made no difference.
3714      */
3715     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3716 
3717     /*
3718      * Minimize the number of flushes by looking for 2 free registers first,
3719      * then a single flush, then two flushes.
3720      */
3721     for (fmin = 2; fmin >= 0; fmin--) {
3722         for (j = k; j < 2; j++) {
3723             TCGRegSet set = reg_ct[j];
3724 
3725             for (i = 0; i < n; i++) {
3726                 TCGReg reg = order[i];
3727 
3728                 if (tcg_regset_test_reg(set, reg)) {
3729                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
3730                     if (f >= fmin) {
3731                         tcg_reg_free(s, reg, allocated_regs);
3732                         tcg_reg_free(s, reg + 1, allocated_regs);
3733                         return reg;
3734                     }
3735                 }
3736             }
3737         }
3738     }
3739     tcg_abort();
3740 }
3741 
3742 /* Make sure the temporary is in a register.  If needed, allocate the register
3743    from DESIRED while avoiding ALLOCATED.  */
3744 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3745                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3746 {
3747     TCGReg reg;
3748 
3749     switch (ts->val_type) {
3750     case TEMP_VAL_REG:
3751         return;
3752     case TEMP_VAL_CONST:
3753         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3754                             preferred_regs, ts->indirect_base);
3755         if (ts->type <= TCG_TYPE_I64) {
3756             tcg_out_movi(s, ts->type, reg, ts->val);
3757         } else {
3758             uint64_t val = ts->val;
3759             MemOp vece = MO_64;
3760 
3761             /*
3762              * Find the minimal vector element that matches the constant.
3763              * The targets will, in general, have to do this search anyway,
3764              * do this generically.
3765              */
3766             if (val == dup_const(MO_8, val)) {
3767                 vece = MO_8;
3768             } else if (val == dup_const(MO_16, val)) {
3769                 vece = MO_16;
3770             } else if (val == dup_const(MO_32, val)) {
3771                 vece = MO_32;
3772             }
3773 
3774             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3775         }
3776         ts->mem_coherent = 0;
3777         break;
3778     case TEMP_VAL_MEM:
3779         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3780                             preferred_regs, ts->indirect_base);
3781         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3782         ts->mem_coherent = 1;
3783         break;
3784     case TEMP_VAL_DEAD:
3785     default:
3786         tcg_abort();
3787     }
3788     set_temp_val_reg(s, ts, reg);
3789 }
3790 
3791 /* Save a temporary to memory. 'allocated_regs' is used in case a
3792    temporary registers needs to be allocated to store a constant.  */
3793 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3794 {
3795     /* The liveness analysis already ensures that globals are back
3796        in memory. Keep an tcg_debug_assert for safety. */
3797     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3798 }
3799 
3800 /* save globals to their canonical location and assume they can be
3801    modified be the following code. 'allocated_regs' is used in case a
3802    temporary registers needs to be allocated to store a constant. */
3803 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3804 {
3805     int i, n;
3806 
3807     for (i = 0, n = s->nb_globals; i < n; i++) {
3808         temp_save(s, &s->temps[i], allocated_regs);
3809     }
3810 }
3811 
3812 /* sync globals to their canonical location and assume they can be
3813    read by the following code. 'allocated_regs' is used in case a
3814    temporary registers needs to be allocated to store a constant. */
3815 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3816 {
3817     int i, n;
3818 
3819     for (i = 0, n = s->nb_globals; i < n; i++) {
3820         TCGTemp *ts = &s->temps[i];
3821         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3822                          || ts->kind == TEMP_FIXED
3823                          || ts->mem_coherent);
3824     }
3825 }
3826 
3827 /* at the end of a basic block, we assume all temporaries are dead and
3828    all globals are stored at their canonical location. */
3829 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3830 {
3831     int i;
3832 
3833     for (i = s->nb_globals; i < s->nb_temps; i++) {
3834         TCGTemp *ts = &s->temps[i];
3835 
3836         switch (ts->kind) {
3837         case TEMP_TB:
3838             temp_save(s, ts, allocated_regs);
3839             break;
3840         case TEMP_EBB:
3841             /* The liveness analysis already ensures that temps are dead.
3842                Keep an tcg_debug_assert for safety. */
3843             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3844             break;
3845         case TEMP_CONST:
3846             /* Similarly, we should have freed any allocated register. */
3847             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3848             break;
3849         default:
3850             g_assert_not_reached();
3851         }
3852     }
3853 
3854     save_globals(s, allocated_regs);
3855 }
3856 
3857 /*
3858  * At a conditional branch, we assume all temporaries are dead unless
3859  * explicitly live-across-conditional-branch; all globals and local
3860  * temps are synced to their location.
3861  */
3862 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3863 {
3864     sync_globals(s, allocated_regs);
3865 
3866     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3867         TCGTemp *ts = &s->temps[i];
3868         /*
3869          * The liveness analysis already ensures that temps are dead.
3870          * Keep tcg_debug_asserts for safety.
3871          */
3872         switch (ts->kind) {
3873         case TEMP_TB:
3874             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3875             break;
3876         case TEMP_EBB:
3877         case TEMP_CONST:
3878             break;
3879         default:
3880             g_assert_not_reached();
3881         }
3882     }
3883 }
3884 
3885 /*
3886  * Specialized code generation for INDEX_op_mov_* with a constant.
3887  */
3888 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3889                                   tcg_target_ulong val, TCGLifeData arg_life,
3890                                   TCGRegSet preferred_regs)
3891 {
3892     /* ENV should not be modified.  */
3893     tcg_debug_assert(!temp_readonly(ots));
3894 
3895     /* The movi is not explicitly generated here.  */
3896     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
3897     ots->val = val;
3898     ots->mem_coherent = 0;
3899     if (NEED_SYNC_ARG(0)) {
3900         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3901     } else if (IS_DEAD_ARG(0)) {
3902         temp_dead(s, ots);
3903     }
3904 }
3905 
3906 /*
3907  * Specialized code generation for INDEX_op_mov_*.
3908  */
3909 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3910 {
3911     const TCGLifeData arg_life = op->life;
3912     TCGRegSet allocated_regs, preferred_regs;
3913     TCGTemp *ts, *ots;
3914     TCGType otype, itype;
3915     TCGReg oreg, ireg;
3916 
3917     allocated_regs = s->reserved_regs;
3918     preferred_regs = output_pref(op, 0);
3919     ots = arg_temp(op->args[0]);
3920     ts = arg_temp(op->args[1]);
3921 
3922     /* ENV should not be modified.  */
3923     tcg_debug_assert(!temp_readonly(ots));
3924 
3925     /* Note that otype != itype for no-op truncation.  */
3926     otype = ots->type;
3927     itype = ts->type;
3928 
3929     if (ts->val_type == TEMP_VAL_CONST) {
3930         /* propagate constant or generate sti */
3931         tcg_target_ulong val = ts->val;
3932         if (IS_DEAD_ARG(1)) {
3933             temp_dead(s, ts);
3934         }
3935         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3936         return;
3937     }
3938 
3939     /* If the source value is in memory we're going to be forced
3940        to have it in a register in order to perform the copy.  Copy
3941        the SOURCE value into its own register first, that way we
3942        don't have to reload SOURCE the next time it is used. */
3943     if (ts->val_type == TEMP_VAL_MEM) {
3944         temp_load(s, ts, tcg_target_available_regs[itype],
3945                   allocated_regs, preferred_regs);
3946     }
3947     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3948     ireg = ts->reg;
3949 
3950     if (IS_DEAD_ARG(0)) {
3951         /* mov to a non-saved dead register makes no sense (even with
3952            liveness analysis disabled). */
3953         tcg_debug_assert(NEED_SYNC_ARG(0));
3954         if (!ots->mem_allocated) {
3955             temp_allocate_frame(s, ots);
3956         }
3957         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
3958         if (IS_DEAD_ARG(1)) {
3959             temp_dead(s, ts);
3960         }
3961         temp_dead(s, ots);
3962         return;
3963     }
3964 
3965     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3966         /*
3967          * The mov can be suppressed.  Kill input first, so that it
3968          * is unlinked from reg_to_temp, then set the output to the
3969          * reg that we saved from the input.
3970          */
3971         temp_dead(s, ts);
3972         oreg = ireg;
3973     } else {
3974         if (ots->val_type == TEMP_VAL_REG) {
3975             oreg = ots->reg;
3976         } else {
3977             /* Make sure to not spill the input register during allocation. */
3978             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3979                                  allocated_regs | ((TCGRegSet)1 << ireg),
3980                                  preferred_regs, ots->indirect_base);
3981         }
3982         if (!tcg_out_mov(s, otype, oreg, ireg)) {
3983             /*
3984              * Cross register class move not supported.
3985              * Store the source register into the destination slot
3986              * and leave the destination temp as TEMP_VAL_MEM.
3987              */
3988             assert(!temp_readonly(ots));
3989             if (!ts->mem_allocated) {
3990                 temp_allocate_frame(s, ots);
3991             }
3992             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
3993             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
3994             ots->mem_coherent = 1;
3995             return;
3996         }
3997     }
3998     set_temp_val_reg(s, ots, oreg);
3999     ots->mem_coherent = 0;
4000 
4001     if (NEED_SYNC_ARG(0)) {
4002         temp_sync(s, ots, allocated_regs, 0, 0);
4003     }
4004 }
4005 
4006 /*
4007  * Specialized code generation for INDEX_op_dup_vec.
4008  */
4009 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4010 {
4011     const TCGLifeData arg_life = op->life;
4012     TCGRegSet dup_out_regs, dup_in_regs;
4013     TCGTemp *its, *ots;
4014     TCGType itype, vtype;
4015     unsigned vece;
4016     int lowpart_ofs;
4017     bool ok;
4018 
4019     ots = arg_temp(op->args[0]);
4020     its = arg_temp(op->args[1]);
4021 
4022     /* ENV should not be modified.  */
4023     tcg_debug_assert(!temp_readonly(ots));
4024 
4025     itype = its->type;
4026     vece = TCGOP_VECE(op);
4027     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4028 
4029     if (its->val_type == TEMP_VAL_CONST) {
4030         /* Propagate constant via movi -> dupi.  */
4031         tcg_target_ulong val = its->val;
4032         if (IS_DEAD_ARG(1)) {
4033             temp_dead(s, its);
4034         }
4035         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4036         return;
4037     }
4038 
4039     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4040     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4041 
4042     /* Allocate the output register now.  */
4043     if (ots->val_type != TEMP_VAL_REG) {
4044         TCGRegSet allocated_regs = s->reserved_regs;
4045         TCGReg oreg;
4046 
4047         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4048             /* Make sure to not spill the input register. */
4049             tcg_regset_set_reg(allocated_regs, its->reg);
4050         }
4051         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4052                              output_pref(op, 0), ots->indirect_base);
4053         set_temp_val_reg(s, ots, oreg);
4054     }
4055 
4056     switch (its->val_type) {
4057     case TEMP_VAL_REG:
4058         /*
4059          * The dup constriaints must be broad, covering all possible VECE.
4060          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4061          * to fail, indicating that extra moves are required for that case.
4062          */
4063         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4064             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4065                 goto done;
4066             }
4067             /* Try again from memory or a vector input register.  */
4068         }
4069         if (!its->mem_coherent) {
4070             /*
4071              * The input register is not synced, and so an extra store
4072              * would be required to use memory.  Attempt an integer-vector
4073              * register move first.  We do not have a TCGRegSet for this.
4074              */
4075             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4076                 break;
4077             }
4078             /* Sync the temp back to its slot and load from there.  */
4079             temp_sync(s, its, s->reserved_regs, 0, 0);
4080         }
4081         /* fall through */
4082 
4083     case TEMP_VAL_MEM:
4084         lowpart_ofs = 0;
4085         if (HOST_BIG_ENDIAN) {
4086             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4087         }
4088         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4089                              its->mem_offset + lowpart_ofs)) {
4090             goto done;
4091         }
4092         /* Load the input into the destination vector register. */
4093         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4094         break;
4095 
4096     default:
4097         g_assert_not_reached();
4098     }
4099 
4100     /* We now have a vector input register, so dup must succeed. */
4101     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4102     tcg_debug_assert(ok);
4103 
4104  done:
4105     ots->mem_coherent = 0;
4106     if (IS_DEAD_ARG(1)) {
4107         temp_dead(s, its);
4108     }
4109     if (NEED_SYNC_ARG(0)) {
4110         temp_sync(s, ots, s->reserved_regs, 0, 0);
4111     }
4112     if (IS_DEAD_ARG(0)) {
4113         temp_dead(s, ots);
4114     }
4115 }
4116 
4117 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4118 {
4119     const TCGLifeData arg_life = op->life;
4120     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4121     TCGRegSet i_allocated_regs;
4122     TCGRegSet o_allocated_regs;
4123     int i, k, nb_iargs, nb_oargs;
4124     TCGReg reg;
4125     TCGArg arg;
4126     const TCGArgConstraint *arg_ct;
4127     TCGTemp *ts;
4128     TCGArg new_args[TCG_MAX_OP_ARGS];
4129     int const_args[TCG_MAX_OP_ARGS];
4130 
4131     nb_oargs = def->nb_oargs;
4132     nb_iargs = def->nb_iargs;
4133 
4134     /* copy constants */
4135     memcpy(new_args + nb_oargs + nb_iargs,
4136            op->args + nb_oargs + nb_iargs,
4137            sizeof(TCGArg) * def->nb_cargs);
4138 
4139     i_allocated_regs = s->reserved_regs;
4140     o_allocated_regs = s->reserved_regs;
4141 
4142     /* satisfy input constraints */
4143     for (k = 0; k < nb_iargs; k++) {
4144         TCGRegSet i_preferred_regs, i_required_regs;
4145         bool allocate_new_reg, copyto_new_reg;
4146         TCGTemp *ts2;
4147         int i1, i2;
4148 
4149         i = def->args_ct[nb_oargs + k].sort_index;
4150         arg = op->args[i];
4151         arg_ct = &def->args_ct[i];
4152         ts = arg_temp(arg);
4153 
4154         if (ts->val_type == TEMP_VAL_CONST
4155             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
4156             /* constant is OK for instruction */
4157             const_args[i] = 1;
4158             new_args[i] = ts->val;
4159             continue;
4160         }
4161 
4162         reg = ts->reg;
4163         i_preferred_regs = 0;
4164         i_required_regs = arg_ct->regs;
4165         allocate_new_reg = false;
4166         copyto_new_reg = false;
4167 
4168         switch (arg_ct->pair) {
4169         case 0: /* not paired */
4170             if (arg_ct->ialias) {
4171                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4172 
4173                 /*
4174                  * If the input is readonly, then it cannot also be an
4175                  * output and aliased to itself.  If the input is not
4176                  * dead after the instruction, we must allocate a new
4177                  * register and move it.
4178                  */
4179                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4180                     allocate_new_reg = true;
4181                 } else if (ts->val_type == TEMP_VAL_REG) {
4182                     /*
4183                      * Check if the current register has already been
4184                      * allocated for another input.
4185                      */
4186                     allocate_new_reg =
4187                         tcg_regset_test_reg(i_allocated_regs, reg);
4188                 }
4189             }
4190             if (!allocate_new_reg) {
4191                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4192                           i_preferred_regs);
4193                 reg = ts->reg;
4194                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4195             }
4196             if (allocate_new_reg) {
4197                 /*
4198                  * Allocate a new register matching the constraint
4199                  * and move the temporary register into it.
4200                  */
4201                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4202                           i_allocated_regs, 0);
4203                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4204                                     i_preferred_regs, ts->indirect_base);
4205                 copyto_new_reg = true;
4206             }
4207             break;
4208 
4209         case 1:
4210             /* First of an input pair; if i1 == i2, the second is an output. */
4211             i1 = i;
4212             i2 = arg_ct->pair_index;
4213             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4214 
4215             /*
4216              * It is easier to default to allocating a new pair
4217              * and to identify a few cases where it's not required.
4218              */
4219             if (arg_ct->ialias) {
4220                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4221                 if (IS_DEAD_ARG(i1) &&
4222                     IS_DEAD_ARG(i2) &&
4223                     !temp_readonly(ts) &&
4224                     ts->val_type == TEMP_VAL_REG &&
4225                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4226                     tcg_regset_test_reg(i_required_regs, reg) &&
4227                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4228                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4229                     (ts2
4230                      ? ts2->val_type == TEMP_VAL_REG &&
4231                        ts2->reg == reg + 1 &&
4232                        !temp_readonly(ts2)
4233                      : s->reg_to_temp[reg + 1] == NULL)) {
4234                     break;
4235                 }
4236             } else {
4237                 /* Without aliasing, the pair must also be an input. */
4238                 tcg_debug_assert(ts2);
4239                 if (ts->val_type == TEMP_VAL_REG &&
4240                     ts2->val_type == TEMP_VAL_REG &&
4241                     ts2->reg == reg + 1 &&
4242                     tcg_regset_test_reg(i_required_regs, reg)) {
4243                     break;
4244                 }
4245             }
4246             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4247                                      0, ts->indirect_base);
4248             goto do_pair;
4249 
4250         case 2: /* pair second */
4251             reg = new_args[arg_ct->pair_index] + 1;
4252             goto do_pair;
4253 
4254         case 3: /* ialias with second output, no first input */
4255             tcg_debug_assert(arg_ct->ialias);
4256             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4257 
4258             if (IS_DEAD_ARG(i) &&
4259                 !temp_readonly(ts) &&
4260                 ts->val_type == TEMP_VAL_REG &&
4261                 reg > 0 &&
4262                 s->reg_to_temp[reg - 1] == NULL &&
4263                 tcg_regset_test_reg(i_required_regs, reg) &&
4264                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4265                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4266                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4267                 break;
4268             }
4269             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4270                                      i_allocated_regs, 0,
4271                                      ts->indirect_base);
4272             tcg_regset_set_reg(i_allocated_regs, reg);
4273             reg += 1;
4274             goto do_pair;
4275 
4276         do_pair:
4277             /*
4278              * If an aliased input is not dead after the instruction,
4279              * we must allocate a new register and move it.
4280              */
4281             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4282                 TCGRegSet t_allocated_regs = i_allocated_regs;
4283 
4284                 /*
4285                  * Because of the alias, and the continued life, make sure
4286                  * that the temp is somewhere *other* than the reg pair,
4287                  * and we get a copy in reg.
4288                  */
4289                 tcg_regset_set_reg(t_allocated_regs, reg);
4290                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4291                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4292                     /* If ts was already in reg, copy it somewhere else. */
4293                     TCGReg nr;
4294                     bool ok;
4295 
4296                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4297                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4298                                        t_allocated_regs, 0, ts->indirect_base);
4299                     ok = tcg_out_mov(s, ts->type, nr, reg);
4300                     tcg_debug_assert(ok);
4301 
4302                     set_temp_val_reg(s, ts, nr);
4303                 } else {
4304                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4305                               t_allocated_regs, 0);
4306                     copyto_new_reg = true;
4307                 }
4308             } else {
4309                 /* Preferably allocate to reg, otherwise copy. */
4310                 i_required_regs = (TCGRegSet)1 << reg;
4311                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4312                           i_preferred_regs);
4313                 copyto_new_reg = ts->reg != reg;
4314             }
4315             break;
4316 
4317         default:
4318             g_assert_not_reached();
4319         }
4320 
4321         if (copyto_new_reg) {
4322             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4323                 /*
4324                  * Cross register class move not supported.  Sync the
4325                  * temp back to its slot and load from there.
4326                  */
4327                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4328                 tcg_out_ld(s, ts->type, reg,
4329                            ts->mem_base->reg, ts->mem_offset);
4330             }
4331         }
4332         new_args[i] = reg;
4333         const_args[i] = 0;
4334         tcg_regset_set_reg(i_allocated_regs, reg);
4335     }
4336 
4337     /* mark dead temporaries and free the associated registers */
4338     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4339         if (IS_DEAD_ARG(i)) {
4340             temp_dead(s, arg_temp(op->args[i]));
4341         }
4342     }
4343 
4344     if (def->flags & TCG_OPF_COND_BRANCH) {
4345         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4346     } else if (def->flags & TCG_OPF_BB_END) {
4347         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4348     } else {
4349         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4350             /* XXX: permit generic clobber register list ? */
4351             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4352                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4353                     tcg_reg_free(s, i, i_allocated_regs);
4354                 }
4355             }
4356         }
4357         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4358             /* sync globals if the op has side effects and might trigger
4359                an exception. */
4360             sync_globals(s, i_allocated_regs);
4361         }
4362 
4363         /* satisfy the output constraints */
4364         for(k = 0; k < nb_oargs; k++) {
4365             i = def->args_ct[k].sort_index;
4366             arg = op->args[i];
4367             arg_ct = &def->args_ct[i];
4368             ts = arg_temp(arg);
4369 
4370             /* ENV should not be modified.  */
4371             tcg_debug_assert(!temp_readonly(ts));
4372 
4373             switch (arg_ct->pair) {
4374             case 0: /* not paired */
4375                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4376                     reg = new_args[arg_ct->alias_index];
4377                 } else if (arg_ct->newreg) {
4378                     reg = tcg_reg_alloc(s, arg_ct->regs,
4379                                         i_allocated_regs | o_allocated_regs,
4380                                         output_pref(op, k), ts->indirect_base);
4381                 } else {
4382                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4383                                         output_pref(op, k), ts->indirect_base);
4384                 }
4385                 break;
4386 
4387             case 1: /* first of pair */
4388                 tcg_debug_assert(!arg_ct->newreg);
4389                 if (arg_ct->oalias) {
4390                     reg = new_args[arg_ct->alias_index];
4391                     break;
4392                 }
4393                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4394                                          output_pref(op, k), ts->indirect_base);
4395                 break;
4396 
4397             case 2: /* second of pair */
4398                 tcg_debug_assert(!arg_ct->newreg);
4399                 if (arg_ct->oalias) {
4400                     reg = new_args[arg_ct->alias_index];
4401                 } else {
4402                     reg = new_args[arg_ct->pair_index] + 1;
4403                 }
4404                 break;
4405 
4406             case 3: /* first of pair, aliasing with a second input */
4407                 tcg_debug_assert(!arg_ct->newreg);
4408                 reg = new_args[arg_ct->pair_index] - 1;
4409                 break;
4410 
4411             default:
4412                 g_assert_not_reached();
4413             }
4414             tcg_regset_set_reg(o_allocated_regs, reg);
4415             set_temp_val_reg(s, ts, reg);
4416             ts->mem_coherent = 0;
4417             new_args[i] = reg;
4418         }
4419     }
4420 
4421     /* emit instruction */
4422     if (def->flags & TCG_OPF_VECTOR) {
4423         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4424                        new_args, const_args);
4425     } else {
4426         tcg_out_op(s, op->opc, new_args, const_args);
4427     }
4428 
4429     /* move the outputs in the correct register if needed */
4430     for(i = 0; i < nb_oargs; i++) {
4431         ts = arg_temp(op->args[i]);
4432 
4433         /* ENV should not be modified.  */
4434         tcg_debug_assert(!temp_readonly(ts));
4435 
4436         if (NEED_SYNC_ARG(i)) {
4437             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4438         } else if (IS_DEAD_ARG(i)) {
4439             temp_dead(s, ts);
4440         }
4441     }
4442 }
4443 
4444 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4445 {
4446     const TCGLifeData arg_life = op->life;
4447     TCGTemp *ots, *itsl, *itsh;
4448     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4449 
4450     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4451     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4452     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4453 
4454     ots = arg_temp(op->args[0]);
4455     itsl = arg_temp(op->args[1]);
4456     itsh = arg_temp(op->args[2]);
4457 
4458     /* ENV should not be modified.  */
4459     tcg_debug_assert(!temp_readonly(ots));
4460 
4461     /* Allocate the output register now.  */
4462     if (ots->val_type != TEMP_VAL_REG) {
4463         TCGRegSet allocated_regs = s->reserved_regs;
4464         TCGRegSet dup_out_regs =
4465             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4466         TCGReg oreg;
4467 
4468         /* Make sure to not spill the input registers. */
4469         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4470             tcg_regset_set_reg(allocated_regs, itsl->reg);
4471         }
4472         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4473             tcg_regset_set_reg(allocated_regs, itsh->reg);
4474         }
4475 
4476         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4477                              output_pref(op, 0), ots->indirect_base);
4478         set_temp_val_reg(s, ots, oreg);
4479     }
4480 
4481     /* Promote dup2 of immediates to dupi_vec. */
4482     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4483         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4484         MemOp vece = MO_64;
4485 
4486         if (val == dup_const(MO_8, val)) {
4487             vece = MO_8;
4488         } else if (val == dup_const(MO_16, val)) {
4489             vece = MO_16;
4490         } else if (val == dup_const(MO_32, val)) {
4491             vece = MO_32;
4492         }
4493 
4494         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4495         goto done;
4496     }
4497 
4498     /* If the two inputs form one 64-bit value, try dupm_vec. */
4499     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
4500         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
4501         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
4502         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
4503 
4504         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
4505         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
4506 
4507         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4508                              its->mem_base->reg, its->mem_offset)) {
4509             goto done;
4510         }
4511     }
4512 
4513     /* Fall back to generic expansion. */
4514     return false;
4515 
4516  done:
4517     ots->mem_coherent = 0;
4518     if (IS_DEAD_ARG(1)) {
4519         temp_dead(s, itsl);
4520     }
4521     if (IS_DEAD_ARG(2)) {
4522         temp_dead(s, itsh);
4523     }
4524     if (NEED_SYNC_ARG(0)) {
4525         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4526     } else if (IS_DEAD_ARG(0)) {
4527         temp_dead(s, ots);
4528     }
4529     return true;
4530 }
4531 
4532 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
4533                          TCGRegSet allocated_regs)
4534 {
4535     if (ts->val_type == TEMP_VAL_REG) {
4536         if (ts->reg != reg) {
4537             tcg_reg_free(s, reg, allocated_regs);
4538             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4539                 /*
4540                  * Cross register class move not supported.  Sync the
4541                  * temp back to its slot and load from there.
4542                  */
4543                 temp_sync(s, ts, allocated_regs, 0, 0);
4544                 tcg_out_ld(s, ts->type, reg,
4545                            ts->mem_base->reg, ts->mem_offset);
4546             }
4547         }
4548     } else {
4549         TCGRegSet arg_set = 0;
4550 
4551         tcg_reg_free(s, reg, allocated_regs);
4552         tcg_regset_set_reg(arg_set, reg);
4553         temp_load(s, ts, arg_set, allocated_regs, 0);
4554     }
4555 }
4556 
4557 static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts,
4558                          TCGRegSet allocated_regs)
4559 {
4560     /*
4561      * When the destination is on the stack, load up the temp and store.
4562      * If there are many call-saved registers, the temp might live to
4563      * see another use; otherwise it'll be discarded.
4564      */
4565     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
4566     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
4567                TCG_TARGET_CALL_STACK_OFFSET +
4568                stk_slot * sizeof(tcg_target_long));
4569 }
4570 
4571 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
4572                             TCGTemp *ts, TCGRegSet *allocated_regs)
4573 {
4574     if (REG_P(l)) {
4575         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
4576         load_arg_reg(s, reg, ts, *allocated_regs);
4577         tcg_regset_set_reg(*allocated_regs, reg);
4578     } else {
4579         load_arg_stk(s, l->arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs),
4580                      ts, *allocated_regs);
4581     }
4582 }
4583 
4584 static void load_arg_ref(TCGContext *s, int arg_slot, TCGReg ref_base,
4585                          intptr_t ref_off, TCGRegSet *allocated_regs)
4586 {
4587     TCGReg reg;
4588     int stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
4589 
4590     if (stk_slot < 0) {
4591         reg = tcg_target_call_iarg_regs[arg_slot];
4592         tcg_reg_free(s, reg, *allocated_regs);
4593         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
4594         tcg_regset_set_reg(*allocated_regs, reg);
4595     } else {
4596         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
4597                             *allocated_regs, 0, false);
4598         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
4599         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
4600                    TCG_TARGET_CALL_STACK_OFFSET
4601                    + stk_slot * sizeof(tcg_target_long));
4602     }
4603 }
4604 
4605 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4606 {
4607     const int nb_oargs = TCGOP_CALLO(op);
4608     const int nb_iargs = TCGOP_CALLI(op);
4609     const TCGLifeData arg_life = op->life;
4610     const TCGHelperInfo *info = tcg_call_info(op);
4611     TCGRegSet allocated_regs = s->reserved_regs;
4612     int i;
4613 
4614     /*
4615      * Move inputs into place in reverse order,
4616      * so that we place stacked arguments first.
4617      */
4618     for (i = nb_iargs - 1; i >= 0; --i) {
4619         const TCGCallArgumentLoc *loc = &info->in[i];
4620         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
4621 
4622         switch (loc->kind) {
4623         case TCG_CALL_ARG_NORMAL:
4624         case TCG_CALL_ARG_EXTEND_U:
4625         case TCG_CALL_ARG_EXTEND_S:
4626             load_arg_normal(s, loc, ts, &allocated_regs);
4627             break;
4628         case TCG_CALL_ARG_BY_REF:
4629             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
4630             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
4631                          TCG_TARGET_CALL_STACK_OFFSET
4632                          + loc->ref_slot * sizeof(tcg_target_long),
4633                          &allocated_regs);
4634             break;
4635         case TCG_CALL_ARG_BY_REF_N:
4636             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
4637             break;
4638         default:
4639             g_assert_not_reached();
4640         }
4641     }
4642 
4643     /* Mark dead temporaries and free the associated registers.  */
4644     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4645         if (IS_DEAD_ARG(i)) {
4646             temp_dead(s, arg_temp(op->args[i]));
4647         }
4648     }
4649 
4650     /* Clobber call registers.  */
4651     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4652         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4653             tcg_reg_free(s, i, allocated_regs);
4654         }
4655     }
4656 
4657     /*
4658      * Save globals if they might be written by the helper,
4659      * sync them if they might be read.
4660      */
4661     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
4662         /* Nothing to do */
4663     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
4664         sync_globals(s, allocated_regs);
4665     } else {
4666         save_globals(s, allocated_regs);
4667     }
4668 
4669     /*
4670      * If the ABI passes a pointer to the returned struct as the first
4671      * argument, load that now.  Pass a pointer to the output home slot.
4672      */
4673     if (info->out_kind == TCG_CALL_RET_BY_REF) {
4674         TCGTemp *ts = arg_temp(op->args[0]);
4675 
4676         if (!ts->mem_allocated) {
4677             temp_allocate_frame(s, ts);
4678         }
4679         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
4680     }
4681 
4682     tcg_out_call(s, tcg_call_func(op), info);
4683 
4684     /* Assign output registers and emit moves if needed.  */
4685     switch (info->out_kind) {
4686     case TCG_CALL_RET_NORMAL:
4687         for (i = 0; i < nb_oargs; i++) {
4688             TCGTemp *ts = arg_temp(op->args[i]);
4689             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
4690 
4691             /* ENV should not be modified.  */
4692             tcg_debug_assert(!temp_readonly(ts));
4693 
4694             set_temp_val_reg(s, ts, reg);
4695             ts->mem_coherent = 0;
4696         }
4697         break;
4698 
4699     case TCG_CALL_RET_BY_VEC:
4700         {
4701             TCGTemp *ts = arg_temp(op->args[0]);
4702 
4703             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
4704             tcg_debug_assert(ts->temp_subindex == 0);
4705             if (!ts->mem_allocated) {
4706                 temp_allocate_frame(s, ts);
4707             }
4708             tcg_out_st(s, TCG_TYPE_V128,
4709                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
4710                        ts->mem_base->reg, ts->mem_offset);
4711         }
4712         /* fall through to mark all parts in memory */
4713 
4714     case TCG_CALL_RET_BY_REF:
4715         /* The callee has performed a write through the reference. */
4716         for (i = 0; i < nb_oargs; i++) {
4717             TCGTemp *ts = arg_temp(op->args[i]);
4718             ts->val_type = TEMP_VAL_MEM;
4719         }
4720         break;
4721 
4722     default:
4723         g_assert_not_reached();
4724     }
4725 
4726     /* Flush or discard output registers as needed. */
4727     for (i = 0; i < nb_oargs; i++) {
4728         TCGTemp *ts = arg_temp(op->args[i]);
4729         if (NEED_SYNC_ARG(i)) {
4730             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
4731         } else if (IS_DEAD_ARG(i)) {
4732             temp_dead(s, ts);
4733         }
4734     }
4735 }
4736 
4737 #ifdef CONFIG_PROFILER
4738 
4739 /* avoid copy/paste errors */
4740 #define PROF_ADD(to, from, field)                       \
4741     do {                                                \
4742         (to)->field += qatomic_read(&((from)->field));  \
4743     } while (0)
4744 
4745 #define PROF_MAX(to, from, field)                                       \
4746     do {                                                                \
4747         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4748         if (val__ > (to)->field) {                                      \
4749             (to)->field = val__;                                        \
4750         }                                                               \
4751     } while (0)
4752 
4753 /* Pass in a zero'ed @prof */
4754 static inline
4755 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4756 {
4757     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4758     unsigned int i;
4759 
4760     for (i = 0; i < n_ctxs; i++) {
4761         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4762         const TCGProfile *orig = &s->prof;
4763 
4764         if (counters) {
4765             PROF_ADD(prof, orig, cpu_exec_time);
4766             PROF_ADD(prof, orig, tb_count1);
4767             PROF_ADD(prof, orig, tb_count);
4768             PROF_ADD(prof, orig, op_count);
4769             PROF_MAX(prof, orig, op_count_max);
4770             PROF_ADD(prof, orig, temp_count);
4771             PROF_MAX(prof, orig, temp_count_max);
4772             PROF_ADD(prof, orig, del_op_count);
4773             PROF_ADD(prof, orig, code_in_len);
4774             PROF_ADD(prof, orig, code_out_len);
4775             PROF_ADD(prof, orig, search_out_len);
4776             PROF_ADD(prof, orig, interm_time);
4777             PROF_ADD(prof, orig, code_time);
4778             PROF_ADD(prof, orig, la_time);
4779             PROF_ADD(prof, orig, opt_time);
4780             PROF_ADD(prof, orig, restore_count);
4781             PROF_ADD(prof, orig, restore_time);
4782         }
4783         if (table) {
4784             int i;
4785 
4786             for (i = 0; i < NB_OPS; i++) {
4787                 PROF_ADD(prof, orig, table_op_count[i]);
4788             }
4789         }
4790     }
4791 }
4792 
4793 #undef PROF_ADD
4794 #undef PROF_MAX
4795 
4796 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4797 {
4798     tcg_profile_snapshot(prof, true, false);
4799 }
4800 
4801 static void tcg_profile_snapshot_table(TCGProfile *prof)
4802 {
4803     tcg_profile_snapshot(prof, false, true);
4804 }
4805 
4806 void tcg_dump_op_count(GString *buf)
4807 {
4808     TCGProfile prof = {};
4809     int i;
4810 
4811     tcg_profile_snapshot_table(&prof);
4812     for (i = 0; i < NB_OPS; i++) {
4813         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
4814                                prof.table_op_count[i]);
4815     }
4816 }
4817 
4818 int64_t tcg_cpu_exec_time(void)
4819 {
4820     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4821     unsigned int i;
4822     int64_t ret = 0;
4823 
4824     for (i = 0; i < n_ctxs; i++) {
4825         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4826         const TCGProfile *prof = &s->prof;
4827 
4828         ret += qatomic_read(&prof->cpu_exec_time);
4829     }
4830     return ret;
4831 }
4832 #else
4833 void tcg_dump_op_count(GString *buf)
4834 {
4835     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4836 }
4837 
4838 int64_t tcg_cpu_exec_time(void)
4839 {
4840     error_report("%s: TCG profiler not compiled", __func__);
4841     exit(EXIT_FAILURE);
4842 }
4843 #endif
4844 
4845 
4846 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
4847 {
4848 #ifdef CONFIG_PROFILER
4849     TCGProfile *prof = &s->prof;
4850 #endif
4851     int i, num_insns;
4852     TCGOp *op;
4853 
4854 #ifdef CONFIG_PROFILER
4855     {
4856         int n = 0;
4857 
4858         QTAILQ_FOREACH(op, &s->ops, link) {
4859             n++;
4860         }
4861         qatomic_set(&prof->op_count, prof->op_count + n);
4862         if (n > prof->op_count_max) {
4863             qatomic_set(&prof->op_count_max, n);
4864         }
4865 
4866         n = s->nb_temps;
4867         qatomic_set(&prof->temp_count, prof->temp_count + n);
4868         if (n > prof->temp_count_max) {
4869             qatomic_set(&prof->temp_count_max, n);
4870         }
4871     }
4872 #endif
4873 
4874 #ifdef DEBUG_DISAS
4875     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4876                  && qemu_log_in_addr_range(pc_start))) {
4877         FILE *logfile = qemu_log_trylock();
4878         if (logfile) {
4879             fprintf(logfile, "OP:\n");
4880             tcg_dump_ops(s, logfile, false);
4881             fprintf(logfile, "\n");
4882             qemu_log_unlock(logfile);
4883         }
4884     }
4885 #endif
4886 
4887 #ifdef CONFIG_DEBUG_TCG
4888     /* Ensure all labels referenced have been emitted.  */
4889     {
4890         TCGLabel *l;
4891         bool error = false;
4892 
4893         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4894             if (unlikely(!l->present) && l->refs) {
4895                 qemu_log_mask(CPU_LOG_TB_OP,
4896                               "$L%d referenced but not present.\n", l->id);
4897                 error = true;
4898             }
4899         }
4900         assert(!error);
4901     }
4902 #endif
4903 
4904 #ifdef CONFIG_PROFILER
4905     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4906 #endif
4907 
4908 #ifdef USE_TCG_OPTIMIZATIONS
4909     tcg_optimize(s);
4910 #endif
4911 
4912 #ifdef CONFIG_PROFILER
4913     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4914     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4915 #endif
4916 
4917     reachable_code_pass(s);
4918     liveness_pass_0(s);
4919     liveness_pass_1(s);
4920 
4921     if (s->nb_indirects > 0) {
4922 #ifdef DEBUG_DISAS
4923         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4924                      && qemu_log_in_addr_range(pc_start))) {
4925             FILE *logfile = qemu_log_trylock();
4926             if (logfile) {
4927                 fprintf(logfile, "OP before indirect lowering:\n");
4928                 tcg_dump_ops(s, logfile, false);
4929                 fprintf(logfile, "\n");
4930                 qemu_log_unlock(logfile);
4931             }
4932         }
4933 #endif
4934         /* Replace indirect temps with direct temps.  */
4935         if (liveness_pass_2(s)) {
4936             /* If changes were made, re-run liveness.  */
4937             liveness_pass_1(s);
4938         }
4939     }
4940 
4941 #ifdef CONFIG_PROFILER
4942     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4943 #endif
4944 
4945 #ifdef DEBUG_DISAS
4946     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4947                  && qemu_log_in_addr_range(pc_start))) {
4948         FILE *logfile = qemu_log_trylock();
4949         if (logfile) {
4950             fprintf(logfile, "OP after optimization and liveness analysis:\n");
4951             tcg_dump_ops(s, logfile, true);
4952             fprintf(logfile, "\n");
4953             qemu_log_unlock(logfile);
4954         }
4955     }
4956 #endif
4957 
4958     /* Initialize goto_tb jump offsets. */
4959     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
4960     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
4961     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
4962     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
4963 
4964     tcg_reg_alloc_start(s);
4965 
4966     /*
4967      * Reset the buffer pointers when restarting after overflow.
4968      * TODO: Move this into translate-all.c with the rest of the
4969      * buffer management.  Having only this done here is confusing.
4970      */
4971     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4972     s->code_ptr = s->code_buf;
4973 
4974 #ifdef TCG_TARGET_NEED_LDST_LABELS
4975     QSIMPLEQ_INIT(&s->ldst_labels);
4976 #endif
4977 #ifdef TCG_TARGET_NEED_POOL_LABELS
4978     s->pool_labels = NULL;
4979 #endif
4980 
4981     num_insns = -1;
4982     QTAILQ_FOREACH(op, &s->ops, link) {
4983         TCGOpcode opc = op->opc;
4984 
4985 #ifdef CONFIG_PROFILER
4986         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4987 #endif
4988 
4989         switch (opc) {
4990         case INDEX_op_mov_i32:
4991         case INDEX_op_mov_i64:
4992         case INDEX_op_mov_vec:
4993             tcg_reg_alloc_mov(s, op);
4994             break;
4995         case INDEX_op_dup_vec:
4996             tcg_reg_alloc_dup(s, op);
4997             break;
4998         case INDEX_op_insn_start:
4999             if (num_insns >= 0) {
5000                 size_t off = tcg_current_code_size(s);
5001                 s->gen_insn_end_off[num_insns] = off;
5002                 /* Assert that we do not overflow our stored offset.  */
5003                 assert(s->gen_insn_end_off[num_insns] == off);
5004             }
5005             num_insns++;
5006             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
5007                 target_ulong a;
5008 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
5009                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
5010 #else
5011                 a = op->args[i];
5012 #endif
5013                 s->gen_insn_data[num_insns][i] = a;
5014             }
5015             break;
5016         case INDEX_op_discard:
5017             temp_dead(s, arg_temp(op->args[0]));
5018             break;
5019         case INDEX_op_set_label:
5020             tcg_reg_alloc_bb_end(s, s->reserved_regs);
5021             tcg_out_label(s, arg_label(op->args[0]));
5022             break;
5023         case INDEX_op_call:
5024             tcg_reg_alloc_call(s, op);
5025             break;
5026         case INDEX_op_exit_tb:
5027             tcg_out_exit_tb(s, op->args[0]);
5028             break;
5029         case INDEX_op_goto_tb:
5030             tcg_out_goto_tb(s, op->args[0]);
5031             break;
5032         case INDEX_op_dup2_vec:
5033             if (tcg_reg_alloc_dup2(s, op)) {
5034                 break;
5035             }
5036             /* fall through */
5037         default:
5038             /* Sanity check that we've not introduced any unhandled opcodes. */
5039             tcg_debug_assert(tcg_op_supported(opc));
5040             /* Note: in order to speed up the code, it would be much
5041                faster to have specialized register allocator functions for
5042                some common argument patterns */
5043             tcg_reg_alloc_op(s, op);
5044             break;
5045         }
5046         /* Test for (pending) buffer overflow.  The assumption is that any
5047            one operation beginning below the high water mark cannot overrun
5048            the buffer completely.  Thus we can test for overflow after
5049            generating code without having to check during generation.  */
5050         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
5051             return -1;
5052         }
5053         /* Test for TB overflow, as seen by gen_insn_end_off.  */
5054         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
5055             return -2;
5056         }
5057     }
5058     tcg_debug_assert(num_insns >= 0);
5059     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
5060 
5061     /* Generate TB finalization at the end of block */
5062 #ifdef TCG_TARGET_NEED_LDST_LABELS
5063     i = tcg_out_ldst_finalize(s);
5064     if (i < 0) {
5065         return i;
5066     }
5067 #endif
5068 #ifdef TCG_TARGET_NEED_POOL_LABELS
5069     i = tcg_out_pool_finalize(s);
5070     if (i < 0) {
5071         return i;
5072     }
5073 #endif
5074     if (!tcg_resolve_relocs(s)) {
5075         return -2;
5076     }
5077 
5078 #ifndef CONFIG_TCG_INTERPRETER
5079     /* flush instruction cache */
5080     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
5081                         (uintptr_t)s->code_buf,
5082                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
5083 #endif
5084 
5085     return tcg_current_code_size(s);
5086 }
5087 
5088 #ifdef CONFIG_PROFILER
5089 void tcg_dump_info(GString *buf)
5090 {
5091     TCGProfile prof = {};
5092     const TCGProfile *s;
5093     int64_t tb_count;
5094     int64_t tb_div_count;
5095     int64_t tot;
5096 
5097     tcg_profile_snapshot_counters(&prof);
5098     s = &prof;
5099     tb_count = s->tb_count;
5100     tb_div_count = tb_count ? tb_count : 1;
5101     tot = s->interm_time + s->code_time;
5102 
5103     g_string_append_printf(buf, "JIT cycles          %" PRId64
5104                            " (%0.3f s at 2.4 GHz)\n",
5105                            tot, tot / 2.4e9);
5106     g_string_append_printf(buf, "translated TBs      %" PRId64
5107                            " (aborted=%" PRId64 " %0.1f%%)\n",
5108                            tb_count, s->tb_count1 - tb_count,
5109                            (double)(s->tb_count1 - s->tb_count)
5110                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
5111     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
5112                            (double)s->op_count / tb_div_count, s->op_count_max);
5113     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
5114                            (double)s->del_op_count / tb_div_count);
5115     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
5116                            (double)s->temp_count / tb_div_count,
5117                            s->temp_count_max);
5118     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
5119                            (double)s->code_out_len / tb_div_count);
5120     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
5121                            (double)s->search_out_len / tb_div_count);
5122 
5123     g_string_append_printf(buf, "cycles/op           %0.1f\n",
5124                            s->op_count ? (double)tot / s->op_count : 0);
5125     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
5126                            s->code_in_len ? (double)tot / s->code_in_len : 0);
5127     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
5128                            s->code_out_len ? (double)tot / s->code_out_len : 0);
5129     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
5130                            s->search_out_len ?
5131                            (double)tot / s->search_out_len : 0);
5132     if (tot == 0) {
5133         tot = 1;
5134     }
5135     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
5136                            (double)s->interm_time / tot * 100.0);
5137     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
5138                            (double)s->code_time / tot * 100.0);
5139     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
5140                            (double)s->opt_time / (s->code_time ?
5141                                                   s->code_time : 1)
5142                            * 100.0);
5143     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
5144                            (double)s->la_time / (s->code_time ?
5145                                                  s->code_time : 1) * 100.0);
5146     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
5147                            s->restore_count);
5148     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
5149                            s->restore_count ?
5150                            (double)s->restore_time / s->restore_count : 0);
5151 }
5152 #else
5153 void tcg_dump_info(GString *buf)
5154 {
5155     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5156 }
5157 #endif
5158 
5159 #ifdef ELF_HOST_MACHINE
5160 /* In order to use this feature, the backend needs to do three things:
5161 
5162    (1) Define ELF_HOST_MACHINE to indicate both what value to
5163        put into the ELF image and to indicate support for the feature.
5164 
5165    (2) Define tcg_register_jit.  This should create a buffer containing
5166        the contents of a .debug_frame section that describes the post-
5167        prologue unwind info for the tcg machine.
5168 
5169    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
5170 */
5171 
5172 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
5173 typedef enum {
5174     JIT_NOACTION = 0,
5175     JIT_REGISTER_FN,
5176     JIT_UNREGISTER_FN
5177 } jit_actions_t;
5178 
5179 struct jit_code_entry {
5180     struct jit_code_entry *next_entry;
5181     struct jit_code_entry *prev_entry;
5182     const void *symfile_addr;
5183     uint64_t symfile_size;
5184 };
5185 
5186 struct jit_descriptor {
5187     uint32_t version;
5188     uint32_t action_flag;
5189     struct jit_code_entry *relevant_entry;
5190     struct jit_code_entry *first_entry;
5191 };
5192 
5193 void __jit_debug_register_code(void) __attribute__((noinline));
5194 void __jit_debug_register_code(void)
5195 {
5196     asm("");
5197 }
5198 
5199 /* Must statically initialize the version, because GDB may check
5200    the version before we can set it.  */
5201 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
5202 
5203 /* End GDB interface.  */
5204 
5205 static int find_string(const char *strtab, const char *str)
5206 {
5207     const char *p = strtab + 1;
5208 
5209     while (1) {
5210         if (strcmp(p, str) == 0) {
5211             return p - strtab;
5212         }
5213         p += strlen(p) + 1;
5214     }
5215 }
5216 
5217 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
5218                                  const void *debug_frame,
5219                                  size_t debug_frame_size)
5220 {
5221     struct __attribute__((packed)) DebugInfo {
5222         uint32_t  len;
5223         uint16_t  version;
5224         uint32_t  abbrev;
5225         uint8_t   ptr_size;
5226         uint8_t   cu_die;
5227         uint16_t  cu_lang;
5228         uintptr_t cu_low_pc;
5229         uintptr_t cu_high_pc;
5230         uint8_t   fn_die;
5231         char      fn_name[16];
5232         uintptr_t fn_low_pc;
5233         uintptr_t fn_high_pc;
5234         uint8_t   cu_eoc;
5235     };
5236 
5237     struct ElfImage {
5238         ElfW(Ehdr) ehdr;
5239         ElfW(Phdr) phdr;
5240         ElfW(Shdr) shdr[7];
5241         ElfW(Sym)  sym[2];
5242         struct DebugInfo di;
5243         uint8_t    da[24];
5244         char       str[80];
5245     };
5246 
5247     struct ElfImage *img;
5248 
5249     static const struct ElfImage img_template = {
5250         .ehdr = {
5251             .e_ident[EI_MAG0] = ELFMAG0,
5252             .e_ident[EI_MAG1] = ELFMAG1,
5253             .e_ident[EI_MAG2] = ELFMAG2,
5254             .e_ident[EI_MAG3] = ELFMAG3,
5255             .e_ident[EI_CLASS] = ELF_CLASS,
5256             .e_ident[EI_DATA] = ELF_DATA,
5257             .e_ident[EI_VERSION] = EV_CURRENT,
5258             .e_type = ET_EXEC,
5259             .e_machine = ELF_HOST_MACHINE,
5260             .e_version = EV_CURRENT,
5261             .e_phoff = offsetof(struct ElfImage, phdr),
5262             .e_shoff = offsetof(struct ElfImage, shdr),
5263             .e_ehsize = sizeof(ElfW(Shdr)),
5264             .e_phentsize = sizeof(ElfW(Phdr)),
5265             .e_phnum = 1,
5266             .e_shentsize = sizeof(ElfW(Shdr)),
5267             .e_shnum = ARRAY_SIZE(img->shdr),
5268             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
5269 #ifdef ELF_HOST_FLAGS
5270             .e_flags = ELF_HOST_FLAGS,
5271 #endif
5272 #ifdef ELF_OSABI
5273             .e_ident[EI_OSABI] = ELF_OSABI,
5274 #endif
5275         },
5276         .phdr = {
5277             .p_type = PT_LOAD,
5278             .p_flags = PF_X,
5279         },
5280         .shdr = {
5281             [0] = { .sh_type = SHT_NULL },
5282             /* Trick: The contents of code_gen_buffer are not present in
5283                this fake ELF file; that got allocated elsewhere.  Therefore
5284                we mark .text as SHT_NOBITS (similar to .bss) so that readers
5285                will not look for contents.  We can record any address.  */
5286             [1] = { /* .text */
5287                 .sh_type = SHT_NOBITS,
5288                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
5289             },
5290             [2] = { /* .debug_info */
5291                 .sh_type = SHT_PROGBITS,
5292                 .sh_offset = offsetof(struct ElfImage, di),
5293                 .sh_size = sizeof(struct DebugInfo),
5294             },
5295             [3] = { /* .debug_abbrev */
5296                 .sh_type = SHT_PROGBITS,
5297                 .sh_offset = offsetof(struct ElfImage, da),
5298                 .sh_size = sizeof(img->da),
5299             },
5300             [4] = { /* .debug_frame */
5301                 .sh_type = SHT_PROGBITS,
5302                 .sh_offset = sizeof(struct ElfImage),
5303             },
5304             [5] = { /* .symtab */
5305                 .sh_type = SHT_SYMTAB,
5306                 .sh_offset = offsetof(struct ElfImage, sym),
5307                 .sh_size = sizeof(img->sym),
5308                 .sh_info = 1,
5309                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
5310                 .sh_entsize = sizeof(ElfW(Sym)),
5311             },
5312             [6] = { /* .strtab */
5313                 .sh_type = SHT_STRTAB,
5314                 .sh_offset = offsetof(struct ElfImage, str),
5315                 .sh_size = sizeof(img->str),
5316             }
5317         },
5318         .sym = {
5319             [1] = { /* code_gen_buffer */
5320                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
5321                 .st_shndx = 1,
5322             }
5323         },
5324         .di = {
5325             .len = sizeof(struct DebugInfo) - 4,
5326             .version = 2,
5327             .ptr_size = sizeof(void *),
5328             .cu_die = 1,
5329             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
5330             .fn_die = 2,
5331             .fn_name = "code_gen_buffer"
5332         },
5333         .da = {
5334             1,          /* abbrev number (the cu) */
5335             0x11, 1,    /* DW_TAG_compile_unit, has children */
5336             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
5337             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5338             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5339             0, 0,       /* end of abbrev */
5340             2,          /* abbrev number (the fn) */
5341             0x2e, 0,    /* DW_TAG_subprogram, no children */
5342             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
5343             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5344             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5345             0, 0,       /* end of abbrev */
5346             0           /* no more abbrev */
5347         },
5348         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5349                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5350     };
5351 
5352     /* We only need a single jit entry; statically allocate it.  */
5353     static struct jit_code_entry one_entry;
5354 
5355     uintptr_t buf = (uintptr_t)buf_ptr;
5356     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
5357     DebugFrameHeader *dfh;
5358 
5359     img = g_malloc(img_size);
5360     *img = img_template;
5361 
5362     img->phdr.p_vaddr = buf;
5363     img->phdr.p_paddr = buf;
5364     img->phdr.p_memsz = buf_size;
5365 
5366     img->shdr[1].sh_name = find_string(img->str, ".text");
5367     img->shdr[1].sh_addr = buf;
5368     img->shdr[1].sh_size = buf_size;
5369 
5370     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
5371     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
5372 
5373     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
5374     img->shdr[4].sh_size = debug_frame_size;
5375 
5376     img->shdr[5].sh_name = find_string(img->str, ".symtab");
5377     img->shdr[6].sh_name = find_string(img->str, ".strtab");
5378 
5379     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
5380     img->sym[1].st_value = buf;
5381     img->sym[1].st_size = buf_size;
5382 
5383     img->di.cu_low_pc = buf;
5384     img->di.cu_high_pc = buf + buf_size;
5385     img->di.fn_low_pc = buf;
5386     img->di.fn_high_pc = buf + buf_size;
5387 
5388     dfh = (DebugFrameHeader *)(img + 1);
5389     memcpy(dfh, debug_frame, debug_frame_size);
5390     dfh->fde.func_start = buf;
5391     dfh->fde.func_len = buf_size;
5392 
5393 #ifdef DEBUG_JIT
5394     /* Enable this block to be able to debug the ELF image file creation.
5395        One can use readelf, objdump, or other inspection utilities.  */
5396     {
5397         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
5398         FILE *f = fopen(jit, "w+b");
5399         if (f) {
5400             if (fwrite(img, img_size, 1, f) != img_size) {
5401                 /* Avoid stupid unused return value warning for fwrite.  */
5402             }
5403             fclose(f);
5404         }
5405     }
5406 #endif
5407 
5408     one_entry.symfile_addr = img;
5409     one_entry.symfile_size = img_size;
5410 
5411     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
5412     __jit_debug_descriptor.relevant_entry = &one_entry;
5413     __jit_debug_descriptor.first_entry = &one_entry;
5414     __jit_debug_register_code();
5415 }
5416 #else
5417 /* No support for the feature.  Provide the entry point expected by exec.c,
5418    and implement the internal function we declared earlier.  */
5419 
5420 static void tcg_register_jit_int(const void *buf, size_t size,
5421                                  const void *debug_frame,
5422                                  size_t debug_frame_size)
5423 {
5424 }
5425 
5426 void tcg_register_jit(const void *buf, size_t buf_size)
5427 {
5428 }
5429 #endif /* ELF_HOST_MACHINE */
5430 
5431 #if !TCG_TARGET_MAYBE_vec
5432 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5433 {
5434     g_assert_not_reached();
5435 }
5436 #endif
5437