xref: /openbmc/qemu/tcg/tcg.c (revision 623d7e3551a6fc5693c06ea938c60fe281b52e27)
1  /*
2   * Tiny Code Generator for QEMU
3   *
4   * Copyright (c) 2008 Fabrice Bellard
5   *
6   * Permission is hereby granted, free of charge, to any person obtaining a copy
7   * of this software and associated documentation files (the "Software"), to deal
8   * in the Software without restriction, including without limitation the rights
9   * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10   * copies of the Software, and to permit persons to whom the Software is
11   * furnished to do so, subject to the following conditions:
12   *
13   * The above copyright notice and this permission notice shall be included in
14   * all copies or substantial portions of the Software.
15   *
16   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22   * THE SOFTWARE.
23   */
24  
25  #include "qemu/osdep.h"
26  
27  /* Define to jump the ELF file used to communicate with GDB.  */
28  #undef DEBUG_JIT
29  
30  #include "qemu/error-report.h"
31  #include "qemu/cutils.h"
32  #include "qemu/host-utils.h"
33  #include "qemu/qemu-print.h"
34  #include "qemu/cacheflush.h"
35  #include "qemu/cacheinfo.h"
36  #include "qemu/timer.h"
37  #include "exec/translation-block.h"
38  #include "exec/tlb-common.h"
39  #include "tcg/tcg-op-common.h"
40  
41  #if UINTPTR_MAX == UINT32_MAX
42  # define ELF_CLASS  ELFCLASS32
43  #else
44  # define ELF_CLASS  ELFCLASS64
45  #endif
46  #if HOST_BIG_ENDIAN
47  # define ELF_DATA   ELFDATA2MSB
48  #else
49  # define ELF_DATA   ELFDATA2LSB
50  #endif
51  
52  #include "elf.h"
53  #include "exec/log.h"
54  #include "tcg/tcg-ldst.h"
55  #include "tcg/tcg-temp-internal.h"
56  #include "tcg-internal.h"
57  #include "accel/tcg/perf.h"
58  #ifdef CONFIG_USER_ONLY
59  #include "exec/user/guest-base.h"
60  #endif
61  
62  /* Forward declarations for functions declared in tcg-target.c.inc and
63     used here. */
64  static void tcg_target_init(TCGContext *s);
65  static void tcg_target_qemu_prologue(TCGContext *s);
66  static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
67                          intptr_t value, intptr_t addend);
68  
69  /* The CIE and FDE header definitions will be common to all hosts.  */
70  typedef struct {
71      uint32_t len __attribute__((aligned((sizeof(void *)))));
72      uint32_t id;
73      uint8_t version;
74      char augmentation[1];
75      uint8_t code_align;
76      uint8_t data_align;
77      uint8_t return_column;
78  } DebugFrameCIE;
79  
80  typedef struct QEMU_PACKED {
81      uint32_t len __attribute__((aligned((sizeof(void *)))));
82      uint32_t cie_offset;
83      uintptr_t func_start;
84      uintptr_t func_len;
85  } DebugFrameFDEHeader;
86  
87  typedef struct QEMU_PACKED {
88      DebugFrameCIE cie;
89      DebugFrameFDEHeader fde;
90  } DebugFrameHeader;
91  
92  typedef struct TCGLabelQemuLdst {
93      bool is_ld;             /* qemu_ld: true, qemu_st: false */
94      MemOpIdx oi;
95      TCGType type;           /* result type of a load */
96      TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
97      TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
98      TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
99      TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
100      const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
101      tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
102      QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
103  } TCGLabelQemuLdst;
104  
105  static void tcg_register_jit_int(const void *buf, size_t size,
106                                   const void *debug_frame,
107                                   size_t debug_frame_size)
108      __attribute__((unused));
109  
110  /* Forward declarations for functions declared and used in tcg-target.c.inc. */
111  static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
112                         intptr_t arg2);
113  static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
114  static void tcg_out_movi(TCGContext *s, TCGType type,
115                           TCGReg ret, tcg_target_long arg);
116  static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
117  static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
118  static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
119  static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
120  static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
121  static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
122  static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
123  static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
124  static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
125  static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
126  static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
127  static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
128  static void tcg_out_goto_tb(TCGContext *s, int which);
129  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
130                         const TCGArg args[TCG_MAX_OP_ARGS],
131                         const int const_args[TCG_MAX_OP_ARGS]);
132  #if TCG_TARGET_MAYBE_vec
133  static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
134                              TCGReg dst, TCGReg src);
135  static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
136                               TCGReg dst, TCGReg base, intptr_t offset);
137  static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
138                               TCGReg dst, int64_t arg);
139  static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
140                             unsigned vecl, unsigned vece,
141                             const TCGArg args[TCG_MAX_OP_ARGS],
142                             const int const_args[TCG_MAX_OP_ARGS]);
143  #else
144  static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
145                                     TCGReg dst, TCGReg src)
146  {
147      g_assert_not_reached();
148  }
149  static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
150                                      TCGReg dst, TCGReg base, intptr_t offset)
151  {
152      g_assert_not_reached();
153  }
154  static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
155                                      TCGReg dst, int64_t arg)
156  {
157      g_assert_not_reached();
158  }
159  static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
160                                    unsigned vecl, unsigned vece,
161                                    const TCGArg args[TCG_MAX_OP_ARGS],
162                                    const int const_args[TCG_MAX_OP_ARGS])
163  {
164      g_assert_not_reached();
165  }
166  #endif
167  static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
168                         intptr_t arg2);
169  static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
170                          TCGReg base, intptr_t ofs);
171  static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
172                           const TCGHelperInfo *info);
173  static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
174  static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
175  #ifdef TCG_TARGET_NEED_LDST_LABELS
176  static int tcg_out_ldst_finalize(TCGContext *s);
177  #endif
178  
179  typedef struct TCGLdstHelperParam {
180      TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
181      unsigned ntmp;
182      int tmp[3];
183  } TCGLdstHelperParam;
184  
185  static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
186                                     const TCGLdstHelperParam *p)
187      __attribute__((unused));
188  static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
189                                    bool load_sign, const TCGLdstHelperParam *p)
190      __attribute__((unused));
191  static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
192                                     const TCGLdstHelperParam *p)
193      __attribute__((unused));
194  
195  static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
196      [MO_UB] = helper_ldub_mmu,
197      [MO_SB] = helper_ldsb_mmu,
198      [MO_UW] = helper_lduw_mmu,
199      [MO_SW] = helper_ldsw_mmu,
200      [MO_UL] = helper_ldul_mmu,
201      [MO_UQ] = helper_ldq_mmu,
202  #if TCG_TARGET_REG_BITS == 64
203      [MO_SL] = helper_ldsl_mmu,
204      [MO_128] = helper_ld16_mmu,
205  #endif
206  };
207  
208  static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
209      [MO_8]  = helper_stb_mmu,
210      [MO_16] = helper_stw_mmu,
211      [MO_32] = helper_stl_mmu,
212      [MO_64] = helper_stq_mmu,
213  #if TCG_TARGET_REG_BITS == 64
214      [MO_128] = helper_st16_mmu,
215  #endif
216  };
217  
218  typedef struct {
219      MemOp atom;   /* lg2 bits of atomicity required */
220      MemOp align;  /* lg2 bits of alignment to use */
221  } TCGAtomAlign;
222  
223  static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
224                                             MemOp host_atom, bool allow_two_ops)
225      __attribute__((unused));
226  
227  TCGContext tcg_init_ctx;
228  __thread TCGContext *tcg_ctx;
229  
230  TCGContext **tcg_ctxs;
231  unsigned int tcg_cur_ctxs;
232  unsigned int tcg_max_ctxs;
233  TCGv_env cpu_env = 0;
234  const void *tcg_code_gen_epilogue;
235  uintptr_t tcg_splitwx_diff;
236  
237  #ifndef CONFIG_TCG_INTERPRETER
238  tcg_prologue_fn *tcg_qemu_tb_exec;
239  #endif
240  
241  static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
242  static TCGRegSet tcg_target_call_clobber_regs;
243  
244  #if TCG_TARGET_INSN_UNIT_SIZE == 1
245  static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
246  {
247      *s->code_ptr++ = v;
248  }
249  
250  static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
251                                                        uint8_t v)
252  {
253      *p = v;
254  }
255  #endif
256  
257  #if TCG_TARGET_INSN_UNIT_SIZE <= 2
258  static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
259  {
260      if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
261          *s->code_ptr++ = v;
262      } else {
263          tcg_insn_unit *p = s->code_ptr;
264          memcpy(p, &v, sizeof(v));
265          s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
266      }
267  }
268  
269  static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
270                                                         uint16_t v)
271  {
272      if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
273          *p = v;
274      } else {
275          memcpy(p, &v, sizeof(v));
276      }
277  }
278  #endif
279  
280  #if TCG_TARGET_INSN_UNIT_SIZE <= 4
281  static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
282  {
283      if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
284          *s->code_ptr++ = v;
285      } else {
286          tcg_insn_unit *p = s->code_ptr;
287          memcpy(p, &v, sizeof(v));
288          s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
289      }
290  }
291  
292  static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
293                                                         uint32_t v)
294  {
295      if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
296          *p = v;
297      } else {
298          memcpy(p, &v, sizeof(v));
299      }
300  }
301  #endif
302  
303  #if TCG_TARGET_INSN_UNIT_SIZE <= 8
304  static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
305  {
306      if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
307          *s->code_ptr++ = v;
308      } else {
309          tcg_insn_unit *p = s->code_ptr;
310          memcpy(p, &v, sizeof(v));
311          s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
312      }
313  }
314  
315  static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
316                                                         uint64_t v)
317  {
318      if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
319          *p = v;
320      } else {
321          memcpy(p, &v, sizeof(v));
322      }
323  }
324  #endif
325  
326  /* label relocation processing */
327  
328  static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
329                            TCGLabel *l, intptr_t addend)
330  {
331      TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
332  
333      r->type = type;
334      r->ptr = code_ptr;
335      r->addend = addend;
336      QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
337  }
338  
339  static void tcg_out_label(TCGContext *s, TCGLabel *l)
340  {
341      tcg_debug_assert(!l->has_value);
342      l->has_value = 1;
343      l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
344  }
345  
346  TCGLabel *gen_new_label(void)
347  {
348      TCGContext *s = tcg_ctx;
349      TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
350  
351      memset(l, 0, sizeof(TCGLabel));
352      l->id = s->nb_labels++;
353      QSIMPLEQ_INIT(&l->branches);
354      QSIMPLEQ_INIT(&l->relocs);
355  
356      QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
357  
358      return l;
359  }
360  
361  static bool tcg_resolve_relocs(TCGContext *s)
362  {
363      TCGLabel *l;
364  
365      QSIMPLEQ_FOREACH(l, &s->labels, next) {
366          TCGRelocation *r;
367          uintptr_t value = l->u.value;
368  
369          QSIMPLEQ_FOREACH(r, &l->relocs, next) {
370              if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
371                  return false;
372              }
373          }
374      }
375      return true;
376  }
377  
378  static void set_jmp_reset_offset(TCGContext *s, int which)
379  {
380      /*
381       * We will check for overflow at the end of the opcode loop in
382       * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
383       */
384      s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
385  }
386  
387  static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
388  {
389      /*
390       * We will check for overflow at the end of the opcode loop in
391       * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
392       */
393      s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
394  }
395  
396  static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
397  {
398      /*
399       * Return the read-execute version of the pointer, for the benefit
400       * of any pc-relative addressing mode.
401       */
402      return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
403  }
404  
405  #if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
406  static int tlb_mask_table_ofs(TCGContext *s, int which)
407  {
408      return s->tlb_fast_offset + which * sizeof(CPUTLBDescFast);
409  }
410  #endif
411  
412  /* Signal overflow, starting over with fewer guest insns. */
413  static G_NORETURN
414  void tcg_raise_tb_overflow(TCGContext *s)
415  {
416      siglongjmp(s->jmp_trans, -2);
417  }
418  
419  /*
420   * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
421   * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
422   *
423   * However, tcg_out_helper_load_slots reuses this field to hold an
424   * argument slot number (which may designate a argument register or an
425   * argument stack slot), converting to TCGReg once all arguments that
426   * are destined for the stack are processed.
427   */
428  typedef struct TCGMovExtend {
429      unsigned dst;
430      TCGReg src;
431      TCGType dst_type;
432      TCGType src_type;
433      MemOp src_ext;
434  } TCGMovExtend;
435  
436  /**
437   * tcg_out_movext -- move and extend
438   * @s: tcg context
439   * @dst_type: integral type for destination
440   * @dst: destination register
441   * @src_type: integral type for source
442   * @src_ext: extension to apply to source
443   * @src: source register
444   *
445   * Move or extend @src into @dst, depending on @src_ext and the types.
446   */
447  static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
448                             TCGType src_type, MemOp src_ext, TCGReg src)
449  {
450      switch (src_ext) {
451      case MO_UB:
452          tcg_out_ext8u(s, dst, src);
453          break;
454      case MO_SB:
455          tcg_out_ext8s(s, dst_type, dst, src);
456          break;
457      case MO_UW:
458          tcg_out_ext16u(s, dst, src);
459          break;
460      case MO_SW:
461          tcg_out_ext16s(s, dst_type, dst, src);
462          break;
463      case MO_UL:
464      case MO_SL:
465          if (dst_type == TCG_TYPE_I32) {
466              if (src_type == TCG_TYPE_I32) {
467                  tcg_out_mov(s, TCG_TYPE_I32, dst, src);
468              } else {
469                  tcg_out_extrl_i64_i32(s, dst, src);
470              }
471          } else if (src_type == TCG_TYPE_I32) {
472              if (src_ext & MO_SIGN) {
473                  tcg_out_exts_i32_i64(s, dst, src);
474              } else {
475                  tcg_out_extu_i32_i64(s, dst, src);
476              }
477          } else {
478              if (src_ext & MO_SIGN) {
479                  tcg_out_ext32s(s, dst, src);
480              } else {
481                  tcg_out_ext32u(s, dst, src);
482              }
483          }
484          break;
485      case MO_UQ:
486          tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
487          if (dst_type == TCG_TYPE_I32) {
488              tcg_out_extrl_i64_i32(s, dst, src);
489          } else {
490              tcg_out_mov(s, TCG_TYPE_I64, dst, src);
491          }
492          break;
493      default:
494          g_assert_not_reached();
495      }
496  }
497  
498  /* Minor variations on a theme, using a structure. */
499  static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
500                                      TCGReg src)
501  {
502      tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
503  }
504  
505  static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
506  {
507      tcg_out_movext1_new_src(s, i, i->src);
508  }
509  
510  /**
511   * tcg_out_movext2 -- move and extend two pair
512   * @s: tcg context
513   * @i1: first move description
514   * @i2: second move description
515   * @scratch: temporary register, or -1 for none
516   *
517   * As tcg_out_movext, for both @i1 and @i2, caring for overlap
518   * between the sources and destinations.
519   */
520  
521  static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
522                              const TCGMovExtend *i2, int scratch)
523  {
524      TCGReg src1 = i1->src;
525      TCGReg src2 = i2->src;
526  
527      if (i1->dst != src2) {
528          tcg_out_movext1(s, i1);
529          tcg_out_movext1(s, i2);
530          return;
531      }
532      if (i2->dst == src1) {
533          TCGType src1_type = i1->src_type;
534          TCGType src2_type = i2->src_type;
535  
536          if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
537              /* The data is now in the correct registers, now extend. */
538              src1 = i2->src;
539              src2 = i1->src;
540          } else {
541              tcg_debug_assert(scratch >= 0);
542              tcg_out_mov(s, src1_type, scratch, src1);
543              src1 = scratch;
544          }
545      }
546      tcg_out_movext1_new_src(s, i2, src2);
547      tcg_out_movext1_new_src(s, i1, src1);
548  }
549  
550  /**
551   * tcg_out_movext3 -- move and extend three pair
552   * @s: tcg context
553   * @i1: first move description
554   * @i2: second move description
555   * @i3: third move description
556   * @scratch: temporary register, or -1 for none
557   *
558   * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
559   * between the sources and destinations.
560   */
561  
562  static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
563                              const TCGMovExtend *i2, const TCGMovExtend *i3,
564                              int scratch)
565  {
566      TCGReg src1 = i1->src;
567      TCGReg src2 = i2->src;
568      TCGReg src3 = i3->src;
569  
570      if (i1->dst != src2 && i1->dst != src3) {
571          tcg_out_movext1(s, i1);
572          tcg_out_movext2(s, i2, i3, scratch);
573          return;
574      }
575      if (i2->dst != src1 && i2->dst != src3) {
576          tcg_out_movext1(s, i2);
577          tcg_out_movext2(s, i1, i3, scratch);
578          return;
579      }
580      if (i3->dst != src1 && i3->dst != src2) {
581          tcg_out_movext1(s, i3);
582          tcg_out_movext2(s, i1, i2, scratch);
583          return;
584      }
585  
586      /*
587       * There is a cycle.  Since there are only 3 nodes, the cycle is
588       * either "clockwise" or "anti-clockwise", and can be solved with
589       * a single scratch or two xchg.
590       */
591      if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
592          /* "Clockwise" */
593          if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
594              tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
595              /* The data is now in the correct registers, now extend. */
596              tcg_out_movext1_new_src(s, i1, i1->dst);
597              tcg_out_movext1_new_src(s, i2, i2->dst);
598              tcg_out_movext1_new_src(s, i3, i3->dst);
599          } else {
600              tcg_debug_assert(scratch >= 0);
601              tcg_out_mov(s, i1->src_type, scratch, src1);
602              tcg_out_movext1(s, i3);
603              tcg_out_movext1(s, i2);
604              tcg_out_movext1_new_src(s, i1, scratch);
605          }
606      } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
607          /* "Anti-clockwise" */
608          if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
609              tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
610              /* The data is now in the correct registers, now extend. */
611              tcg_out_movext1_new_src(s, i1, i1->dst);
612              tcg_out_movext1_new_src(s, i2, i2->dst);
613              tcg_out_movext1_new_src(s, i3, i3->dst);
614          } else {
615              tcg_debug_assert(scratch >= 0);
616              tcg_out_mov(s, i1->src_type, scratch, src1);
617              tcg_out_movext1(s, i2);
618              tcg_out_movext1(s, i3);
619              tcg_out_movext1_new_src(s, i1, scratch);
620          }
621      } else {
622          g_assert_not_reached();
623      }
624  }
625  
626  #define C_PFX1(P, A)                    P##A
627  #define C_PFX2(P, A, B)                 P##A##_##B
628  #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
629  #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
630  #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
631  #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
632  
633  /* Define an enumeration for the various combinations. */
634  
635  #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
636  #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
637  #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
638  #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
639  
640  #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
641  #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
642  #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
643  #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
644  
645  #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
646  
647  #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
648  #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
649  #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
650  #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
651  
652  typedef enum {
653  #include "tcg-target-con-set.h"
654  } TCGConstraintSetIndex;
655  
656  static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
657  
658  #undef C_O0_I1
659  #undef C_O0_I2
660  #undef C_O0_I3
661  #undef C_O0_I4
662  #undef C_O1_I1
663  #undef C_O1_I2
664  #undef C_O1_I3
665  #undef C_O1_I4
666  #undef C_N1_I2
667  #undef C_O2_I1
668  #undef C_O2_I2
669  #undef C_O2_I3
670  #undef C_O2_I4
671  
672  /* Put all of the constraint sets into an array, indexed by the enum. */
673  
674  #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
675  #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
676  #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
677  #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
678  
679  #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
680  #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
681  #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
682  #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
683  
684  #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
685  
686  #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
687  #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
688  #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
689  #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
690  
691  static const TCGTargetOpDef constraint_sets[] = {
692  #include "tcg-target-con-set.h"
693  };
694  
695  
696  #undef C_O0_I1
697  #undef C_O0_I2
698  #undef C_O0_I3
699  #undef C_O0_I4
700  #undef C_O1_I1
701  #undef C_O1_I2
702  #undef C_O1_I3
703  #undef C_O1_I4
704  #undef C_N1_I2
705  #undef C_O2_I1
706  #undef C_O2_I2
707  #undef C_O2_I3
708  #undef C_O2_I4
709  
710  /* Expand the enumerator to be returned from tcg_target_op_def(). */
711  
712  #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
713  #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
714  #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
715  #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
716  
717  #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
718  #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
719  #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
720  #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
721  
722  #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
723  
724  #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
725  #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
726  #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
727  #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
728  
729  #include "tcg-target.c.inc"
730  
731  static void alloc_tcg_plugin_context(TCGContext *s)
732  {
733  #ifdef CONFIG_PLUGIN
734      s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
735      s->plugin_tb->insns =
736          g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
737  #endif
738  }
739  
740  /*
741   * All TCG threads except the parent (i.e. the one that called tcg_context_init
742   * and registered the target's TCG globals) must register with this function
743   * before initiating translation.
744   *
745   * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
746   * of tcg_region_init() for the reasoning behind this.
747   *
748   * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
749   * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
750   * is not used anymore for translation once this function is called.
751   *
752   * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
753   * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
754   */
755  #ifdef CONFIG_USER_ONLY
756  void tcg_register_thread(void)
757  {
758      tcg_ctx = &tcg_init_ctx;
759  }
760  #else
761  void tcg_register_thread(void)
762  {
763      TCGContext *s = g_malloc(sizeof(*s));
764      unsigned int i, n;
765  
766      *s = tcg_init_ctx;
767  
768      /* Relink mem_base.  */
769      for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
770          if (tcg_init_ctx.temps[i].mem_base) {
771              ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
772              tcg_debug_assert(b >= 0 && b < n);
773              s->temps[i].mem_base = &s->temps[b];
774          }
775      }
776  
777      /* Claim an entry in tcg_ctxs */
778      n = qatomic_fetch_inc(&tcg_cur_ctxs);
779      g_assert(n < tcg_max_ctxs);
780      qatomic_set(&tcg_ctxs[n], s);
781  
782      if (n > 0) {
783          alloc_tcg_plugin_context(s);
784          tcg_region_initial_alloc(s);
785      }
786  
787      tcg_ctx = s;
788  }
789  #endif /* !CONFIG_USER_ONLY */
790  
791  /* pool based memory allocation */
792  void *tcg_malloc_internal(TCGContext *s, int size)
793  {
794      TCGPool *p;
795      int pool_size;
796  
797      if (size > TCG_POOL_CHUNK_SIZE) {
798          /* big malloc: insert a new pool (XXX: could optimize) */
799          p = g_malloc(sizeof(TCGPool) + size);
800          p->size = size;
801          p->next = s->pool_first_large;
802          s->pool_first_large = p;
803          return p->data;
804      } else {
805          p = s->pool_current;
806          if (!p) {
807              p = s->pool_first;
808              if (!p)
809                  goto new_pool;
810          } else {
811              if (!p->next) {
812              new_pool:
813                  pool_size = TCG_POOL_CHUNK_SIZE;
814                  p = g_malloc(sizeof(TCGPool) + pool_size);
815                  p->size = pool_size;
816                  p->next = NULL;
817                  if (s->pool_current) {
818                      s->pool_current->next = p;
819                  } else {
820                      s->pool_first = p;
821                  }
822              } else {
823                  p = p->next;
824              }
825          }
826      }
827      s->pool_current = p;
828      s->pool_cur = p->data + size;
829      s->pool_end = p->data + p->size;
830      return p->data;
831  }
832  
833  void tcg_pool_reset(TCGContext *s)
834  {
835      TCGPool *p, *t;
836      for (p = s->pool_first_large; p; p = t) {
837          t = p->next;
838          g_free(p);
839      }
840      s->pool_first_large = NULL;
841      s->pool_cur = s->pool_end = NULL;
842      s->pool_current = NULL;
843  }
844  
845  /*
846   * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
847   * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
848   * We only use these for layout in tcg_out_ld_helper_ret and
849   * tcg_out_st_helper_args, and share them between several of
850   * the helpers, with the end result that it's easier to build manually.
851   */
852  
853  #if TCG_TARGET_REG_BITS == 32
854  # define dh_typecode_ttl  dh_typecode_i32
855  #else
856  # define dh_typecode_ttl  dh_typecode_i64
857  #endif
858  
859  static TCGHelperInfo info_helper_ld32_mmu = {
860      .flags = TCG_CALL_NO_WG,
861      .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
862                | dh_typemask(env, 1)
863                | dh_typemask(i64, 2)  /* uint64_t addr */
864                | dh_typemask(i32, 3)  /* unsigned oi */
865                | dh_typemask(ptr, 4)  /* uintptr_t ra */
866  };
867  
868  static TCGHelperInfo info_helper_ld64_mmu = {
869      .flags = TCG_CALL_NO_WG,
870      .typemask = dh_typemask(i64, 0)  /* return uint64_t */
871                | dh_typemask(env, 1)
872                | dh_typemask(i64, 2)  /* uint64_t addr */
873                | dh_typemask(i32, 3)  /* unsigned oi */
874                | dh_typemask(ptr, 4)  /* uintptr_t ra */
875  };
876  
877  static TCGHelperInfo info_helper_ld128_mmu = {
878      .flags = TCG_CALL_NO_WG,
879      .typemask = dh_typemask(i128, 0) /* return Int128 */
880                | dh_typemask(env, 1)
881                | dh_typemask(i64, 2)  /* uint64_t addr */
882                | dh_typemask(i32, 3)  /* unsigned oi */
883                | dh_typemask(ptr, 4)  /* uintptr_t ra */
884  };
885  
886  static TCGHelperInfo info_helper_st32_mmu = {
887      .flags = TCG_CALL_NO_WG,
888      .typemask = dh_typemask(void, 0)
889                | dh_typemask(env, 1)
890                | dh_typemask(i64, 2)  /* uint64_t addr */
891                | dh_typemask(i32, 3)  /* uint32_t data */
892                | dh_typemask(i32, 4)  /* unsigned oi */
893                | dh_typemask(ptr, 5)  /* uintptr_t ra */
894  };
895  
896  static TCGHelperInfo info_helper_st64_mmu = {
897      .flags = TCG_CALL_NO_WG,
898      .typemask = dh_typemask(void, 0)
899                | dh_typemask(env, 1)
900                | dh_typemask(i64, 2)  /* uint64_t addr */
901                | dh_typemask(i64, 3)  /* uint64_t data */
902                | dh_typemask(i32, 4)  /* unsigned oi */
903                | dh_typemask(ptr, 5)  /* uintptr_t ra */
904  };
905  
906  static TCGHelperInfo info_helper_st128_mmu = {
907      .flags = TCG_CALL_NO_WG,
908      .typemask = dh_typemask(void, 0)
909                | dh_typemask(env, 1)
910                | dh_typemask(i64, 2)  /* uint64_t addr */
911                | dh_typemask(i128, 3) /* Int128 data */
912                | dh_typemask(i32, 4)  /* unsigned oi */
913                | dh_typemask(ptr, 5)  /* uintptr_t ra */
914  };
915  
916  #ifdef CONFIG_TCG_INTERPRETER
917  static ffi_type *typecode_to_ffi(int argmask)
918  {
919      /*
920       * libffi does not support __int128_t, so we have forced Int128
921       * to use the structure definition instead of the builtin type.
922       */
923      static ffi_type *ffi_type_i128_elements[3] = {
924          &ffi_type_uint64,
925          &ffi_type_uint64,
926          NULL
927      };
928      static ffi_type ffi_type_i128 = {
929          .size = 16,
930          .alignment = __alignof__(Int128),
931          .type = FFI_TYPE_STRUCT,
932          .elements = ffi_type_i128_elements,
933      };
934  
935      switch (argmask) {
936      case dh_typecode_void:
937          return &ffi_type_void;
938      case dh_typecode_i32:
939          return &ffi_type_uint32;
940      case dh_typecode_s32:
941          return &ffi_type_sint32;
942      case dh_typecode_i64:
943          return &ffi_type_uint64;
944      case dh_typecode_s64:
945          return &ffi_type_sint64;
946      case dh_typecode_ptr:
947          return &ffi_type_pointer;
948      case dh_typecode_i128:
949          return &ffi_type_i128;
950      }
951      g_assert_not_reached();
952  }
953  
954  static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
955  {
956      unsigned typemask = info->typemask;
957      struct {
958          ffi_cif cif;
959          ffi_type *args[];
960      } *ca;
961      ffi_status status;
962      int nargs;
963  
964      /* Ignoring the return type, find the last non-zero field. */
965      nargs = 32 - clz32(typemask >> 3);
966      nargs = DIV_ROUND_UP(nargs, 3);
967      assert(nargs <= MAX_CALL_IARGS);
968  
969      ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
970      ca->cif.rtype = typecode_to_ffi(typemask & 7);
971      ca->cif.nargs = nargs;
972  
973      if (nargs != 0) {
974          ca->cif.arg_types = ca->args;
975          for (int j = 0; j < nargs; ++j) {
976              int typecode = extract32(typemask, (j + 1) * 3, 3);
977              ca->args[j] = typecode_to_ffi(typecode);
978          }
979      }
980  
981      status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
982                            ca->cif.rtype, ca->cif.arg_types);
983      assert(status == FFI_OK);
984  
985      return &ca->cif;
986  }
987  
988  #define HELPER_INFO_INIT(I)      (&(I)->cif)
989  #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
990  #else
991  #define HELPER_INFO_INIT(I)      (&(I)->init)
992  #define HELPER_INFO_INIT_VAL(I)  1
993  #endif /* CONFIG_TCG_INTERPRETER */
994  
995  static inline bool arg_slot_reg_p(unsigned arg_slot)
996  {
997      /*
998       * Split the sizeof away from the comparison to avoid Werror from
999       * "unsigned < 0 is always false", when iarg_regs is empty.
1000       */
1001      unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1002      return arg_slot < nreg;
1003  }
1004  
1005  static inline int arg_slot_stk_ofs(unsigned arg_slot)
1006  {
1007      unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1008      unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1009  
1010      tcg_debug_assert(stk_slot < max);
1011      return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1012  }
1013  
1014  typedef struct TCGCumulativeArgs {
1015      int arg_idx;                /* tcg_gen_callN args[] */
1016      int info_in_idx;            /* TCGHelperInfo in[] */
1017      int arg_slot;               /* regs+stack slot */
1018      int ref_slot;               /* stack slots for references */
1019  } TCGCumulativeArgs;
1020  
1021  static void layout_arg_even(TCGCumulativeArgs *cum)
1022  {
1023      cum->arg_slot += cum->arg_slot & 1;
1024  }
1025  
1026  static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1027                           TCGCallArgumentKind kind)
1028  {
1029      TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1030  
1031      *loc = (TCGCallArgumentLoc){
1032          .kind = kind,
1033          .arg_idx = cum->arg_idx,
1034          .arg_slot = cum->arg_slot,
1035      };
1036      cum->info_in_idx++;
1037      cum->arg_slot++;
1038  }
1039  
1040  static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1041                                  TCGHelperInfo *info, int n)
1042  {
1043      TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1044  
1045      for (int i = 0; i < n; ++i) {
1046          /* Layout all using the same arg_idx, adjusting the subindex. */
1047          loc[i] = (TCGCallArgumentLoc){
1048              .kind = TCG_CALL_ARG_NORMAL,
1049              .arg_idx = cum->arg_idx,
1050              .tmp_subindex = i,
1051              .arg_slot = cum->arg_slot + i,
1052          };
1053      }
1054      cum->info_in_idx += n;
1055      cum->arg_slot += n;
1056  }
1057  
1058  static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1059  {
1060      TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1061      int n = 128 / TCG_TARGET_REG_BITS;
1062  
1063      /* The first subindex carries the pointer. */
1064      layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1065  
1066      /*
1067       * The callee is allowed to clobber memory associated with
1068       * structure pass by-reference.  Therefore we must make copies.
1069       * Allocate space from "ref_slot", which will be adjusted to
1070       * follow the parameters on the stack.
1071       */
1072      loc[0].ref_slot = cum->ref_slot;
1073  
1074      /*
1075       * Subsequent words also go into the reference slot, but
1076       * do not accumulate into the regular arguments.
1077       */
1078      for (int i = 1; i < n; ++i) {
1079          loc[i] = (TCGCallArgumentLoc){
1080              .kind = TCG_CALL_ARG_BY_REF_N,
1081              .arg_idx = cum->arg_idx,
1082              .tmp_subindex = i,
1083              .ref_slot = cum->ref_slot + i,
1084          };
1085      }
1086      cum->info_in_idx += n;
1087      cum->ref_slot += n;
1088  }
1089  
1090  static void init_call_layout(TCGHelperInfo *info)
1091  {
1092      int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1093      int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1094      unsigned typemask = info->typemask;
1095      unsigned typecode;
1096      TCGCumulativeArgs cum = { };
1097  
1098      /*
1099       * Parse and place any function return value.
1100       */
1101      typecode = typemask & 7;
1102      switch (typecode) {
1103      case dh_typecode_void:
1104          info->nr_out = 0;
1105          break;
1106      case dh_typecode_i32:
1107      case dh_typecode_s32:
1108      case dh_typecode_ptr:
1109          info->nr_out = 1;
1110          info->out_kind = TCG_CALL_RET_NORMAL;
1111          break;
1112      case dh_typecode_i64:
1113      case dh_typecode_s64:
1114          info->nr_out = 64 / TCG_TARGET_REG_BITS;
1115          info->out_kind = TCG_CALL_RET_NORMAL;
1116          /* Query the last register now to trigger any assert early. */
1117          tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1118          break;
1119      case dh_typecode_i128:
1120          info->nr_out = 128 / TCG_TARGET_REG_BITS;
1121          info->out_kind = TCG_TARGET_CALL_RET_I128;
1122          switch (TCG_TARGET_CALL_RET_I128) {
1123          case TCG_CALL_RET_NORMAL:
1124              /* Query the last register now to trigger any assert early. */
1125              tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1126              break;
1127          case TCG_CALL_RET_BY_VEC:
1128              /* Query the single register now to trigger any assert early. */
1129              tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1130              break;
1131          case TCG_CALL_RET_BY_REF:
1132              /*
1133               * Allocate the first argument to the output.
1134               * We don't need to store this anywhere, just make it
1135               * unavailable for use in the input loop below.
1136               */
1137              cum.arg_slot = 1;
1138              break;
1139          default:
1140              qemu_build_not_reached();
1141          }
1142          break;
1143      default:
1144          g_assert_not_reached();
1145      }
1146  
1147      /*
1148       * Parse and place function arguments.
1149       */
1150      for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1151          TCGCallArgumentKind kind;
1152          TCGType type;
1153  
1154          typecode = typemask & 7;
1155          switch (typecode) {
1156          case dh_typecode_i32:
1157          case dh_typecode_s32:
1158              type = TCG_TYPE_I32;
1159              break;
1160          case dh_typecode_i64:
1161          case dh_typecode_s64:
1162              type = TCG_TYPE_I64;
1163              break;
1164          case dh_typecode_ptr:
1165              type = TCG_TYPE_PTR;
1166              break;
1167          case dh_typecode_i128:
1168              type = TCG_TYPE_I128;
1169              break;
1170          default:
1171              g_assert_not_reached();
1172          }
1173  
1174          switch (type) {
1175          case TCG_TYPE_I32:
1176              switch (TCG_TARGET_CALL_ARG_I32) {
1177              case TCG_CALL_ARG_EVEN:
1178                  layout_arg_even(&cum);
1179                  /* fall through */
1180              case TCG_CALL_ARG_NORMAL:
1181                  layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1182                  break;
1183              case TCG_CALL_ARG_EXTEND:
1184                  kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1185                  layout_arg_1(&cum, info, kind);
1186                  break;
1187              default:
1188                  qemu_build_not_reached();
1189              }
1190              break;
1191  
1192          case TCG_TYPE_I64:
1193              switch (TCG_TARGET_CALL_ARG_I64) {
1194              case TCG_CALL_ARG_EVEN:
1195                  layout_arg_even(&cum);
1196                  /* fall through */
1197              case TCG_CALL_ARG_NORMAL:
1198                  if (TCG_TARGET_REG_BITS == 32) {
1199                      layout_arg_normal_n(&cum, info, 2);
1200                  } else {
1201                      layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1202                  }
1203                  break;
1204              default:
1205                  qemu_build_not_reached();
1206              }
1207              break;
1208  
1209          case TCG_TYPE_I128:
1210              switch (TCG_TARGET_CALL_ARG_I128) {
1211              case TCG_CALL_ARG_EVEN:
1212                  layout_arg_even(&cum);
1213                  /* fall through */
1214              case TCG_CALL_ARG_NORMAL:
1215                  layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1216                  break;
1217              case TCG_CALL_ARG_BY_REF:
1218                  layout_arg_by_ref(&cum, info);
1219                  break;
1220              default:
1221                  qemu_build_not_reached();
1222              }
1223              break;
1224  
1225          default:
1226              g_assert_not_reached();
1227          }
1228      }
1229      info->nr_in = cum.info_in_idx;
1230  
1231      /* Validate that we didn't overrun the input array. */
1232      assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1233      /* Validate the backend has enough argument space. */
1234      assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1235  
1236      /*
1237       * Relocate the "ref_slot" area to the end of the parameters.
1238       * Minimizing this stack offset helps code size for x86,
1239       * which has a signed 8-bit offset encoding.
1240       */
1241      if (cum.ref_slot != 0) {
1242          int ref_base = 0;
1243  
1244          if (cum.arg_slot > max_reg_slots) {
1245              int align = __alignof(Int128) / sizeof(tcg_target_long);
1246  
1247              ref_base = cum.arg_slot - max_reg_slots;
1248              if (align > 1) {
1249                  ref_base = ROUND_UP(ref_base, align);
1250              }
1251          }
1252          assert(ref_base + cum.ref_slot <= max_stk_slots);
1253          ref_base += max_reg_slots;
1254  
1255          if (ref_base != 0) {
1256              for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1257                  TCGCallArgumentLoc *loc = &info->in[i];
1258                  switch (loc->kind) {
1259                  case TCG_CALL_ARG_BY_REF:
1260                  case TCG_CALL_ARG_BY_REF_N:
1261                      loc->ref_slot += ref_base;
1262                      break;
1263                  default:
1264                      break;
1265                  }
1266              }
1267          }
1268      }
1269  }
1270  
1271  static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1272  static void process_op_defs(TCGContext *s);
1273  static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1274                                              TCGReg reg, const char *name);
1275  
1276  static void tcg_context_init(unsigned max_cpus)
1277  {
1278      TCGContext *s = &tcg_init_ctx;
1279      int op, total_args, n, i;
1280      TCGOpDef *def;
1281      TCGArgConstraint *args_ct;
1282      TCGTemp *ts;
1283  
1284      memset(s, 0, sizeof(*s));
1285      s->nb_globals = 0;
1286  
1287      /* Count total number of arguments and allocate the corresponding
1288         space */
1289      total_args = 0;
1290      for(op = 0; op < NB_OPS; op++) {
1291          def = &tcg_op_defs[op];
1292          n = def->nb_iargs + def->nb_oargs;
1293          total_args += n;
1294      }
1295  
1296      args_ct = g_new0(TCGArgConstraint, total_args);
1297  
1298      for(op = 0; op < NB_OPS; op++) {
1299          def = &tcg_op_defs[op];
1300          def->args_ct = args_ct;
1301          n = def->nb_iargs + def->nb_oargs;
1302          args_ct += n;
1303      }
1304  
1305      init_call_layout(&info_helper_ld32_mmu);
1306      init_call_layout(&info_helper_ld64_mmu);
1307      init_call_layout(&info_helper_ld128_mmu);
1308      init_call_layout(&info_helper_st32_mmu);
1309      init_call_layout(&info_helper_st64_mmu);
1310      init_call_layout(&info_helper_st128_mmu);
1311  
1312      tcg_target_init(s);
1313      process_op_defs(s);
1314  
1315      /* Reverse the order of the saved registers, assuming they're all at
1316         the start of tcg_target_reg_alloc_order.  */
1317      for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1318          int r = tcg_target_reg_alloc_order[n];
1319          if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1320              break;
1321          }
1322      }
1323      for (i = 0; i < n; ++i) {
1324          indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1325      }
1326      for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1327          indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1328      }
1329  
1330      alloc_tcg_plugin_context(s);
1331  
1332      tcg_ctx = s;
1333      /*
1334       * In user-mode we simply share the init context among threads, since we
1335       * use a single region. See the documentation tcg_region_init() for the
1336       * reasoning behind this.
1337       * In softmmu we will have at most max_cpus TCG threads.
1338       */
1339  #ifdef CONFIG_USER_ONLY
1340      tcg_ctxs = &tcg_ctx;
1341      tcg_cur_ctxs = 1;
1342      tcg_max_ctxs = 1;
1343  #else
1344      tcg_max_ctxs = max_cpus;
1345      tcg_ctxs = g_new0(TCGContext *, max_cpus);
1346  #endif
1347  
1348      tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1349      ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1350      cpu_env = temp_tcgv_ptr(ts);
1351  }
1352  
1353  void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1354  {
1355      tcg_context_init(max_cpus);
1356      tcg_region_init(tb_size, splitwx, max_cpus);
1357  }
1358  
1359  /*
1360   * Allocate TBs right before their corresponding translated code, making
1361   * sure that TBs and code are on different cache lines.
1362   */
1363  TranslationBlock *tcg_tb_alloc(TCGContext *s)
1364  {
1365      uintptr_t align = qemu_icache_linesize;
1366      TranslationBlock *tb;
1367      void *next;
1368  
1369   retry:
1370      tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1371      next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1372  
1373      if (unlikely(next > s->code_gen_highwater)) {
1374          if (tcg_region_alloc(s)) {
1375              return NULL;
1376          }
1377          goto retry;
1378      }
1379      qatomic_set(&s->code_gen_ptr, next);
1380      s->data_gen_ptr = NULL;
1381      return tb;
1382  }
1383  
1384  void tcg_prologue_init(TCGContext *s)
1385  {
1386      size_t prologue_size;
1387  
1388      s->code_ptr = s->code_gen_ptr;
1389      s->code_buf = s->code_gen_ptr;
1390      s->data_gen_ptr = NULL;
1391  
1392  #ifndef CONFIG_TCG_INTERPRETER
1393      tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1394  #endif
1395  
1396  #ifdef TCG_TARGET_NEED_POOL_LABELS
1397      s->pool_labels = NULL;
1398  #endif
1399  
1400      qemu_thread_jit_write();
1401      /* Generate the prologue.  */
1402      tcg_target_qemu_prologue(s);
1403  
1404  #ifdef TCG_TARGET_NEED_POOL_LABELS
1405      /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1406      {
1407          int result = tcg_out_pool_finalize(s);
1408          tcg_debug_assert(result == 0);
1409      }
1410  #endif
1411  
1412      prologue_size = tcg_current_code_size(s);
1413      perf_report_prologue(s->code_gen_ptr, prologue_size);
1414  
1415  #ifndef CONFIG_TCG_INTERPRETER
1416      flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1417                          (uintptr_t)s->code_buf, prologue_size);
1418  #endif
1419  
1420      if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1421          FILE *logfile = qemu_log_trylock();
1422          if (logfile) {
1423              fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1424              if (s->data_gen_ptr) {
1425                  size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1426                  size_t data_size = prologue_size - code_size;
1427                  size_t i;
1428  
1429                  disas(logfile, s->code_gen_ptr, code_size);
1430  
1431                  for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1432                      if (sizeof(tcg_target_ulong) == 8) {
1433                          fprintf(logfile,
1434                                  "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1435                                  (uintptr_t)s->data_gen_ptr + i,
1436                                  *(uint64_t *)(s->data_gen_ptr + i));
1437                      } else {
1438                          fprintf(logfile,
1439                                  "0x%08" PRIxPTR ":  .long  0x%08x\n",
1440                                  (uintptr_t)s->data_gen_ptr + i,
1441                                  *(uint32_t *)(s->data_gen_ptr + i));
1442                      }
1443                  }
1444              } else {
1445                  disas(logfile, s->code_gen_ptr, prologue_size);
1446              }
1447              fprintf(logfile, "\n");
1448              qemu_log_unlock(logfile);
1449          }
1450      }
1451  
1452  #ifndef CONFIG_TCG_INTERPRETER
1453      /*
1454       * Assert that goto_ptr is implemented completely, setting an epilogue.
1455       * For tci, we use NULL as the signal to return from the interpreter,
1456       * so skip this check.
1457       */
1458      tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1459  #endif
1460  
1461      tcg_region_prologue_set(s);
1462  }
1463  
1464  void tcg_func_start(TCGContext *s)
1465  {
1466      tcg_pool_reset(s);
1467      s->nb_temps = s->nb_globals;
1468  
1469      /* No temps have been previously allocated for size or locality.  */
1470      memset(s->free_temps, 0, sizeof(s->free_temps));
1471  
1472      /* No constant temps have been previously allocated. */
1473      for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1474          if (s->const_table[i]) {
1475              g_hash_table_remove_all(s->const_table[i]);
1476          }
1477      }
1478  
1479      s->nb_ops = 0;
1480      s->nb_labels = 0;
1481      s->current_frame_offset = s->frame_start;
1482  
1483  #ifdef CONFIG_DEBUG_TCG
1484      s->goto_tb_issue_mask = 0;
1485  #endif
1486  
1487      QTAILQ_INIT(&s->ops);
1488      QTAILQ_INIT(&s->free_ops);
1489      QSIMPLEQ_INIT(&s->labels);
1490  
1491      tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1492                       s->addr_type == TCG_TYPE_I64);
1493  
1494  #if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
1495      tcg_debug_assert(s->tlb_fast_offset < 0);
1496      tcg_debug_assert(s->tlb_fast_offset >= MIN_TLB_MASK_TABLE_OFS);
1497  #endif
1498  
1499      tcg_debug_assert(s->insn_start_words > 0);
1500  }
1501  
1502  static TCGTemp *tcg_temp_alloc(TCGContext *s)
1503  {
1504      int n = s->nb_temps++;
1505  
1506      if (n >= TCG_MAX_TEMPS) {
1507          tcg_raise_tb_overflow(s);
1508      }
1509      return memset(&s->temps[n], 0, sizeof(TCGTemp));
1510  }
1511  
1512  static TCGTemp *tcg_global_alloc(TCGContext *s)
1513  {
1514      TCGTemp *ts;
1515  
1516      tcg_debug_assert(s->nb_globals == s->nb_temps);
1517      tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1518      s->nb_globals++;
1519      ts = tcg_temp_alloc(s);
1520      ts->kind = TEMP_GLOBAL;
1521  
1522      return ts;
1523  }
1524  
1525  static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1526                                              TCGReg reg, const char *name)
1527  {
1528      TCGTemp *ts;
1529  
1530      tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1531  
1532      ts = tcg_global_alloc(s);
1533      ts->base_type = type;
1534      ts->type = type;
1535      ts->kind = TEMP_FIXED;
1536      ts->reg = reg;
1537      ts->name = name;
1538      tcg_regset_set_reg(s->reserved_regs, reg);
1539  
1540      return ts;
1541  }
1542  
1543  void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1544  {
1545      s->frame_start = start;
1546      s->frame_end = start + size;
1547      s->frame_temp
1548          = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1549  }
1550  
1551  TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1552                                       intptr_t offset, const char *name)
1553  {
1554      TCGContext *s = tcg_ctx;
1555      TCGTemp *base_ts = tcgv_ptr_temp(base);
1556      TCGTemp *ts = tcg_global_alloc(s);
1557      int indirect_reg = 0;
1558  
1559      switch (base_ts->kind) {
1560      case TEMP_FIXED:
1561          break;
1562      case TEMP_GLOBAL:
1563          /* We do not support double-indirect registers.  */
1564          tcg_debug_assert(!base_ts->indirect_reg);
1565          base_ts->indirect_base = 1;
1566          s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1567                              ? 2 : 1);
1568          indirect_reg = 1;
1569          break;
1570      default:
1571          g_assert_not_reached();
1572      }
1573  
1574      if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1575          TCGTemp *ts2 = tcg_global_alloc(s);
1576          char buf[64];
1577  
1578          ts->base_type = TCG_TYPE_I64;
1579          ts->type = TCG_TYPE_I32;
1580          ts->indirect_reg = indirect_reg;
1581          ts->mem_allocated = 1;
1582          ts->mem_base = base_ts;
1583          ts->mem_offset = offset;
1584          pstrcpy(buf, sizeof(buf), name);
1585          pstrcat(buf, sizeof(buf), "_0");
1586          ts->name = strdup(buf);
1587  
1588          tcg_debug_assert(ts2 == ts + 1);
1589          ts2->base_type = TCG_TYPE_I64;
1590          ts2->type = TCG_TYPE_I32;
1591          ts2->indirect_reg = indirect_reg;
1592          ts2->mem_allocated = 1;
1593          ts2->mem_base = base_ts;
1594          ts2->mem_offset = offset + 4;
1595          ts2->temp_subindex = 1;
1596          pstrcpy(buf, sizeof(buf), name);
1597          pstrcat(buf, sizeof(buf), "_1");
1598          ts2->name = strdup(buf);
1599      } else {
1600          ts->base_type = type;
1601          ts->type = type;
1602          ts->indirect_reg = indirect_reg;
1603          ts->mem_allocated = 1;
1604          ts->mem_base = base_ts;
1605          ts->mem_offset = offset;
1606          ts->name = name;
1607      }
1608      return ts;
1609  }
1610  
1611  TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1612  {
1613      TCGContext *s = tcg_ctx;
1614      TCGTemp *ts;
1615      int n;
1616  
1617      if (kind == TEMP_EBB) {
1618          int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1619  
1620          if (idx < TCG_MAX_TEMPS) {
1621              /* There is already an available temp with the right type.  */
1622              clear_bit(idx, s->free_temps[type].l);
1623  
1624              ts = &s->temps[idx];
1625              ts->temp_allocated = 1;
1626              tcg_debug_assert(ts->base_type == type);
1627              tcg_debug_assert(ts->kind == kind);
1628              return ts;
1629          }
1630      } else {
1631          tcg_debug_assert(kind == TEMP_TB);
1632      }
1633  
1634      switch (type) {
1635      case TCG_TYPE_I32:
1636      case TCG_TYPE_V64:
1637      case TCG_TYPE_V128:
1638      case TCG_TYPE_V256:
1639          n = 1;
1640          break;
1641      case TCG_TYPE_I64:
1642          n = 64 / TCG_TARGET_REG_BITS;
1643          break;
1644      case TCG_TYPE_I128:
1645          n = 128 / TCG_TARGET_REG_BITS;
1646          break;
1647      default:
1648          g_assert_not_reached();
1649      }
1650  
1651      ts = tcg_temp_alloc(s);
1652      ts->base_type = type;
1653      ts->temp_allocated = 1;
1654      ts->kind = kind;
1655  
1656      if (n == 1) {
1657          ts->type = type;
1658      } else {
1659          ts->type = TCG_TYPE_REG;
1660  
1661          for (int i = 1; i < n; ++i) {
1662              TCGTemp *ts2 = tcg_temp_alloc(s);
1663  
1664              tcg_debug_assert(ts2 == ts + i);
1665              ts2->base_type = type;
1666              ts2->type = TCG_TYPE_REG;
1667              ts2->temp_allocated = 1;
1668              ts2->temp_subindex = i;
1669              ts2->kind = kind;
1670          }
1671      }
1672      return ts;
1673  }
1674  
1675  TCGv_vec tcg_temp_new_vec(TCGType type)
1676  {
1677      TCGTemp *t;
1678  
1679  #ifdef CONFIG_DEBUG_TCG
1680      switch (type) {
1681      case TCG_TYPE_V64:
1682          assert(TCG_TARGET_HAS_v64);
1683          break;
1684      case TCG_TYPE_V128:
1685          assert(TCG_TARGET_HAS_v128);
1686          break;
1687      case TCG_TYPE_V256:
1688          assert(TCG_TARGET_HAS_v256);
1689          break;
1690      default:
1691          g_assert_not_reached();
1692      }
1693  #endif
1694  
1695      t = tcg_temp_new_internal(type, TEMP_EBB);
1696      return temp_tcgv_vec(t);
1697  }
1698  
1699  /* Create a new temp of the same type as an existing temp.  */
1700  TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1701  {
1702      TCGTemp *t = tcgv_vec_temp(match);
1703  
1704      tcg_debug_assert(t->temp_allocated != 0);
1705  
1706      t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1707      return temp_tcgv_vec(t);
1708  }
1709  
1710  void tcg_temp_free_internal(TCGTemp *ts)
1711  {
1712      TCGContext *s = tcg_ctx;
1713  
1714      switch (ts->kind) {
1715      case TEMP_CONST:
1716      case TEMP_TB:
1717          /* Silently ignore free. */
1718          break;
1719      case TEMP_EBB:
1720          tcg_debug_assert(ts->temp_allocated != 0);
1721          ts->temp_allocated = 0;
1722          set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1723          break;
1724      default:
1725          /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1726          g_assert_not_reached();
1727      }
1728  }
1729  
1730  TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1731  {
1732      TCGContext *s = tcg_ctx;
1733      GHashTable *h = s->const_table[type];
1734      TCGTemp *ts;
1735  
1736      if (h == NULL) {
1737          h = g_hash_table_new(g_int64_hash, g_int64_equal);
1738          s->const_table[type] = h;
1739      }
1740  
1741      ts = g_hash_table_lookup(h, &val);
1742      if (ts == NULL) {
1743          int64_t *val_ptr;
1744  
1745          ts = tcg_temp_alloc(s);
1746  
1747          if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1748              TCGTemp *ts2 = tcg_temp_alloc(s);
1749  
1750              tcg_debug_assert(ts2 == ts + 1);
1751  
1752              ts->base_type = TCG_TYPE_I64;
1753              ts->type = TCG_TYPE_I32;
1754              ts->kind = TEMP_CONST;
1755              ts->temp_allocated = 1;
1756  
1757              ts2->base_type = TCG_TYPE_I64;
1758              ts2->type = TCG_TYPE_I32;
1759              ts2->kind = TEMP_CONST;
1760              ts2->temp_allocated = 1;
1761              ts2->temp_subindex = 1;
1762  
1763              /*
1764               * Retain the full value of the 64-bit constant in the low
1765               * part, so that the hash table works.  Actual uses will
1766               * truncate the value to the low part.
1767               */
1768              ts[HOST_BIG_ENDIAN].val = val;
1769              ts[!HOST_BIG_ENDIAN].val = val >> 32;
1770              val_ptr = &ts[HOST_BIG_ENDIAN].val;
1771          } else {
1772              ts->base_type = type;
1773              ts->type = type;
1774              ts->kind = TEMP_CONST;
1775              ts->temp_allocated = 1;
1776              ts->val = val;
1777              val_ptr = &ts->val;
1778          }
1779          g_hash_table_insert(h, val_ptr, ts);
1780      }
1781  
1782      return ts;
1783  }
1784  
1785  TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1786  {
1787      val = dup_const(vece, val);
1788      return temp_tcgv_vec(tcg_constant_internal(type, val));
1789  }
1790  
1791  TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1792  {
1793      TCGTemp *t = tcgv_vec_temp(match);
1794  
1795      tcg_debug_assert(t->temp_allocated != 0);
1796      return tcg_constant_vec(t->base_type, vece, val);
1797  }
1798  
1799  #ifdef CONFIG_DEBUG_TCG
1800  size_t temp_idx(TCGTemp *ts)
1801  {
1802      ptrdiff_t n = ts - tcg_ctx->temps;
1803      assert(n >= 0 && n < tcg_ctx->nb_temps);
1804      return n;
1805  }
1806  
1807  TCGTemp *tcgv_i32_temp(TCGv_i32 v)
1808  {
1809      uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
1810  
1811      assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
1812      assert(o % sizeof(TCGTemp) == 0);
1813  
1814      return (void *)tcg_ctx + (uintptr_t)v;
1815  }
1816  #endif /* CONFIG_DEBUG_TCG */
1817  
1818  /* Return true if OP may appear in the opcode stream.
1819     Test the runtime variable that controls each opcode.  */
1820  bool tcg_op_supported(TCGOpcode op)
1821  {
1822      const bool have_vec
1823          = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1824  
1825      switch (op) {
1826      case INDEX_op_discard:
1827      case INDEX_op_set_label:
1828      case INDEX_op_call:
1829      case INDEX_op_br:
1830      case INDEX_op_mb:
1831      case INDEX_op_insn_start:
1832      case INDEX_op_exit_tb:
1833      case INDEX_op_goto_tb:
1834      case INDEX_op_goto_ptr:
1835      case INDEX_op_qemu_ld_a32_i32:
1836      case INDEX_op_qemu_ld_a64_i32:
1837      case INDEX_op_qemu_st_a32_i32:
1838      case INDEX_op_qemu_st_a64_i32:
1839      case INDEX_op_qemu_ld_a32_i64:
1840      case INDEX_op_qemu_ld_a64_i64:
1841      case INDEX_op_qemu_st_a32_i64:
1842      case INDEX_op_qemu_st_a64_i64:
1843          return true;
1844  
1845      case INDEX_op_qemu_st8_a32_i32:
1846      case INDEX_op_qemu_st8_a64_i32:
1847          return TCG_TARGET_HAS_qemu_st8_i32;
1848  
1849      case INDEX_op_qemu_ld_a32_i128:
1850      case INDEX_op_qemu_ld_a64_i128:
1851      case INDEX_op_qemu_st_a32_i128:
1852      case INDEX_op_qemu_st_a64_i128:
1853          return TCG_TARGET_HAS_qemu_ldst_i128;
1854  
1855      case INDEX_op_mov_i32:
1856      case INDEX_op_setcond_i32:
1857      case INDEX_op_brcond_i32:
1858      case INDEX_op_ld8u_i32:
1859      case INDEX_op_ld8s_i32:
1860      case INDEX_op_ld16u_i32:
1861      case INDEX_op_ld16s_i32:
1862      case INDEX_op_ld_i32:
1863      case INDEX_op_st8_i32:
1864      case INDEX_op_st16_i32:
1865      case INDEX_op_st_i32:
1866      case INDEX_op_add_i32:
1867      case INDEX_op_sub_i32:
1868      case INDEX_op_mul_i32:
1869      case INDEX_op_and_i32:
1870      case INDEX_op_or_i32:
1871      case INDEX_op_xor_i32:
1872      case INDEX_op_shl_i32:
1873      case INDEX_op_shr_i32:
1874      case INDEX_op_sar_i32:
1875          return true;
1876  
1877      case INDEX_op_movcond_i32:
1878          return TCG_TARGET_HAS_movcond_i32;
1879      case INDEX_op_div_i32:
1880      case INDEX_op_divu_i32:
1881          return TCG_TARGET_HAS_div_i32;
1882      case INDEX_op_rem_i32:
1883      case INDEX_op_remu_i32:
1884          return TCG_TARGET_HAS_rem_i32;
1885      case INDEX_op_div2_i32:
1886      case INDEX_op_divu2_i32:
1887          return TCG_TARGET_HAS_div2_i32;
1888      case INDEX_op_rotl_i32:
1889      case INDEX_op_rotr_i32:
1890          return TCG_TARGET_HAS_rot_i32;
1891      case INDEX_op_deposit_i32:
1892          return TCG_TARGET_HAS_deposit_i32;
1893      case INDEX_op_extract_i32:
1894          return TCG_TARGET_HAS_extract_i32;
1895      case INDEX_op_sextract_i32:
1896          return TCG_TARGET_HAS_sextract_i32;
1897      case INDEX_op_extract2_i32:
1898          return TCG_TARGET_HAS_extract2_i32;
1899      case INDEX_op_add2_i32:
1900          return TCG_TARGET_HAS_add2_i32;
1901      case INDEX_op_sub2_i32:
1902          return TCG_TARGET_HAS_sub2_i32;
1903      case INDEX_op_mulu2_i32:
1904          return TCG_TARGET_HAS_mulu2_i32;
1905      case INDEX_op_muls2_i32:
1906          return TCG_TARGET_HAS_muls2_i32;
1907      case INDEX_op_muluh_i32:
1908          return TCG_TARGET_HAS_muluh_i32;
1909      case INDEX_op_mulsh_i32:
1910          return TCG_TARGET_HAS_mulsh_i32;
1911      case INDEX_op_ext8s_i32:
1912          return TCG_TARGET_HAS_ext8s_i32;
1913      case INDEX_op_ext16s_i32:
1914          return TCG_TARGET_HAS_ext16s_i32;
1915      case INDEX_op_ext8u_i32:
1916          return TCG_TARGET_HAS_ext8u_i32;
1917      case INDEX_op_ext16u_i32:
1918          return TCG_TARGET_HAS_ext16u_i32;
1919      case INDEX_op_bswap16_i32:
1920          return TCG_TARGET_HAS_bswap16_i32;
1921      case INDEX_op_bswap32_i32:
1922          return TCG_TARGET_HAS_bswap32_i32;
1923      case INDEX_op_not_i32:
1924          return TCG_TARGET_HAS_not_i32;
1925      case INDEX_op_neg_i32:
1926          return TCG_TARGET_HAS_neg_i32;
1927      case INDEX_op_andc_i32:
1928          return TCG_TARGET_HAS_andc_i32;
1929      case INDEX_op_orc_i32:
1930          return TCG_TARGET_HAS_orc_i32;
1931      case INDEX_op_eqv_i32:
1932          return TCG_TARGET_HAS_eqv_i32;
1933      case INDEX_op_nand_i32:
1934          return TCG_TARGET_HAS_nand_i32;
1935      case INDEX_op_nor_i32:
1936          return TCG_TARGET_HAS_nor_i32;
1937      case INDEX_op_clz_i32:
1938          return TCG_TARGET_HAS_clz_i32;
1939      case INDEX_op_ctz_i32:
1940          return TCG_TARGET_HAS_ctz_i32;
1941      case INDEX_op_ctpop_i32:
1942          return TCG_TARGET_HAS_ctpop_i32;
1943  
1944      case INDEX_op_brcond2_i32:
1945      case INDEX_op_setcond2_i32:
1946          return TCG_TARGET_REG_BITS == 32;
1947  
1948      case INDEX_op_mov_i64:
1949      case INDEX_op_setcond_i64:
1950      case INDEX_op_brcond_i64:
1951      case INDEX_op_ld8u_i64:
1952      case INDEX_op_ld8s_i64:
1953      case INDEX_op_ld16u_i64:
1954      case INDEX_op_ld16s_i64:
1955      case INDEX_op_ld32u_i64:
1956      case INDEX_op_ld32s_i64:
1957      case INDEX_op_ld_i64:
1958      case INDEX_op_st8_i64:
1959      case INDEX_op_st16_i64:
1960      case INDEX_op_st32_i64:
1961      case INDEX_op_st_i64:
1962      case INDEX_op_add_i64:
1963      case INDEX_op_sub_i64:
1964      case INDEX_op_mul_i64:
1965      case INDEX_op_and_i64:
1966      case INDEX_op_or_i64:
1967      case INDEX_op_xor_i64:
1968      case INDEX_op_shl_i64:
1969      case INDEX_op_shr_i64:
1970      case INDEX_op_sar_i64:
1971      case INDEX_op_ext_i32_i64:
1972      case INDEX_op_extu_i32_i64:
1973          return TCG_TARGET_REG_BITS == 64;
1974  
1975      case INDEX_op_movcond_i64:
1976          return TCG_TARGET_HAS_movcond_i64;
1977      case INDEX_op_div_i64:
1978      case INDEX_op_divu_i64:
1979          return TCG_TARGET_HAS_div_i64;
1980      case INDEX_op_rem_i64:
1981      case INDEX_op_remu_i64:
1982          return TCG_TARGET_HAS_rem_i64;
1983      case INDEX_op_div2_i64:
1984      case INDEX_op_divu2_i64:
1985          return TCG_TARGET_HAS_div2_i64;
1986      case INDEX_op_rotl_i64:
1987      case INDEX_op_rotr_i64:
1988          return TCG_TARGET_HAS_rot_i64;
1989      case INDEX_op_deposit_i64:
1990          return TCG_TARGET_HAS_deposit_i64;
1991      case INDEX_op_extract_i64:
1992          return TCG_TARGET_HAS_extract_i64;
1993      case INDEX_op_sextract_i64:
1994          return TCG_TARGET_HAS_sextract_i64;
1995      case INDEX_op_extract2_i64:
1996          return TCG_TARGET_HAS_extract2_i64;
1997      case INDEX_op_extrl_i64_i32:
1998          return TCG_TARGET_HAS_extrl_i64_i32;
1999      case INDEX_op_extrh_i64_i32:
2000          return TCG_TARGET_HAS_extrh_i64_i32;
2001      case INDEX_op_ext8s_i64:
2002          return TCG_TARGET_HAS_ext8s_i64;
2003      case INDEX_op_ext16s_i64:
2004          return TCG_TARGET_HAS_ext16s_i64;
2005      case INDEX_op_ext32s_i64:
2006          return TCG_TARGET_HAS_ext32s_i64;
2007      case INDEX_op_ext8u_i64:
2008          return TCG_TARGET_HAS_ext8u_i64;
2009      case INDEX_op_ext16u_i64:
2010          return TCG_TARGET_HAS_ext16u_i64;
2011      case INDEX_op_ext32u_i64:
2012          return TCG_TARGET_HAS_ext32u_i64;
2013      case INDEX_op_bswap16_i64:
2014          return TCG_TARGET_HAS_bswap16_i64;
2015      case INDEX_op_bswap32_i64:
2016          return TCG_TARGET_HAS_bswap32_i64;
2017      case INDEX_op_bswap64_i64:
2018          return TCG_TARGET_HAS_bswap64_i64;
2019      case INDEX_op_not_i64:
2020          return TCG_TARGET_HAS_not_i64;
2021      case INDEX_op_neg_i64:
2022          return TCG_TARGET_HAS_neg_i64;
2023      case INDEX_op_andc_i64:
2024          return TCG_TARGET_HAS_andc_i64;
2025      case INDEX_op_orc_i64:
2026          return TCG_TARGET_HAS_orc_i64;
2027      case INDEX_op_eqv_i64:
2028          return TCG_TARGET_HAS_eqv_i64;
2029      case INDEX_op_nand_i64:
2030          return TCG_TARGET_HAS_nand_i64;
2031      case INDEX_op_nor_i64:
2032          return TCG_TARGET_HAS_nor_i64;
2033      case INDEX_op_clz_i64:
2034          return TCG_TARGET_HAS_clz_i64;
2035      case INDEX_op_ctz_i64:
2036          return TCG_TARGET_HAS_ctz_i64;
2037      case INDEX_op_ctpop_i64:
2038          return TCG_TARGET_HAS_ctpop_i64;
2039      case INDEX_op_add2_i64:
2040          return TCG_TARGET_HAS_add2_i64;
2041      case INDEX_op_sub2_i64:
2042          return TCG_TARGET_HAS_sub2_i64;
2043      case INDEX_op_mulu2_i64:
2044          return TCG_TARGET_HAS_mulu2_i64;
2045      case INDEX_op_muls2_i64:
2046          return TCG_TARGET_HAS_muls2_i64;
2047      case INDEX_op_muluh_i64:
2048          return TCG_TARGET_HAS_muluh_i64;
2049      case INDEX_op_mulsh_i64:
2050          return TCG_TARGET_HAS_mulsh_i64;
2051  
2052      case INDEX_op_mov_vec:
2053      case INDEX_op_dup_vec:
2054      case INDEX_op_dupm_vec:
2055      case INDEX_op_ld_vec:
2056      case INDEX_op_st_vec:
2057      case INDEX_op_add_vec:
2058      case INDEX_op_sub_vec:
2059      case INDEX_op_and_vec:
2060      case INDEX_op_or_vec:
2061      case INDEX_op_xor_vec:
2062      case INDEX_op_cmp_vec:
2063          return have_vec;
2064      case INDEX_op_dup2_vec:
2065          return have_vec && TCG_TARGET_REG_BITS == 32;
2066      case INDEX_op_not_vec:
2067          return have_vec && TCG_TARGET_HAS_not_vec;
2068      case INDEX_op_neg_vec:
2069          return have_vec && TCG_TARGET_HAS_neg_vec;
2070      case INDEX_op_abs_vec:
2071          return have_vec && TCG_TARGET_HAS_abs_vec;
2072      case INDEX_op_andc_vec:
2073          return have_vec && TCG_TARGET_HAS_andc_vec;
2074      case INDEX_op_orc_vec:
2075          return have_vec && TCG_TARGET_HAS_orc_vec;
2076      case INDEX_op_nand_vec:
2077          return have_vec && TCG_TARGET_HAS_nand_vec;
2078      case INDEX_op_nor_vec:
2079          return have_vec && TCG_TARGET_HAS_nor_vec;
2080      case INDEX_op_eqv_vec:
2081          return have_vec && TCG_TARGET_HAS_eqv_vec;
2082      case INDEX_op_mul_vec:
2083          return have_vec && TCG_TARGET_HAS_mul_vec;
2084      case INDEX_op_shli_vec:
2085      case INDEX_op_shri_vec:
2086      case INDEX_op_sari_vec:
2087          return have_vec && TCG_TARGET_HAS_shi_vec;
2088      case INDEX_op_shls_vec:
2089      case INDEX_op_shrs_vec:
2090      case INDEX_op_sars_vec:
2091          return have_vec && TCG_TARGET_HAS_shs_vec;
2092      case INDEX_op_shlv_vec:
2093      case INDEX_op_shrv_vec:
2094      case INDEX_op_sarv_vec:
2095          return have_vec && TCG_TARGET_HAS_shv_vec;
2096      case INDEX_op_rotli_vec:
2097          return have_vec && TCG_TARGET_HAS_roti_vec;
2098      case INDEX_op_rotls_vec:
2099          return have_vec && TCG_TARGET_HAS_rots_vec;
2100      case INDEX_op_rotlv_vec:
2101      case INDEX_op_rotrv_vec:
2102          return have_vec && TCG_TARGET_HAS_rotv_vec;
2103      case INDEX_op_ssadd_vec:
2104      case INDEX_op_usadd_vec:
2105      case INDEX_op_sssub_vec:
2106      case INDEX_op_ussub_vec:
2107          return have_vec && TCG_TARGET_HAS_sat_vec;
2108      case INDEX_op_smin_vec:
2109      case INDEX_op_umin_vec:
2110      case INDEX_op_smax_vec:
2111      case INDEX_op_umax_vec:
2112          return have_vec && TCG_TARGET_HAS_minmax_vec;
2113      case INDEX_op_bitsel_vec:
2114          return have_vec && TCG_TARGET_HAS_bitsel_vec;
2115      case INDEX_op_cmpsel_vec:
2116          return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2117  
2118      default:
2119          tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2120          return true;
2121      }
2122  }
2123  
2124  static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2125  
2126  static void tcg_gen_callN(TCGHelperInfo *info, TCGTemp *ret, TCGTemp **args)
2127  {
2128      TCGv_i64 extend_free[MAX_CALL_IARGS];
2129      int n_extend = 0;
2130      TCGOp *op;
2131      int i, n, pi = 0, total_args;
2132  
2133      if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2134          init_call_layout(info);
2135          g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2136      }
2137  
2138      total_args = info->nr_out + info->nr_in + 2;
2139      op = tcg_op_alloc(INDEX_op_call, total_args);
2140  
2141  #ifdef CONFIG_PLUGIN
2142      /* Flag helpers that may affect guest state */
2143      if (tcg_ctx->plugin_insn &&
2144          !(info->flags & TCG_CALL_PLUGIN) &&
2145          !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2146          tcg_ctx->plugin_insn->calls_helpers = true;
2147      }
2148  #endif
2149  
2150      TCGOP_CALLO(op) = n = info->nr_out;
2151      switch (n) {
2152      case 0:
2153          tcg_debug_assert(ret == NULL);
2154          break;
2155      case 1:
2156          tcg_debug_assert(ret != NULL);
2157          op->args[pi++] = temp_arg(ret);
2158          break;
2159      case 2:
2160      case 4:
2161          tcg_debug_assert(ret != NULL);
2162          tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2163          tcg_debug_assert(ret->temp_subindex == 0);
2164          for (i = 0; i < n; ++i) {
2165              op->args[pi++] = temp_arg(ret + i);
2166          }
2167          break;
2168      default:
2169          g_assert_not_reached();
2170      }
2171  
2172      TCGOP_CALLI(op) = n = info->nr_in;
2173      for (i = 0; i < n; i++) {
2174          const TCGCallArgumentLoc *loc = &info->in[i];
2175          TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2176  
2177          switch (loc->kind) {
2178          case TCG_CALL_ARG_NORMAL:
2179          case TCG_CALL_ARG_BY_REF:
2180          case TCG_CALL_ARG_BY_REF_N:
2181              op->args[pi++] = temp_arg(ts);
2182              break;
2183  
2184          case TCG_CALL_ARG_EXTEND_U:
2185          case TCG_CALL_ARG_EXTEND_S:
2186              {
2187                  TCGv_i64 temp = tcg_temp_ebb_new_i64();
2188                  TCGv_i32 orig = temp_tcgv_i32(ts);
2189  
2190                  if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2191                      tcg_gen_ext_i32_i64(temp, orig);
2192                  } else {
2193                      tcg_gen_extu_i32_i64(temp, orig);
2194                  }
2195                  op->args[pi++] = tcgv_i64_arg(temp);
2196                  extend_free[n_extend++] = temp;
2197              }
2198              break;
2199  
2200          default:
2201              g_assert_not_reached();
2202          }
2203      }
2204      op->args[pi++] = (uintptr_t)info->func;
2205      op->args[pi++] = (uintptr_t)info;
2206      tcg_debug_assert(pi == total_args);
2207  
2208      QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2209  
2210      tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2211      for (i = 0; i < n_extend; ++i) {
2212          tcg_temp_free_i64(extend_free[i]);
2213      }
2214  }
2215  
2216  void tcg_gen_call0(TCGHelperInfo *info, TCGTemp *ret)
2217  {
2218      tcg_gen_callN(info, ret, NULL);
2219  }
2220  
2221  void tcg_gen_call1(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2222  {
2223      tcg_gen_callN(info, ret, &t1);
2224  }
2225  
2226  void tcg_gen_call2(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2)
2227  {
2228      TCGTemp *args[2] = { t1, t2 };
2229      tcg_gen_callN(info, ret, args);
2230  }
2231  
2232  void tcg_gen_call3(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2233                     TCGTemp *t2, TCGTemp *t3)
2234  {
2235      TCGTemp *args[3] = { t1, t2, t3 };
2236      tcg_gen_callN(info, ret, args);
2237  }
2238  
2239  void tcg_gen_call4(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2240                     TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2241  {
2242      TCGTemp *args[4] = { t1, t2, t3, t4 };
2243      tcg_gen_callN(info, ret, args);
2244  }
2245  
2246  void tcg_gen_call5(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2247                     TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2248  {
2249      TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2250      tcg_gen_callN(info, ret, args);
2251  }
2252  
2253  void tcg_gen_call6(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2,
2254                     TCGTemp *t3, TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2255  {
2256      TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2257      tcg_gen_callN(info, ret, args);
2258  }
2259  
2260  void tcg_gen_call7(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2261                     TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2262                     TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2263  {
2264      TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2265      tcg_gen_callN(info, ret, args);
2266  }
2267  
2268  static void tcg_reg_alloc_start(TCGContext *s)
2269  {
2270      int i, n;
2271  
2272      for (i = 0, n = s->nb_temps; i < n; i++) {
2273          TCGTemp *ts = &s->temps[i];
2274          TCGTempVal val = TEMP_VAL_MEM;
2275  
2276          switch (ts->kind) {
2277          case TEMP_CONST:
2278              val = TEMP_VAL_CONST;
2279              break;
2280          case TEMP_FIXED:
2281              val = TEMP_VAL_REG;
2282              break;
2283          case TEMP_GLOBAL:
2284              break;
2285          case TEMP_EBB:
2286              val = TEMP_VAL_DEAD;
2287              /* fall through */
2288          case TEMP_TB:
2289              ts->mem_allocated = 0;
2290              break;
2291          default:
2292              g_assert_not_reached();
2293          }
2294          ts->val_type = val;
2295      }
2296  
2297      memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2298  }
2299  
2300  static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2301                                   TCGTemp *ts)
2302  {
2303      int idx = temp_idx(ts);
2304  
2305      switch (ts->kind) {
2306      case TEMP_FIXED:
2307      case TEMP_GLOBAL:
2308          pstrcpy(buf, buf_size, ts->name);
2309          break;
2310      case TEMP_TB:
2311          snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2312          break;
2313      case TEMP_EBB:
2314          snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2315          break;
2316      case TEMP_CONST:
2317          switch (ts->type) {
2318          case TCG_TYPE_I32:
2319              snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2320              break;
2321  #if TCG_TARGET_REG_BITS > 32
2322          case TCG_TYPE_I64:
2323              snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2324              break;
2325  #endif
2326          case TCG_TYPE_V64:
2327          case TCG_TYPE_V128:
2328          case TCG_TYPE_V256:
2329              snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2330                       64 << (ts->type - TCG_TYPE_V64), ts->val);
2331              break;
2332          default:
2333              g_assert_not_reached();
2334          }
2335          break;
2336      }
2337      return buf;
2338  }
2339  
2340  static char *tcg_get_arg_str(TCGContext *s, char *buf,
2341                               int buf_size, TCGArg arg)
2342  {
2343      return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2344  }
2345  
2346  static const char * const cond_name[] =
2347  {
2348      [TCG_COND_NEVER] = "never",
2349      [TCG_COND_ALWAYS] = "always",
2350      [TCG_COND_EQ] = "eq",
2351      [TCG_COND_NE] = "ne",
2352      [TCG_COND_LT] = "lt",
2353      [TCG_COND_GE] = "ge",
2354      [TCG_COND_LE] = "le",
2355      [TCG_COND_GT] = "gt",
2356      [TCG_COND_LTU] = "ltu",
2357      [TCG_COND_GEU] = "geu",
2358      [TCG_COND_LEU] = "leu",
2359      [TCG_COND_GTU] = "gtu"
2360  };
2361  
2362  static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2363  {
2364      [MO_UB]   = "ub",
2365      [MO_SB]   = "sb",
2366      [MO_LEUW] = "leuw",
2367      [MO_LESW] = "lesw",
2368      [MO_LEUL] = "leul",
2369      [MO_LESL] = "lesl",
2370      [MO_LEUQ] = "leq",
2371      [MO_BEUW] = "beuw",
2372      [MO_BESW] = "besw",
2373      [MO_BEUL] = "beul",
2374      [MO_BESL] = "besl",
2375      [MO_BEUQ] = "beq",
2376      [MO_128 + MO_BE] = "beo",
2377      [MO_128 + MO_LE] = "leo",
2378  };
2379  
2380  static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2381      [MO_UNALN >> MO_ASHIFT]    = "un+",
2382      [MO_ALIGN >> MO_ASHIFT]    = "al+",
2383      [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2384      [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2385      [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2386      [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2387      [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2388      [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2389  };
2390  
2391  static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2392      [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2393      [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2394      [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2395      [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2396      [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2397      [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2398  };
2399  
2400  static const char bswap_flag_name[][6] = {
2401      [TCG_BSWAP_IZ] = "iz",
2402      [TCG_BSWAP_OZ] = "oz",
2403      [TCG_BSWAP_OS] = "os",
2404      [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2405      [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2406  };
2407  
2408  static inline bool tcg_regset_single(TCGRegSet d)
2409  {
2410      return (d & (d - 1)) == 0;
2411  }
2412  
2413  static inline TCGReg tcg_regset_first(TCGRegSet d)
2414  {
2415      if (TCG_TARGET_NB_REGS <= 32) {
2416          return ctz32(d);
2417      } else {
2418          return ctz64(d);
2419      }
2420  }
2421  
2422  /* Return only the number of characters output -- no error return. */
2423  #define ne_fprintf(...) \
2424      ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2425  
2426  static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2427  {
2428      char buf[128];
2429      TCGOp *op;
2430  
2431      QTAILQ_FOREACH(op, &s->ops, link) {
2432          int i, k, nb_oargs, nb_iargs, nb_cargs;
2433          const TCGOpDef *def;
2434          TCGOpcode c;
2435          int col = 0;
2436  
2437          c = op->opc;
2438          def = &tcg_op_defs[c];
2439  
2440          if (c == INDEX_op_insn_start) {
2441              nb_oargs = 0;
2442              col += ne_fprintf(f, "\n ----");
2443  
2444              for (i = 0, k = s->insn_start_words; i < k; ++i) {
2445                  col += ne_fprintf(f, " %016" PRIx64,
2446                                    tcg_get_insn_start_param(op, i));
2447              }
2448          } else if (c == INDEX_op_call) {
2449              const TCGHelperInfo *info = tcg_call_info(op);
2450              void *func = tcg_call_func(op);
2451  
2452              /* variable number of arguments */
2453              nb_oargs = TCGOP_CALLO(op);
2454              nb_iargs = TCGOP_CALLI(op);
2455              nb_cargs = def->nb_cargs;
2456  
2457              col += ne_fprintf(f, " %s ", def->name);
2458  
2459              /*
2460               * Print the function name from TCGHelperInfo, if available.
2461               * Note that plugins have a template function for the info,
2462               * but the actual function pointer comes from the plugin.
2463               */
2464              if (func == info->func) {
2465                  col += ne_fprintf(f, "%s", info->name);
2466              } else {
2467                  col += ne_fprintf(f, "plugin(%p)", func);
2468              }
2469  
2470              col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2471              for (i = 0; i < nb_oargs; i++) {
2472                  col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2473                                                              op->args[i]));
2474              }
2475              for (i = 0; i < nb_iargs; i++) {
2476                  TCGArg arg = op->args[nb_oargs + i];
2477                  const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2478                  col += ne_fprintf(f, ",%s", t);
2479              }
2480          } else {
2481              col += ne_fprintf(f, " %s ", def->name);
2482  
2483              nb_oargs = def->nb_oargs;
2484              nb_iargs = def->nb_iargs;
2485              nb_cargs = def->nb_cargs;
2486  
2487              if (def->flags & TCG_OPF_VECTOR) {
2488                  col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2489                                    8 << TCGOP_VECE(op));
2490              }
2491  
2492              k = 0;
2493              for (i = 0; i < nb_oargs; i++) {
2494                  const char *sep =  k ? "," : "";
2495                  col += ne_fprintf(f, "%s%s", sep,
2496                                    tcg_get_arg_str(s, buf, sizeof(buf),
2497                                                    op->args[k++]));
2498              }
2499              for (i = 0; i < nb_iargs; i++) {
2500                  const char *sep =  k ? "," : "";
2501                  col += ne_fprintf(f, "%s%s", sep,
2502                                    tcg_get_arg_str(s, buf, sizeof(buf),
2503                                                    op->args[k++]));
2504              }
2505              switch (c) {
2506              case INDEX_op_brcond_i32:
2507              case INDEX_op_setcond_i32:
2508              case INDEX_op_movcond_i32:
2509              case INDEX_op_brcond2_i32:
2510              case INDEX_op_setcond2_i32:
2511              case INDEX_op_brcond_i64:
2512              case INDEX_op_setcond_i64:
2513              case INDEX_op_movcond_i64:
2514              case INDEX_op_cmp_vec:
2515              case INDEX_op_cmpsel_vec:
2516                  if (op->args[k] < ARRAY_SIZE(cond_name)
2517                      && cond_name[op->args[k]]) {
2518                      col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2519                  } else {
2520                      col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2521                  }
2522                  i = 1;
2523                  break;
2524              case INDEX_op_qemu_ld_a32_i32:
2525              case INDEX_op_qemu_ld_a64_i32:
2526              case INDEX_op_qemu_st_a32_i32:
2527              case INDEX_op_qemu_st_a64_i32:
2528              case INDEX_op_qemu_st8_a32_i32:
2529              case INDEX_op_qemu_st8_a64_i32:
2530              case INDEX_op_qemu_ld_a32_i64:
2531              case INDEX_op_qemu_ld_a64_i64:
2532              case INDEX_op_qemu_st_a32_i64:
2533              case INDEX_op_qemu_st_a64_i64:
2534              case INDEX_op_qemu_ld_a32_i128:
2535              case INDEX_op_qemu_ld_a64_i128:
2536              case INDEX_op_qemu_st_a32_i128:
2537              case INDEX_op_qemu_st_a64_i128:
2538                  {
2539                      const char *s_al, *s_op, *s_at;
2540                      MemOpIdx oi = op->args[k++];
2541                      MemOp op = get_memop(oi);
2542                      unsigned ix = get_mmuidx(oi);
2543  
2544                      s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2545                      s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2546                      s_at = atom_name[(op & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2547                      op &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2548  
2549                      /* If all fields are accounted for, print symbolically. */
2550                      if (!op && s_al && s_op && s_at) {
2551                          col += ne_fprintf(f, ",%s%s%s,%u",
2552                                            s_at, s_al, s_op, ix);
2553                      } else {
2554                          op = get_memop(oi);
2555                          col += ne_fprintf(f, ",$0x%x,%u", op, ix);
2556                      }
2557                      i = 1;
2558                  }
2559                  break;
2560              case INDEX_op_bswap16_i32:
2561              case INDEX_op_bswap16_i64:
2562              case INDEX_op_bswap32_i32:
2563              case INDEX_op_bswap32_i64:
2564              case INDEX_op_bswap64_i64:
2565                  {
2566                      TCGArg flags = op->args[k];
2567                      const char *name = NULL;
2568  
2569                      if (flags < ARRAY_SIZE(bswap_flag_name)) {
2570                          name = bswap_flag_name[flags];
2571                      }
2572                      if (name) {
2573                          col += ne_fprintf(f, ",%s", name);
2574                      } else {
2575                          col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2576                      }
2577                      i = k = 1;
2578                  }
2579                  break;
2580              default:
2581                  i = 0;
2582                  break;
2583              }
2584              switch (c) {
2585              case INDEX_op_set_label:
2586              case INDEX_op_br:
2587              case INDEX_op_brcond_i32:
2588              case INDEX_op_brcond_i64:
2589              case INDEX_op_brcond2_i32:
2590                  col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2591                                    arg_label(op->args[k])->id);
2592                  i++, k++;
2593                  break;
2594              case INDEX_op_mb:
2595                  {
2596                      TCGBar membar = op->args[k];
2597                      const char *b_op, *m_op;
2598  
2599                      switch (membar & TCG_BAR_SC) {
2600                      case 0:
2601                          b_op = "none";
2602                          break;
2603                      case TCG_BAR_LDAQ:
2604                          b_op = "acq";
2605                          break;
2606                      case TCG_BAR_STRL:
2607                          b_op = "rel";
2608                          break;
2609                      case TCG_BAR_SC:
2610                          b_op = "seq";
2611                          break;
2612                      default:
2613                          g_assert_not_reached();
2614                      }
2615  
2616                      switch (membar & TCG_MO_ALL) {
2617                      case 0:
2618                          m_op = "none";
2619                          break;
2620                      case TCG_MO_LD_LD:
2621                          m_op = "rr";
2622                          break;
2623                      case TCG_MO_LD_ST:
2624                          m_op = "rw";
2625                          break;
2626                      case TCG_MO_ST_LD:
2627                          m_op = "wr";
2628                          break;
2629                      case TCG_MO_ST_ST:
2630                          m_op = "ww";
2631                          break;
2632                      case TCG_MO_LD_LD | TCG_MO_LD_ST:
2633                          m_op = "rr+rw";
2634                          break;
2635                      case TCG_MO_LD_LD | TCG_MO_ST_LD:
2636                          m_op = "rr+wr";
2637                          break;
2638                      case TCG_MO_LD_LD | TCG_MO_ST_ST:
2639                          m_op = "rr+ww";
2640                          break;
2641                      case TCG_MO_LD_ST | TCG_MO_ST_LD:
2642                          m_op = "rw+wr";
2643                          break;
2644                      case TCG_MO_LD_ST | TCG_MO_ST_ST:
2645                          m_op = "rw+ww";
2646                          break;
2647                      case TCG_MO_ST_LD | TCG_MO_ST_ST:
2648                          m_op = "wr+ww";
2649                          break;
2650                      case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2651                          m_op = "rr+rw+wr";
2652                          break;
2653                      case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2654                          m_op = "rr+rw+ww";
2655                          break;
2656                      case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2657                          m_op = "rr+wr+ww";
2658                          break;
2659                      case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2660                          m_op = "rw+wr+ww";
2661                          break;
2662                      case TCG_MO_ALL:
2663                          m_op = "all";
2664                          break;
2665                      default:
2666                          g_assert_not_reached();
2667                      }
2668  
2669                      col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2670                      i++, k++;
2671                  }
2672                  break;
2673              default:
2674                  break;
2675              }
2676              for (; i < nb_cargs; i++, k++) {
2677                  col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2678                                    op->args[k]);
2679              }
2680          }
2681  
2682          if (have_prefs || op->life) {
2683              for (; col < 40; ++col) {
2684                  putc(' ', f);
2685              }
2686          }
2687  
2688          if (op->life) {
2689              unsigned life = op->life;
2690  
2691              if (life & (SYNC_ARG * 3)) {
2692                  ne_fprintf(f, "  sync:");
2693                  for (i = 0; i < 2; ++i) {
2694                      if (life & (SYNC_ARG << i)) {
2695                          ne_fprintf(f, " %d", i);
2696                      }
2697                  }
2698              }
2699              life /= DEAD_ARG;
2700              if (life) {
2701                  ne_fprintf(f, "  dead:");
2702                  for (i = 0; life; ++i, life >>= 1) {
2703                      if (life & 1) {
2704                          ne_fprintf(f, " %d", i);
2705                      }
2706                  }
2707              }
2708          }
2709  
2710          if (have_prefs) {
2711              for (i = 0; i < nb_oargs; ++i) {
2712                  TCGRegSet set = output_pref(op, i);
2713  
2714                  if (i == 0) {
2715                      ne_fprintf(f, "  pref=");
2716                  } else {
2717                      ne_fprintf(f, ",");
2718                  }
2719                  if (set == 0) {
2720                      ne_fprintf(f, "none");
2721                  } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2722                      ne_fprintf(f, "all");
2723  #ifdef CONFIG_DEBUG_TCG
2724                  } else if (tcg_regset_single(set)) {
2725                      TCGReg reg = tcg_regset_first(set);
2726                      ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2727  #endif
2728                  } else if (TCG_TARGET_NB_REGS <= 32) {
2729                      ne_fprintf(f, "0x%x", (uint32_t)set);
2730                  } else {
2731                      ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2732                  }
2733              }
2734          }
2735  
2736          putc('\n', f);
2737      }
2738  }
2739  
2740  /* we give more priority to constraints with less registers */
2741  static int get_constraint_priority(const TCGOpDef *def, int k)
2742  {
2743      const TCGArgConstraint *arg_ct = &def->args_ct[k];
2744      int n = ctpop64(arg_ct->regs);
2745  
2746      /*
2747       * Sort constraints of a single register first, which includes output
2748       * aliases (which must exactly match the input already allocated).
2749       */
2750      if (n == 1 || arg_ct->oalias) {
2751          return INT_MAX;
2752      }
2753  
2754      /*
2755       * Sort register pairs next, first then second immediately after.
2756       * Arbitrarily sort multiple pairs by the index of the first reg;
2757       * there shouldn't be many pairs.
2758       */
2759      switch (arg_ct->pair) {
2760      case 1:
2761      case 3:
2762          return (k + 1) * 2;
2763      case 2:
2764          return (arg_ct->pair_index + 1) * 2 - 1;
2765      }
2766  
2767      /* Finally, sort by decreasing register count. */
2768      assert(n > 1);
2769      return -n;
2770  }
2771  
2772  /* sort from highest priority to lowest */
2773  static void sort_constraints(TCGOpDef *def, int start, int n)
2774  {
2775      int i, j;
2776      TCGArgConstraint *a = def->args_ct;
2777  
2778      for (i = 0; i < n; i++) {
2779          a[start + i].sort_index = start + i;
2780      }
2781      if (n <= 1) {
2782          return;
2783      }
2784      for (i = 0; i < n - 1; i++) {
2785          for (j = i + 1; j < n; j++) {
2786              int p1 = get_constraint_priority(def, a[start + i].sort_index);
2787              int p2 = get_constraint_priority(def, a[start + j].sort_index);
2788              if (p1 < p2) {
2789                  int tmp = a[start + i].sort_index;
2790                  a[start + i].sort_index = a[start + j].sort_index;
2791                  a[start + j].sort_index = tmp;
2792              }
2793          }
2794      }
2795  }
2796  
2797  static void process_op_defs(TCGContext *s)
2798  {
2799      TCGOpcode op;
2800  
2801      for (op = 0; op < NB_OPS; op++) {
2802          TCGOpDef *def = &tcg_op_defs[op];
2803          const TCGTargetOpDef *tdefs;
2804          bool saw_alias_pair = false;
2805          int i, o, i2, o2, nb_args;
2806  
2807          if (def->flags & TCG_OPF_NOT_PRESENT) {
2808              continue;
2809          }
2810  
2811          nb_args = def->nb_iargs + def->nb_oargs;
2812          if (nb_args == 0) {
2813              continue;
2814          }
2815  
2816          /*
2817           * Macro magic should make it impossible, but double-check that
2818           * the array index is in range.  Since the signness of an enum
2819           * is implementation defined, force the result to unsigned.
2820           */
2821          unsigned con_set = tcg_target_op_def(op);
2822          tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2823          tdefs = &constraint_sets[con_set];
2824  
2825          for (i = 0; i < nb_args; i++) {
2826              const char *ct_str = tdefs->args_ct_str[i];
2827              bool input_p = i >= def->nb_oargs;
2828  
2829              /* Incomplete TCGTargetOpDef entry. */
2830              tcg_debug_assert(ct_str != NULL);
2831  
2832              switch (*ct_str) {
2833              case '0' ... '9':
2834                  o = *ct_str - '0';
2835                  tcg_debug_assert(input_p);
2836                  tcg_debug_assert(o < def->nb_oargs);
2837                  tcg_debug_assert(def->args_ct[o].regs != 0);
2838                  tcg_debug_assert(!def->args_ct[o].oalias);
2839                  def->args_ct[i] = def->args_ct[o];
2840                  /* The output sets oalias.  */
2841                  def->args_ct[o].oalias = 1;
2842                  def->args_ct[o].alias_index = i;
2843                  /* The input sets ialias. */
2844                  def->args_ct[i].ialias = 1;
2845                  def->args_ct[i].alias_index = o;
2846                  if (def->args_ct[i].pair) {
2847                      saw_alias_pair = true;
2848                  }
2849                  tcg_debug_assert(ct_str[1] == '\0');
2850                  continue;
2851  
2852              case '&':
2853                  tcg_debug_assert(!input_p);
2854                  def->args_ct[i].newreg = true;
2855                  ct_str++;
2856                  break;
2857  
2858              case 'p': /* plus */
2859                  /* Allocate to the register after the previous. */
2860                  tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2861                  o = i - 1;
2862                  tcg_debug_assert(!def->args_ct[o].pair);
2863                  tcg_debug_assert(!def->args_ct[o].ct);
2864                  def->args_ct[i] = (TCGArgConstraint){
2865                      .pair = 2,
2866                      .pair_index = o,
2867                      .regs = def->args_ct[o].regs << 1,
2868                  };
2869                  def->args_ct[o].pair = 1;
2870                  def->args_ct[o].pair_index = i;
2871                  tcg_debug_assert(ct_str[1] == '\0');
2872                  continue;
2873  
2874              case 'm': /* minus */
2875                  /* Allocate to the register before the previous. */
2876                  tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2877                  o = i - 1;
2878                  tcg_debug_assert(!def->args_ct[o].pair);
2879                  tcg_debug_assert(!def->args_ct[o].ct);
2880                  def->args_ct[i] = (TCGArgConstraint){
2881                      .pair = 1,
2882                      .pair_index = o,
2883                      .regs = def->args_ct[o].regs >> 1,
2884                  };
2885                  def->args_ct[o].pair = 2;
2886                  def->args_ct[o].pair_index = i;
2887                  tcg_debug_assert(ct_str[1] == '\0');
2888                  continue;
2889              }
2890  
2891              do {
2892                  switch (*ct_str) {
2893                  case 'i':
2894                      def->args_ct[i].ct |= TCG_CT_CONST;
2895                      break;
2896  
2897                  /* Include all of the target-specific constraints. */
2898  
2899  #undef CONST
2900  #define CONST(CASE, MASK) \
2901      case CASE: def->args_ct[i].ct |= MASK; break;
2902  #define REGS(CASE, MASK) \
2903      case CASE: def->args_ct[i].regs |= MASK; break;
2904  
2905  #include "tcg-target-con-str.h"
2906  
2907  #undef REGS
2908  #undef CONST
2909                  default:
2910                  case '0' ... '9':
2911                  case '&':
2912                  case 'p':
2913                  case 'm':
2914                      /* Typo in TCGTargetOpDef constraint. */
2915                      g_assert_not_reached();
2916                  }
2917              } while (*++ct_str != '\0');
2918          }
2919  
2920          /* TCGTargetOpDef entry with too much information? */
2921          tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2922  
2923          /*
2924           * Fix up output pairs that are aliased with inputs.
2925           * When we created the alias, we copied pair from the output.
2926           * There are three cases:
2927           *    (1a) Pairs of inputs alias pairs of outputs.
2928           *    (1b) One input aliases the first of a pair of outputs.
2929           *    (2)  One input aliases the second of a pair of outputs.
2930           *
2931           * Case 1a is handled by making sure that the pair_index'es are
2932           * properly updated so that they appear the same as a pair of inputs.
2933           *
2934           * Case 1b is handled by setting the pair_index of the input to
2935           * itself, simply so it doesn't point to an unrelated argument.
2936           * Since we don't encounter the "second" during the input allocation
2937           * phase, nothing happens with the second half of the input pair.
2938           *
2939           * Case 2 is handled by setting the second input to pair=3, the
2940           * first output to pair=3, and the pair_index'es to match.
2941           */
2942          if (saw_alias_pair) {
2943              for (i = def->nb_oargs; i < nb_args; i++) {
2944                  /*
2945                   * Since [0-9pm] must be alone in the constraint string,
2946                   * the only way they can both be set is if the pair comes
2947                   * from the output alias.
2948                   */
2949                  if (!def->args_ct[i].ialias) {
2950                      continue;
2951                  }
2952                  switch (def->args_ct[i].pair) {
2953                  case 0:
2954                      break;
2955                  case 1:
2956                      o = def->args_ct[i].alias_index;
2957                      o2 = def->args_ct[o].pair_index;
2958                      tcg_debug_assert(def->args_ct[o].pair == 1);
2959                      tcg_debug_assert(def->args_ct[o2].pair == 2);
2960                      if (def->args_ct[o2].oalias) {
2961                          /* Case 1a */
2962                          i2 = def->args_ct[o2].alias_index;
2963                          tcg_debug_assert(def->args_ct[i2].pair == 2);
2964                          def->args_ct[i2].pair_index = i;
2965                          def->args_ct[i].pair_index = i2;
2966                      } else {
2967                          /* Case 1b */
2968                          def->args_ct[i].pair_index = i;
2969                      }
2970                      break;
2971                  case 2:
2972                      o = def->args_ct[i].alias_index;
2973                      o2 = def->args_ct[o].pair_index;
2974                      tcg_debug_assert(def->args_ct[o].pair == 2);
2975                      tcg_debug_assert(def->args_ct[o2].pair == 1);
2976                      if (def->args_ct[o2].oalias) {
2977                          /* Case 1a */
2978                          i2 = def->args_ct[o2].alias_index;
2979                          tcg_debug_assert(def->args_ct[i2].pair == 1);
2980                          def->args_ct[i2].pair_index = i;
2981                          def->args_ct[i].pair_index = i2;
2982                      } else {
2983                          /* Case 2 */
2984                          def->args_ct[i].pair = 3;
2985                          def->args_ct[o2].pair = 3;
2986                          def->args_ct[i].pair_index = o2;
2987                          def->args_ct[o2].pair_index = i;
2988                      }
2989                      break;
2990                  default:
2991                      g_assert_not_reached();
2992                  }
2993              }
2994          }
2995  
2996          /* sort the constraints (XXX: this is just an heuristic) */
2997          sort_constraints(def, 0, def->nb_oargs);
2998          sort_constraints(def, def->nb_oargs, def->nb_iargs);
2999      }
3000  }
3001  
3002  static void remove_label_use(TCGOp *op, int idx)
3003  {
3004      TCGLabel *label = arg_label(op->args[idx]);
3005      TCGLabelUse *use;
3006  
3007      QSIMPLEQ_FOREACH(use, &label->branches, next) {
3008          if (use->op == op) {
3009              QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3010              return;
3011          }
3012      }
3013      g_assert_not_reached();
3014  }
3015  
3016  void tcg_op_remove(TCGContext *s, TCGOp *op)
3017  {
3018      switch (op->opc) {
3019      case INDEX_op_br:
3020          remove_label_use(op, 0);
3021          break;
3022      case INDEX_op_brcond_i32:
3023      case INDEX_op_brcond_i64:
3024          remove_label_use(op, 3);
3025          break;
3026      case INDEX_op_brcond2_i32:
3027          remove_label_use(op, 5);
3028          break;
3029      default:
3030          break;
3031      }
3032  
3033      QTAILQ_REMOVE(&s->ops, op, link);
3034      QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3035      s->nb_ops--;
3036  }
3037  
3038  void tcg_remove_ops_after(TCGOp *op)
3039  {
3040      TCGContext *s = tcg_ctx;
3041  
3042      while (true) {
3043          TCGOp *last = tcg_last_op();
3044          if (last == op) {
3045              return;
3046          }
3047          tcg_op_remove(s, last);
3048      }
3049  }
3050  
3051  static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3052  {
3053      TCGContext *s = tcg_ctx;
3054      TCGOp *op = NULL;
3055  
3056      if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3057          QTAILQ_FOREACH(op, &s->free_ops, link) {
3058              if (nargs <= op->nargs) {
3059                  QTAILQ_REMOVE(&s->free_ops, op, link);
3060                  nargs = op->nargs;
3061                  goto found;
3062              }
3063          }
3064      }
3065  
3066      /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3067      nargs = MAX(4, nargs);
3068      op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3069  
3070   found:
3071      memset(op, 0, offsetof(TCGOp, link));
3072      op->opc = opc;
3073      op->nargs = nargs;
3074  
3075      /* Check for bitfield overflow. */
3076      tcg_debug_assert(op->nargs == nargs);
3077  
3078      s->nb_ops++;
3079      return op;
3080  }
3081  
3082  TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3083  {
3084      TCGOp *op = tcg_op_alloc(opc, nargs);
3085      QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3086      return op;
3087  }
3088  
3089  TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3090                              TCGOpcode opc, unsigned nargs)
3091  {
3092      TCGOp *new_op = tcg_op_alloc(opc, nargs);
3093      QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3094      return new_op;
3095  }
3096  
3097  TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3098                             TCGOpcode opc, unsigned nargs)
3099  {
3100      TCGOp *new_op = tcg_op_alloc(opc, nargs);
3101      QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3102      return new_op;
3103  }
3104  
3105  static void move_label_uses(TCGLabel *to, TCGLabel *from)
3106  {
3107      TCGLabelUse *u;
3108  
3109      QSIMPLEQ_FOREACH(u, &from->branches, next) {
3110          TCGOp *op = u->op;
3111          switch (op->opc) {
3112          case INDEX_op_br:
3113              op->args[0] = label_arg(to);
3114              break;
3115          case INDEX_op_brcond_i32:
3116          case INDEX_op_brcond_i64:
3117              op->args[3] = label_arg(to);
3118              break;
3119          case INDEX_op_brcond2_i32:
3120              op->args[5] = label_arg(to);
3121              break;
3122          default:
3123              g_assert_not_reached();
3124          }
3125      }
3126  
3127      QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3128  }
3129  
3130  /* Reachable analysis : remove unreachable code.  */
3131  static void __attribute__((noinline))
3132  reachable_code_pass(TCGContext *s)
3133  {
3134      TCGOp *op, *op_next, *op_prev;
3135      bool dead = false;
3136  
3137      QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3138          bool remove = dead;
3139          TCGLabel *label;
3140  
3141          switch (op->opc) {
3142          case INDEX_op_set_label:
3143              label = arg_label(op->args[0]);
3144  
3145              /*
3146               * Note that the first op in the TB is always a load,
3147               * so there is always something before a label.
3148               */
3149              op_prev = QTAILQ_PREV(op, link);
3150  
3151              /*
3152               * If we find two sequential labels, move all branches to
3153               * reference the second label and remove the first label.
3154               * Do this before branch to next optimization, so that the
3155               * middle label is out of the way.
3156               */
3157              if (op_prev->opc == INDEX_op_set_label) {
3158                  move_label_uses(label, arg_label(op_prev->args[0]));
3159                  tcg_op_remove(s, op_prev);
3160                  op_prev = QTAILQ_PREV(op, link);
3161              }
3162  
3163              /*
3164               * Optimization can fold conditional branches to unconditional.
3165               * If we find a label which is preceded by an unconditional
3166               * branch to next, remove the branch.  We couldn't do this when
3167               * processing the branch because any dead code between the branch
3168               * and label had not yet been removed.
3169               */
3170              if (op_prev->opc == INDEX_op_br &&
3171                  label == arg_label(op_prev->args[0])) {
3172                  tcg_op_remove(s, op_prev);
3173                  /* Fall through means insns become live again.  */
3174                  dead = false;
3175              }
3176  
3177              if (QSIMPLEQ_EMPTY(&label->branches)) {
3178                  /*
3179                   * While there is an occasional backward branch, virtually
3180                   * all branches generated by the translators are forward.
3181                   * Which means that generally we will have already removed
3182                   * all references to the label that will be, and there is
3183                   * little to be gained by iterating.
3184                   */
3185                  remove = true;
3186              } else {
3187                  /* Once we see a label, insns become live again.  */
3188                  dead = false;
3189                  remove = false;
3190              }
3191              break;
3192  
3193          case INDEX_op_br:
3194          case INDEX_op_exit_tb:
3195          case INDEX_op_goto_ptr:
3196              /* Unconditional branches; everything following is dead.  */
3197              dead = true;
3198              break;
3199  
3200          case INDEX_op_call:
3201              /* Notice noreturn helper calls, raising exceptions.  */
3202              if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3203                  dead = true;
3204              }
3205              break;
3206  
3207          case INDEX_op_insn_start:
3208              /* Never remove -- we need to keep these for unwind.  */
3209              remove = false;
3210              break;
3211  
3212          default:
3213              break;
3214          }
3215  
3216          if (remove) {
3217              tcg_op_remove(s, op);
3218          }
3219      }
3220  }
3221  
3222  #define TS_DEAD  1
3223  #define TS_MEM   2
3224  
3225  #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3226  #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3227  
3228  /* For liveness_pass_1, the register preferences for a given temp.  */
3229  static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3230  {
3231      return ts->state_ptr;
3232  }
3233  
3234  /* For liveness_pass_1, reset the preferences for a given temp to the
3235   * maximal regset for its type.
3236   */
3237  static inline void la_reset_pref(TCGTemp *ts)
3238  {
3239      *la_temp_pref(ts)
3240          = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3241  }
3242  
3243  /* liveness analysis: end of function: all temps are dead, and globals
3244     should be in memory. */
3245  static void la_func_end(TCGContext *s, int ng, int nt)
3246  {
3247      int i;
3248  
3249      for (i = 0; i < ng; ++i) {
3250          s->temps[i].state = TS_DEAD | TS_MEM;
3251          la_reset_pref(&s->temps[i]);
3252      }
3253      for (i = ng; i < nt; ++i) {
3254          s->temps[i].state = TS_DEAD;
3255          la_reset_pref(&s->temps[i]);
3256      }
3257  }
3258  
3259  /* liveness analysis: end of basic block: all temps are dead, globals
3260     and local temps should be in memory. */
3261  static void la_bb_end(TCGContext *s, int ng, int nt)
3262  {
3263      int i;
3264  
3265      for (i = 0; i < nt; ++i) {
3266          TCGTemp *ts = &s->temps[i];
3267          int state;
3268  
3269          switch (ts->kind) {
3270          case TEMP_FIXED:
3271          case TEMP_GLOBAL:
3272          case TEMP_TB:
3273              state = TS_DEAD | TS_MEM;
3274              break;
3275          case TEMP_EBB:
3276          case TEMP_CONST:
3277              state = TS_DEAD;
3278              break;
3279          default:
3280              g_assert_not_reached();
3281          }
3282          ts->state = state;
3283          la_reset_pref(ts);
3284      }
3285  }
3286  
3287  /* liveness analysis: sync globals back to memory.  */
3288  static void la_global_sync(TCGContext *s, int ng)
3289  {
3290      int i;
3291  
3292      for (i = 0; i < ng; ++i) {
3293          int state = s->temps[i].state;
3294          s->temps[i].state = state | TS_MEM;
3295          if (state == TS_DEAD) {
3296              /* If the global was previously dead, reset prefs.  */
3297              la_reset_pref(&s->temps[i]);
3298          }
3299      }
3300  }
3301  
3302  /*
3303   * liveness analysis: conditional branch: all temps are dead unless
3304   * explicitly live-across-conditional-branch, globals and local temps
3305   * should be synced.
3306   */
3307  static void la_bb_sync(TCGContext *s, int ng, int nt)
3308  {
3309      la_global_sync(s, ng);
3310  
3311      for (int i = ng; i < nt; ++i) {
3312          TCGTemp *ts = &s->temps[i];
3313          int state;
3314  
3315          switch (ts->kind) {
3316          case TEMP_TB:
3317              state = ts->state;
3318              ts->state = state | TS_MEM;
3319              if (state != TS_DEAD) {
3320                  continue;
3321              }
3322              break;
3323          case TEMP_EBB:
3324          case TEMP_CONST:
3325              continue;
3326          default:
3327              g_assert_not_reached();
3328          }
3329          la_reset_pref(&s->temps[i]);
3330      }
3331  }
3332  
3333  /* liveness analysis: sync globals back to memory and kill.  */
3334  static void la_global_kill(TCGContext *s, int ng)
3335  {
3336      int i;
3337  
3338      for (i = 0; i < ng; i++) {
3339          s->temps[i].state = TS_DEAD | TS_MEM;
3340          la_reset_pref(&s->temps[i]);
3341      }
3342  }
3343  
3344  /* liveness analysis: note live globals crossing calls.  */
3345  static void la_cross_call(TCGContext *s, int nt)
3346  {
3347      TCGRegSet mask = ~tcg_target_call_clobber_regs;
3348      int i;
3349  
3350      for (i = 0; i < nt; i++) {
3351          TCGTemp *ts = &s->temps[i];
3352          if (!(ts->state & TS_DEAD)) {
3353              TCGRegSet *pset = la_temp_pref(ts);
3354              TCGRegSet set = *pset;
3355  
3356              set &= mask;
3357              /* If the combination is not possible, restart.  */
3358              if (set == 0) {
3359                  set = tcg_target_available_regs[ts->type] & mask;
3360              }
3361              *pset = set;
3362          }
3363      }
3364  }
3365  
3366  /*
3367   * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3368   * to TEMP_EBB, if possible.
3369   */
3370  static void __attribute__((noinline))
3371  liveness_pass_0(TCGContext *s)
3372  {
3373      void * const multiple_ebb = (void *)(uintptr_t)-1;
3374      int nb_temps = s->nb_temps;
3375      TCGOp *op, *ebb;
3376  
3377      for (int i = s->nb_globals; i < nb_temps; ++i) {
3378          s->temps[i].state_ptr = NULL;
3379      }
3380  
3381      /*
3382       * Represent each EBB by the op at which it begins.  In the case of
3383       * the first EBB, this is the first op, otherwise it is a label.
3384       * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3385       * within a single EBB, else MULTIPLE_EBB.
3386       */
3387      ebb = QTAILQ_FIRST(&s->ops);
3388      QTAILQ_FOREACH(op, &s->ops, link) {
3389          const TCGOpDef *def;
3390          int nb_oargs, nb_iargs;
3391  
3392          switch (op->opc) {
3393          case INDEX_op_set_label:
3394              ebb = op;
3395              continue;
3396          case INDEX_op_discard:
3397              continue;
3398          case INDEX_op_call:
3399              nb_oargs = TCGOP_CALLO(op);
3400              nb_iargs = TCGOP_CALLI(op);
3401              break;
3402          default:
3403              def = &tcg_op_defs[op->opc];
3404              nb_oargs = def->nb_oargs;
3405              nb_iargs = def->nb_iargs;
3406              break;
3407          }
3408  
3409          for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3410              TCGTemp *ts = arg_temp(op->args[i]);
3411  
3412              if (ts->kind != TEMP_TB) {
3413                  continue;
3414              }
3415              if (ts->state_ptr == NULL) {
3416                  ts->state_ptr = ebb;
3417              } else if (ts->state_ptr != ebb) {
3418                  ts->state_ptr = multiple_ebb;
3419              }
3420          }
3421      }
3422  
3423      /*
3424       * For TEMP_TB that turned out not to be used beyond one EBB,
3425       * reduce the liveness to TEMP_EBB.
3426       */
3427      for (int i = s->nb_globals; i < nb_temps; ++i) {
3428          TCGTemp *ts = &s->temps[i];
3429          if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3430              ts->kind = TEMP_EBB;
3431          }
3432      }
3433  }
3434  
3435  /* Liveness analysis : update the opc_arg_life array to tell if a
3436     given input arguments is dead. Instructions updating dead
3437     temporaries are removed. */
3438  static void __attribute__((noinline))
3439  liveness_pass_1(TCGContext *s)
3440  {
3441      int nb_globals = s->nb_globals;
3442      int nb_temps = s->nb_temps;
3443      TCGOp *op, *op_prev;
3444      TCGRegSet *prefs;
3445      int i;
3446  
3447      prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3448      for (i = 0; i < nb_temps; ++i) {
3449          s->temps[i].state_ptr = prefs + i;
3450      }
3451  
3452      /* ??? Should be redundant with the exit_tb that ends the TB.  */
3453      la_func_end(s, nb_globals, nb_temps);
3454  
3455      QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3456          int nb_iargs, nb_oargs;
3457          TCGOpcode opc_new, opc_new2;
3458          bool have_opc_new2;
3459          TCGLifeData arg_life = 0;
3460          TCGTemp *ts;
3461          TCGOpcode opc = op->opc;
3462          const TCGOpDef *def = &tcg_op_defs[opc];
3463  
3464          switch (opc) {
3465          case INDEX_op_call:
3466              {
3467                  const TCGHelperInfo *info = tcg_call_info(op);
3468                  int call_flags = tcg_call_flags(op);
3469  
3470                  nb_oargs = TCGOP_CALLO(op);
3471                  nb_iargs = TCGOP_CALLI(op);
3472  
3473                  /* pure functions can be removed if their result is unused */
3474                  if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3475                      for (i = 0; i < nb_oargs; i++) {
3476                          ts = arg_temp(op->args[i]);
3477                          if (ts->state != TS_DEAD) {
3478                              goto do_not_remove_call;
3479                          }
3480                      }
3481                      goto do_remove;
3482                  }
3483              do_not_remove_call:
3484  
3485                  /* Output args are dead.  */
3486                  for (i = 0; i < nb_oargs; i++) {
3487                      ts = arg_temp(op->args[i]);
3488                      if (ts->state & TS_DEAD) {
3489                          arg_life |= DEAD_ARG << i;
3490                      }
3491                      if (ts->state & TS_MEM) {
3492                          arg_life |= SYNC_ARG << i;
3493                      }
3494                      ts->state = TS_DEAD;
3495                      la_reset_pref(ts);
3496                  }
3497  
3498                  /* Not used -- it will be tcg_target_call_oarg_reg().  */
3499                  memset(op->output_pref, 0, sizeof(op->output_pref));
3500  
3501                  if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3502                                      TCG_CALL_NO_READ_GLOBALS))) {
3503                      la_global_kill(s, nb_globals);
3504                  } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3505                      la_global_sync(s, nb_globals);
3506                  }
3507  
3508                  /* Record arguments that die in this helper.  */
3509                  for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3510                      ts = arg_temp(op->args[i]);
3511                      if (ts->state & TS_DEAD) {
3512                          arg_life |= DEAD_ARG << i;
3513                      }
3514                  }
3515  
3516                  /* For all live registers, remove call-clobbered prefs.  */
3517                  la_cross_call(s, nb_temps);
3518  
3519                  /*
3520                   * Input arguments are live for preceding opcodes.
3521                   *
3522                   * For those arguments that die, and will be allocated in
3523                   * registers, clear the register set for that arg, to be
3524                   * filled in below.  For args that will be on the stack,
3525                   * reset to any available reg.  Process arguments in reverse
3526                   * order so that if a temp is used more than once, the stack
3527                   * reset to max happens before the register reset to 0.
3528                   */
3529                  for (i = nb_iargs - 1; i >= 0; i--) {
3530                      const TCGCallArgumentLoc *loc = &info->in[i];
3531                      ts = arg_temp(op->args[nb_oargs + i]);
3532  
3533                      if (ts->state & TS_DEAD) {
3534                          switch (loc->kind) {
3535                          case TCG_CALL_ARG_NORMAL:
3536                          case TCG_CALL_ARG_EXTEND_U:
3537                          case TCG_CALL_ARG_EXTEND_S:
3538                              if (arg_slot_reg_p(loc->arg_slot)) {
3539                                  *la_temp_pref(ts) = 0;
3540                                  break;
3541                              }
3542                              /* fall through */
3543                          default:
3544                              *la_temp_pref(ts) =
3545                                  tcg_target_available_regs[ts->type];
3546                              break;
3547                          }
3548                          ts->state &= ~TS_DEAD;
3549                      }
3550                  }
3551  
3552                  /*
3553                   * For each input argument, add its input register to prefs.
3554                   * If a temp is used once, this produces a single set bit;
3555                   * if a temp is used multiple times, this produces a set.
3556                   */
3557                  for (i = 0; i < nb_iargs; i++) {
3558                      const TCGCallArgumentLoc *loc = &info->in[i];
3559                      ts = arg_temp(op->args[nb_oargs + i]);
3560  
3561                      switch (loc->kind) {
3562                      case TCG_CALL_ARG_NORMAL:
3563                      case TCG_CALL_ARG_EXTEND_U:
3564                      case TCG_CALL_ARG_EXTEND_S:
3565                          if (arg_slot_reg_p(loc->arg_slot)) {
3566                              tcg_regset_set_reg(*la_temp_pref(ts),
3567                                  tcg_target_call_iarg_regs[loc->arg_slot]);
3568                          }
3569                          break;
3570                      default:
3571                          break;
3572                      }
3573                  }
3574              }
3575              break;
3576          case INDEX_op_insn_start:
3577              break;
3578          case INDEX_op_discard:
3579              /* mark the temporary as dead */
3580              ts = arg_temp(op->args[0]);
3581              ts->state = TS_DEAD;
3582              la_reset_pref(ts);
3583              break;
3584  
3585          case INDEX_op_add2_i32:
3586              opc_new = INDEX_op_add_i32;
3587              goto do_addsub2;
3588          case INDEX_op_sub2_i32:
3589              opc_new = INDEX_op_sub_i32;
3590              goto do_addsub2;
3591          case INDEX_op_add2_i64:
3592              opc_new = INDEX_op_add_i64;
3593              goto do_addsub2;
3594          case INDEX_op_sub2_i64:
3595              opc_new = INDEX_op_sub_i64;
3596          do_addsub2:
3597              nb_iargs = 4;
3598              nb_oargs = 2;
3599              /* Test if the high part of the operation is dead, but not
3600                 the low part.  The result can be optimized to a simple
3601                 add or sub.  This happens often for x86_64 guest when the
3602                 cpu mode is set to 32 bit.  */
3603              if (arg_temp(op->args[1])->state == TS_DEAD) {
3604                  if (arg_temp(op->args[0])->state == TS_DEAD) {
3605                      goto do_remove;
3606                  }
3607                  /* Replace the opcode and adjust the args in place,
3608                     leaving 3 unused args at the end.  */
3609                  op->opc = opc = opc_new;
3610                  op->args[1] = op->args[2];
3611                  op->args[2] = op->args[4];
3612                  /* Fall through and mark the single-word operation live.  */
3613                  nb_iargs = 2;
3614                  nb_oargs = 1;
3615              }
3616              goto do_not_remove;
3617  
3618          case INDEX_op_mulu2_i32:
3619              opc_new = INDEX_op_mul_i32;
3620              opc_new2 = INDEX_op_muluh_i32;
3621              have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3622              goto do_mul2;
3623          case INDEX_op_muls2_i32:
3624              opc_new = INDEX_op_mul_i32;
3625              opc_new2 = INDEX_op_mulsh_i32;
3626              have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3627              goto do_mul2;
3628          case INDEX_op_mulu2_i64:
3629              opc_new = INDEX_op_mul_i64;
3630              opc_new2 = INDEX_op_muluh_i64;
3631              have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3632              goto do_mul2;
3633          case INDEX_op_muls2_i64:
3634              opc_new = INDEX_op_mul_i64;
3635              opc_new2 = INDEX_op_mulsh_i64;
3636              have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3637              goto do_mul2;
3638          do_mul2:
3639              nb_iargs = 2;
3640              nb_oargs = 2;
3641              if (arg_temp(op->args[1])->state == TS_DEAD) {
3642                  if (arg_temp(op->args[0])->state == TS_DEAD) {
3643                      /* Both parts of the operation are dead.  */
3644                      goto do_remove;
3645                  }
3646                  /* The high part of the operation is dead; generate the low. */
3647                  op->opc = opc = opc_new;
3648                  op->args[1] = op->args[2];
3649                  op->args[2] = op->args[3];
3650              } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3651                  /* The low part of the operation is dead; generate the high. */
3652                  op->opc = opc = opc_new2;
3653                  op->args[0] = op->args[1];
3654                  op->args[1] = op->args[2];
3655                  op->args[2] = op->args[3];
3656              } else {
3657                  goto do_not_remove;
3658              }
3659              /* Mark the single-word operation live.  */
3660              nb_oargs = 1;
3661              goto do_not_remove;
3662  
3663          default:
3664              /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3665              nb_iargs = def->nb_iargs;
3666              nb_oargs = def->nb_oargs;
3667  
3668              /* Test if the operation can be removed because all
3669                 its outputs are dead. We assume that nb_oargs == 0
3670                 implies side effects */
3671              if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3672                  for (i = 0; i < nb_oargs; i++) {
3673                      if (arg_temp(op->args[i])->state != TS_DEAD) {
3674                          goto do_not_remove;
3675                      }
3676                  }
3677                  goto do_remove;
3678              }
3679              goto do_not_remove;
3680  
3681          do_remove:
3682              tcg_op_remove(s, op);
3683              break;
3684  
3685          do_not_remove:
3686              for (i = 0; i < nb_oargs; i++) {
3687                  ts = arg_temp(op->args[i]);
3688  
3689                  /* Remember the preference of the uses that followed.  */
3690                  if (i < ARRAY_SIZE(op->output_pref)) {
3691                      op->output_pref[i] = *la_temp_pref(ts);
3692                  }
3693  
3694                  /* Output args are dead.  */
3695                  if (ts->state & TS_DEAD) {
3696                      arg_life |= DEAD_ARG << i;
3697                  }
3698                  if (ts->state & TS_MEM) {
3699                      arg_life |= SYNC_ARG << i;
3700                  }
3701                  ts->state = TS_DEAD;
3702                  la_reset_pref(ts);
3703              }
3704  
3705              /* If end of basic block, update.  */
3706              if (def->flags & TCG_OPF_BB_EXIT) {
3707                  la_func_end(s, nb_globals, nb_temps);
3708              } else if (def->flags & TCG_OPF_COND_BRANCH) {
3709                  la_bb_sync(s, nb_globals, nb_temps);
3710              } else if (def->flags & TCG_OPF_BB_END) {
3711                  la_bb_end(s, nb_globals, nb_temps);
3712              } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3713                  la_global_sync(s, nb_globals);
3714                  if (def->flags & TCG_OPF_CALL_CLOBBER) {
3715                      la_cross_call(s, nb_temps);
3716                  }
3717              }
3718  
3719              /* Record arguments that die in this opcode.  */
3720              for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3721                  ts = arg_temp(op->args[i]);
3722                  if (ts->state & TS_DEAD) {
3723                      arg_life |= DEAD_ARG << i;
3724                  }
3725              }
3726  
3727              /* Input arguments are live for preceding opcodes.  */
3728              for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3729                  ts = arg_temp(op->args[i]);
3730                  if (ts->state & TS_DEAD) {
3731                      /* For operands that were dead, initially allow
3732                         all regs for the type.  */
3733                      *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3734                      ts->state &= ~TS_DEAD;
3735                  }
3736              }
3737  
3738              /* Incorporate constraints for this operand.  */
3739              switch (opc) {
3740              case INDEX_op_mov_i32:
3741              case INDEX_op_mov_i64:
3742                  /* Note that these are TCG_OPF_NOT_PRESENT and do not
3743                     have proper constraints.  That said, special case
3744                     moves to propagate preferences backward.  */
3745                  if (IS_DEAD_ARG(1)) {
3746                      *la_temp_pref(arg_temp(op->args[0]))
3747                          = *la_temp_pref(arg_temp(op->args[1]));
3748                  }
3749                  break;
3750  
3751              default:
3752                  for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3753                      const TCGArgConstraint *ct = &def->args_ct[i];
3754                      TCGRegSet set, *pset;
3755  
3756                      ts = arg_temp(op->args[i]);
3757                      pset = la_temp_pref(ts);
3758                      set = *pset;
3759  
3760                      set &= ct->regs;
3761                      if (ct->ialias) {
3762                          set &= output_pref(op, ct->alias_index);
3763                      }
3764                      /* If the combination is not possible, restart.  */
3765                      if (set == 0) {
3766                          set = ct->regs;
3767                      }
3768                      *pset = set;
3769                  }
3770                  break;
3771              }
3772              break;
3773          }
3774          op->life = arg_life;
3775      }
3776  }
3777  
3778  /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3779  static bool __attribute__((noinline))
3780  liveness_pass_2(TCGContext *s)
3781  {
3782      int nb_globals = s->nb_globals;
3783      int nb_temps, i;
3784      bool changes = false;
3785      TCGOp *op, *op_next;
3786  
3787      /* Create a temporary for each indirect global.  */
3788      for (i = 0; i < nb_globals; ++i) {
3789          TCGTemp *its = &s->temps[i];
3790          if (its->indirect_reg) {
3791              TCGTemp *dts = tcg_temp_alloc(s);
3792              dts->type = its->type;
3793              dts->base_type = its->base_type;
3794              dts->temp_subindex = its->temp_subindex;
3795              dts->kind = TEMP_EBB;
3796              its->state_ptr = dts;
3797          } else {
3798              its->state_ptr = NULL;
3799          }
3800          /* All globals begin dead.  */
3801          its->state = TS_DEAD;
3802      }
3803      for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3804          TCGTemp *its = &s->temps[i];
3805          its->state_ptr = NULL;
3806          its->state = TS_DEAD;
3807      }
3808  
3809      QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3810          TCGOpcode opc = op->opc;
3811          const TCGOpDef *def = &tcg_op_defs[opc];
3812          TCGLifeData arg_life = op->life;
3813          int nb_iargs, nb_oargs, call_flags;
3814          TCGTemp *arg_ts, *dir_ts;
3815  
3816          if (opc == INDEX_op_call) {
3817              nb_oargs = TCGOP_CALLO(op);
3818              nb_iargs = TCGOP_CALLI(op);
3819              call_flags = tcg_call_flags(op);
3820          } else {
3821              nb_iargs = def->nb_iargs;
3822              nb_oargs = def->nb_oargs;
3823  
3824              /* Set flags similar to how calls require.  */
3825              if (def->flags & TCG_OPF_COND_BRANCH) {
3826                  /* Like reading globals: sync_globals */
3827                  call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3828              } else if (def->flags & TCG_OPF_BB_END) {
3829                  /* Like writing globals: save_globals */
3830                  call_flags = 0;
3831              } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3832                  /* Like reading globals: sync_globals */
3833                  call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3834              } else {
3835                  /* No effect on globals.  */
3836                  call_flags = (TCG_CALL_NO_READ_GLOBALS |
3837                                TCG_CALL_NO_WRITE_GLOBALS);
3838              }
3839          }
3840  
3841          /* Make sure that input arguments are available.  */
3842          for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3843              arg_ts = arg_temp(op->args[i]);
3844              dir_ts = arg_ts->state_ptr;
3845              if (dir_ts && arg_ts->state == TS_DEAD) {
3846                  TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3847                                    ? INDEX_op_ld_i32
3848                                    : INDEX_op_ld_i64);
3849                  TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3850  
3851                  lop->args[0] = temp_arg(dir_ts);
3852                  lop->args[1] = temp_arg(arg_ts->mem_base);
3853                  lop->args[2] = arg_ts->mem_offset;
3854  
3855                  /* Loaded, but synced with memory.  */
3856                  arg_ts->state = TS_MEM;
3857              }
3858          }
3859  
3860          /* Perform input replacement, and mark inputs that became dead.
3861             No action is required except keeping temp_state up to date
3862             so that we reload when needed.  */
3863          for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3864              arg_ts = arg_temp(op->args[i]);
3865              dir_ts = arg_ts->state_ptr;
3866              if (dir_ts) {
3867                  op->args[i] = temp_arg(dir_ts);
3868                  changes = true;
3869                  if (IS_DEAD_ARG(i)) {
3870                      arg_ts->state = TS_DEAD;
3871                  }
3872              }
3873          }
3874  
3875          /* Liveness analysis should ensure that the following are
3876             all correct, for call sites and basic block end points.  */
3877          if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3878              /* Nothing to do */
3879          } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3880              for (i = 0; i < nb_globals; ++i) {
3881                  /* Liveness should see that globals are synced back,
3882                     that is, either TS_DEAD or TS_MEM.  */
3883                  arg_ts = &s->temps[i];
3884                  tcg_debug_assert(arg_ts->state_ptr == 0
3885                                   || arg_ts->state != 0);
3886              }
3887          } else {
3888              for (i = 0; i < nb_globals; ++i) {
3889                  /* Liveness should see that globals are saved back,
3890                     that is, TS_DEAD, waiting to be reloaded.  */
3891                  arg_ts = &s->temps[i];
3892                  tcg_debug_assert(arg_ts->state_ptr == 0
3893                                   || arg_ts->state == TS_DEAD);
3894              }
3895          }
3896  
3897          /* Outputs become available.  */
3898          if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3899              arg_ts = arg_temp(op->args[0]);
3900              dir_ts = arg_ts->state_ptr;
3901              if (dir_ts) {
3902                  op->args[0] = temp_arg(dir_ts);
3903                  changes = true;
3904  
3905                  /* The output is now live and modified.  */
3906                  arg_ts->state = 0;
3907  
3908                  if (NEED_SYNC_ARG(0)) {
3909                      TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3910                                        ? INDEX_op_st_i32
3911                                        : INDEX_op_st_i64);
3912                      TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3913                      TCGTemp *out_ts = dir_ts;
3914  
3915                      if (IS_DEAD_ARG(0)) {
3916                          out_ts = arg_temp(op->args[1]);
3917                          arg_ts->state = TS_DEAD;
3918                          tcg_op_remove(s, op);
3919                      } else {
3920                          arg_ts->state = TS_MEM;
3921                      }
3922  
3923                      sop->args[0] = temp_arg(out_ts);
3924                      sop->args[1] = temp_arg(arg_ts->mem_base);
3925                      sop->args[2] = arg_ts->mem_offset;
3926                  } else {
3927                      tcg_debug_assert(!IS_DEAD_ARG(0));
3928                  }
3929              }
3930          } else {
3931              for (i = 0; i < nb_oargs; i++) {
3932                  arg_ts = arg_temp(op->args[i]);
3933                  dir_ts = arg_ts->state_ptr;
3934                  if (!dir_ts) {
3935                      continue;
3936                  }
3937                  op->args[i] = temp_arg(dir_ts);
3938                  changes = true;
3939  
3940                  /* The output is now live and modified.  */
3941                  arg_ts->state = 0;
3942  
3943                  /* Sync outputs upon their last write.  */
3944                  if (NEED_SYNC_ARG(i)) {
3945                      TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3946                                        ? INDEX_op_st_i32
3947                                        : INDEX_op_st_i64);
3948                      TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3949  
3950                      sop->args[0] = temp_arg(dir_ts);
3951                      sop->args[1] = temp_arg(arg_ts->mem_base);
3952                      sop->args[2] = arg_ts->mem_offset;
3953  
3954                      arg_ts->state = TS_MEM;
3955                  }
3956                  /* Drop outputs that are dead.  */
3957                  if (IS_DEAD_ARG(i)) {
3958                      arg_ts->state = TS_DEAD;
3959                  }
3960              }
3961          }
3962      }
3963  
3964      return changes;
3965  }
3966  
3967  static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3968  {
3969      intptr_t off;
3970      int size, align;
3971  
3972      /* When allocating an object, look at the full type. */
3973      size = tcg_type_size(ts->base_type);
3974      switch (ts->base_type) {
3975      case TCG_TYPE_I32:
3976          align = 4;
3977          break;
3978      case TCG_TYPE_I64:
3979      case TCG_TYPE_V64:
3980          align = 8;
3981          break;
3982      case TCG_TYPE_I128:
3983      case TCG_TYPE_V128:
3984      case TCG_TYPE_V256:
3985          /*
3986           * Note that we do not require aligned storage for V256,
3987           * and that we provide alignment for I128 to match V128,
3988           * even if that's above what the host ABI requires.
3989           */
3990          align = 16;
3991          break;
3992      default:
3993          g_assert_not_reached();
3994      }
3995  
3996      /*
3997       * Assume the stack is sufficiently aligned.
3998       * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3999       * and do not require 16 byte vector alignment.  This seems slightly
4000       * easier than fully parameterizing the above switch statement.
4001       */
4002      align = MIN(TCG_TARGET_STACK_ALIGN, align);
4003      off = ROUND_UP(s->current_frame_offset, align);
4004  
4005      /* If we've exhausted the stack frame, restart with a smaller TB. */
4006      if (off + size > s->frame_end) {
4007          tcg_raise_tb_overflow(s);
4008      }
4009      s->current_frame_offset = off + size;
4010  #if defined(__sparc__)
4011      off += TCG_TARGET_STACK_BIAS;
4012  #endif
4013  
4014      /* If the object was subdivided, assign memory to all the parts. */
4015      if (ts->base_type != ts->type) {
4016          int part_size = tcg_type_size(ts->type);
4017          int part_count = size / part_size;
4018  
4019          /*
4020           * Each part is allocated sequentially in tcg_temp_new_internal.
4021           * Jump back to the first part by subtracting the current index.
4022           */
4023          ts -= ts->temp_subindex;
4024          for (int i = 0; i < part_count; ++i) {
4025              ts[i].mem_offset = off + i * part_size;
4026              ts[i].mem_base = s->frame_temp;
4027              ts[i].mem_allocated = 1;
4028          }
4029      } else {
4030          ts->mem_offset = off;
4031          ts->mem_base = s->frame_temp;
4032          ts->mem_allocated = 1;
4033      }
4034  }
4035  
4036  /* Assign @reg to @ts, and update reg_to_temp[]. */
4037  static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4038  {
4039      if (ts->val_type == TEMP_VAL_REG) {
4040          TCGReg old = ts->reg;
4041          tcg_debug_assert(s->reg_to_temp[old] == ts);
4042          if (old == reg) {
4043              return;
4044          }
4045          s->reg_to_temp[old] = NULL;
4046      }
4047      tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4048      s->reg_to_temp[reg] = ts;
4049      ts->val_type = TEMP_VAL_REG;
4050      ts->reg = reg;
4051  }
4052  
4053  /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4054  static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4055  {
4056      tcg_debug_assert(type != TEMP_VAL_REG);
4057      if (ts->val_type == TEMP_VAL_REG) {
4058          TCGReg reg = ts->reg;
4059          tcg_debug_assert(s->reg_to_temp[reg] == ts);
4060          s->reg_to_temp[reg] = NULL;
4061      }
4062      ts->val_type = type;
4063  }
4064  
4065  static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4066  
4067  /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4068     mark it free; otherwise mark it dead.  */
4069  static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4070  {
4071      TCGTempVal new_type;
4072  
4073      switch (ts->kind) {
4074      case TEMP_FIXED:
4075          return;
4076      case TEMP_GLOBAL:
4077      case TEMP_TB:
4078          new_type = TEMP_VAL_MEM;
4079          break;
4080      case TEMP_EBB:
4081          new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4082          break;
4083      case TEMP_CONST:
4084          new_type = TEMP_VAL_CONST;
4085          break;
4086      default:
4087          g_assert_not_reached();
4088      }
4089      set_temp_val_nonreg(s, ts, new_type);
4090  }
4091  
4092  /* Mark a temporary as dead.  */
4093  static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4094  {
4095      temp_free_or_dead(s, ts, 1);
4096  }
4097  
4098  /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4099     registers needs to be allocated to store a constant.  If 'free_or_dead'
4100     is non-zero, subsequently release the temporary; if it is positive, the
4101     temp is dead; if it is negative, the temp is free.  */
4102  static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4103                        TCGRegSet preferred_regs, int free_or_dead)
4104  {
4105      if (!temp_readonly(ts) && !ts->mem_coherent) {
4106          if (!ts->mem_allocated) {
4107              temp_allocate_frame(s, ts);
4108          }
4109          switch (ts->val_type) {
4110          case TEMP_VAL_CONST:
4111              /* If we're going to free the temp immediately, then we won't
4112                 require it later in a register, so attempt to store the
4113                 constant to memory directly.  */
4114              if (free_or_dead
4115                  && tcg_out_sti(s, ts->type, ts->val,
4116                                 ts->mem_base->reg, ts->mem_offset)) {
4117                  break;
4118              }
4119              temp_load(s, ts, tcg_target_available_regs[ts->type],
4120                        allocated_regs, preferred_regs);
4121              /* fallthrough */
4122  
4123          case TEMP_VAL_REG:
4124              tcg_out_st(s, ts->type, ts->reg,
4125                         ts->mem_base->reg, ts->mem_offset);
4126              break;
4127  
4128          case TEMP_VAL_MEM:
4129              break;
4130  
4131          case TEMP_VAL_DEAD:
4132          default:
4133              g_assert_not_reached();
4134          }
4135          ts->mem_coherent = 1;
4136      }
4137      if (free_or_dead) {
4138          temp_free_or_dead(s, ts, free_or_dead);
4139      }
4140  }
4141  
4142  /* free register 'reg' by spilling the corresponding temporary if necessary */
4143  static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4144  {
4145      TCGTemp *ts = s->reg_to_temp[reg];
4146      if (ts != NULL) {
4147          temp_sync(s, ts, allocated_regs, 0, -1);
4148      }
4149  }
4150  
4151  /**
4152   * tcg_reg_alloc:
4153   * @required_regs: Set of registers in which we must allocate.
4154   * @allocated_regs: Set of registers which must be avoided.
4155   * @preferred_regs: Set of registers we should prefer.
4156   * @rev: True if we search the registers in "indirect" order.
4157   *
4158   * The allocated register must be in @required_regs & ~@allocated_regs,
4159   * but if we can put it in @preferred_regs we may save a move later.
4160   */
4161  static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4162                              TCGRegSet allocated_regs,
4163                              TCGRegSet preferred_regs, bool rev)
4164  {
4165      int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4166      TCGRegSet reg_ct[2];
4167      const int *order;
4168  
4169      reg_ct[1] = required_regs & ~allocated_regs;
4170      tcg_debug_assert(reg_ct[1] != 0);
4171      reg_ct[0] = reg_ct[1] & preferred_regs;
4172  
4173      /* Skip the preferred_regs option if it cannot be satisfied,
4174         or if the preference made no difference.  */
4175      f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4176  
4177      order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4178  
4179      /* Try free registers, preferences first.  */
4180      for (j = f; j < 2; j++) {
4181          TCGRegSet set = reg_ct[j];
4182  
4183          if (tcg_regset_single(set)) {
4184              /* One register in the set.  */
4185              TCGReg reg = tcg_regset_first(set);
4186              if (s->reg_to_temp[reg] == NULL) {
4187                  return reg;
4188              }
4189          } else {
4190              for (i = 0; i < n; i++) {
4191                  TCGReg reg = order[i];
4192                  if (s->reg_to_temp[reg] == NULL &&
4193                      tcg_regset_test_reg(set, reg)) {
4194                      return reg;
4195                  }
4196              }
4197          }
4198      }
4199  
4200      /* We must spill something.  */
4201      for (j = f; j < 2; j++) {
4202          TCGRegSet set = reg_ct[j];
4203  
4204          if (tcg_regset_single(set)) {
4205              /* One register in the set.  */
4206              TCGReg reg = tcg_regset_first(set);
4207              tcg_reg_free(s, reg, allocated_regs);
4208              return reg;
4209          } else {
4210              for (i = 0; i < n; i++) {
4211                  TCGReg reg = order[i];
4212                  if (tcg_regset_test_reg(set, reg)) {
4213                      tcg_reg_free(s, reg, allocated_regs);
4214                      return reg;
4215                  }
4216              }
4217          }
4218      }
4219  
4220      g_assert_not_reached();
4221  }
4222  
4223  static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4224                                   TCGRegSet allocated_regs,
4225                                   TCGRegSet preferred_regs, bool rev)
4226  {
4227      int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4228      TCGRegSet reg_ct[2];
4229      const int *order;
4230  
4231      /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4232      reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4233      tcg_debug_assert(reg_ct[1] != 0);
4234      reg_ct[0] = reg_ct[1] & preferred_regs;
4235  
4236      order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4237  
4238      /*
4239       * Skip the preferred_regs option if it cannot be satisfied,
4240       * or if the preference made no difference.
4241       */
4242      k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4243  
4244      /*
4245       * Minimize the number of flushes by looking for 2 free registers first,
4246       * then a single flush, then two flushes.
4247       */
4248      for (fmin = 2; fmin >= 0; fmin--) {
4249          for (j = k; j < 2; j++) {
4250              TCGRegSet set = reg_ct[j];
4251  
4252              for (i = 0; i < n; i++) {
4253                  TCGReg reg = order[i];
4254  
4255                  if (tcg_regset_test_reg(set, reg)) {
4256                      int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4257                      if (f >= fmin) {
4258                          tcg_reg_free(s, reg, allocated_regs);
4259                          tcg_reg_free(s, reg + 1, allocated_regs);
4260                          return reg;
4261                      }
4262                  }
4263              }
4264          }
4265      }
4266      g_assert_not_reached();
4267  }
4268  
4269  /* Make sure the temporary is in a register.  If needed, allocate the register
4270     from DESIRED while avoiding ALLOCATED.  */
4271  static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4272                        TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4273  {
4274      TCGReg reg;
4275  
4276      switch (ts->val_type) {
4277      case TEMP_VAL_REG:
4278          return;
4279      case TEMP_VAL_CONST:
4280          reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4281                              preferred_regs, ts->indirect_base);
4282          if (ts->type <= TCG_TYPE_I64) {
4283              tcg_out_movi(s, ts->type, reg, ts->val);
4284          } else {
4285              uint64_t val = ts->val;
4286              MemOp vece = MO_64;
4287  
4288              /*
4289               * Find the minimal vector element that matches the constant.
4290               * The targets will, in general, have to do this search anyway,
4291               * do this generically.
4292               */
4293              if (val == dup_const(MO_8, val)) {
4294                  vece = MO_8;
4295              } else if (val == dup_const(MO_16, val)) {
4296                  vece = MO_16;
4297              } else if (val == dup_const(MO_32, val)) {
4298                  vece = MO_32;
4299              }
4300  
4301              tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4302          }
4303          ts->mem_coherent = 0;
4304          break;
4305      case TEMP_VAL_MEM:
4306          reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4307                              preferred_regs, ts->indirect_base);
4308          tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4309          ts->mem_coherent = 1;
4310          break;
4311      case TEMP_VAL_DEAD:
4312      default:
4313          g_assert_not_reached();
4314      }
4315      set_temp_val_reg(s, ts, reg);
4316  }
4317  
4318  /* Save a temporary to memory. 'allocated_regs' is used in case a
4319     temporary registers needs to be allocated to store a constant.  */
4320  static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4321  {
4322      /* The liveness analysis already ensures that globals are back
4323         in memory. Keep an tcg_debug_assert for safety. */
4324      tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4325  }
4326  
4327  /* save globals to their canonical location and assume they can be
4328     modified be the following code. 'allocated_regs' is used in case a
4329     temporary registers needs to be allocated to store a constant. */
4330  static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4331  {
4332      int i, n;
4333  
4334      for (i = 0, n = s->nb_globals; i < n; i++) {
4335          temp_save(s, &s->temps[i], allocated_regs);
4336      }
4337  }
4338  
4339  /* sync globals to their canonical location and assume they can be
4340     read by the following code. 'allocated_regs' is used in case a
4341     temporary registers needs to be allocated to store a constant. */
4342  static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4343  {
4344      int i, n;
4345  
4346      for (i = 0, n = s->nb_globals; i < n; i++) {
4347          TCGTemp *ts = &s->temps[i];
4348          tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4349                           || ts->kind == TEMP_FIXED
4350                           || ts->mem_coherent);
4351      }
4352  }
4353  
4354  /* at the end of a basic block, we assume all temporaries are dead and
4355     all globals are stored at their canonical location. */
4356  static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4357  {
4358      int i;
4359  
4360      for (i = s->nb_globals; i < s->nb_temps; i++) {
4361          TCGTemp *ts = &s->temps[i];
4362  
4363          switch (ts->kind) {
4364          case TEMP_TB:
4365              temp_save(s, ts, allocated_regs);
4366              break;
4367          case TEMP_EBB:
4368              /* The liveness analysis already ensures that temps are dead.
4369                 Keep an tcg_debug_assert for safety. */
4370              tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4371              break;
4372          case TEMP_CONST:
4373              /* Similarly, we should have freed any allocated register. */
4374              tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4375              break;
4376          default:
4377              g_assert_not_reached();
4378          }
4379      }
4380  
4381      save_globals(s, allocated_regs);
4382  }
4383  
4384  /*
4385   * At a conditional branch, we assume all temporaries are dead unless
4386   * explicitly live-across-conditional-branch; all globals and local
4387   * temps are synced to their location.
4388   */
4389  static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4390  {
4391      sync_globals(s, allocated_regs);
4392  
4393      for (int i = s->nb_globals; i < s->nb_temps; i++) {
4394          TCGTemp *ts = &s->temps[i];
4395          /*
4396           * The liveness analysis already ensures that temps are dead.
4397           * Keep tcg_debug_asserts for safety.
4398           */
4399          switch (ts->kind) {
4400          case TEMP_TB:
4401              tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4402              break;
4403          case TEMP_EBB:
4404          case TEMP_CONST:
4405              break;
4406          default:
4407              g_assert_not_reached();
4408          }
4409      }
4410  }
4411  
4412  /*
4413   * Specialized code generation for INDEX_op_mov_* with a constant.
4414   */
4415  static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4416                                    tcg_target_ulong val, TCGLifeData arg_life,
4417                                    TCGRegSet preferred_regs)
4418  {
4419      /* ENV should not be modified.  */
4420      tcg_debug_assert(!temp_readonly(ots));
4421  
4422      /* The movi is not explicitly generated here.  */
4423      set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4424      ots->val = val;
4425      ots->mem_coherent = 0;
4426      if (NEED_SYNC_ARG(0)) {
4427          temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4428      } else if (IS_DEAD_ARG(0)) {
4429          temp_dead(s, ots);
4430      }
4431  }
4432  
4433  /*
4434   * Specialized code generation for INDEX_op_mov_*.
4435   */
4436  static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4437  {
4438      const TCGLifeData arg_life = op->life;
4439      TCGRegSet allocated_regs, preferred_regs;
4440      TCGTemp *ts, *ots;
4441      TCGType otype, itype;
4442      TCGReg oreg, ireg;
4443  
4444      allocated_regs = s->reserved_regs;
4445      preferred_regs = output_pref(op, 0);
4446      ots = arg_temp(op->args[0]);
4447      ts = arg_temp(op->args[1]);
4448  
4449      /* ENV should not be modified.  */
4450      tcg_debug_assert(!temp_readonly(ots));
4451  
4452      /* Note that otype != itype for no-op truncation.  */
4453      otype = ots->type;
4454      itype = ts->type;
4455  
4456      if (ts->val_type == TEMP_VAL_CONST) {
4457          /* propagate constant or generate sti */
4458          tcg_target_ulong val = ts->val;
4459          if (IS_DEAD_ARG(1)) {
4460              temp_dead(s, ts);
4461          }
4462          tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4463          return;
4464      }
4465  
4466      /* If the source value is in memory we're going to be forced
4467         to have it in a register in order to perform the copy.  Copy
4468         the SOURCE value into its own register first, that way we
4469         don't have to reload SOURCE the next time it is used. */
4470      if (ts->val_type == TEMP_VAL_MEM) {
4471          temp_load(s, ts, tcg_target_available_regs[itype],
4472                    allocated_regs, preferred_regs);
4473      }
4474      tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4475      ireg = ts->reg;
4476  
4477      if (IS_DEAD_ARG(0)) {
4478          /* mov to a non-saved dead register makes no sense (even with
4479             liveness analysis disabled). */
4480          tcg_debug_assert(NEED_SYNC_ARG(0));
4481          if (!ots->mem_allocated) {
4482              temp_allocate_frame(s, ots);
4483          }
4484          tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4485          if (IS_DEAD_ARG(1)) {
4486              temp_dead(s, ts);
4487          }
4488          temp_dead(s, ots);
4489          return;
4490      }
4491  
4492      if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4493          /*
4494           * The mov can be suppressed.  Kill input first, so that it
4495           * is unlinked from reg_to_temp, then set the output to the
4496           * reg that we saved from the input.
4497           */
4498          temp_dead(s, ts);
4499          oreg = ireg;
4500      } else {
4501          if (ots->val_type == TEMP_VAL_REG) {
4502              oreg = ots->reg;
4503          } else {
4504              /* Make sure to not spill the input register during allocation. */
4505              oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4506                                   allocated_regs | ((TCGRegSet)1 << ireg),
4507                                   preferred_regs, ots->indirect_base);
4508          }
4509          if (!tcg_out_mov(s, otype, oreg, ireg)) {
4510              /*
4511               * Cross register class move not supported.
4512               * Store the source register into the destination slot
4513               * and leave the destination temp as TEMP_VAL_MEM.
4514               */
4515              assert(!temp_readonly(ots));
4516              if (!ts->mem_allocated) {
4517                  temp_allocate_frame(s, ots);
4518              }
4519              tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4520              set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4521              ots->mem_coherent = 1;
4522              return;
4523          }
4524      }
4525      set_temp_val_reg(s, ots, oreg);
4526      ots->mem_coherent = 0;
4527  
4528      if (NEED_SYNC_ARG(0)) {
4529          temp_sync(s, ots, allocated_regs, 0, 0);
4530      }
4531  }
4532  
4533  /*
4534   * Specialized code generation for INDEX_op_dup_vec.
4535   */
4536  static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4537  {
4538      const TCGLifeData arg_life = op->life;
4539      TCGRegSet dup_out_regs, dup_in_regs;
4540      TCGTemp *its, *ots;
4541      TCGType itype, vtype;
4542      unsigned vece;
4543      int lowpart_ofs;
4544      bool ok;
4545  
4546      ots = arg_temp(op->args[0]);
4547      its = arg_temp(op->args[1]);
4548  
4549      /* ENV should not be modified.  */
4550      tcg_debug_assert(!temp_readonly(ots));
4551  
4552      itype = its->type;
4553      vece = TCGOP_VECE(op);
4554      vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4555  
4556      if (its->val_type == TEMP_VAL_CONST) {
4557          /* Propagate constant via movi -> dupi.  */
4558          tcg_target_ulong val = its->val;
4559          if (IS_DEAD_ARG(1)) {
4560              temp_dead(s, its);
4561          }
4562          tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4563          return;
4564      }
4565  
4566      dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4567      dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4568  
4569      /* Allocate the output register now.  */
4570      if (ots->val_type != TEMP_VAL_REG) {
4571          TCGRegSet allocated_regs = s->reserved_regs;
4572          TCGReg oreg;
4573  
4574          if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4575              /* Make sure to not spill the input register. */
4576              tcg_regset_set_reg(allocated_regs, its->reg);
4577          }
4578          oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4579                               output_pref(op, 0), ots->indirect_base);
4580          set_temp_val_reg(s, ots, oreg);
4581      }
4582  
4583      switch (its->val_type) {
4584      case TEMP_VAL_REG:
4585          /*
4586           * The dup constriaints must be broad, covering all possible VECE.
4587           * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4588           * to fail, indicating that extra moves are required for that case.
4589           */
4590          if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4591              if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4592                  goto done;
4593              }
4594              /* Try again from memory or a vector input register.  */
4595          }
4596          if (!its->mem_coherent) {
4597              /*
4598               * The input register is not synced, and so an extra store
4599               * would be required to use memory.  Attempt an integer-vector
4600               * register move first.  We do not have a TCGRegSet for this.
4601               */
4602              if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4603                  break;
4604              }
4605              /* Sync the temp back to its slot and load from there.  */
4606              temp_sync(s, its, s->reserved_regs, 0, 0);
4607          }
4608          /* fall through */
4609  
4610      case TEMP_VAL_MEM:
4611          lowpart_ofs = 0;
4612          if (HOST_BIG_ENDIAN) {
4613              lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4614          }
4615          if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4616                               its->mem_offset + lowpart_ofs)) {
4617              goto done;
4618          }
4619          /* Load the input into the destination vector register. */
4620          tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4621          break;
4622  
4623      default:
4624          g_assert_not_reached();
4625      }
4626  
4627      /* We now have a vector input register, so dup must succeed. */
4628      ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4629      tcg_debug_assert(ok);
4630  
4631   done:
4632      ots->mem_coherent = 0;
4633      if (IS_DEAD_ARG(1)) {
4634          temp_dead(s, its);
4635      }
4636      if (NEED_SYNC_ARG(0)) {
4637          temp_sync(s, ots, s->reserved_regs, 0, 0);
4638      }
4639      if (IS_DEAD_ARG(0)) {
4640          temp_dead(s, ots);
4641      }
4642  }
4643  
4644  static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4645  {
4646      const TCGLifeData arg_life = op->life;
4647      const TCGOpDef * const def = &tcg_op_defs[op->opc];
4648      TCGRegSet i_allocated_regs;
4649      TCGRegSet o_allocated_regs;
4650      int i, k, nb_iargs, nb_oargs;
4651      TCGReg reg;
4652      TCGArg arg;
4653      const TCGArgConstraint *arg_ct;
4654      TCGTemp *ts;
4655      TCGArg new_args[TCG_MAX_OP_ARGS];
4656      int const_args[TCG_MAX_OP_ARGS];
4657  
4658      nb_oargs = def->nb_oargs;
4659      nb_iargs = def->nb_iargs;
4660  
4661      /* copy constants */
4662      memcpy(new_args + nb_oargs + nb_iargs,
4663             op->args + nb_oargs + nb_iargs,
4664             sizeof(TCGArg) * def->nb_cargs);
4665  
4666      i_allocated_regs = s->reserved_regs;
4667      o_allocated_regs = s->reserved_regs;
4668  
4669      /* satisfy input constraints */
4670      for (k = 0; k < nb_iargs; k++) {
4671          TCGRegSet i_preferred_regs, i_required_regs;
4672          bool allocate_new_reg, copyto_new_reg;
4673          TCGTemp *ts2;
4674          int i1, i2;
4675  
4676          i = def->args_ct[nb_oargs + k].sort_index;
4677          arg = op->args[i];
4678          arg_ct = &def->args_ct[i];
4679          ts = arg_temp(arg);
4680  
4681          if (ts->val_type == TEMP_VAL_CONST
4682              && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
4683              /* constant is OK for instruction */
4684              const_args[i] = 1;
4685              new_args[i] = ts->val;
4686              continue;
4687          }
4688  
4689          reg = ts->reg;
4690          i_preferred_regs = 0;
4691          i_required_regs = arg_ct->regs;
4692          allocate_new_reg = false;
4693          copyto_new_reg = false;
4694  
4695          switch (arg_ct->pair) {
4696          case 0: /* not paired */
4697              if (arg_ct->ialias) {
4698                  i_preferred_regs = output_pref(op, arg_ct->alias_index);
4699  
4700                  /*
4701                   * If the input is readonly, then it cannot also be an
4702                   * output and aliased to itself.  If the input is not
4703                   * dead after the instruction, we must allocate a new
4704                   * register and move it.
4705                   */
4706                  if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4707                      allocate_new_reg = true;
4708                  } else if (ts->val_type == TEMP_VAL_REG) {
4709                      /*
4710                       * Check if the current register has already been
4711                       * allocated for another input.
4712                       */
4713                      allocate_new_reg =
4714                          tcg_regset_test_reg(i_allocated_regs, reg);
4715                  }
4716              }
4717              if (!allocate_new_reg) {
4718                  temp_load(s, ts, i_required_regs, i_allocated_regs,
4719                            i_preferred_regs);
4720                  reg = ts->reg;
4721                  allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4722              }
4723              if (allocate_new_reg) {
4724                  /*
4725                   * Allocate a new register matching the constraint
4726                   * and move the temporary register into it.
4727                   */
4728                  temp_load(s, ts, tcg_target_available_regs[ts->type],
4729                            i_allocated_regs, 0);
4730                  reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4731                                      i_preferred_regs, ts->indirect_base);
4732                  copyto_new_reg = true;
4733              }
4734              break;
4735  
4736          case 1:
4737              /* First of an input pair; if i1 == i2, the second is an output. */
4738              i1 = i;
4739              i2 = arg_ct->pair_index;
4740              ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4741  
4742              /*
4743               * It is easier to default to allocating a new pair
4744               * and to identify a few cases where it's not required.
4745               */
4746              if (arg_ct->ialias) {
4747                  i_preferred_regs = output_pref(op, arg_ct->alias_index);
4748                  if (IS_DEAD_ARG(i1) &&
4749                      IS_DEAD_ARG(i2) &&
4750                      !temp_readonly(ts) &&
4751                      ts->val_type == TEMP_VAL_REG &&
4752                      ts->reg < TCG_TARGET_NB_REGS - 1 &&
4753                      tcg_regset_test_reg(i_required_regs, reg) &&
4754                      !tcg_regset_test_reg(i_allocated_regs, reg) &&
4755                      !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4756                      (ts2
4757                       ? ts2->val_type == TEMP_VAL_REG &&
4758                         ts2->reg == reg + 1 &&
4759                         !temp_readonly(ts2)
4760                       : s->reg_to_temp[reg + 1] == NULL)) {
4761                      break;
4762                  }
4763              } else {
4764                  /* Without aliasing, the pair must also be an input. */
4765                  tcg_debug_assert(ts2);
4766                  if (ts->val_type == TEMP_VAL_REG &&
4767                      ts2->val_type == TEMP_VAL_REG &&
4768                      ts2->reg == reg + 1 &&
4769                      tcg_regset_test_reg(i_required_regs, reg)) {
4770                      break;
4771                  }
4772              }
4773              reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4774                                       0, ts->indirect_base);
4775              goto do_pair;
4776  
4777          case 2: /* pair second */
4778              reg = new_args[arg_ct->pair_index] + 1;
4779              goto do_pair;
4780  
4781          case 3: /* ialias with second output, no first input */
4782              tcg_debug_assert(arg_ct->ialias);
4783              i_preferred_regs = output_pref(op, arg_ct->alias_index);
4784  
4785              if (IS_DEAD_ARG(i) &&
4786                  !temp_readonly(ts) &&
4787                  ts->val_type == TEMP_VAL_REG &&
4788                  reg > 0 &&
4789                  s->reg_to_temp[reg - 1] == NULL &&
4790                  tcg_regset_test_reg(i_required_regs, reg) &&
4791                  !tcg_regset_test_reg(i_allocated_regs, reg) &&
4792                  !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4793                  tcg_regset_set_reg(i_allocated_regs, reg - 1);
4794                  break;
4795              }
4796              reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4797                                       i_allocated_regs, 0,
4798                                       ts->indirect_base);
4799              tcg_regset_set_reg(i_allocated_regs, reg);
4800              reg += 1;
4801              goto do_pair;
4802  
4803          do_pair:
4804              /*
4805               * If an aliased input is not dead after the instruction,
4806               * we must allocate a new register and move it.
4807               */
4808              if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4809                  TCGRegSet t_allocated_regs = i_allocated_regs;
4810  
4811                  /*
4812                   * Because of the alias, and the continued life, make sure
4813                   * that the temp is somewhere *other* than the reg pair,
4814                   * and we get a copy in reg.
4815                   */
4816                  tcg_regset_set_reg(t_allocated_regs, reg);
4817                  tcg_regset_set_reg(t_allocated_regs, reg + 1);
4818                  if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4819                      /* If ts was already in reg, copy it somewhere else. */
4820                      TCGReg nr;
4821                      bool ok;
4822  
4823                      tcg_debug_assert(ts->kind != TEMP_FIXED);
4824                      nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4825                                         t_allocated_regs, 0, ts->indirect_base);
4826                      ok = tcg_out_mov(s, ts->type, nr, reg);
4827                      tcg_debug_assert(ok);
4828  
4829                      set_temp_val_reg(s, ts, nr);
4830                  } else {
4831                      temp_load(s, ts, tcg_target_available_regs[ts->type],
4832                                t_allocated_regs, 0);
4833                      copyto_new_reg = true;
4834                  }
4835              } else {
4836                  /* Preferably allocate to reg, otherwise copy. */
4837                  i_required_regs = (TCGRegSet)1 << reg;
4838                  temp_load(s, ts, i_required_regs, i_allocated_regs,
4839                            i_preferred_regs);
4840                  copyto_new_reg = ts->reg != reg;
4841              }
4842              break;
4843  
4844          default:
4845              g_assert_not_reached();
4846          }
4847  
4848          if (copyto_new_reg) {
4849              if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4850                  /*
4851                   * Cross register class move not supported.  Sync the
4852                   * temp back to its slot and load from there.
4853                   */
4854                  temp_sync(s, ts, i_allocated_regs, 0, 0);
4855                  tcg_out_ld(s, ts->type, reg,
4856                             ts->mem_base->reg, ts->mem_offset);
4857              }
4858          }
4859          new_args[i] = reg;
4860          const_args[i] = 0;
4861          tcg_regset_set_reg(i_allocated_regs, reg);
4862      }
4863  
4864      /* mark dead temporaries and free the associated registers */
4865      for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4866          if (IS_DEAD_ARG(i)) {
4867              temp_dead(s, arg_temp(op->args[i]));
4868          }
4869      }
4870  
4871      if (def->flags & TCG_OPF_COND_BRANCH) {
4872          tcg_reg_alloc_cbranch(s, i_allocated_regs);
4873      } else if (def->flags & TCG_OPF_BB_END) {
4874          tcg_reg_alloc_bb_end(s, i_allocated_regs);
4875      } else {
4876          if (def->flags & TCG_OPF_CALL_CLOBBER) {
4877              /* XXX: permit generic clobber register list ? */
4878              for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4879                  if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4880                      tcg_reg_free(s, i, i_allocated_regs);
4881                  }
4882              }
4883          }
4884          if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4885              /* sync globals if the op has side effects and might trigger
4886                 an exception. */
4887              sync_globals(s, i_allocated_regs);
4888          }
4889  
4890          /* satisfy the output constraints */
4891          for(k = 0; k < nb_oargs; k++) {
4892              i = def->args_ct[k].sort_index;
4893              arg = op->args[i];
4894              arg_ct = &def->args_ct[i];
4895              ts = arg_temp(arg);
4896  
4897              /* ENV should not be modified.  */
4898              tcg_debug_assert(!temp_readonly(ts));
4899  
4900              switch (arg_ct->pair) {
4901              case 0: /* not paired */
4902                  if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4903                      reg = new_args[arg_ct->alias_index];
4904                  } else if (arg_ct->newreg) {
4905                      reg = tcg_reg_alloc(s, arg_ct->regs,
4906                                          i_allocated_regs | o_allocated_regs,
4907                                          output_pref(op, k), ts->indirect_base);
4908                  } else {
4909                      reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4910                                          output_pref(op, k), ts->indirect_base);
4911                  }
4912                  break;
4913  
4914              case 1: /* first of pair */
4915                  tcg_debug_assert(!arg_ct->newreg);
4916                  if (arg_ct->oalias) {
4917                      reg = new_args[arg_ct->alias_index];
4918                      break;
4919                  }
4920                  reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4921                                           output_pref(op, k), ts->indirect_base);
4922                  break;
4923  
4924              case 2: /* second of pair */
4925                  tcg_debug_assert(!arg_ct->newreg);
4926                  if (arg_ct->oalias) {
4927                      reg = new_args[arg_ct->alias_index];
4928                  } else {
4929                      reg = new_args[arg_ct->pair_index] + 1;
4930                  }
4931                  break;
4932  
4933              case 3: /* first of pair, aliasing with a second input */
4934                  tcg_debug_assert(!arg_ct->newreg);
4935                  reg = new_args[arg_ct->pair_index] - 1;
4936                  break;
4937  
4938              default:
4939                  g_assert_not_reached();
4940              }
4941              tcg_regset_set_reg(o_allocated_regs, reg);
4942              set_temp_val_reg(s, ts, reg);
4943              ts->mem_coherent = 0;
4944              new_args[i] = reg;
4945          }
4946      }
4947  
4948      /* emit instruction */
4949      switch (op->opc) {
4950      case INDEX_op_ext8s_i32:
4951          tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4952          break;
4953      case INDEX_op_ext8s_i64:
4954          tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4955          break;
4956      case INDEX_op_ext8u_i32:
4957      case INDEX_op_ext8u_i64:
4958          tcg_out_ext8u(s, new_args[0], new_args[1]);
4959          break;
4960      case INDEX_op_ext16s_i32:
4961          tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4962          break;
4963      case INDEX_op_ext16s_i64:
4964          tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4965          break;
4966      case INDEX_op_ext16u_i32:
4967      case INDEX_op_ext16u_i64:
4968          tcg_out_ext16u(s, new_args[0], new_args[1]);
4969          break;
4970      case INDEX_op_ext32s_i64:
4971          tcg_out_ext32s(s, new_args[0], new_args[1]);
4972          break;
4973      case INDEX_op_ext32u_i64:
4974          tcg_out_ext32u(s, new_args[0], new_args[1]);
4975          break;
4976      case INDEX_op_ext_i32_i64:
4977          tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
4978          break;
4979      case INDEX_op_extu_i32_i64:
4980          tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
4981          break;
4982      case INDEX_op_extrl_i64_i32:
4983          tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
4984          break;
4985      default:
4986          if (def->flags & TCG_OPF_VECTOR) {
4987              tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4988                             new_args, const_args);
4989          } else {
4990              tcg_out_op(s, op->opc, new_args, const_args);
4991          }
4992          break;
4993      }
4994  
4995      /* move the outputs in the correct register if needed */
4996      for(i = 0; i < nb_oargs; i++) {
4997          ts = arg_temp(op->args[i]);
4998  
4999          /* ENV should not be modified.  */
5000          tcg_debug_assert(!temp_readonly(ts));
5001  
5002          if (NEED_SYNC_ARG(i)) {
5003              temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5004          } else if (IS_DEAD_ARG(i)) {
5005              temp_dead(s, ts);
5006          }
5007      }
5008  }
5009  
5010  static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5011  {
5012      const TCGLifeData arg_life = op->life;
5013      TCGTemp *ots, *itsl, *itsh;
5014      TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
5015  
5016      /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5017      tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5018      tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5019  
5020      ots = arg_temp(op->args[0]);
5021      itsl = arg_temp(op->args[1]);
5022      itsh = arg_temp(op->args[2]);
5023  
5024      /* ENV should not be modified.  */
5025      tcg_debug_assert(!temp_readonly(ots));
5026  
5027      /* Allocate the output register now.  */
5028      if (ots->val_type != TEMP_VAL_REG) {
5029          TCGRegSet allocated_regs = s->reserved_regs;
5030          TCGRegSet dup_out_regs =
5031              tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
5032          TCGReg oreg;
5033  
5034          /* Make sure to not spill the input registers. */
5035          if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5036              tcg_regset_set_reg(allocated_regs, itsl->reg);
5037          }
5038          if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5039              tcg_regset_set_reg(allocated_regs, itsh->reg);
5040          }
5041  
5042          oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5043                               output_pref(op, 0), ots->indirect_base);
5044          set_temp_val_reg(s, ots, oreg);
5045      }
5046  
5047      /* Promote dup2 of immediates to dupi_vec. */
5048      if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5049          uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5050          MemOp vece = MO_64;
5051  
5052          if (val == dup_const(MO_8, val)) {
5053              vece = MO_8;
5054          } else if (val == dup_const(MO_16, val)) {
5055              vece = MO_16;
5056          } else if (val == dup_const(MO_32, val)) {
5057              vece = MO_32;
5058          }
5059  
5060          tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5061          goto done;
5062      }
5063  
5064      /* If the two inputs form one 64-bit value, try dupm_vec. */
5065      if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5066          itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5067          itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5068          TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5069  
5070          temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5071          temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5072  
5073          if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5074                               its->mem_base->reg, its->mem_offset)) {
5075              goto done;
5076          }
5077      }
5078  
5079      /* Fall back to generic expansion. */
5080      return false;
5081  
5082   done:
5083      ots->mem_coherent = 0;
5084      if (IS_DEAD_ARG(1)) {
5085          temp_dead(s, itsl);
5086      }
5087      if (IS_DEAD_ARG(2)) {
5088          temp_dead(s, itsh);
5089      }
5090      if (NEED_SYNC_ARG(0)) {
5091          temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5092      } else if (IS_DEAD_ARG(0)) {
5093          temp_dead(s, ots);
5094      }
5095      return true;
5096  }
5097  
5098  static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5099                           TCGRegSet allocated_regs)
5100  {
5101      if (ts->val_type == TEMP_VAL_REG) {
5102          if (ts->reg != reg) {
5103              tcg_reg_free(s, reg, allocated_regs);
5104              if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5105                  /*
5106                   * Cross register class move not supported.  Sync the
5107                   * temp back to its slot and load from there.
5108                   */
5109                  temp_sync(s, ts, allocated_regs, 0, 0);
5110                  tcg_out_ld(s, ts->type, reg,
5111                             ts->mem_base->reg, ts->mem_offset);
5112              }
5113          }
5114      } else {
5115          TCGRegSet arg_set = 0;
5116  
5117          tcg_reg_free(s, reg, allocated_regs);
5118          tcg_regset_set_reg(arg_set, reg);
5119          temp_load(s, ts, arg_set, allocated_regs, 0);
5120      }
5121  }
5122  
5123  static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5124                           TCGRegSet allocated_regs)
5125  {
5126      /*
5127       * When the destination is on the stack, load up the temp and store.
5128       * If there are many call-saved registers, the temp might live to
5129       * see another use; otherwise it'll be discarded.
5130       */
5131      temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5132      tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5133                 arg_slot_stk_ofs(arg_slot));
5134  }
5135  
5136  static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5137                              TCGTemp *ts, TCGRegSet *allocated_regs)
5138  {
5139      if (arg_slot_reg_p(l->arg_slot)) {
5140          TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5141          load_arg_reg(s, reg, ts, *allocated_regs);
5142          tcg_regset_set_reg(*allocated_regs, reg);
5143      } else {
5144          load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5145      }
5146  }
5147  
5148  static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5149                           intptr_t ref_off, TCGRegSet *allocated_regs)
5150  {
5151      TCGReg reg;
5152  
5153      if (arg_slot_reg_p(arg_slot)) {
5154          reg = tcg_target_call_iarg_regs[arg_slot];
5155          tcg_reg_free(s, reg, *allocated_regs);
5156          tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5157          tcg_regset_set_reg(*allocated_regs, reg);
5158      } else {
5159          reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5160                              *allocated_regs, 0, false);
5161          tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5162          tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5163                     arg_slot_stk_ofs(arg_slot));
5164      }
5165  }
5166  
5167  static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5168  {
5169      const int nb_oargs = TCGOP_CALLO(op);
5170      const int nb_iargs = TCGOP_CALLI(op);
5171      const TCGLifeData arg_life = op->life;
5172      const TCGHelperInfo *info = tcg_call_info(op);
5173      TCGRegSet allocated_regs = s->reserved_regs;
5174      int i;
5175  
5176      /*
5177       * Move inputs into place in reverse order,
5178       * so that we place stacked arguments first.
5179       */
5180      for (i = nb_iargs - 1; i >= 0; --i) {
5181          const TCGCallArgumentLoc *loc = &info->in[i];
5182          TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5183  
5184          switch (loc->kind) {
5185          case TCG_CALL_ARG_NORMAL:
5186          case TCG_CALL_ARG_EXTEND_U:
5187          case TCG_CALL_ARG_EXTEND_S:
5188              load_arg_normal(s, loc, ts, &allocated_regs);
5189              break;
5190          case TCG_CALL_ARG_BY_REF:
5191              load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5192              load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5193                           arg_slot_stk_ofs(loc->ref_slot),
5194                           &allocated_regs);
5195              break;
5196          case TCG_CALL_ARG_BY_REF_N:
5197              load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5198              break;
5199          default:
5200              g_assert_not_reached();
5201          }
5202      }
5203  
5204      /* Mark dead temporaries and free the associated registers.  */
5205      for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5206          if (IS_DEAD_ARG(i)) {
5207              temp_dead(s, arg_temp(op->args[i]));
5208          }
5209      }
5210  
5211      /* Clobber call registers.  */
5212      for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5213          if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5214              tcg_reg_free(s, i, allocated_regs);
5215          }
5216      }
5217  
5218      /*
5219       * Save globals if they might be written by the helper,
5220       * sync them if they might be read.
5221       */
5222      if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5223          /* Nothing to do */
5224      } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5225          sync_globals(s, allocated_regs);
5226      } else {
5227          save_globals(s, allocated_regs);
5228      }
5229  
5230      /*
5231       * If the ABI passes a pointer to the returned struct as the first
5232       * argument, load that now.  Pass a pointer to the output home slot.
5233       */
5234      if (info->out_kind == TCG_CALL_RET_BY_REF) {
5235          TCGTemp *ts = arg_temp(op->args[0]);
5236  
5237          if (!ts->mem_allocated) {
5238              temp_allocate_frame(s, ts);
5239          }
5240          load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5241      }
5242  
5243      tcg_out_call(s, tcg_call_func(op), info);
5244  
5245      /* Assign output registers and emit moves if needed.  */
5246      switch (info->out_kind) {
5247      case TCG_CALL_RET_NORMAL:
5248          for (i = 0; i < nb_oargs; i++) {
5249              TCGTemp *ts = arg_temp(op->args[i]);
5250              TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5251  
5252              /* ENV should not be modified.  */
5253              tcg_debug_assert(!temp_readonly(ts));
5254  
5255              set_temp_val_reg(s, ts, reg);
5256              ts->mem_coherent = 0;
5257          }
5258          break;
5259  
5260      case TCG_CALL_RET_BY_VEC:
5261          {
5262              TCGTemp *ts = arg_temp(op->args[0]);
5263  
5264              tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5265              tcg_debug_assert(ts->temp_subindex == 0);
5266              if (!ts->mem_allocated) {
5267                  temp_allocate_frame(s, ts);
5268              }
5269              tcg_out_st(s, TCG_TYPE_V128,
5270                         tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5271                         ts->mem_base->reg, ts->mem_offset);
5272          }
5273          /* fall through to mark all parts in memory */
5274  
5275      case TCG_CALL_RET_BY_REF:
5276          /* The callee has performed a write through the reference. */
5277          for (i = 0; i < nb_oargs; i++) {
5278              TCGTemp *ts = arg_temp(op->args[i]);
5279              ts->val_type = TEMP_VAL_MEM;
5280          }
5281          break;
5282  
5283      default:
5284          g_assert_not_reached();
5285      }
5286  
5287      /* Flush or discard output registers as needed. */
5288      for (i = 0; i < nb_oargs; i++) {
5289          TCGTemp *ts = arg_temp(op->args[i]);
5290          if (NEED_SYNC_ARG(i)) {
5291              temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5292          } else if (IS_DEAD_ARG(i)) {
5293              temp_dead(s, ts);
5294          }
5295      }
5296  }
5297  
5298  /**
5299   * atom_and_align_for_opc:
5300   * @s: tcg context
5301   * @opc: memory operation code
5302   * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5303   * @allow_two_ops: true if we are prepared to issue two operations
5304   *
5305   * Return the alignment and atomicity to use for the inline fast path
5306   * for the given memory operation.  The alignment may be larger than
5307   * that specified in @opc, and the correct alignment will be diagnosed
5308   * by the slow path helper.
5309   *
5310   * If @allow_two_ops, the host is prepared to test for 2x alignment,
5311   * and issue two loads or stores for subalignment.
5312   */
5313  static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5314                                             MemOp host_atom, bool allow_two_ops)
5315  {
5316      MemOp align = get_alignment_bits(opc);
5317      MemOp size = opc & MO_SIZE;
5318      MemOp half = size ? size - 1 : 0;
5319      MemOp atmax;
5320      MemOp atom;
5321  
5322      /* When serialized, no further atomicity required.  */
5323      if (s->gen_tb->cflags & CF_PARALLEL) {
5324          atom = opc & MO_ATOM_MASK;
5325      } else {
5326          atom = MO_ATOM_NONE;
5327      }
5328  
5329      switch (atom) {
5330      case MO_ATOM_NONE:
5331          /* The operation requires no specific atomicity. */
5332          atmax = MO_8;
5333          break;
5334  
5335      case MO_ATOM_IFALIGN:
5336          atmax = size;
5337          break;
5338  
5339      case MO_ATOM_IFALIGN_PAIR:
5340          atmax = half;
5341          break;
5342  
5343      case MO_ATOM_WITHIN16:
5344          atmax = size;
5345          if (size == MO_128) {
5346              /* Misalignment implies !within16, and therefore no atomicity. */
5347          } else if (host_atom != MO_ATOM_WITHIN16) {
5348              /* The host does not implement within16, so require alignment. */
5349              align = MAX(align, size);
5350          }
5351          break;
5352  
5353      case MO_ATOM_WITHIN16_PAIR:
5354          atmax = size;
5355          /*
5356           * Misalignment implies !within16, and therefore half atomicity.
5357           * Any host prepared for two operations can implement this with
5358           * half alignment.
5359           */
5360          if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5361              align = MAX(align, half);
5362          }
5363          break;
5364  
5365      case MO_ATOM_SUBALIGN:
5366          atmax = size;
5367          if (host_atom != MO_ATOM_SUBALIGN) {
5368              /* If unaligned but not odd, there are subobjects up to half. */
5369              if (allow_two_ops) {
5370                  align = MAX(align, half);
5371              } else {
5372                  align = MAX(align, size);
5373              }
5374          }
5375          break;
5376  
5377      default:
5378          g_assert_not_reached();
5379      }
5380  
5381      return (TCGAtomAlign){ .atom = atmax, .align = align };
5382  }
5383  
5384  /*
5385   * Similarly for qemu_ld/st slow path helpers.
5386   * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5387   * using only the provided backend tcg_out_* functions.
5388   */
5389  
5390  static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5391  {
5392      int ofs = arg_slot_stk_ofs(slot);
5393  
5394      /*
5395       * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5396       * require extension to uint64_t, adjust the address for uint32_t.
5397       */
5398      if (HOST_BIG_ENDIAN &&
5399          TCG_TARGET_REG_BITS == 64 &&
5400          type == TCG_TYPE_I32) {
5401          ofs += 4;
5402      }
5403      return ofs;
5404  }
5405  
5406  static void tcg_out_helper_load_slots(TCGContext *s,
5407                                        unsigned nmov, TCGMovExtend *mov,
5408                                        const TCGLdstHelperParam *parm)
5409  {
5410      unsigned i;
5411      TCGReg dst3;
5412  
5413      /*
5414       * Start from the end, storing to the stack first.
5415       * This frees those registers, so we need not consider overlap.
5416       */
5417      for (i = nmov; i-- > 0; ) {
5418          unsigned slot = mov[i].dst;
5419  
5420          if (arg_slot_reg_p(slot)) {
5421              goto found_reg;
5422          }
5423  
5424          TCGReg src = mov[i].src;
5425          TCGType dst_type = mov[i].dst_type;
5426          MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5427  
5428          /* The argument is going onto the stack; extend into scratch. */
5429          if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5430              tcg_debug_assert(parm->ntmp != 0);
5431              mov[i].dst = src = parm->tmp[0];
5432              tcg_out_movext1(s, &mov[i]);
5433          }
5434  
5435          tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5436                     tcg_out_helper_stk_ofs(dst_type, slot));
5437      }
5438      return;
5439  
5440   found_reg:
5441      /*
5442       * The remaining arguments are in registers.
5443       * Convert slot numbers to argument registers.
5444       */
5445      nmov = i + 1;
5446      for (i = 0; i < nmov; ++i) {
5447          mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5448      }
5449  
5450      switch (nmov) {
5451      case 4:
5452          /* The backend must have provided enough temps for the worst case. */
5453          tcg_debug_assert(parm->ntmp >= 2);
5454  
5455          dst3 = mov[3].dst;
5456          for (unsigned j = 0; j < 3; ++j) {
5457              if (dst3 == mov[j].src) {
5458                  /*
5459                   * Conflict. Copy the source to a temporary, perform the
5460                   * remaining moves, then the extension from our scratch
5461                   * on the way out.
5462                   */
5463                  TCGReg scratch = parm->tmp[1];
5464  
5465                  tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5466                  tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5467                  tcg_out_movext1_new_src(s, &mov[3], scratch);
5468                  break;
5469              }
5470          }
5471  
5472          /* No conflicts: perform this move and continue. */
5473          tcg_out_movext1(s, &mov[3]);
5474          /* fall through */
5475  
5476      case 3:
5477          tcg_out_movext3(s, mov, mov + 1, mov + 2,
5478                          parm->ntmp ? parm->tmp[0] : -1);
5479          break;
5480      case 2:
5481          tcg_out_movext2(s, mov, mov + 1,
5482                          parm->ntmp ? parm->tmp[0] : -1);
5483          break;
5484      case 1:
5485          tcg_out_movext1(s, mov);
5486          break;
5487      default:
5488          g_assert_not_reached();
5489      }
5490  }
5491  
5492  static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5493                                      TCGType type, tcg_target_long imm,
5494                                      const TCGLdstHelperParam *parm)
5495  {
5496      if (arg_slot_reg_p(slot)) {
5497          tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5498      } else {
5499          int ofs = tcg_out_helper_stk_ofs(type, slot);
5500          if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5501              tcg_debug_assert(parm->ntmp != 0);
5502              tcg_out_movi(s, type, parm->tmp[0], imm);
5503              tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5504          }
5505      }
5506  }
5507  
5508  static void tcg_out_helper_load_common_args(TCGContext *s,
5509                                              const TCGLabelQemuLdst *ldst,
5510                                              const TCGLdstHelperParam *parm,
5511                                              const TCGHelperInfo *info,
5512                                              unsigned next_arg)
5513  {
5514      TCGMovExtend ptr_mov = {
5515          .dst_type = TCG_TYPE_PTR,
5516          .src_type = TCG_TYPE_PTR,
5517          .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5518      };
5519      const TCGCallArgumentLoc *loc = &info->in[0];
5520      TCGType type;
5521      unsigned slot;
5522      tcg_target_ulong imm;
5523  
5524      /*
5525       * Handle env, which is always first.
5526       */
5527      ptr_mov.dst = loc->arg_slot;
5528      ptr_mov.src = TCG_AREG0;
5529      tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5530  
5531      /*
5532       * Handle oi.
5533       */
5534      imm = ldst->oi;
5535      loc = &info->in[next_arg];
5536      type = TCG_TYPE_I32;
5537      switch (loc->kind) {
5538      case TCG_CALL_ARG_NORMAL:
5539          break;
5540      case TCG_CALL_ARG_EXTEND_U:
5541      case TCG_CALL_ARG_EXTEND_S:
5542          /* No extension required for MemOpIdx. */
5543          tcg_debug_assert(imm <= INT32_MAX);
5544          type = TCG_TYPE_REG;
5545          break;
5546      default:
5547          g_assert_not_reached();
5548      }
5549      tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5550      next_arg++;
5551  
5552      /*
5553       * Handle ra.
5554       */
5555      loc = &info->in[next_arg];
5556      slot = loc->arg_slot;
5557      if (parm->ra_gen) {
5558          int arg_reg = -1;
5559          TCGReg ra_reg;
5560  
5561          if (arg_slot_reg_p(slot)) {
5562              arg_reg = tcg_target_call_iarg_regs[slot];
5563          }
5564          ra_reg = parm->ra_gen(s, ldst, arg_reg);
5565  
5566          ptr_mov.dst = slot;
5567          ptr_mov.src = ra_reg;
5568          tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5569      } else {
5570          imm = (uintptr_t)ldst->raddr;
5571          tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5572      }
5573  }
5574  
5575  static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5576                                         const TCGCallArgumentLoc *loc,
5577                                         TCGType dst_type, TCGType src_type,
5578                                         TCGReg lo, TCGReg hi)
5579  {
5580      MemOp reg_mo;
5581  
5582      if (dst_type <= TCG_TYPE_REG) {
5583          MemOp src_ext;
5584  
5585          switch (loc->kind) {
5586          case TCG_CALL_ARG_NORMAL:
5587              src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5588              break;
5589          case TCG_CALL_ARG_EXTEND_U:
5590              dst_type = TCG_TYPE_REG;
5591              src_ext = MO_UL;
5592              break;
5593          case TCG_CALL_ARG_EXTEND_S:
5594              dst_type = TCG_TYPE_REG;
5595              src_ext = MO_SL;
5596              break;
5597          default:
5598              g_assert_not_reached();
5599          }
5600  
5601          mov[0].dst = loc->arg_slot;
5602          mov[0].dst_type = dst_type;
5603          mov[0].src = lo;
5604          mov[0].src_type = src_type;
5605          mov[0].src_ext = src_ext;
5606          return 1;
5607      }
5608  
5609      if (TCG_TARGET_REG_BITS == 32) {
5610          assert(dst_type == TCG_TYPE_I64);
5611          reg_mo = MO_32;
5612      } else {
5613          assert(dst_type == TCG_TYPE_I128);
5614          reg_mo = MO_64;
5615      }
5616  
5617      mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5618      mov[0].src = lo;
5619      mov[0].dst_type = TCG_TYPE_REG;
5620      mov[0].src_type = TCG_TYPE_REG;
5621      mov[0].src_ext = reg_mo;
5622  
5623      mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5624      mov[1].src = hi;
5625      mov[1].dst_type = TCG_TYPE_REG;
5626      mov[1].src_type = TCG_TYPE_REG;
5627      mov[1].src_ext = reg_mo;
5628  
5629      return 2;
5630  }
5631  
5632  static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5633                                     const TCGLdstHelperParam *parm)
5634  {
5635      const TCGHelperInfo *info;
5636      const TCGCallArgumentLoc *loc;
5637      TCGMovExtend mov[2];
5638      unsigned next_arg, nmov;
5639      MemOp mop = get_memop(ldst->oi);
5640  
5641      switch (mop & MO_SIZE) {
5642      case MO_8:
5643      case MO_16:
5644      case MO_32:
5645          info = &info_helper_ld32_mmu;
5646          break;
5647      case MO_64:
5648          info = &info_helper_ld64_mmu;
5649          break;
5650      case MO_128:
5651          info = &info_helper_ld128_mmu;
5652          break;
5653      default:
5654          g_assert_not_reached();
5655      }
5656  
5657      /* Defer env argument. */
5658      next_arg = 1;
5659  
5660      loc = &info->in[next_arg];
5661      if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5662          /*
5663           * 32-bit host with 32-bit guest: zero-extend the guest address
5664           * to 64-bits for the helper by storing the low part, then
5665           * load a zero for the high part.
5666           */
5667          tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5668                                 TCG_TYPE_I32, TCG_TYPE_I32,
5669                                 ldst->addrlo_reg, -1);
5670          tcg_out_helper_load_slots(s, 1, mov, parm);
5671  
5672          tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5673                                  TCG_TYPE_I32, 0, parm);
5674          next_arg += 2;
5675      } else {
5676          nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5677                                        ldst->addrlo_reg, ldst->addrhi_reg);
5678          tcg_out_helper_load_slots(s, nmov, mov, parm);
5679          next_arg += nmov;
5680      }
5681  
5682      switch (info->out_kind) {
5683      case TCG_CALL_RET_NORMAL:
5684      case TCG_CALL_RET_BY_VEC:
5685          break;
5686      case TCG_CALL_RET_BY_REF:
5687          /*
5688           * The return reference is in the first argument slot.
5689           * We need memory in which to return: re-use the top of stack.
5690           */
5691          {
5692              int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5693  
5694              if (arg_slot_reg_p(0)) {
5695                  tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5696                                   TCG_REG_CALL_STACK, ofs_slot0);
5697              } else {
5698                  tcg_debug_assert(parm->ntmp != 0);
5699                  tcg_out_addi_ptr(s, parm->tmp[0],
5700                                   TCG_REG_CALL_STACK, ofs_slot0);
5701                  tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5702                             TCG_REG_CALL_STACK, ofs_slot0);
5703              }
5704          }
5705          break;
5706      default:
5707          g_assert_not_reached();
5708      }
5709  
5710      tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5711  }
5712  
5713  static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5714                                    bool load_sign,
5715                                    const TCGLdstHelperParam *parm)
5716  {
5717      MemOp mop = get_memop(ldst->oi);
5718      TCGMovExtend mov[2];
5719      int ofs_slot0;
5720  
5721      switch (ldst->type) {
5722      case TCG_TYPE_I64:
5723          if (TCG_TARGET_REG_BITS == 32) {
5724              break;
5725          }
5726          /* fall through */
5727  
5728      case TCG_TYPE_I32:
5729          mov[0].dst = ldst->datalo_reg;
5730          mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5731          mov[0].dst_type = ldst->type;
5732          mov[0].src_type = TCG_TYPE_REG;
5733  
5734          /*
5735           * If load_sign, then we allowed the helper to perform the
5736           * appropriate sign extension to tcg_target_ulong, and all
5737           * we need now is a plain move.
5738           *
5739           * If they do not, then we expect the relevant extension
5740           * instruction to be no more expensive than a move, and
5741           * we thus save the icache etc by only using one of two
5742           * helper functions.
5743           */
5744          if (load_sign || !(mop & MO_SIGN)) {
5745              if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5746                  mov[0].src_ext = MO_32;
5747              } else {
5748                  mov[0].src_ext = MO_64;
5749              }
5750          } else {
5751              mov[0].src_ext = mop & MO_SSIZE;
5752          }
5753          tcg_out_movext1(s, mov);
5754          return;
5755  
5756      case TCG_TYPE_I128:
5757          tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5758          ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5759          switch (TCG_TARGET_CALL_RET_I128) {
5760          case TCG_CALL_RET_NORMAL:
5761              break;
5762          case TCG_CALL_RET_BY_VEC:
5763              tcg_out_st(s, TCG_TYPE_V128,
5764                         tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5765                         TCG_REG_CALL_STACK, ofs_slot0);
5766              /* fall through */
5767          case TCG_CALL_RET_BY_REF:
5768              tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5769                         TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5770              tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5771                         TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5772              return;
5773          default:
5774              g_assert_not_reached();
5775          }
5776          break;
5777  
5778      default:
5779          g_assert_not_reached();
5780      }
5781  
5782      mov[0].dst = ldst->datalo_reg;
5783      mov[0].src =
5784          tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5785      mov[0].dst_type = TCG_TYPE_REG;
5786      mov[0].src_type = TCG_TYPE_REG;
5787      mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5788  
5789      mov[1].dst = ldst->datahi_reg;
5790      mov[1].src =
5791          tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5792      mov[1].dst_type = TCG_TYPE_REG;
5793      mov[1].src_type = TCG_TYPE_REG;
5794      mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5795  
5796      tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5797  }
5798  
5799  static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5800                                     const TCGLdstHelperParam *parm)
5801  {
5802      const TCGHelperInfo *info;
5803      const TCGCallArgumentLoc *loc;
5804      TCGMovExtend mov[4];
5805      TCGType data_type;
5806      unsigned next_arg, nmov, n;
5807      MemOp mop = get_memop(ldst->oi);
5808  
5809      switch (mop & MO_SIZE) {
5810      case MO_8:
5811      case MO_16:
5812      case MO_32:
5813          info = &info_helper_st32_mmu;
5814          data_type = TCG_TYPE_I32;
5815          break;
5816      case MO_64:
5817          info = &info_helper_st64_mmu;
5818          data_type = TCG_TYPE_I64;
5819          break;
5820      case MO_128:
5821          info = &info_helper_st128_mmu;
5822          data_type = TCG_TYPE_I128;
5823          break;
5824      default:
5825          g_assert_not_reached();
5826      }
5827  
5828      /* Defer env argument. */
5829      next_arg = 1;
5830      nmov = 0;
5831  
5832      /* Handle addr argument. */
5833      loc = &info->in[next_arg];
5834      if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5835          /*
5836           * 32-bit host with 32-bit guest: zero-extend the guest address
5837           * to 64-bits for the helper by storing the low part.  Later,
5838           * after we have processed the register inputs, we will load a
5839           * zero for the high part.
5840           */
5841          tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5842                                 TCG_TYPE_I32, TCG_TYPE_I32,
5843                                 ldst->addrlo_reg, -1);
5844          next_arg += 2;
5845          nmov += 1;
5846      } else {
5847          n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5848                                     ldst->addrlo_reg, ldst->addrhi_reg);
5849          next_arg += n;
5850          nmov += n;
5851      }
5852  
5853      /* Handle data argument. */
5854      loc = &info->in[next_arg];
5855      switch (loc->kind) {
5856      case TCG_CALL_ARG_NORMAL:
5857      case TCG_CALL_ARG_EXTEND_U:
5858      case TCG_CALL_ARG_EXTEND_S:
5859          n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
5860                                     ldst->datalo_reg, ldst->datahi_reg);
5861          next_arg += n;
5862          nmov += n;
5863          tcg_out_helper_load_slots(s, nmov, mov, parm);
5864          break;
5865  
5866      case TCG_CALL_ARG_BY_REF:
5867          tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5868          tcg_debug_assert(data_type == TCG_TYPE_I128);
5869          tcg_out_st(s, TCG_TYPE_I64,
5870                     HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
5871                     TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
5872          tcg_out_st(s, TCG_TYPE_I64,
5873                     HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
5874                     TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
5875  
5876          tcg_out_helper_load_slots(s, nmov, mov, parm);
5877  
5878          if (arg_slot_reg_p(loc->arg_slot)) {
5879              tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
5880                               TCG_REG_CALL_STACK,
5881                               arg_slot_stk_ofs(loc->ref_slot));
5882          } else {
5883              tcg_debug_assert(parm->ntmp != 0);
5884              tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
5885                               arg_slot_stk_ofs(loc->ref_slot));
5886              tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5887                         TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
5888          }
5889          next_arg += 2;
5890          break;
5891  
5892      default:
5893          g_assert_not_reached();
5894      }
5895  
5896      if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5897          /* Zero extend the address by loading a zero for the high part. */
5898          loc = &info->in[1 + !HOST_BIG_ENDIAN];
5899          tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
5900      }
5901  
5902      tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5903  }
5904  
5905  void tcg_dump_op_count(GString *buf)
5906  {
5907      g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5908  }
5909  
5910  int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
5911  {
5912      int i, start_words, num_insns;
5913      TCGOp *op;
5914  
5915      if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
5916                   && qemu_log_in_addr_range(pc_start))) {
5917          FILE *logfile = qemu_log_trylock();
5918          if (logfile) {
5919              fprintf(logfile, "OP:\n");
5920              tcg_dump_ops(s, logfile, false);
5921              fprintf(logfile, "\n");
5922              qemu_log_unlock(logfile);
5923          }
5924      }
5925  
5926  #ifdef CONFIG_DEBUG_TCG
5927      /* Ensure all labels referenced have been emitted.  */
5928      {
5929          TCGLabel *l;
5930          bool error = false;
5931  
5932          QSIMPLEQ_FOREACH(l, &s->labels, next) {
5933              if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
5934                  qemu_log_mask(CPU_LOG_TB_OP,
5935                                "$L%d referenced but not present.\n", l->id);
5936                  error = true;
5937              }
5938          }
5939          assert(!error);
5940      }
5941  #endif
5942  
5943      tcg_optimize(s);
5944  
5945      reachable_code_pass(s);
5946      liveness_pass_0(s);
5947      liveness_pass_1(s);
5948  
5949      if (s->nb_indirects > 0) {
5950          if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
5951                       && qemu_log_in_addr_range(pc_start))) {
5952              FILE *logfile = qemu_log_trylock();
5953              if (logfile) {
5954                  fprintf(logfile, "OP before indirect lowering:\n");
5955                  tcg_dump_ops(s, logfile, false);
5956                  fprintf(logfile, "\n");
5957                  qemu_log_unlock(logfile);
5958              }
5959          }
5960  
5961          /* Replace indirect temps with direct temps.  */
5962          if (liveness_pass_2(s)) {
5963              /* If changes were made, re-run liveness.  */
5964              liveness_pass_1(s);
5965          }
5966      }
5967  
5968      if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
5969                   && qemu_log_in_addr_range(pc_start))) {
5970          FILE *logfile = qemu_log_trylock();
5971          if (logfile) {
5972              fprintf(logfile, "OP after optimization and liveness analysis:\n");
5973              tcg_dump_ops(s, logfile, true);
5974              fprintf(logfile, "\n");
5975              qemu_log_unlock(logfile);
5976          }
5977      }
5978  
5979      /* Initialize goto_tb jump offsets. */
5980      tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
5981      tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
5982      tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
5983      tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
5984  
5985      tcg_reg_alloc_start(s);
5986  
5987      /*
5988       * Reset the buffer pointers when restarting after overflow.
5989       * TODO: Move this into translate-all.c with the rest of the
5990       * buffer management.  Having only this done here is confusing.
5991       */
5992      s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
5993      s->code_ptr = s->code_buf;
5994  
5995  #ifdef TCG_TARGET_NEED_LDST_LABELS
5996      QSIMPLEQ_INIT(&s->ldst_labels);
5997  #endif
5998  #ifdef TCG_TARGET_NEED_POOL_LABELS
5999      s->pool_labels = NULL;
6000  #endif
6001  
6002      start_words = s->insn_start_words;
6003      s->gen_insn_data =
6004          tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6005  
6006      num_insns = -1;
6007      QTAILQ_FOREACH(op, &s->ops, link) {
6008          TCGOpcode opc = op->opc;
6009  
6010          switch (opc) {
6011          case INDEX_op_mov_i32:
6012          case INDEX_op_mov_i64:
6013          case INDEX_op_mov_vec:
6014              tcg_reg_alloc_mov(s, op);
6015              break;
6016          case INDEX_op_dup_vec:
6017              tcg_reg_alloc_dup(s, op);
6018              break;
6019          case INDEX_op_insn_start:
6020              if (num_insns >= 0) {
6021                  size_t off = tcg_current_code_size(s);
6022                  s->gen_insn_end_off[num_insns] = off;
6023                  /* Assert that we do not overflow our stored offset.  */
6024                  assert(s->gen_insn_end_off[num_insns] == off);
6025              }
6026              num_insns++;
6027              for (i = 0; i < start_words; ++i) {
6028                  s->gen_insn_data[num_insns * start_words + i] =
6029                      tcg_get_insn_start_param(op, i);
6030              }
6031              break;
6032          case INDEX_op_discard:
6033              temp_dead(s, arg_temp(op->args[0]));
6034              break;
6035          case INDEX_op_set_label:
6036              tcg_reg_alloc_bb_end(s, s->reserved_regs);
6037              tcg_out_label(s, arg_label(op->args[0]));
6038              break;
6039          case INDEX_op_call:
6040              tcg_reg_alloc_call(s, op);
6041              break;
6042          case INDEX_op_exit_tb:
6043              tcg_out_exit_tb(s, op->args[0]);
6044              break;
6045          case INDEX_op_goto_tb:
6046              tcg_out_goto_tb(s, op->args[0]);
6047              break;
6048          case INDEX_op_dup2_vec:
6049              if (tcg_reg_alloc_dup2(s, op)) {
6050                  break;
6051              }
6052              /* fall through */
6053          default:
6054              /* Sanity check that we've not introduced any unhandled opcodes. */
6055              tcg_debug_assert(tcg_op_supported(opc));
6056              /* Note: in order to speed up the code, it would be much
6057                 faster to have specialized register allocator functions for
6058                 some common argument patterns */
6059              tcg_reg_alloc_op(s, op);
6060              break;
6061          }
6062          /* Test for (pending) buffer overflow.  The assumption is that any
6063             one operation beginning below the high water mark cannot overrun
6064             the buffer completely.  Thus we can test for overflow after
6065             generating code without having to check during generation.  */
6066          if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6067              return -1;
6068          }
6069          /* Test for TB overflow, as seen by gen_insn_end_off.  */
6070          if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6071              return -2;
6072          }
6073      }
6074      tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6075      s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6076  
6077      /* Generate TB finalization at the end of block */
6078  #ifdef TCG_TARGET_NEED_LDST_LABELS
6079      i = tcg_out_ldst_finalize(s);
6080      if (i < 0) {
6081          return i;
6082      }
6083  #endif
6084  #ifdef TCG_TARGET_NEED_POOL_LABELS
6085      i = tcg_out_pool_finalize(s);
6086      if (i < 0) {
6087          return i;
6088      }
6089  #endif
6090      if (!tcg_resolve_relocs(s)) {
6091          return -2;
6092      }
6093  
6094  #ifndef CONFIG_TCG_INTERPRETER
6095      /* flush instruction cache */
6096      flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6097                          (uintptr_t)s->code_buf,
6098                          tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6099  #endif
6100  
6101      return tcg_current_code_size(s);
6102  }
6103  
6104  void tcg_dump_info(GString *buf)
6105  {
6106      g_string_append_printf(buf, "[TCG profiler not compiled]\n");
6107  }
6108  
6109  #ifdef ELF_HOST_MACHINE
6110  /* In order to use this feature, the backend needs to do three things:
6111  
6112     (1) Define ELF_HOST_MACHINE to indicate both what value to
6113         put into the ELF image and to indicate support for the feature.
6114  
6115     (2) Define tcg_register_jit.  This should create a buffer containing
6116         the contents of a .debug_frame section that describes the post-
6117         prologue unwind info for the tcg machine.
6118  
6119     (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6120  */
6121  
6122  /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6123  typedef enum {
6124      JIT_NOACTION = 0,
6125      JIT_REGISTER_FN,
6126      JIT_UNREGISTER_FN
6127  } jit_actions_t;
6128  
6129  struct jit_code_entry {
6130      struct jit_code_entry *next_entry;
6131      struct jit_code_entry *prev_entry;
6132      const void *symfile_addr;
6133      uint64_t symfile_size;
6134  };
6135  
6136  struct jit_descriptor {
6137      uint32_t version;
6138      uint32_t action_flag;
6139      struct jit_code_entry *relevant_entry;
6140      struct jit_code_entry *first_entry;
6141  };
6142  
6143  void __jit_debug_register_code(void) __attribute__((noinline));
6144  void __jit_debug_register_code(void)
6145  {
6146      asm("");
6147  }
6148  
6149  /* Must statically initialize the version, because GDB may check
6150     the version before we can set it.  */
6151  struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6152  
6153  /* End GDB interface.  */
6154  
6155  static int find_string(const char *strtab, const char *str)
6156  {
6157      const char *p = strtab + 1;
6158  
6159      while (1) {
6160          if (strcmp(p, str) == 0) {
6161              return p - strtab;
6162          }
6163          p += strlen(p) + 1;
6164      }
6165  }
6166  
6167  static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6168                                   const void *debug_frame,
6169                                   size_t debug_frame_size)
6170  {
6171      struct __attribute__((packed)) DebugInfo {
6172          uint32_t  len;
6173          uint16_t  version;
6174          uint32_t  abbrev;
6175          uint8_t   ptr_size;
6176          uint8_t   cu_die;
6177          uint16_t  cu_lang;
6178          uintptr_t cu_low_pc;
6179          uintptr_t cu_high_pc;
6180          uint8_t   fn_die;
6181          char      fn_name[16];
6182          uintptr_t fn_low_pc;
6183          uintptr_t fn_high_pc;
6184          uint8_t   cu_eoc;
6185      };
6186  
6187      struct ElfImage {
6188          ElfW(Ehdr) ehdr;
6189          ElfW(Phdr) phdr;
6190          ElfW(Shdr) shdr[7];
6191          ElfW(Sym)  sym[2];
6192          struct DebugInfo di;
6193          uint8_t    da[24];
6194          char       str[80];
6195      };
6196  
6197      struct ElfImage *img;
6198  
6199      static const struct ElfImage img_template = {
6200          .ehdr = {
6201              .e_ident[EI_MAG0] = ELFMAG0,
6202              .e_ident[EI_MAG1] = ELFMAG1,
6203              .e_ident[EI_MAG2] = ELFMAG2,
6204              .e_ident[EI_MAG3] = ELFMAG3,
6205              .e_ident[EI_CLASS] = ELF_CLASS,
6206              .e_ident[EI_DATA] = ELF_DATA,
6207              .e_ident[EI_VERSION] = EV_CURRENT,
6208              .e_type = ET_EXEC,
6209              .e_machine = ELF_HOST_MACHINE,
6210              .e_version = EV_CURRENT,
6211              .e_phoff = offsetof(struct ElfImage, phdr),
6212              .e_shoff = offsetof(struct ElfImage, shdr),
6213              .e_ehsize = sizeof(ElfW(Shdr)),
6214              .e_phentsize = sizeof(ElfW(Phdr)),
6215              .e_phnum = 1,
6216              .e_shentsize = sizeof(ElfW(Shdr)),
6217              .e_shnum = ARRAY_SIZE(img->shdr),
6218              .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6219  #ifdef ELF_HOST_FLAGS
6220              .e_flags = ELF_HOST_FLAGS,
6221  #endif
6222  #ifdef ELF_OSABI
6223              .e_ident[EI_OSABI] = ELF_OSABI,
6224  #endif
6225          },
6226          .phdr = {
6227              .p_type = PT_LOAD,
6228              .p_flags = PF_X,
6229          },
6230          .shdr = {
6231              [0] = { .sh_type = SHT_NULL },
6232              /* Trick: The contents of code_gen_buffer are not present in
6233                 this fake ELF file; that got allocated elsewhere.  Therefore
6234                 we mark .text as SHT_NOBITS (similar to .bss) so that readers
6235                 will not look for contents.  We can record any address.  */
6236              [1] = { /* .text */
6237                  .sh_type = SHT_NOBITS,
6238                  .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6239              },
6240              [2] = { /* .debug_info */
6241                  .sh_type = SHT_PROGBITS,
6242                  .sh_offset = offsetof(struct ElfImage, di),
6243                  .sh_size = sizeof(struct DebugInfo),
6244              },
6245              [3] = { /* .debug_abbrev */
6246                  .sh_type = SHT_PROGBITS,
6247                  .sh_offset = offsetof(struct ElfImage, da),
6248                  .sh_size = sizeof(img->da),
6249              },
6250              [4] = { /* .debug_frame */
6251                  .sh_type = SHT_PROGBITS,
6252                  .sh_offset = sizeof(struct ElfImage),
6253              },
6254              [5] = { /* .symtab */
6255                  .sh_type = SHT_SYMTAB,
6256                  .sh_offset = offsetof(struct ElfImage, sym),
6257                  .sh_size = sizeof(img->sym),
6258                  .sh_info = 1,
6259                  .sh_link = ARRAY_SIZE(img->shdr) - 1,
6260                  .sh_entsize = sizeof(ElfW(Sym)),
6261              },
6262              [6] = { /* .strtab */
6263                  .sh_type = SHT_STRTAB,
6264                  .sh_offset = offsetof(struct ElfImage, str),
6265                  .sh_size = sizeof(img->str),
6266              }
6267          },
6268          .sym = {
6269              [1] = { /* code_gen_buffer */
6270                  .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6271                  .st_shndx = 1,
6272              }
6273          },
6274          .di = {
6275              .len = sizeof(struct DebugInfo) - 4,
6276              .version = 2,
6277              .ptr_size = sizeof(void *),
6278              .cu_die = 1,
6279              .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6280              .fn_die = 2,
6281              .fn_name = "code_gen_buffer"
6282          },
6283          .da = {
6284              1,          /* abbrev number (the cu) */
6285              0x11, 1,    /* DW_TAG_compile_unit, has children */
6286              0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6287              0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6288              0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6289              0, 0,       /* end of abbrev */
6290              2,          /* abbrev number (the fn) */
6291              0x2e, 0,    /* DW_TAG_subprogram, no children */
6292              0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6293              0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6294              0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6295              0, 0,       /* end of abbrev */
6296              0           /* no more abbrev */
6297          },
6298          .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6299                 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6300      };
6301  
6302      /* We only need a single jit entry; statically allocate it.  */
6303      static struct jit_code_entry one_entry;
6304  
6305      uintptr_t buf = (uintptr_t)buf_ptr;
6306      size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6307      DebugFrameHeader *dfh;
6308  
6309      img = g_malloc(img_size);
6310      *img = img_template;
6311  
6312      img->phdr.p_vaddr = buf;
6313      img->phdr.p_paddr = buf;
6314      img->phdr.p_memsz = buf_size;
6315  
6316      img->shdr[1].sh_name = find_string(img->str, ".text");
6317      img->shdr[1].sh_addr = buf;
6318      img->shdr[1].sh_size = buf_size;
6319  
6320      img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6321      img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6322  
6323      img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6324      img->shdr[4].sh_size = debug_frame_size;
6325  
6326      img->shdr[5].sh_name = find_string(img->str, ".symtab");
6327      img->shdr[6].sh_name = find_string(img->str, ".strtab");
6328  
6329      img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6330      img->sym[1].st_value = buf;
6331      img->sym[1].st_size = buf_size;
6332  
6333      img->di.cu_low_pc = buf;
6334      img->di.cu_high_pc = buf + buf_size;
6335      img->di.fn_low_pc = buf;
6336      img->di.fn_high_pc = buf + buf_size;
6337  
6338      dfh = (DebugFrameHeader *)(img + 1);
6339      memcpy(dfh, debug_frame, debug_frame_size);
6340      dfh->fde.func_start = buf;
6341      dfh->fde.func_len = buf_size;
6342  
6343  #ifdef DEBUG_JIT
6344      /* Enable this block to be able to debug the ELF image file creation.
6345         One can use readelf, objdump, or other inspection utilities.  */
6346      {
6347          g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6348          FILE *f = fopen(jit, "w+b");
6349          if (f) {
6350              if (fwrite(img, img_size, 1, f) != img_size) {
6351                  /* Avoid stupid unused return value warning for fwrite.  */
6352              }
6353              fclose(f);
6354          }
6355      }
6356  #endif
6357  
6358      one_entry.symfile_addr = img;
6359      one_entry.symfile_size = img_size;
6360  
6361      __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6362      __jit_debug_descriptor.relevant_entry = &one_entry;
6363      __jit_debug_descriptor.first_entry = &one_entry;
6364      __jit_debug_register_code();
6365  }
6366  #else
6367  /* No support for the feature.  Provide the entry point expected by exec.c,
6368     and implement the internal function we declared earlier.  */
6369  
6370  static void tcg_register_jit_int(const void *buf, size_t size,
6371                                   const void *debug_frame,
6372                                   size_t debug_frame_size)
6373  {
6374  }
6375  
6376  void tcg_register_jit(const void *buf, size_t buf_size)
6377  {
6378  }
6379  #endif /* ELF_HOST_MACHINE */
6380  
6381  #if !TCG_TARGET_MAYBE_vec
6382  void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6383  {
6384      g_assert_not_reached();
6385  }
6386  #endif
6387