xref: /openbmc/qemu/tcg/tcg.c (revision 125062e791258c68109f3a59cb7aca3dadbdb5a3)
1  /*
2   * Tiny Code Generator for QEMU
3   *
4   * Copyright (c) 2008 Fabrice Bellard
5   *
6   * Permission is hereby granted, free of charge, to any person obtaining a copy
7   * of this software and associated documentation files (the "Software"), to deal
8   * in the Software without restriction, including without limitation the rights
9   * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10   * copies of the Software, and to permit persons to whom the Software is
11   * furnished to do so, subject to the following conditions:
12   *
13   * The above copyright notice and this permission notice shall be included in
14   * all copies or substantial portions of the Software.
15   *
16   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22   * THE SOFTWARE.
23   */
24  
25  #include "qemu/osdep.h"
26  
27  /* Define to jump the ELF file used to communicate with GDB.  */
28  #undef DEBUG_JIT
29  
30  #include "qemu/error-report.h"
31  #include "qemu/cutils.h"
32  #include "qemu/host-utils.h"
33  #include "qemu/qemu-print.h"
34  #include "qemu/cacheflush.h"
35  #include "qemu/cacheinfo.h"
36  #include "qemu/timer.h"
37  #include "exec/translation-block.h"
38  #include "exec/tlb-common.h"
39  #include "tcg/startup.h"
40  #include "tcg/tcg-op-common.h"
41  
42  #if UINTPTR_MAX == UINT32_MAX
43  # define ELF_CLASS  ELFCLASS32
44  #else
45  # define ELF_CLASS  ELFCLASS64
46  #endif
47  #if HOST_BIG_ENDIAN
48  # define ELF_DATA   ELFDATA2MSB
49  #else
50  # define ELF_DATA   ELFDATA2LSB
51  #endif
52  
53  #include "elf.h"
54  #include "exec/log.h"
55  #include "tcg/tcg-ldst.h"
56  #include "tcg/tcg-temp-internal.h"
57  #include "tcg-internal.h"
58  #include "accel/tcg/perf.h"
59  #ifdef CONFIG_USER_ONLY
60  #include "exec/user/guest-base.h"
61  #endif
62  
63  /* Forward declarations for functions declared in tcg-target.c.inc and
64     used here. */
65  static void tcg_target_init(TCGContext *s);
66  static void tcg_target_qemu_prologue(TCGContext *s);
67  static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
68                          intptr_t value, intptr_t addend);
69  
70  /* The CIE and FDE header definitions will be common to all hosts.  */
71  typedef struct {
72      uint32_t len __attribute__((aligned((sizeof(void *)))));
73      uint32_t id;
74      uint8_t version;
75      char augmentation[1];
76      uint8_t code_align;
77      uint8_t data_align;
78      uint8_t return_column;
79  } DebugFrameCIE;
80  
81  typedef struct QEMU_PACKED {
82      uint32_t len __attribute__((aligned((sizeof(void *)))));
83      uint32_t cie_offset;
84      uintptr_t func_start;
85      uintptr_t func_len;
86  } DebugFrameFDEHeader;
87  
88  typedef struct QEMU_PACKED {
89      DebugFrameCIE cie;
90      DebugFrameFDEHeader fde;
91  } DebugFrameHeader;
92  
93  typedef struct TCGLabelQemuLdst {
94      bool is_ld;             /* qemu_ld: true, qemu_st: false */
95      MemOpIdx oi;
96      TCGType type;           /* result type of a load */
97      TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
98      TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
99      TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
100      TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
101      const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
102      tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
103      QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
104  } TCGLabelQemuLdst;
105  
106  static void tcg_register_jit_int(const void *buf, size_t size,
107                                   const void *debug_frame,
108                                   size_t debug_frame_size)
109      __attribute__((unused));
110  
111  /* Forward declarations for functions declared and used in tcg-target.c.inc. */
112  static void tcg_out_tb_start(TCGContext *s);
113  static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
114                         intptr_t arg2);
115  static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
116  static void tcg_out_movi(TCGContext *s, TCGType type,
117                           TCGReg ret, tcg_target_long arg);
118  static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
119  static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
120  static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
121  static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
122  static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
123  static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
124  static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
125  static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
126  static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
127  static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
128  static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
129  static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
130  static void tcg_out_goto_tb(TCGContext *s, int which);
131  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
132                         const TCGArg args[TCG_MAX_OP_ARGS],
133                         const int const_args[TCG_MAX_OP_ARGS]);
134  #if TCG_TARGET_MAYBE_vec
135  static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
136                              TCGReg dst, TCGReg src);
137  static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
138                               TCGReg dst, TCGReg base, intptr_t offset);
139  static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
140                               TCGReg dst, int64_t arg);
141  static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
142                             unsigned vecl, unsigned vece,
143                             const TCGArg args[TCG_MAX_OP_ARGS],
144                             const int const_args[TCG_MAX_OP_ARGS]);
145  #else
146  static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
147                                     TCGReg dst, TCGReg src)
148  {
149      g_assert_not_reached();
150  }
151  static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
152                                      TCGReg dst, TCGReg base, intptr_t offset)
153  {
154      g_assert_not_reached();
155  }
156  static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
157                                      TCGReg dst, int64_t arg)
158  {
159      g_assert_not_reached();
160  }
161  static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
162                                    unsigned vecl, unsigned vece,
163                                    const TCGArg args[TCG_MAX_OP_ARGS],
164                                    const int const_args[TCG_MAX_OP_ARGS])
165  {
166      g_assert_not_reached();
167  }
168  #endif
169  static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
170                         intptr_t arg2);
171  static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
172                          TCGReg base, intptr_t ofs);
173  static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
174                           const TCGHelperInfo *info);
175  static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
176  static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece);
177  #ifdef TCG_TARGET_NEED_LDST_LABELS
178  static int tcg_out_ldst_finalize(TCGContext *s);
179  #endif
180  
181  #ifndef CONFIG_USER_ONLY
182  #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
183  #endif
184  
185  typedef struct TCGLdstHelperParam {
186      TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
187      unsigned ntmp;
188      int tmp[3];
189  } TCGLdstHelperParam;
190  
191  static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
192                                     const TCGLdstHelperParam *p)
193      __attribute__((unused));
194  static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
195                                    bool load_sign, const TCGLdstHelperParam *p)
196      __attribute__((unused));
197  static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
198                                     const TCGLdstHelperParam *p)
199      __attribute__((unused));
200  
201  static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
202      [MO_UB] = helper_ldub_mmu,
203      [MO_SB] = helper_ldsb_mmu,
204      [MO_UW] = helper_lduw_mmu,
205      [MO_SW] = helper_ldsw_mmu,
206      [MO_UL] = helper_ldul_mmu,
207      [MO_UQ] = helper_ldq_mmu,
208  #if TCG_TARGET_REG_BITS == 64
209      [MO_SL] = helper_ldsl_mmu,
210      [MO_128] = helper_ld16_mmu,
211  #endif
212  };
213  
214  static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
215      [MO_8]  = helper_stb_mmu,
216      [MO_16] = helper_stw_mmu,
217      [MO_32] = helper_stl_mmu,
218      [MO_64] = helper_stq_mmu,
219  #if TCG_TARGET_REG_BITS == 64
220      [MO_128] = helper_st16_mmu,
221  #endif
222  };
223  
224  typedef struct {
225      MemOp atom;   /* lg2 bits of atomicity required */
226      MemOp align;  /* lg2 bits of alignment to use */
227  } TCGAtomAlign;
228  
229  static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
230                                             MemOp host_atom, bool allow_two_ops)
231      __attribute__((unused));
232  
233  #ifdef CONFIG_USER_ONLY
234  bool tcg_use_softmmu;
235  #endif
236  
237  TCGContext tcg_init_ctx;
238  __thread TCGContext *tcg_ctx;
239  
240  TCGContext **tcg_ctxs;
241  unsigned int tcg_cur_ctxs;
242  unsigned int tcg_max_ctxs;
243  TCGv_env tcg_env;
244  const void *tcg_code_gen_epilogue;
245  uintptr_t tcg_splitwx_diff;
246  
247  #ifndef CONFIG_TCG_INTERPRETER
248  tcg_prologue_fn *tcg_qemu_tb_exec;
249  #endif
250  
251  static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
252  static TCGRegSet tcg_target_call_clobber_regs;
253  
254  #if TCG_TARGET_INSN_UNIT_SIZE == 1
255  static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
256  {
257      *s->code_ptr++ = v;
258  }
259  
260  static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
261                                                        uint8_t v)
262  {
263      *p = v;
264  }
265  #endif
266  
267  #if TCG_TARGET_INSN_UNIT_SIZE <= 2
268  static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
269  {
270      if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
271          *s->code_ptr++ = v;
272      } else {
273          tcg_insn_unit *p = s->code_ptr;
274          memcpy(p, &v, sizeof(v));
275          s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
276      }
277  }
278  
279  static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
280                                                         uint16_t v)
281  {
282      if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
283          *p = v;
284      } else {
285          memcpy(p, &v, sizeof(v));
286      }
287  }
288  #endif
289  
290  #if TCG_TARGET_INSN_UNIT_SIZE <= 4
291  static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
292  {
293      if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
294          *s->code_ptr++ = v;
295      } else {
296          tcg_insn_unit *p = s->code_ptr;
297          memcpy(p, &v, sizeof(v));
298          s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
299      }
300  }
301  
302  static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
303                                                         uint32_t v)
304  {
305      if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
306          *p = v;
307      } else {
308          memcpy(p, &v, sizeof(v));
309      }
310  }
311  #endif
312  
313  #if TCG_TARGET_INSN_UNIT_SIZE <= 8
314  static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
315  {
316      if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
317          *s->code_ptr++ = v;
318      } else {
319          tcg_insn_unit *p = s->code_ptr;
320          memcpy(p, &v, sizeof(v));
321          s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
322      }
323  }
324  
325  static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
326                                                         uint64_t v)
327  {
328      if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
329          *p = v;
330      } else {
331          memcpy(p, &v, sizeof(v));
332      }
333  }
334  #endif
335  
336  /* label relocation processing */
337  
338  static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
339                            TCGLabel *l, intptr_t addend)
340  {
341      TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
342  
343      r->type = type;
344      r->ptr = code_ptr;
345      r->addend = addend;
346      QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
347  }
348  
349  static void tcg_out_label(TCGContext *s, TCGLabel *l)
350  {
351      tcg_debug_assert(!l->has_value);
352      l->has_value = 1;
353      l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
354  }
355  
356  TCGLabel *gen_new_label(void)
357  {
358      TCGContext *s = tcg_ctx;
359      TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
360  
361      memset(l, 0, sizeof(TCGLabel));
362      l->id = s->nb_labels++;
363      QSIMPLEQ_INIT(&l->branches);
364      QSIMPLEQ_INIT(&l->relocs);
365  
366      QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
367  
368      return l;
369  }
370  
371  static bool tcg_resolve_relocs(TCGContext *s)
372  {
373      TCGLabel *l;
374  
375      QSIMPLEQ_FOREACH(l, &s->labels, next) {
376          TCGRelocation *r;
377          uintptr_t value = l->u.value;
378  
379          QSIMPLEQ_FOREACH(r, &l->relocs, next) {
380              if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
381                  return false;
382              }
383          }
384      }
385      return true;
386  }
387  
388  static void set_jmp_reset_offset(TCGContext *s, int which)
389  {
390      /*
391       * We will check for overflow at the end of the opcode loop in
392       * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
393       */
394      s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
395  }
396  
397  static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
398  {
399      /*
400       * We will check for overflow at the end of the opcode loop in
401       * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
402       */
403      s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
404  }
405  
406  static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
407  {
408      /*
409       * Return the read-execute version of the pointer, for the benefit
410       * of any pc-relative addressing mode.
411       */
412      return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
413  }
414  
415  static int __attribute__((unused))
416  tlb_mask_table_ofs(TCGContext *s, int which)
417  {
418      return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
419              sizeof(CPUNegativeOffsetState));
420  }
421  
422  /* Signal overflow, starting over with fewer guest insns. */
423  static G_NORETURN
424  void tcg_raise_tb_overflow(TCGContext *s)
425  {
426      siglongjmp(s->jmp_trans, -2);
427  }
428  
429  /*
430   * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
431   * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
432   *
433   * However, tcg_out_helper_load_slots reuses this field to hold an
434   * argument slot number (which may designate a argument register or an
435   * argument stack slot), converting to TCGReg once all arguments that
436   * are destined for the stack are processed.
437   */
438  typedef struct TCGMovExtend {
439      unsigned dst;
440      TCGReg src;
441      TCGType dst_type;
442      TCGType src_type;
443      MemOp src_ext;
444  } TCGMovExtend;
445  
446  /**
447   * tcg_out_movext -- move and extend
448   * @s: tcg context
449   * @dst_type: integral type for destination
450   * @dst: destination register
451   * @src_type: integral type for source
452   * @src_ext: extension to apply to source
453   * @src: source register
454   *
455   * Move or extend @src into @dst, depending on @src_ext and the types.
456   */
457  static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
458                             TCGType src_type, MemOp src_ext, TCGReg src)
459  {
460      switch (src_ext) {
461      case MO_UB:
462          tcg_out_ext8u(s, dst, src);
463          break;
464      case MO_SB:
465          tcg_out_ext8s(s, dst_type, dst, src);
466          break;
467      case MO_UW:
468          tcg_out_ext16u(s, dst, src);
469          break;
470      case MO_SW:
471          tcg_out_ext16s(s, dst_type, dst, src);
472          break;
473      case MO_UL:
474      case MO_SL:
475          if (dst_type == TCG_TYPE_I32) {
476              if (src_type == TCG_TYPE_I32) {
477                  tcg_out_mov(s, TCG_TYPE_I32, dst, src);
478              } else {
479                  tcg_out_extrl_i64_i32(s, dst, src);
480              }
481          } else if (src_type == TCG_TYPE_I32) {
482              if (src_ext & MO_SIGN) {
483                  tcg_out_exts_i32_i64(s, dst, src);
484              } else {
485                  tcg_out_extu_i32_i64(s, dst, src);
486              }
487          } else {
488              if (src_ext & MO_SIGN) {
489                  tcg_out_ext32s(s, dst, src);
490              } else {
491                  tcg_out_ext32u(s, dst, src);
492              }
493          }
494          break;
495      case MO_UQ:
496          tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
497          if (dst_type == TCG_TYPE_I32) {
498              tcg_out_extrl_i64_i32(s, dst, src);
499          } else {
500              tcg_out_mov(s, TCG_TYPE_I64, dst, src);
501          }
502          break;
503      default:
504          g_assert_not_reached();
505      }
506  }
507  
508  /* Minor variations on a theme, using a structure. */
509  static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
510                                      TCGReg src)
511  {
512      tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
513  }
514  
515  static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
516  {
517      tcg_out_movext1_new_src(s, i, i->src);
518  }
519  
520  /**
521   * tcg_out_movext2 -- move and extend two pair
522   * @s: tcg context
523   * @i1: first move description
524   * @i2: second move description
525   * @scratch: temporary register, or -1 for none
526   *
527   * As tcg_out_movext, for both @i1 and @i2, caring for overlap
528   * between the sources and destinations.
529   */
530  
531  static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
532                              const TCGMovExtend *i2, int scratch)
533  {
534      TCGReg src1 = i1->src;
535      TCGReg src2 = i2->src;
536  
537      if (i1->dst != src2) {
538          tcg_out_movext1(s, i1);
539          tcg_out_movext1(s, i2);
540          return;
541      }
542      if (i2->dst == src1) {
543          TCGType src1_type = i1->src_type;
544          TCGType src2_type = i2->src_type;
545  
546          if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
547              /* The data is now in the correct registers, now extend. */
548              src1 = i2->src;
549              src2 = i1->src;
550          } else {
551              tcg_debug_assert(scratch >= 0);
552              tcg_out_mov(s, src1_type, scratch, src1);
553              src1 = scratch;
554          }
555      }
556      tcg_out_movext1_new_src(s, i2, src2);
557      tcg_out_movext1_new_src(s, i1, src1);
558  }
559  
560  /**
561   * tcg_out_movext3 -- move and extend three pair
562   * @s: tcg context
563   * @i1: first move description
564   * @i2: second move description
565   * @i3: third move description
566   * @scratch: temporary register, or -1 for none
567   *
568   * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
569   * between the sources and destinations.
570   */
571  
572  static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
573                              const TCGMovExtend *i2, const TCGMovExtend *i3,
574                              int scratch)
575  {
576      TCGReg src1 = i1->src;
577      TCGReg src2 = i2->src;
578      TCGReg src3 = i3->src;
579  
580      if (i1->dst != src2 && i1->dst != src3) {
581          tcg_out_movext1(s, i1);
582          tcg_out_movext2(s, i2, i3, scratch);
583          return;
584      }
585      if (i2->dst != src1 && i2->dst != src3) {
586          tcg_out_movext1(s, i2);
587          tcg_out_movext2(s, i1, i3, scratch);
588          return;
589      }
590      if (i3->dst != src1 && i3->dst != src2) {
591          tcg_out_movext1(s, i3);
592          tcg_out_movext2(s, i1, i2, scratch);
593          return;
594      }
595  
596      /*
597       * There is a cycle.  Since there are only 3 nodes, the cycle is
598       * either "clockwise" or "anti-clockwise", and can be solved with
599       * a single scratch or two xchg.
600       */
601      if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
602          /* "Clockwise" */
603          if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
604              tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
605              /* The data is now in the correct registers, now extend. */
606              tcg_out_movext1_new_src(s, i1, i1->dst);
607              tcg_out_movext1_new_src(s, i2, i2->dst);
608              tcg_out_movext1_new_src(s, i3, i3->dst);
609          } else {
610              tcg_debug_assert(scratch >= 0);
611              tcg_out_mov(s, i1->src_type, scratch, src1);
612              tcg_out_movext1(s, i3);
613              tcg_out_movext1(s, i2);
614              tcg_out_movext1_new_src(s, i1, scratch);
615          }
616      } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
617          /* "Anti-clockwise" */
618          if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
619              tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
620              /* The data is now in the correct registers, now extend. */
621              tcg_out_movext1_new_src(s, i1, i1->dst);
622              tcg_out_movext1_new_src(s, i2, i2->dst);
623              tcg_out_movext1_new_src(s, i3, i3->dst);
624          } else {
625              tcg_debug_assert(scratch >= 0);
626              tcg_out_mov(s, i1->src_type, scratch, src1);
627              tcg_out_movext1(s, i2);
628              tcg_out_movext1(s, i3);
629              tcg_out_movext1_new_src(s, i1, scratch);
630          }
631      } else {
632          g_assert_not_reached();
633      }
634  }
635  
636  #define C_PFX1(P, A)                    P##A
637  #define C_PFX2(P, A, B)                 P##A##_##B
638  #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
639  #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
640  #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
641  #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
642  
643  /* Define an enumeration for the various combinations. */
644  
645  #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
646  #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
647  #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
648  #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
649  
650  #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
651  #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
652  #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
653  #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
654  
655  #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
656  #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
657  
658  #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
659  #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
660  #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
661  #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
662  #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
663  
664  typedef enum {
665  #include "tcg-target-con-set.h"
666  } TCGConstraintSetIndex;
667  
668  static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
669  
670  #undef C_O0_I1
671  #undef C_O0_I2
672  #undef C_O0_I3
673  #undef C_O0_I4
674  #undef C_O1_I1
675  #undef C_O1_I2
676  #undef C_O1_I3
677  #undef C_O1_I4
678  #undef C_N1_I2
679  #undef C_N2_I1
680  #undef C_O2_I1
681  #undef C_O2_I2
682  #undef C_O2_I3
683  #undef C_O2_I4
684  #undef C_N1_O1_I4
685  
686  /* Put all of the constraint sets into an array, indexed by the enum. */
687  
688  #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
689  #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
690  #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
691  #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
692  
693  #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
694  #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
695  #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
696  #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
697  
698  #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
699  #define C_N2_I1(O1, O2, I1)             { .args_ct_str = { "&" #O1, "&" #O2, #I1 } },
700  
701  #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
702  #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
703  #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
704  #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
705  #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
706  
707  static const TCGTargetOpDef constraint_sets[] = {
708  #include "tcg-target-con-set.h"
709  };
710  
711  
712  #undef C_O0_I1
713  #undef C_O0_I2
714  #undef C_O0_I3
715  #undef C_O0_I4
716  #undef C_O1_I1
717  #undef C_O1_I2
718  #undef C_O1_I3
719  #undef C_O1_I4
720  #undef C_N1_I2
721  #undef C_N2_I1
722  #undef C_O2_I1
723  #undef C_O2_I2
724  #undef C_O2_I3
725  #undef C_O2_I4
726  #undef C_N1_O1_I4
727  
728  /* Expand the enumerator to be returned from tcg_target_op_def(). */
729  
730  #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
731  #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
732  #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
733  #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
734  
735  #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
736  #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
737  #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
738  #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
739  
740  #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
741  #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
742  
743  #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
744  #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
745  #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
746  #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
747  #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
748  
749  #include "tcg-target.c.inc"
750  
751  #ifndef CONFIG_TCG_INTERPRETER
752  /* Validate CPUTLBDescFast placement. */
753  QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
754                          sizeof(CPUNegativeOffsetState))
755                    < MIN_TLB_MASK_TABLE_OFS);
756  #endif
757  
758  static void alloc_tcg_plugin_context(TCGContext *s)
759  {
760  #ifdef CONFIG_PLUGIN
761      s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
762      s->plugin_tb->insns =
763          g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
764  #endif
765  }
766  
767  /*
768   * All TCG threads except the parent (i.e. the one that called tcg_context_init
769   * and registered the target's TCG globals) must register with this function
770   * before initiating translation.
771   *
772   * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
773   * of tcg_region_init() for the reasoning behind this.
774   *
775   * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
776   * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
777   * is not used anymore for translation once this function is called.
778   *
779   * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
780   * iterates over the array (e.g. tcg_code_size() the same for both system/user
781   * modes.
782   */
783  #ifdef CONFIG_USER_ONLY
784  void tcg_register_thread(void)
785  {
786      tcg_ctx = &tcg_init_ctx;
787  }
788  #else
789  void tcg_register_thread(void)
790  {
791      TCGContext *s = g_malloc(sizeof(*s));
792      unsigned int i, n;
793  
794      *s = tcg_init_ctx;
795  
796      /* Relink mem_base.  */
797      for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
798          if (tcg_init_ctx.temps[i].mem_base) {
799              ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
800              tcg_debug_assert(b >= 0 && b < n);
801              s->temps[i].mem_base = &s->temps[b];
802          }
803      }
804  
805      /* Claim an entry in tcg_ctxs */
806      n = qatomic_fetch_inc(&tcg_cur_ctxs);
807      g_assert(n < tcg_max_ctxs);
808      qatomic_set(&tcg_ctxs[n], s);
809  
810      if (n > 0) {
811          alloc_tcg_plugin_context(s);
812          tcg_region_initial_alloc(s);
813      }
814  
815      tcg_ctx = s;
816  }
817  #endif /* !CONFIG_USER_ONLY */
818  
819  /* pool based memory allocation */
820  void *tcg_malloc_internal(TCGContext *s, int size)
821  {
822      TCGPool *p;
823      int pool_size;
824  
825      if (size > TCG_POOL_CHUNK_SIZE) {
826          /* big malloc: insert a new pool (XXX: could optimize) */
827          p = g_malloc(sizeof(TCGPool) + size);
828          p->size = size;
829          p->next = s->pool_first_large;
830          s->pool_first_large = p;
831          return p->data;
832      } else {
833          p = s->pool_current;
834          if (!p) {
835              p = s->pool_first;
836              if (!p)
837                  goto new_pool;
838          } else {
839              if (!p->next) {
840              new_pool:
841                  pool_size = TCG_POOL_CHUNK_SIZE;
842                  p = g_malloc(sizeof(TCGPool) + pool_size);
843                  p->size = pool_size;
844                  p->next = NULL;
845                  if (s->pool_current) {
846                      s->pool_current->next = p;
847                  } else {
848                      s->pool_first = p;
849                  }
850              } else {
851                  p = p->next;
852              }
853          }
854      }
855      s->pool_current = p;
856      s->pool_cur = p->data + size;
857      s->pool_end = p->data + p->size;
858      return p->data;
859  }
860  
861  void tcg_pool_reset(TCGContext *s)
862  {
863      TCGPool *p, *t;
864      for (p = s->pool_first_large; p; p = t) {
865          t = p->next;
866          g_free(p);
867      }
868      s->pool_first_large = NULL;
869      s->pool_cur = s->pool_end = NULL;
870      s->pool_current = NULL;
871  }
872  
873  /*
874   * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
875   * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
876   * We only use these for layout in tcg_out_ld_helper_ret and
877   * tcg_out_st_helper_args, and share them between several of
878   * the helpers, with the end result that it's easier to build manually.
879   */
880  
881  #if TCG_TARGET_REG_BITS == 32
882  # define dh_typecode_ttl  dh_typecode_i32
883  #else
884  # define dh_typecode_ttl  dh_typecode_i64
885  #endif
886  
887  static TCGHelperInfo info_helper_ld32_mmu = {
888      .flags = TCG_CALL_NO_WG,
889      .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
890                | dh_typemask(env, 1)
891                | dh_typemask(i64, 2)  /* uint64_t addr */
892                | dh_typemask(i32, 3)  /* unsigned oi */
893                | dh_typemask(ptr, 4)  /* uintptr_t ra */
894  };
895  
896  static TCGHelperInfo info_helper_ld64_mmu = {
897      .flags = TCG_CALL_NO_WG,
898      .typemask = dh_typemask(i64, 0)  /* return uint64_t */
899                | dh_typemask(env, 1)
900                | dh_typemask(i64, 2)  /* uint64_t addr */
901                | dh_typemask(i32, 3)  /* unsigned oi */
902                | dh_typemask(ptr, 4)  /* uintptr_t ra */
903  };
904  
905  static TCGHelperInfo info_helper_ld128_mmu = {
906      .flags = TCG_CALL_NO_WG,
907      .typemask = dh_typemask(i128, 0) /* return Int128 */
908                | dh_typemask(env, 1)
909                | dh_typemask(i64, 2)  /* uint64_t addr */
910                | dh_typemask(i32, 3)  /* unsigned oi */
911                | dh_typemask(ptr, 4)  /* uintptr_t ra */
912  };
913  
914  static TCGHelperInfo info_helper_st32_mmu = {
915      .flags = TCG_CALL_NO_WG,
916      .typemask = dh_typemask(void, 0)
917                | dh_typemask(env, 1)
918                | dh_typemask(i64, 2)  /* uint64_t addr */
919                | dh_typemask(i32, 3)  /* uint32_t data */
920                | dh_typemask(i32, 4)  /* unsigned oi */
921                | dh_typemask(ptr, 5)  /* uintptr_t ra */
922  };
923  
924  static TCGHelperInfo info_helper_st64_mmu = {
925      .flags = TCG_CALL_NO_WG,
926      .typemask = dh_typemask(void, 0)
927                | dh_typemask(env, 1)
928                | dh_typemask(i64, 2)  /* uint64_t addr */
929                | dh_typemask(i64, 3)  /* uint64_t data */
930                | dh_typemask(i32, 4)  /* unsigned oi */
931                | dh_typemask(ptr, 5)  /* uintptr_t ra */
932  };
933  
934  static TCGHelperInfo info_helper_st128_mmu = {
935      .flags = TCG_CALL_NO_WG,
936      .typemask = dh_typemask(void, 0)
937                | dh_typemask(env, 1)
938                | dh_typemask(i64, 2)  /* uint64_t addr */
939                | dh_typemask(i128, 3) /* Int128 data */
940                | dh_typemask(i32, 4)  /* unsigned oi */
941                | dh_typemask(ptr, 5)  /* uintptr_t ra */
942  };
943  
944  #ifdef CONFIG_TCG_INTERPRETER
945  static ffi_type *typecode_to_ffi(int argmask)
946  {
947      /*
948       * libffi does not support __int128_t, so we have forced Int128
949       * to use the structure definition instead of the builtin type.
950       */
951      static ffi_type *ffi_type_i128_elements[3] = {
952          &ffi_type_uint64,
953          &ffi_type_uint64,
954          NULL
955      };
956      static ffi_type ffi_type_i128 = {
957          .size = 16,
958          .alignment = __alignof__(Int128),
959          .type = FFI_TYPE_STRUCT,
960          .elements = ffi_type_i128_elements,
961      };
962  
963      switch (argmask) {
964      case dh_typecode_void:
965          return &ffi_type_void;
966      case dh_typecode_i32:
967          return &ffi_type_uint32;
968      case dh_typecode_s32:
969          return &ffi_type_sint32;
970      case dh_typecode_i64:
971          return &ffi_type_uint64;
972      case dh_typecode_s64:
973          return &ffi_type_sint64;
974      case dh_typecode_ptr:
975          return &ffi_type_pointer;
976      case dh_typecode_i128:
977          return &ffi_type_i128;
978      }
979      g_assert_not_reached();
980  }
981  
982  static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
983  {
984      unsigned typemask = info->typemask;
985      struct {
986          ffi_cif cif;
987          ffi_type *args[];
988      } *ca;
989      ffi_status status;
990      int nargs;
991  
992      /* Ignoring the return type, find the last non-zero field. */
993      nargs = 32 - clz32(typemask >> 3);
994      nargs = DIV_ROUND_UP(nargs, 3);
995      assert(nargs <= MAX_CALL_IARGS);
996  
997      ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
998      ca->cif.rtype = typecode_to_ffi(typemask & 7);
999      ca->cif.nargs = nargs;
1000  
1001      if (nargs != 0) {
1002          ca->cif.arg_types = ca->args;
1003          for (int j = 0; j < nargs; ++j) {
1004              int typecode = extract32(typemask, (j + 1) * 3, 3);
1005              ca->args[j] = typecode_to_ffi(typecode);
1006          }
1007      }
1008  
1009      status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1010                            ca->cif.rtype, ca->cif.arg_types);
1011      assert(status == FFI_OK);
1012  
1013      return &ca->cif;
1014  }
1015  
1016  #define HELPER_INFO_INIT(I)      (&(I)->cif)
1017  #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1018  #else
1019  #define HELPER_INFO_INIT(I)      (&(I)->init)
1020  #define HELPER_INFO_INIT_VAL(I)  1
1021  #endif /* CONFIG_TCG_INTERPRETER */
1022  
1023  static inline bool arg_slot_reg_p(unsigned arg_slot)
1024  {
1025      /*
1026       * Split the sizeof away from the comparison to avoid Werror from
1027       * "unsigned < 0 is always false", when iarg_regs is empty.
1028       */
1029      unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1030      return arg_slot < nreg;
1031  }
1032  
1033  static inline int arg_slot_stk_ofs(unsigned arg_slot)
1034  {
1035      unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1036      unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1037  
1038      tcg_debug_assert(stk_slot < max);
1039      return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1040  }
1041  
1042  typedef struct TCGCumulativeArgs {
1043      int arg_idx;                /* tcg_gen_callN args[] */
1044      int info_in_idx;            /* TCGHelperInfo in[] */
1045      int arg_slot;               /* regs+stack slot */
1046      int ref_slot;               /* stack slots for references */
1047  } TCGCumulativeArgs;
1048  
1049  static void layout_arg_even(TCGCumulativeArgs *cum)
1050  {
1051      cum->arg_slot += cum->arg_slot & 1;
1052  }
1053  
1054  static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1055                           TCGCallArgumentKind kind)
1056  {
1057      TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1058  
1059      *loc = (TCGCallArgumentLoc){
1060          .kind = kind,
1061          .arg_idx = cum->arg_idx,
1062          .arg_slot = cum->arg_slot,
1063      };
1064      cum->info_in_idx++;
1065      cum->arg_slot++;
1066  }
1067  
1068  static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1069                                  TCGHelperInfo *info, int n)
1070  {
1071      TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1072  
1073      for (int i = 0; i < n; ++i) {
1074          /* Layout all using the same arg_idx, adjusting the subindex. */
1075          loc[i] = (TCGCallArgumentLoc){
1076              .kind = TCG_CALL_ARG_NORMAL,
1077              .arg_idx = cum->arg_idx,
1078              .tmp_subindex = i,
1079              .arg_slot = cum->arg_slot + i,
1080          };
1081      }
1082      cum->info_in_idx += n;
1083      cum->arg_slot += n;
1084  }
1085  
1086  static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1087  {
1088      TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1089      int n = 128 / TCG_TARGET_REG_BITS;
1090  
1091      /* The first subindex carries the pointer. */
1092      layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1093  
1094      /*
1095       * The callee is allowed to clobber memory associated with
1096       * structure pass by-reference.  Therefore we must make copies.
1097       * Allocate space from "ref_slot", which will be adjusted to
1098       * follow the parameters on the stack.
1099       */
1100      loc[0].ref_slot = cum->ref_slot;
1101  
1102      /*
1103       * Subsequent words also go into the reference slot, but
1104       * do not accumulate into the regular arguments.
1105       */
1106      for (int i = 1; i < n; ++i) {
1107          loc[i] = (TCGCallArgumentLoc){
1108              .kind = TCG_CALL_ARG_BY_REF_N,
1109              .arg_idx = cum->arg_idx,
1110              .tmp_subindex = i,
1111              .ref_slot = cum->ref_slot + i,
1112          };
1113      }
1114      cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1115      cum->ref_slot += n;
1116  }
1117  
1118  static void init_call_layout(TCGHelperInfo *info)
1119  {
1120      int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1121      int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1122      unsigned typemask = info->typemask;
1123      unsigned typecode;
1124      TCGCumulativeArgs cum = { };
1125  
1126      /*
1127       * Parse and place any function return value.
1128       */
1129      typecode = typemask & 7;
1130      switch (typecode) {
1131      case dh_typecode_void:
1132          info->nr_out = 0;
1133          break;
1134      case dh_typecode_i32:
1135      case dh_typecode_s32:
1136      case dh_typecode_ptr:
1137          info->nr_out = 1;
1138          info->out_kind = TCG_CALL_RET_NORMAL;
1139          break;
1140      case dh_typecode_i64:
1141      case dh_typecode_s64:
1142          info->nr_out = 64 / TCG_TARGET_REG_BITS;
1143          info->out_kind = TCG_CALL_RET_NORMAL;
1144          /* Query the last register now to trigger any assert early. */
1145          tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1146          break;
1147      case dh_typecode_i128:
1148          info->nr_out = 128 / TCG_TARGET_REG_BITS;
1149          info->out_kind = TCG_TARGET_CALL_RET_I128;
1150          switch (TCG_TARGET_CALL_RET_I128) {
1151          case TCG_CALL_RET_NORMAL:
1152              /* Query the last register now to trigger any assert early. */
1153              tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1154              break;
1155          case TCG_CALL_RET_BY_VEC:
1156              /* Query the single register now to trigger any assert early. */
1157              tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1158              break;
1159          case TCG_CALL_RET_BY_REF:
1160              /*
1161               * Allocate the first argument to the output.
1162               * We don't need to store this anywhere, just make it
1163               * unavailable for use in the input loop below.
1164               */
1165              cum.arg_slot = 1;
1166              break;
1167          default:
1168              qemu_build_not_reached();
1169          }
1170          break;
1171      default:
1172          g_assert_not_reached();
1173      }
1174  
1175      /*
1176       * Parse and place function arguments.
1177       */
1178      for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1179          TCGCallArgumentKind kind;
1180          TCGType type;
1181  
1182          typecode = typemask & 7;
1183          switch (typecode) {
1184          case dh_typecode_i32:
1185          case dh_typecode_s32:
1186              type = TCG_TYPE_I32;
1187              break;
1188          case dh_typecode_i64:
1189          case dh_typecode_s64:
1190              type = TCG_TYPE_I64;
1191              break;
1192          case dh_typecode_ptr:
1193              type = TCG_TYPE_PTR;
1194              break;
1195          case dh_typecode_i128:
1196              type = TCG_TYPE_I128;
1197              break;
1198          default:
1199              g_assert_not_reached();
1200          }
1201  
1202          switch (type) {
1203          case TCG_TYPE_I32:
1204              switch (TCG_TARGET_CALL_ARG_I32) {
1205              case TCG_CALL_ARG_EVEN:
1206                  layout_arg_even(&cum);
1207                  /* fall through */
1208              case TCG_CALL_ARG_NORMAL:
1209                  layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1210                  break;
1211              case TCG_CALL_ARG_EXTEND:
1212                  kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1213                  layout_arg_1(&cum, info, kind);
1214                  break;
1215              default:
1216                  qemu_build_not_reached();
1217              }
1218              break;
1219  
1220          case TCG_TYPE_I64:
1221              switch (TCG_TARGET_CALL_ARG_I64) {
1222              case TCG_CALL_ARG_EVEN:
1223                  layout_arg_even(&cum);
1224                  /* fall through */
1225              case TCG_CALL_ARG_NORMAL:
1226                  if (TCG_TARGET_REG_BITS == 32) {
1227                      layout_arg_normal_n(&cum, info, 2);
1228                  } else {
1229                      layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1230                  }
1231                  break;
1232              default:
1233                  qemu_build_not_reached();
1234              }
1235              break;
1236  
1237          case TCG_TYPE_I128:
1238              switch (TCG_TARGET_CALL_ARG_I128) {
1239              case TCG_CALL_ARG_EVEN:
1240                  layout_arg_even(&cum);
1241                  /* fall through */
1242              case TCG_CALL_ARG_NORMAL:
1243                  layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1244                  break;
1245              case TCG_CALL_ARG_BY_REF:
1246                  layout_arg_by_ref(&cum, info);
1247                  break;
1248              default:
1249                  qemu_build_not_reached();
1250              }
1251              break;
1252  
1253          default:
1254              g_assert_not_reached();
1255          }
1256      }
1257      info->nr_in = cum.info_in_idx;
1258  
1259      /* Validate that we didn't overrun the input array. */
1260      assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1261      /* Validate the backend has enough argument space. */
1262      assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1263  
1264      /*
1265       * Relocate the "ref_slot" area to the end of the parameters.
1266       * Minimizing this stack offset helps code size for x86,
1267       * which has a signed 8-bit offset encoding.
1268       */
1269      if (cum.ref_slot != 0) {
1270          int ref_base = 0;
1271  
1272          if (cum.arg_slot > max_reg_slots) {
1273              int align = __alignof(Int128) / sizeof(tcg_target_long);
1274  
1275              ref_base = cum.arg_slot - max_reg_slots;
1276              if (align > 1) {
1277                  ref_base = ROUND_UP(ref_base, align);
1278              }
1279          }
1280          assert(ref_base + cum.ref_slot <= max_stk_slots);
1281          ref_base += max_reg_slots;
1282  
1283          if (ref_base != 0) {
1284              for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1285                  TCGCallArgumentLoc *loc = &info->in[i];
1286                  switch (loc->kind) {
1287                  case TCG_CALL_ARG_BY_REF:
1288                  case TCG_CALL_ARG_BY_REF_N:
1289                      loc->ref_slot += ref_base;
1290                      break;
1291                  default:
1292                      break;
1293                  }
1294              }
1295          }
1296      }
1297  }
1298  
1299  static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1300  static void process_op_defs(TCGContext *s);
1301  static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1302                                              TCGReg reg, const char *name);
1303  
1304  static void tcg_context_init(unsigned max_cpus)
1305  {
1306      TCGContext *s = &tcg_init_ctx;
1307      int op, total_args, n, i;
1308      TCGOpDef *def;
1309      TCGArgConstraint *args_ct;
1310      TCGTemp *ts;
1311  
1312      memset(s, 0, sizeof(*s));
1313      s->nb_globals = 0;
1314  
1315      /* Count total number of arguments and allocate the corresponding
1316         space */
1317      total_args = 0;
1318      for(op = 0; op < NB_OPS; op++) {
1319          def = &tcg_op_defs[op];
1320          n = def->nb_iargs + def->nb_oargs;
1321          total_args += n;
1322      }
1323  
1324      args_ct = g_new0(TCGArgConstraint, total_args);
1325  
1326      for(op = 0; op < NB_OPS; op++) {
1327          def = &tcg_op_defs[op];
1328          def->args_ct = args_ct;
1329          n = def->nb_iargs + def->nb_oargs;
1330          args_ct += n;
1331      }
1332  
1333      init_call_layout(&info_helper_ld32_mmu);
1334      init_call_layout(&info_helper_ld64_mmu);
1335      init_call_layout(&info_helper_ld128_mmu);
1336      init_call_layout(&info_helper_st32_mmu);
1337      init_call_layout(&info_helper_st64_mmu);
1338      init_call_layout(&info_helper_st128_mmu);
1339  
1340      tcg_target_init(s);
1341      process_op_defs(s);
1342  
1343      /* Reverse the order of the saved registers, assuming they're all at
1344         the start of tcg_target_reg_alloc_order.  */
1345      for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1346          int r = tcg_target_reg_alloc_order[n];
1347          if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1348              break;
1349          }
1350      }
1351      for (i = 0; i < n; ++i) {
1352          indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1353      }
1354      for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1355          indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1356      }
1357  
1358      alloc_tcg_plugin_context(s);
1359  
1360      tcg_ctx = s;
1361      /*
1362       * In user-mode we simply share the init context among threads, since we
1363       * use a single region. See the documentation tcg_region_init() for the
1364       * reasoning behind this.
1365       * In system-mode we will have at most max_cpus TCG threads.
1366       */
1367  #ifdef CONFIG_USER_ONLY
1368      tcg_ctxs = &tcg_ctx;
1369      tcg_cur_ctxs = 1;
1370      tcg_max_ctxs = 1;
1371  #else
1372      tcg_max_ctxs = max_cpus;
1373      tcg_ctxs = g_new0(TCGContext *, max_cpus);
1374  #endif
1375  
1376      tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1377      ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1378      tcg_env = temp_tcgv_ptr(ts);
1379  }
1380  
1381  void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1382  {
1383      tcg_context_init(max_cpus);
1384      tcg_region_init(tb_size, splitwx, max_cpus);
1385  }
1386  
1387  /*
1388   * Allocate TBs right before their corresponding translated code, making
1389   * sure that TBs and code are on different cache lines.
1390   */
1391  TranslationBlock *tcg_tb_alloc(TCGContext *s)
1392  {
1393      uintptr_t align = qemu_icache_linesize;
1394      TranslationBlock *tb;
1395      void *next;
1396  
1397   retry:
1398      tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1399      next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1400  
1401      if (unlikely(next > s->code_gen_highwater)) {
1402          if (tcg_region_alloc(s)) {
1403              return NULL;
1404          }
1405          goto retry;
1406      }
1407      qatomic_set(&s->code_gen_ptr, next);
1408      s->data_gen_ptr = NULL;
1409      return tb;
1410  }
1411  
1412  void tcg_prologue_init(void)
1413  {
1414      TCGContext *s = tcg_ctx;
1415      size_t prologue_size;
1416  
1417      s->code_ptr = s->code_gen_ptr;
1418      s->code_buf = s->code_gen_ptr;
1419      s->data_gen_ptr = NULL;
1420  
1421  #ifndef CONFIG_TCG_INTERPRETER
1422      tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1423  #endif
1424  
1425  #ifdef TCG_TARGET_NEED_POOL_LABELS
1426      s->pool_labels = NULL;
1427  #endif
1428  
1429      qemu_thread_jit_write();
1430      /* Generate the prologue.  */
1431      tcg_target_qemu_prologue(s);
1432  
1433  #ifdef TCG_TARGET_NEED_POOL_LABELS
1434      /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1435      {
1436          int result = tcg_out_pool_finalize(s);
1437          tcg_debug_assert(result == 0);
1438      }
1439  #endif
1440  
1441      prologue_size = tcg_current_code_size(s);
1442      perf_report_prologue(s->code_gen_ptr, prologue_size);
1443  
1444  #ifndef CONFIG_TCG_INTERPRETER
1445      flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1446                          (uintptr_t)s->code_buf, prologue_size);
1447  #endif
1448  
1449      if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1450          FILE *logfile = qemu_log_trylock();
1451          if (logfile) {
1452              fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1453              if (s->data_gen_ptr) {
1454                  size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1455                  size_t data_size = prologue_size - code_size;
1456                  size_t i;
1457  
1458                  disas(logfile, s->code_gen_ptr, code_size);
1459  
1460                  for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1461                      if (sizeof(tcg_target_ulong) == 8) {
1462                          fprintf(logfile,
1463                                  "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1464                                  (uintptr_t)s->data_gen_ptr + i,
1465                                  *(uint64_t *)(s->data_gen_ptr + i));
1466                      } else {
1467                          fprintf(logfile,
1468                                  "0x%08" PRIxPTR ":  .long  0x%08x\n",
1469                                  (uintptr_t)s->data_gen_ptr + i,
1470                                  *(uint32_t *)(s->data_gen_ptr + i));
1471                      }
1472                  }
1473              } else {
1474                  disas(logfile, s->code_gen_ptr, prologue_size);
1475              }
1476              fprintf(logfile, "\n");
1477              qemu_log_unlock(logfile);
1478          }
1479      }
1480  
1481  #ifndef CONFIG_TCG_INTERPRETER
1482      /*
1483       * Assert that goto_ptr is implemented completely, setting an epilogue.
1484       * For tci, we use NULL as the signal to return from the interpreter,
1485       * so skip this check.
1486       */
1487      tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1488  #endif
1489  
1490      tcg_region_prologue_set(s);
1491  }
1492  
1493  void tcg_func_start(TCGContext *s)
1494  {
1495      tcg_pool_reset(s);
1496      s->nb_temps = s->nb_globals;
1497  
1498      /* No temps have been previously allocated for size or locality.  */
1499      memset(s->free_temps, 0, sizeof(s->free_temps));
1500  
1501      /* No constant temps have been previously allocated. */
1502      for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1503          if (s->const_table[i]) {
1504              g_hash_table_remove_all(s->const_table[i]);
1505          }
1506      }
1507  
1508      s->nb_ops = 0;
1509      s->nb_labels = 0;
1510      s->current_frame_offset = s->frame_start;
1511  
1512  #ifdef CONFIG_DEBUG_TCG
1513      s->goto_tb_issue_mask = 0;
1514  #endif
1515  
1516      QTAILQ_INIT(&s->ops);
1517      QTAILQ_INIT(&s->free_ops);
1518      QSIMPLEQ_INIT(&s->labels);
1519  
1520      tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1521                       s->addr_type == TCG_TYPE_I64);
1522  
1523      tcg_debug_assert(s->insn_start_words > 0);
1524  }
1525  
1526  static TCGTemp *tcg_temp_alloc(TCGContext *s)
1527  {
1528      int n = s->nb_temps++;
1529  
1530      if (n >= TCG_MAX_TEMPS) {
1531          tcg_raise_tb_overflow(s);
1532      }
1533      return memset(&s->temps[n], 0, sizeof(TCGTemp));
1534  }
1535  
1536  static TCGTemp *tcg_global_alloc(TCGContext *s)
1537  {
1538      TCGTemp *ts;
1539  
1540      tcg_debug_assert(s->nb_globals == s->nb_temps);
1541      tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1542      s->nb_globals++;
1543      ts = tcg_temp_alloc(s);
1544      ts->kind = TEMP_GLOBAL;
1545  
1546      return ts;
1547  }
1548  
1549  static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1550                                              TCGReg reg, const char *name)
1551  {
1552      TCGTemp *ts;
1553  
1554      tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1555  
1556      ts = tcg_global_alloc(s);
1557      ts->base_type = type;
1558      ts->type = type;
1559      ts->kind = TEMP_FIXED;
1560      ts->reg = reg;
1561      ts->name = name;
1562      tcg_regset_set_reg(s->reserved_regs, reg);
1563  
1564      return ts;
1565  }
1566  
1567  void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1568  {
1569      s->frame_start = start;
1570      s->frame_end = start + size;
1571      s->frame_temp
1572          = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1573  }
1574  
1575  static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1576                                              const char *name, TCGType type)
1577  {
1578      TCGContext *s = tcg_ctx;
1579      TCGTemp *base_ts = tcgv_ptr_temp(base);
1580      TCGTemp *ts = tcg_global_alloc(s);
1581      int indirect_reg = 0;
1582  
1583      switch (base_ts->kind) {
1584      case TEMP_FIXED:
1585          break;
1586      case TEMP_GLOBAL:
1587          /* We do not support double-indirect registers.  */
1588          tcg_debug_assert(!base_ts->indirect_reg);
1589          base_ts->indirect_base = 1;
1590          s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1591                              ? 2 : 1);
1592          indirect_reg = 1;
1593          break;
1594      default:
1595          g_assert_not_reached();
1596      }
1597  
1598      if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1599          TCGTemp *ts2 = tcg_global_alloc(s);
1600          char buf[64];
1601  
1602          ts->base_type = TCG_TYPE_I64;
1603          ts->type = TCG_TYPE_I32;
1604          ts->indirect_reg = indirect_reg;
1605          ts->mem_allocated = 1;
1606          ts->mem_base = base_ts;
1607          ts->mem_offset = offset;
1608          pstrcpy(buf, sizeof(buf), name);
1609          pstrcat(buf, sizeof(buf), "_0");
1610          ts->name = strdup(buf);
1611  
1612          tcg_debug_assert(ts2 == ts + 1);
1613          ts2->base_type = TCG_TYPE_I64;
1614          ts2->type = TCG_TYPE_I32;
1615          ts2->indirect_reg = indirect_reg;
1616          ts2->mem_allocated = 1;
1617          ts2->mem_base = base_ts;
1618          ts2->mem_offset = offset + 4;
1619          ts2->temp_subindex = 1;
1620          pstrcpy(buf, sizeof(buf), name);
1621          pstrcat(buf, sizeof(buf), "_1");
1622          ts2->name = strdup(buf);
1623      } else {
1624          ts->base_type = type;
1625          ts->type = type;
1626          ts->indirect_reg = indirect_reg;
1627          ts->mem_allocated = 1;
1628          ts->mem_base = base_ts;
1629          ts->mem_offset = offset;
1630          ts->name = name;
1631      }
1632      return ts;
1633  }
1634  
1635  TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1636  {
1637      TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1638      return temp_tcgv_i32(ts);
1639  }
1640  
1641  TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1642  {
1643      TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1644      return temp_tcgv_i64(ts);
1645  }
1646  
1647  TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1648  {
1649      TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1650      return temp_tcgv_ptr(ts);
1651  }
1652  
1653  static TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1654  {
1655      TCGContext *s = tcg_ctx;
1656      TCGTemp *ts;
1657      int n;
1658  
1659      if (kind == TEMP_EBB) {
1660          int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1661  
1662          if (idx < TCG_MAX_TEMPS) {
1663              /* There is already an available temp with the right type.  */
1664              clear_bit(idx, s->free_temps[type].l);
1665  
1666              ts = &s->temps[idx];
1667              ts->temp_allocated = 1;
1668              tcg_debug_assert(ts->base_type == type);
1669              tcg_debug_assert(ts->kind == kind);
1670              return ts;
1671          }
1672      } else {
1673          tcg_debug_assert(kind == TEMP_TB);
1674      }
1675  
1676      switch (type) {
1677      case TCG_TYPE_I32:
1678      case TCG_TYPE_V64:
1679      case TCG_TYPE_V128:
1680      case TCG_TYPE_V256:
1681          n = 1;
1682          break;
1683      case TCG_TYPE_I64:
1684          n = 64 / TCG_TARGET_REG_BITS;
1685          break;
1686      case TCG_TYPE_I128:
1687          n = 128 / TCG_TARGET_REG_BITS;
1688          break;
1689      default:
1690          g_assert_not_reached();
1691      }
1692  
1693      ts = tcg_temp_alloc(s);
1694      ts->base_type = type;
1695      ts->temp_allocated = 1;
1696      ts->kind = kind;
1697  
1698      if (n == 1) {
1699          ts->type = type;
1700      } else {
1701          ts->type = TCG_TYPE_REG;
1702  
1703          for (int i = 1; i < n; ++i) {
1704              TCGTemp *ts2 = tcg_temp_alloc(s);
1705  
1706              tcg_debug_assert(ts2 == ts + i);
1707              ts2->base_type = type;
1708              ts2->type = TCG_TYPE_REG;
1709              ts2->temp_allocated = 1;
1710              ts2->temp_subindex = i;
1711              ts2->kind = kind;
1712          }
1713      }
1714      return ts;
1715  }
1716  
1717  TCGv_i32 tcg_temp_new_i32(void)
1718  {
1719      return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1720  }
1721  
1722  TCGv_i32 tcg_temp_ebb_new_i32(void)
1723  {
1724      return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1725  }
1726  
1727  TCGv_i64 tcg_temp_new_i64(void)
1728  {
1729      return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1730  }
1731  
1732  TCGv_i64 tcg_temp_ebb_new_i64(void)
1733  {
1734      return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1735  }
1736  
1737  TCGv_ptr tcg_temp_new_ptr(void)
1738  {
1739      return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1740  }
1741  
1742  TCGv_ptr tcg_temp_ebb_new_ptr(void)
1743  {
1744      return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1745  }
1746  
1747  TCGv_i128 tcg_temp_new_i128(void)
1748  {
1749      return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1750  }
1751  
1752  TCGv_i128 tcg_temp_ebb_new_i128(void)
1753  {
1754      return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1755  }
1756  
1757  TCGv_vec tcg_temp_new_vec(TCGType type)
1758  {
1759      TCGTemp *t;
1760  
1761  #ifdef CONFIG_DEBUG_TCG
1762      switch (type) {
1763      case TCG_TYPE_V64:
1764          assert(TCG_TARGET_HAS_v64);
1765          break;
1766      case TCG_TYPE_V128:
1767          assert(TCG_TARGET_HAS_v128);
1768          break;
1769      case TCG_TYPE_V256:
1770          assert(TCG_TARGET_HAS_v256);
1771          break;
1772      default:
1773          g_assert_not_reached();
1774      }
1775  #endif
1776  
1777      t = tcg_temp_new_internal(type, TEMP_EBB);
1778      return temp_tcgv_vec(t);
1779  }
1780  
1781  /* Create a new temp of the same type as an existing temp.  */
1782  TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1783  {
1784      TCGTemp *t = tcgv_vec_temp(match);
1785  
1786      tcg_debug_assert(t->temp_allocated != 0);
1787  
1788      t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1789      return temp_tcgv_vec(t);
1790  }
1791  
1792  void tcg_temp_free_internal(TCGTemp *ts)
1793  {
1794      TCGContext *s = tcg_ctx;
1795  
1796      switch (ts->kind) {
1797      case TEMP_CONST:
1798      case TEMP_TB:
1799          /* Silently ignore free. */
1800          break;
1801      case TEMP_EBB:
1802          tcg_debug_assert(ts->temp_allocated != 0);
1803          ts->temp_allocated = 0;
1804          set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1805          break;
1806      default:
1807          /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1808          g_assert_not_reached();
1809      }
1810  }
1811  
1812  void tcg_temp_free_i32(TCGv_i32 arg)
1813  {
1814      tcg_temp_free_internal(tcgv_i32_temp(arg));
1815  }
1816  
1817  void tcg_temp_free_i64(TCGv_i64 arg)
1818  {
1819      tcg_temp_free_internal(tcgv_i64_temp(arg));
1820  }
1821  
1822  void tcg_temp_free_i128(TCGv_i128 arg)
1823  {
1824      tcg_temp_free_internal(tcgv_i128_temp(arg));
1825  }
1826  
1827  void tcg_temp_free_ptr(TCGv_ptr arg)
1828  {
1829      tcg_temp_free_internal(tcgv_ptr_temp(arg));
1830  }
1831  
1832  void tcg_temp_free_vec(TCGv_vec arg)
1833  {
1834      tcg_temp_free_internal(tcgv_vec_temp(arg));
1835  }
1836  
1837  TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1838  {
1839      TCGContext *s = tcg_ctx;
1840      GHashTable *h = s->const_table[type];
1841      TCGTemp *ts;
1842  
1843      if (h == NULL) {
1844          h = g_hash_table_new(g_int64_hash, g_int64_equal);
1845          s->const_table[type] = h;
1846      }
1847  
1848      ts = g_hash_table_lookup(h, &val);
1849      if (ts == NULL) {
1850          int64_t *val_ptr;
1851  
1852          ts = tcg_temp_alloc(s);
1853  
1854          if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1855              TCGTemp *ts2 = tcg_temp_alloc(s);
1856  
1857              tcg_debug_assert(ts2 == ts + 1);
1858  
1859              ts->base_type = TCG_TYPE_I64;
1860              ts->type = TCG_TYPE_I32;
1861              ts->kind = TEMP_CONST;
1862              ts->temp_allocated = 1;
1863  
1864              ts2->base_type = TCG_TYPE_I64;
1865              ts2->type = TCG_TYPE_I32;
1866              ts2->kind = TEMP_CONST;
1867              ts2->temp_allocated = 1;
1868              ts2->temp_subindex = 1;
1869  
1870              /*
1871               * Retain the full value of the 64-bit constant in the low
1872               * part, so that the hash table works.  Actual uses will
1873               * truncate the value to the low part.
1874               */
1875              ts[HOST_BIG_ENDIAN].val = val;
1876              ts[!HOST_BIG_ENDIAN].val = val >> 32;
1877              val_ptr = &ts[HOST_BIG_ENDIAN].val;
1878          } else {
1879              ts->base_type = type;
1880              ts->type = type;
1881              ts->kind = TEMP_CONST;
1882              ts->temp_allocated = 1;
1883              ts->val = val;
1884              val_ptr = &ts->val;
1885          }
1886          g_hash_table_insert(h, val_ptr, ts);
1887      }
1888  
1889      return ts;
1890  }
1891  
1892  TCGv_i32 tcg_constant_i32(int32_t val)
1893  {
1894      return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
1895  }
1896  
1897  TCGv_i64 tcg_constant_i64(int64_t val)
1898  {
1899      return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
1900  }
1901  
1902  TCGv_ptr tcg_constant_ptr_int(intptr_t val)
1903  {
1904      return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
1905  }
1906  
1907  TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1908  {
1909      val = dup_const(vece, val);
1910      return temp_tcgv_vec(tcg_constant_internal(type, val));
1911  }
1912  
1913  TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1914  {
1915      TCGTemp *t = tcgv_vec_temp(match);
1916  
1917      tcg_debug_assert(t->temp_allocated != 0);
1918      return tcg_constant_vec(t->base_type, vece, val);
1919  }
1920  
1921  #ifdef CONFIG_DEBUG_TCG
1922  size_t temp_idx(TCGTemp *ts)
1923  {
1924      ptrdiff_t n = ts - tcg_ctx->temps;
1925      assert(n >= 0 && n < tcg_ctx->nb_temps);
1926      return n;
1927  }
1928  
1929  TCGTemp *tcgv_i32_temp(TCGv_i32 v)
1930  {
1931      uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
1932  
1933      assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
1934      assert(o % sizeof(TCGTemp) == 0);
1935  
1936      return (void *)tcg_ctx + (uintptr_t)v;
1937  }
1938  #endif /* CONFIG_DEBUG_TCG */
1939  
1940  /* Return true if OP may appear in the opcode stream.
1941     Test the runtime variable that controls each opcode.  */
1942  bool tcg_op_supported(TCGOpcode op)
1943  {
1944      const bool have_vec
1945          = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1946  
1947      switch (op) {
1948      case INDEX_op_discard:
1949      case INDEX_op_set_label:
1950      case INDEX_op_call:
1951      case INDEX_op_br:
1952      case INDEX_op_mb:
1953      case INDEX_op_insn_start:
1954      case INDEX_op_exit_tb:
1955      case INDEX_op_goto_tb:
1956      case INDEX_op_goto_ptr:
1957      case INDEX_op_qemu_ld_a32_i32:
1958      case INDEX_op_qemu_ld_a64_i32:
1959      case INDEX_op_qemu_st_a32_i32:
1960      case INDEX_op_qemu_st_a64_i32:
1961      case INDEX_op_qemu_ld_a32_i64:
1962      case INDEX_op_qemu_ld_a64_i64:
1963      case INDEX_op_qemu_st_a32_i64:
1964      case INDEX_op_qemu_st_a64_i64:
1965          return true;
1966  
1967      case INDEX_op_qemu_st8_a32_i32:
1968      case INDEX_op_qemu_st8_a64_i32:
1969          return TCG_TARGET_HAS_qemu_st8_i32;
1970  
1971      case INDEX_op_qemu_ld_a32_i128:
1972      case INDEX_op_qemu_ld_a64_i128:
1973      case INDEX_op_qemu_st_a32_i128:
1974      case INDEX_op_qemu_st_a64_i128:
1975          return TCG_TARGET_HAS_qemu_ldst_i128;
1976  
1977      case INDEX_op_mov_i32:
1978      case INDEX_op_setcond_i32:
1979      case INDEX_op_brcond_i32:
1980      case INDEX_op_movcond_i32:
1981      case INDEX_op_ld8u_i32:
1982      case INDEX_op_ld8s_i32:
1983      case INDEX_op_ld16u_i32:
1984      case INDEX_op_ld16s_i32:
1985      case INDEX_op_ld_i32:
1986      case INDEX_op_st8_i32:
1987      case INDEX_op_st16_i32:
1988      case INDEX_op_st_i32:
1989      case INDEX_op_add_i32:
1990      case INDEX_op_sub_i32:
1991      case INDEX_op_neg_i32:
1992      case INDEX_op_mul_i32:
1993      case INDEX_op_and_i32:
1994      case INDEX_op_or_i32:
1995      case INDEX_op_xor_i32:
1996      case INDEX_op_shl_i32:
1997      case INDEX_op_shr_i32:
1998      case INDEX_op_sar_i32:
1999          return true;
2000  
2001      case INDEX_op_negsetcond_i32:
2002          return TCG_TARGET_HAS_negsetcond_i32;
2003      case INDEX_op_div_i32:
2004      case INDEX_op_divu_i32:
2005          return TCG_TARGET_HAS_div_i32;
2006      case INDEX_op_rem_i32:
2007      case INDEX_op_remu_i32:
2008          return TCG_TARGET_HAS_rem_i32;
2009      case INDEX_op_div2_i32:
2010      case INDEX_op_divu2_i32:
2011          return TCG_TARGET_HAS_div2_i32;
2012      case INDEX_op_rotl_i32:
2013      case INDEX_op_rotr_i32:
2014          return TCG_TARGET_HAS_rot_i32;
2015      case INDEX_op_deposit_i32:
2016          return TCG_TARGET_HAS_deposit_i32;
2017      case INDEX_op_extract_i32:
2018          return TCG_TARGET_HAS_extract_i32;
2019      case INDEX_op_sextract_i32:
2020          return TCG_TARGET_HAS_sextract_i32;
2021      case INDEX_op_extract2_i32:
2022          return TCG_TARGET_HAS_extract2_i32;
2023      case INDEX_op_add2_i32:
2024          return TCG_TARGET_HAS_add2_i32;
2025      case INDEX_op_sub2_i32:
2026          return TCG_TARGET_HAS_sub2_i32;
2027      case INDEX_op_mulu2_i32:
2028          return TCG_TARGET_HAS_mulu2_i32;
2029      case INDEX_op_muls2_i32:
2030          return TCG_TARGET_HAS_muls2_i32;
2031      case INDEX_op_muluh_i32:
2032          return TCG_TARGET_HAS_muluh_i32;
2033      case INDEX_op_mulsh_i32:
2034          return TCG_TARGET_HAS_mulsh_i32;
2035      case INDEX_op_ext8s_i32:
2036          return TCG_TARGET_HAS_ext8s_i32;
2037      case INDEX_op_ext16s_i32:
2038          return TCG_TARGET_HAS_ext16s_i32;
2039      case INDEX_op_ext8u_i32:
2040          return TCG_TARGET_HAS_ext8u_i32;
2041      case INDEX_op_ext16u_i32:
2042          return TCG_TARGET_HAS_ext16u_i32;
2043      case INDEX_op_bswap16_i32:
2044          return TCG_TARGET_HAS_bswap16_i32;
2045      case INDEX_op_bswap32_i32:
2046          return TCG_TARGET_HAS_bswap32_i32;
2047      case INDEX_op_not_i32:
2048          return TCG_TARGET_HAS_not_i32;
2049      case INDEX_op_andc_i32:
2050          return TCG_TARGET_HAS_andc_i32;
2051      case INDEX_op_orc_i32:
2052          return TCG_TARGET_HAS_orc_i32;
2053      case INDEX_op_eqv_i32:
2054          return TCG_TARGET_HAS_eqv_i32;
2055      case INDEX_op_nand_i32:
2056          return TCG_TARGET_HAS_nand_i32;
2057      case INDEX_op_nor_i32:
2058          return TCG_TARGET_HAS_nor_i32;
2059      case INDEX_op_clz_i32:
2060          return TCG_TARGET_HAS_clz_i32;
2061      case INDEX_op_ctz_i32:
2062          return TCG_TARGET_HAS_ctz_i32;
2063      case INDEX_op_ctpop_i32:
2064          return TCG_TARGET_HAS_ctpop_i32;
2065  
2066      case INDEX_op_brcond2_i32:
2067      case INDEX_op_setcond2_i32:
2068          return TCG_TARGET_REG_BITS == 32;
2069  
2070      case INDEX_op_mov_i64:
2071      case INDEX_op_setcond_i64:
2072      case INDEX_op_brcond_i64:
2073      case INDEX_op_movcond_i64:
2074      case INDEX_op_ld8u_i64:
2075      case INDEX_op_ld8s_i64:
2076      case INDEX_op_ld16u_i64:
2077      case INDEX_op_ld16s_i64:
2078      case INDEX_op_ld32u_i64:
2079      case INDEX_op_ld32s_i64:
2080      case INDEX_op_ld_i64:
2081      case INDEX_op_st8_i64:
2082      case INDEX_op_st16_i64:
2083      case INDEX_op_st32_i64:
2084      case INDEX_op_st_i64:
2085      case INDEX_op_add_i64:
2086      case INDEX_op_sub_i64:
2087      case INDEX_op_neg_i64:
2088      case INDEX_op_mul_i64:
2089      case INDEX_op_and_i64:
2090      case INDEX_op_or_i64:
2091      case INDEX_op_xor_i64:
2092      case INDEX_op_shl_i64:
2093      case INDEX_op_shr_i64:
2094      case INDEX_op_sar_i64:
2095      case INDEX_op_ext_i32_i64:
2096      case INDEX_op_extu_i32_i64:
2097          return TCG_TARGET_REG_BITS == 64;
2098  
2099      case INDEX_op_negsetcond_i64:
2100          return TCG_TARGET_HAS_negsetcond_i64;
2101      case INDEX_op_div_i64:
2102      case INDEX_op_divu_i64:
2103          return TCG_TARGET_HAS_div_i64;
2104      case INDEX_op_rem_i64:
2105      case INDEX_op_remu_i64:
2106          return TCG_TARGET_HAS_rem_i64;
2107      case INDEX_op_div2_i64:
2108      case INDEX_op_divu2_i64:
2109          return TCG_TARGET_HAS_div2_i64;
2110      case INDEX_op_rotl_i64:
2111      case INDEX_op_rotr_i64:
2112          return TCG_TARGET_HAS_rot_i64;
2113      case INDEX_op_deposit_i64:
2114          return TCG_TARGET_HAS_deposit_i64;
2115      case INDEX_op_extract_i64:
2116          return TCG_TARGET_HAS_extract_i64;
2117      case INDEX_op_sextract_i64:
2118          return TCG_TARGET_HAS_sextract_i64;
2119      case INDEX_op_extract2_i64:
2120          return TCG_TARGET_HAS_extract2_i64;
2121      case INDEX_op_extrl_i64_i32:
2122      case INDEX_op_extrh_i64_i32:
2123          return TCG_TARGET_HAS_extr_i64_i32;
2124      case INDEX_op_ext8s_i64:
2125          return TCG_TARGET_HAS_ext8s_i64;
2126      case INDEX_op_ext16s_i64:
2127          return TCG_TARGET_HAS_ext16s_i64;
2128      case INDEX_op_ext32s_i64:
2129          return TCG_TARGET_HAS_ext32s_i64;
2130      case INDEX_op_ext8u_i64:
2131          return TCG_TARGET_HAS_ext8u_i64;
2132      case INDEX_op_ext16u_i64:
2133          return TCG_TARGET_HAS_ext16u_i64;
2134      case INDEX_op_ext32u_i64:
2135          return TCG_TARGET_HAS_ext32u_i64;
2136      case INDEX_op_bswap16_i64:
2137          return TCG_TARGET_HAS_bswap16_i64;
2138      case INDEX_op_bswap32_i64:
2139          return TCG_TARGET_HAS_bswap32_i64;
2140      case INDEX_op_bswap64_i64:
2141          return TCG_TARGET_HAS_bswap64_i64;
2142      case INDEX_op_not_i64:
2143          return TCG_TARGET_HAS_not_i64;
2144      case INDEX_op_andc_i64:
2145          return TCG_TARGET_HAS_andc_i64;
2146      case INDEX_op_orc_i64:
2147          return TCG_TARGET_HAS_orc_i64;
2148      case INDEX_op_eqv_i64:
2149          return TCG_TARGET_HAS_eqv_i64;
2150      case INDEX_op_nand_i64:
2151          return TCG_TARGET_HAS_nand_i64;
2152      case INDEX_op_nor_i64:
2153          return TCG_TARGET_HAS_nor_i64;
2154      case INDEX_op_clz_i64:
2155          return TCG_TARGET_HAS_clz_i64;
2156      case INDEX_op_ctz_i64:
2157          return TCG_TARGET_HAS_ctz_i64;
2158      case INDEX_op_ctpop_i64:
2159          return TCG_TARGET_HAS_ctpop_i64;
2160      case INDEX_op_add2_i64:
2161          return TCG_TARGET_HAS_add2_i64;
2162      case INDEX_op_sub2_i64:
2163          return TCG_TARGET_HAS_sub2_i64;
2164      case INDEX_op_mulu2_i64:
2165          return TCG_TARGET_HAS_mulu2_i64;
2166      case INDEX_op_muls2_i64:
2167          return TCG_TARGET_HAS_muls2_i64;
2168      case INDEX_op_muluh_i64:
2169          return TCG_TARGET_HAS_muluh_i64;
2170      case INDEX_op_mulsh_i64:
2171          return TCG_TARGET_HAS_mulsh_i64;
2172  
2173      case INDEX_op_mov_vec:
2174      case INDEX_op_dup_vec:
2175      case INDEX_op_dupm_vec:
2176      case INDEX_op_ld_vec:
2177      case INDEX_op_st_vec:
2178      case INDEX_op_add_vec:
2179      case INDEX_op_sub_vec:
2180      case INDEX_op_and_vec:
2181      case INDEX_op_or_vec:
2182      case INDEX_op_xor_vec:
2183      case INDEX_op_cmp_vec:
2184          return have_vec;
2185      case INDEX_op_dup2_vec:
2186          return have_vec && TCG_TARGET_REG_BITS == 32;
2187      case INDEX_op_not_vec:
2188          return have_vec && TCG_TARGET_HAS_not_vec;
2189      case INDEX_op_neg_vec:
2190          return have_vec && TCG_TARGET_HAS_neg_vec;
2191      case INDEX_op_abs_vec:
2192          return have_vec && TCG_TARGET_HAS_abs_vec;
2193      case INDEX_op_andc_vec:
2194          return have_vec && TCG_TARGET_HAS_andc_vec;
2195      case INDEX_op_orc_vec:
2196          return have_vec && TCG_TARGET_HAS_orc_vec;
2197      case INDEX_op_nand_vec:
2198          return have_vec && TCG_TARGET_HAS_nand_vec;
2199      case INDEX_op_nor_vec:
2200          return have_vec && TCG_TARGET_HAS_nor_vec;
2201      case INDEX_op_eqv_vec:
2202          return have_vec && TCG_TARGET_HAS_eqv_vec;
2203      case INDEX_op_mul_vec:
2204          return have_vec && TCG_TARGET_HAS_mul_vec;
2205      case INDEX_op_shli_vec:
2206      case INDEX_op_shri_vec:
2207      case INDEX_op_sari_vec:
2208          return have_vec && TCG_TARGET_HAS_shi_vec;
2209      case INDEX_op_shls_vec:
2210      case INDEX_op_shrs_vec:
2211      case INDEX_op_sars_vec:
2212          return have_vec && TCG_TARGET_HAS_shs_vec;
2213      case INDEX_op_shlv_vec:
2214      case INDEX_op_shrv_vec:
2215      case INDEX_op_sarv_vec:
2216          return have_vec && TCG_TARGET_HAS_shv_vec;
2217      case INDEX_op_rotli_vec:
2218          return have_vec && TCG_TARGET_HAS_roti_vec;
2219      case INDEX_op_rotls_vec:
2220          return have_vec && TCG_TARGET_HAS_rots_vec;
2221      case INDEX_op_rotlv_vec:
2222      case INDEX_op_rotrv_vec:
2223          return have_vec && TCG_TARGET_HAS_rotv_vec;
2224      case INDEX_op_ssadd_vec:
2225      case INDEX_op_usadd_vec:
2226      case INDEX_op_sssub_vec:
2227      case INDEX_op_ussub_vec:
2228          return have_vec && TCG_TARGET_HAS_sat_vec;
2229      case INDEX_op_smin_vec:
2230      case INDEX_op_umin_vec:
2231      case INDEX_op_smax_vec:
2232      case INDEX_op_umax_vec:
2233          return have_vec && TCG_TARGET_HAS_minmax_vec;
2234      case INDEX_op_bitsel_vec:
2235          return have_vec && TCG_TARGET_HAS_bitsel_vec;
2236      case INDEX_op_cmpsel_vec:
2237          return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2238  
2239      default:
2240          tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2241          return true;
2242      }
2243  }
2244  
2245  static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2246  
2247  static void tcg_gen_callN(TCGHelperInfo *info, TCGTemp *ret, TCGTemp **args)
2248  {
2249      TCGv_i64 extend_free[MAX_CALL_IARGS];
2250      int n_extend = 0;
2251      TCGOp *op;
2252      int i, n, pi = 0, total_args;
2253  
2254      if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2255          init_call_layout(info);
2256          g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2257      }
2258  
2259      total_args = info->nr_out + info->nr_in + 2;
2260      op = tcg_op_alloc(INDEX_op_call, total_args);
2261  
2262  #ifdef CONFIG_PLUGIN
2263      /* Flag helpers that may affect guest state */
2264      if (tcg_ctx->plugin_insn &&
2265          !(info->flags & TCG_CALL_PLUGIN) &&
2266          !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2267          tcg_ctx->plugin_insn->calls_helpers = true;
2268      }
2269  #endif
2270  
2271      TCGOP_CALLO(op) = n = info->nr_out;
2272      switch (n) {
2273      case 0:
2274          tcg_debug_assert(ret == NULL);
2275          break;
2276      case 1:
2277          tcg_debug_assert(ret != NULL);
2278          op->args[pi++] = temp_arg(ret);
2279          break;
2280      case 2:
2281      case 4:
2282          tcg_debug_assert(ret != NULL);
2283          tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2284          tcg_debug_assert(ret->temp_subindex == 0);
2285          for (i = 0; i < n; ++i) {
2286              op->args[pi++] = temp_arg(ret + i);
2287          }
2288          break;
2289      default:
2290          g_assert_not_reached();
2291      }
2292  
2293      TCGOP_CALLI(op) = n = info->nr_in;
2294      for (i = 0; i < n; i++) {
2295          const TCGCallArgumentLoc *loc = &info->in[i];
2296          TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2297  
2298          switch (loc->kind) {
2299          case TCG_CALL_ARG_NORMAL:
2300          case TCG_CALL_ARG_BY_REF:
2301          case TCG_CALL_ARG_BY_REF_N:
2302              op->args[pi++] = temp_arg(ts);
2303              break;
2304  
2305          case TCG_CALL_ARG_EXTEND_U:
2306          case TCG_CALL_ARG_EXTEND_S:
2307              {
2308                  TCGv_i64 temp = tcg_temp_ebb_new_i64();
2309                  TCGv_i32 orig = temp_tcgv_i32(ts);
2310  
2311                  if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2312                      tcg_gen_ext_i32_i64(temp, orig);
2313                  } else {
2314                      tcg_gen_extu_i32_i64(temp, orig);
2315                  }
2316                  op->args[pi++] = tcgv_i64_arg(temp);
2317                  extend_free[n_extend++] = temp;
2318              }
2319              break;
2320  
2321          default:
2322              g_assert_not_reached();
2323          }
2324      }
2325      op->args[pi++] = (uintptr_t)info->func;
2326      op->args[pi++] = (uintptr_t)info;
2327      tcg_debug_assert(pi == total_args);
2328  
2329      QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2330  
2331      tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2332      for (i = 0; i < n_extend; ++i) {
2333          tcg_temp_free_i64(extend_free[i]);
2334      }
2335  }
2336  
2337  void tcg_gen_call0(TCGHelperInfo *info, TCGTemp *ret)
2338  {
2339      tcg_gen_callN(info, ret, NULL);
2340  }
2341  
2342  void tcg_gen_call1(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2343  {
2344      tcg_gen_callN(info, ret, &t1);
2345  }
2346  
2347  void tcg_gen_call2(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2)
2348  {
2349      TCGTemp *args[2] = { t1, t2 };
2350      tcg_gen_callN(info, ret, args);
2351  }
2352  
2353  void tcg_gen_call3(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2354                     TCGTemp *t2, TCGTemp *t3)
2355  {
2356      TCGTemp *args[3] = { t1, t2, t3 };
2357      tcg_gen_callN(info, ret, args);
2358  }
2359  
2360  void tcg_gen_call4(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2361                     TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2362  {
2363      TCGTemp *args[4] = { t1, t2, t3, t4 };
2364      tcg_gen_callN(info, ret, args);
2365  }
2366  
2367  void tcg_gen_call5(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2368                     TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2369  {
2370      TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2371      tcg_gen_callN(info, ret, args);
2372  }
2373  
2374  void tcg_gen_call6(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2,
2375                     TCGTemp *t3, TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2376  {
2377      TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2378      tcg_gen_callN(info, ret, args);
2379  }
2380  
2381  void tcg_gen_call7(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2382                     TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2383                     TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2384  {
2385      TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2386      tcg_gen_callN(info, ret, args);
2387  }
2388  
2389  static void tcg_reg_alloc_start(TCGContext *s)
2390  {
2391      int i, n;
2392  
2393      for (i = 0, n = s->nb_temps; i < n; i++) {
2394          TCGTemp *ts = &s->temps[i];
2395          TCGTempVal val = TEMP_VAL_MEM;
2396  
2397          switch (ts->kind) {
2398          case TEMP_CONST:
2399              val = TEMP_VAL_CONST;
2400              break;
2401          case TEMP_FIXED:
2402              val = TEMP_VAL_REG;
2403              break;
2404          case TEMP_GLOBAL:
2405              break;
2406          case TEMP_EBB:
2407              val = TEMP_VAL_DEAD;
2408              /* fall through */
2409          case TEMP_TB:
2410              ts->mem_allocated = 0;
2411              break;
2412          default:
2413              g_assert_not_reached();
2414          }
2415          ts->val_type = val;
2416      }
2417  
2418      memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2419  }
2420  
2421  static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2422                                   TCGTemp *ts)
2423  {
2424      int idx = temp_idx(ts);
2425  
2426      switch (ts->kind) {
2427      case TEMP_FIXED:
2428      case TEMP_GLOBAL:
2429          pstrcpy(buf, buf_size, ts->name);
2430          break;
2431      case TEMP_TB:
2432          snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2433          break;
2434      case TEMP_EBB:
2435          snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2436          break;
2437      case TEMP_CONST:
2438          switch (ts->type) {
2439          case TCG_TYPE_I32:
2440              snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2441              break;
2442  #if TCG_TARGET_REG_BITS > 32
2443          case TCG_TYPE_I64:
2444              snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2445              break;
2446  #endif
2447          case TCG_TYPE_V64:
2448          case TCG_TYPE_V128:
2449          case TCG_TYPE_V256:
2450              snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2451                       64 << (ts->type - TCG_TYPE_V64), ts->val);
2452              break;
2453          default:
2454              g_assert_not_reached();
2455          }
2456          break;
2457      }
2458      return buf;
2459  }
2460  
2461  static char *tcg_get_arg_str(TCGContext *s, char *buf,
2462                               int buf_size, TCGArg arg)
2463  {
2464      return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2465  }
2466  
2467  static const char * const cond_name[] =
2468  {
2469      [TCG_COND_NEVER] = "never",
2470      [TCG_COND_ALWAYS] = "always",
2471      [TCG_COND_EQ] = "eq",
2472      [TCG_COND_NE] = "ne",
2473      [TCG_COND_LT] = "lt",
2474      [TCG_COND_GE] = "ge",
2475      [TCG_COND_LE] = "le",
2476      [TCG_COND_GT] = "gt",
2477      [TCG_COND_LTU] = "ltu",
2478      [TCG_COND_GEU] = "geu",
2479      [TCG_COND_LEU] = "leu",
2480      [TCG_COND_GTU] = "gtu"
2481  };
2482  
2483  static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2484  {
2485      [MO_UB]   = "ub",
2486      [MO_SB]   = "sb",
2487      [MO_LEUW] = "leuw",
2488      [MO_LESW] = "lesw",
2489      [MO_LEUL] = "leul",
2490      [MO_LESL] = "lesl",
2491      [MO_LEUQ] = "leq",
2492      [MO_BEUW] = "beuw",
2493      [MO_BESW] = "besw",
2494      [MO_BEUL] = "beul",
2495      [MO_BESL] = "besl",
2496      [MO_BEUQ] = "beq",
2497      [MO_128 + MO_BE] = "beo",
2498      [MO_128 + MO_LE] = "leo",
2499  };
2500  
2501  static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2502      [MO_UNALN >> MO_ASHIFT]    = "un+",
2503      [MO_ALIGN >> MO_ASHIFT]    = "al+",
2504      [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2505      [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2506      [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2507      [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2508      [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2509      [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2510  };
2511  
2512  static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2513      [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2514      [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2515      [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2516      [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2517      [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2518      [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2519  };
2520  
2521  static const char bswap_flag_name[][6] = {
2522      [TCG_BSWAP_IZ] = "iz",
2523      [TCG_BSWAP_OZ] = "oz",
2524      [TCG_BSWAP_OS] = "os",
2525      [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2526      [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2527  };
2528  
2529  static inline bool tcg_regset_single(TCGRegSet d)
2530  {
2531      return (d & (d - 1)) == 0;
2532  }
2533  
2534  static inline TCGReg tcg_regset_first(TCGRegSet d)
2535  {
2536      if (TCG_TARGET_NB_REGS <= 32) {
2537          return ctz32(d);
2538      } else {
2539          return ctz64(d);
2540      }
2541  }
2542  
2543  /* Return only the number of characters output -- no error return. */
2544  #define ne_fprintf(...) \
2545      ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2546  
2547  static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2548  {
2549      char buf[128];
2550      TCGOp *op;
2551  
2552      QTAILQ_FOREACH(op, &s->ops, link) {
2553          int i, k, nb_oargs, nb_iargs, nb_cargs;
2554          const TCGOpDef *def;
2555          TCGOpcode c;
2556          int col = 0;
2557  
2558          c = op->opc;
2559          def = &tcg_op_defs[c];
2560  
2561          if (c == INDEX_op_insn_start) {
2562              nb_oargs = 0;
2563              col += ne_fprintf(f, "\n ----");
2564  
2565              for (i = 0, k = s->insn_start_words; i < k; ++i) {
2566                  col += ne_fprintf(f, " %016" PRIx64,
2567                                    tcg_get_insn_start_param(op, i));
2568              }
2569          } else if (c == INDEX_op_call) {
2570              const TCGHelperInfo *info = tcg_call_info(op);
2571              void *func = tcg_call_func(op);
2572  
2573              /* variable number of arguments */
2574              nb_oargs = TCGOP_CALLO(op);
2575              nb_iargs = TCGOP_CALLI(op);
2576              nb_cargs = def->nb_cargs;
2577  
2578              col += ne_fprintf(f, " %s ", def->name);
2579  
2580              /*
2581               * Print the function name from TCGHelperInfo, if available.
2582               * Note that plugins have a template function for the info,
2583               * but the actual function pointer comes from the plugin.
2584               */
2585              if (func == info->func) {
2586                  col += ne_fprintf(f, "%s", info->name);
2587              } else {
2588                  col += ne_fprintf(f, "plugin(%p)", func);
2589              }
2590  
2591              col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2592              for (i = 0; i < nb_oargs; i++) {
2593                  col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2594                                                              op->args[i]));
2595              }
2596              for (i = 0; i < nb_iargs; i++) {
2597                  TCGArg arg = op->args[nb_oargs + i];
2598                  const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2599                  col += ne_fprintf(f, ",%s", t);
2600              }
2601          } else {
2602              col += ne_fprintf(f, " %s ", def->name);
2603  
2604              nb_oargs = def->nb_oargs;
2605              nb_iargs = def->nb_iargs;
2606              nb_cargs = def->nb_cargs;
2607  
2608              if (def->flags & TCG_OPF_VECTOR) {
2609                  col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2610                                    8 << TCGOP_VECE(op));
2611              }
2612  
2613              k = 0;
2614              for (i = 0; i < nb_oargs; i++) {
2615                  const char *sep =  k ? "," : "";
2616                  col += ne_fprintf(f, "%s%s", sep,
2617                                    tcg_get_arg_str(s, buf, sizeof(buf),
2618                                                    op->args[k++]));
2619              }
2620              for (i = 0; i < nb_iargs; i++) {
2621                  const char *sep =  k ? "," : "";
2622                  col += ne_fprintf(f, "%s%s", sep,
2623                                    tcg_get_arg_str(s, buf, sizeof(buf),
2624                                                    op->args[k++]));
2625              }
2626              switch (c) {
2627              case INDEX_op_brcond_i32:
2628              case INDEX_op_setcond_i32:
2629              case INDEX_op_negsetcond_i32:
2630              case INDEX_op_movcond_i32:
2631              case INDEX_op_brcond2_i32:
2632              case INDEX_op_setcond2_i32:
2633              case INDEX_op_brcond_i64:
2634              case INDEX_op_setcond_i64:
2635              case INDEX_op_negsetcond_i64:
2636              case INDEX_op_movcond_i64:
2637              case INDEX_op_cmp_vec:
2638              case INDEX_op_cmpsel_vec:
2639                  if (op->args[k] < ARRAY_SIZE(cond_name)
2640                      && cond_name[op->args[k]]) {
2641                      col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2642                  } else {
2643                      col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2644                  }
2645                  i = 1;
2646                  break;
2647              case INDEX_op_qemu_ld_a32_i32:
2648              case INDEX_op_qemu_ld_a64_i32:
2649              case INDEX_op_qemu_st_a32_i32:
2650              case INDEX_op_qemu_st_a64_i32:
2651              case INDEX_op_qemu_st8_a32_i32:
2652              case INDEX_op_qemu_st8_a64_i32:
2653              case INDEX_op_qemu_ld_a32_i64:
2654              case INDEX_op_qemu_ld_a64_i64:
2655              case INDEX_op_qemu_st_a32_i64:
2656              case INDEX_op_qemu_st_a64_i64:
2657              case INDEX_op_qemu_ld_a32_i128:
2658              case INDEX_op_qemu_ld_a64_i128:
2659              case INDEX_op_qemu_st_a32_i128:
2660              case INDEX_op_qemu_st_a64_i128:
2661                  {
2662                      const char *s_al, *s_op, *s_at;
2663                      MemOpIdx oi = op->args[k++];
2664                      MemOp mop = get_memop(oi);
2665                      unsigned ix = get_mmuidx(oi);
2666  
2667                      s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2668                      s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2669                      s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2670                      mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2671  
2672                      /* If all fields are accounted for, print symbolically. */
2673                      if (!mop && s_al && s_op && s_at) {
2674                          col += ne_fprintf(f, ",%s%s%s,%u",
2675                                            s_at, s_al, s_op, ix);
2676                      } else {
2677                          mop = get_memop(oi);
2678                          col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2679                      }
2680                      i = 1;
2681                  }
2682                  break;
2683              case INDEX_op_bswap16_i32:
2684              case INDEX_op_bswap16_i64:
2685              case INDEX_op_bswap32_i32:
2686              case INDEX_op_bswap32_i64:
2687              case INDEX_op_bswap64_i64:
2688                  {
2689                      TCGArg flags = op->args[k];
2690                      const char *name = NULL;
2691  
2692                      if (flags < ARRAY_SIZE(bswap_flag_name)) {
2693                          name = bswap_flag_name[flags];
2694                      }
2695                      if (name) {
2696                          col += ne_fprintf(f, ",%s", name);
2697                      } else {
2698                          col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2699                      }
2700                      i = k = 1;
2701                  }
2702                  break;
2703              default:
2704                  i = 0;
2705                  break;
2706              }
2707              switch (c) {
2708              case INDEX_op_set_label:
2709              case INDEX_op_br:
2710              case INDEX_op_brcond_i32:
2711              case INDEX_op_brcond_i64:
2712              case INDEX_op_brcond2_i32:
2713                  col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2714                                    arg_label(op->args[k])->id);
2715                  i++, k++;
2716                  break;
2717              case INDEX_op_mb:
2718                  {
2719                      TCGBar membar = op->args[k];
2720                      const char *b_op, *m_op;
2721  
2722                      switch (membar & TCG_BAR_SC) {
2723                      case 0:
2724                          b_op = "none";
2725                          break;
2726                      case TCG_BAR_LDAQ:
2727                          b_op = "acq";
2728                          break;
2729                      case TCG_BAR_STRL:
2730                          b_op = "rel";
2731                          break;
2732                      case TCG_BAR_SC:
2733                          b_op = "seq";
2734                          break;
2735                      default:
2736                          g_assert_not_reached();
2737                      }
2738  
2739                      switch (membar & TCG_MO_ALL) {
2740                      case 0:
2741                          m_op = "none";
2742                          break;
2743                      case TCG_MO_LD_LD:
2744                          m_op = "rr";
2745                          break;
2746                      case TCG_MO_LD_ST:
2747                          m_op = "rw";
2748                          break;
2749                      case TCG_MO_ST_LD:
2750                          m_op = "wr";
2751                          break;
2752                      case TCG_MO_ST_ST:
2753                          m_op = "ww";
2754                          break;
2755                      case TCG_MO_LD_LD | TCG_MO_LD_ST:
2756                          m_op = "rr+rw";
2757                          break;
2758                      case TCG_MO_LD_LD | TCG_MO_ST_LD:
2759                          m_op = "rr+wr";
2760                          break;
2761                      case TCG_MO_LD_LD | TCG_MO_ST_ST:
2762                          m_op = "rr+ww";
2763                          break;
2764                      case TCG_MO_LD_ST | TCG_MO_ST_LD:
2765                          m_op = "rw+wr";
2766                          break;
2767                      case TCG_MO_LD_ST | TCG_MO_ST_ST:
2768                          m_op = "rw+ww";
2769                          break;
2770                      case TCG_MO_ST_LD | TCG_MO_ST_ST:
2771                          m_op = "wr+ww";
2772                          break;
2773                      case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2774                          m_op = "rr+rw+wr";
2775                          break;
2776                      case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2777                          m_op = "rr+rw+ww";
2778                          break;
2779                      case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2780                          m_op = "rr+wr+ww";
2781                          break;
2782                      case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2783                          m_op = "rw+wr+ww";
2784                          break;
2785                      case TCG_MO_ALL:
2786                          m_op = "all";
2787                          break;
2788                      default:
2789                          g_assert_not_reached();
2790                      }
2791  
2792                      col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2793                      i++, k++;
2794                  }
2795                  break;
2796              default:
2797                  break;
2798              }
2799              for (; i < nb_cargs; i++, k++) {
2800                  col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2801                                    op->args[k]);
2802              }
2803          }
2804  
2805          if (have_prefs || op->life) {
2806              for (; col < 40; ++col) {
2807                  putc(' ', f);
2808              }
2809          }
2810  
2811          if (op->life) {
2812              unsigned life = op->life;
2813  
2814              if (life & (SYNC_ARG * 3)) {
2815                  ne_fprintf(f, "  sync:");
2816                  for (i = 0; i < 2; ++i) {
2817                      if (life & (SYNC_ARG << i)) {
2818                          ne_fprintf(f, " %d", i);
2819                      }
2820                  }
2821              }
2822              life /= DEAD_ARG;
2823              if (life) {
2824                  ne_fprintf(f, "  dead:");
2825                  for (i = 0; life; ++i, life >>= 1) {
2826                      if (life & 1) {
2827                          ne_fprintf(f, " %d", i);
2828                      }
2829                  }
2830              }
2831          }
2832  
2833          if (have_prefs) {
2834              for (i = 0; i < nb_oargs; ++i) {
2835                  TCGRegSet set = output_pref(op, i);
2836  
2837                  if (i == 0) {
2838                      ne_fprintf(f, "  pref=");
2839                  } else {
2840                      ne_fprintf(f, ",");
2841                  }
2842                  if (set == 0) {
2843                      ne_fprintf(f, "none");
2844                  } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2845                      ne_fprintf(f, "all");
2846  #ifdef CONFIG_DEBUG_TCG
2847                  } else if (tcg_regset_single(set)) {
2848                      TCGReg reg = tcg_regset_first(set);
2849                      ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2850  #endif
2851                  } else if (TCG_TARGET_NB_REGS <= 32) {
2852                      ne_fprintf(f, "0x%x", (uint32_t)set);
2853                  } else {
2854                      ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2855                  }
2856              }
2857          }
2858  
2859          putc('\n', f);
2860      }
2861  }
2862  
2863  /* we give more priority to constraints with less registers */
2864  static int get_constraint_priority(const TCGOpDef *def, int k)
2865  {
2866      const TCGArgConstraint *arg_ct = &def->args_ct[k];
2867      int n = ctpop64(arg_ct->regs);
2868  
2869      /*
2870       * Sort constraints of a single register first, which includes output
2871       * aliases (which must exactly match the input already allocated).
2872       */
2873      if (n == 1 || arg_ct->oalias) {
2874          return INT_MAX;
2875      }
2876  
2877      /*
2878       * Sort register pairs next, first then second immediately after.
2879       * Arbitrarily sort multiple pairs by the index of the first reg;
2880       * there shouldn't be many pairs.
2881       */
2882      switch (arg_ct->pair) {
2883      case 1:
2884      case 3:
2885          return (k + 1) * 2;
2886      case 2:
2887          return (arg_ct->pair_index + 1) * 2 - 1;
2888      }
2889  
2890      /* Finally, sort by decreasing register count. */
2891      assert(n > 1);
2892      return -n;
2893  }
2894  
2895  /* sort from highest priority to lowest */
2896  static void sort_constraints(TCGOpDef *def, int start, int n)
2897  {
2898      int i, j;
2899      TCGArgConstraint *a = def->args_ct;
2900  
2901      for (i = 0; i < n; i++) {
2902          a[start + i].sort_index = start + i;
2903      }
2904      if (n <= 1) {
2905          return;
2906      }
2907      for (i = 0; i < n - 1; i++) {
2908          for (j = i + 1; j < n; j++) {
2909              int p1 = get_constraint_priority(def, a[start + i].sort_index);
2910              int p2 = get_constraint_priority(def, a[start + j].sort_index);
2911              if (p1 < p2) {
2912                  int tmp = a[start + i].sort_index;
2913                  a[start + i].sort_index = a[start + j].sort_index;
2914                  a[start + j].sort_index = tmp;
2915              }
2916          }
2917      }
2918  }
2919  
2920  static void process_op_defs(TCGContext *s)
2921  {
2922      TCGOpcode op;
2923  
2924      for (op = 0; op < NB_OPS; op++) {
2925          TCGOpDef *def = &tcg_op_defs[op];
2926          const TCGTargetOpDef *tdefs;
2927          bool saw_alias_pair = false;
2928          int i, o, i2, o2, nb_args;
2929  
2930          if (def->flags & TCG_OPF_NOT_PRESENT) {
2931              continue;
2932          }
2933  
2934          nb_args = def->nb_iargs + def->nb_oargs;
2935          if (nb_args == 0) {
2936              continue;
2937          }
2938  
2939          /*
2940           * Macro magic should make it impossible, but double-check that
2941           * the array index is in range.  Since the signness of an enum
2942           * is implementation defined, force the result to unsigned.
2943           */
2944          unsigned con_set = tcg_target_op_def(op);
2945          tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2946          tdefs = &constraint_sets[con_set];
2947  
2948          for (i = 0; i < nb_args; i++) {
2949              const char *ct_str = tdefs->args_ct_str[i];
2950              bool input_p = i >= def->nb_oargs;
2951  
2952              /* Incomplete TCGTargetOpDef entry. */
2953              tcg_debug_assert(ct_str != NULL);
2954  
2955              switch (*ct_str) {
2956              case '0' ... '9':
2957                  o = *ct_str - '0';
2958                  tcg_debug_assert(input_p);
2959                  tcg_debug_assert(o < def->nb_oargs);
2960                  tcg_debug_assert(def->args_ct[o].regs != 0);
2961                  tcg_debug_assert(!def->args_ct[o].oalias);
2962                  def->args_ct[i] = def->args_ct[o];
2963                  /* The output sets oalias.  */
2964                  def->args_ct[o].oalias = 1;
2965                  def->args_ct[o].alias_index = i;
2966                  /* The input sets ialias. */
2967                  def->args_ct[i].ialias = 1;
2968                  def->args_ct[i].alias_index = o;
2969                  if (def->args_ct[i].pair) {
2970                      saw_alias_pair = true;
2971                  }
2972                  tcg_debug_assert(ct_str[1] == '\0');
2973                  continue;
2974  
2975              case '&':
2976                  tcg_debug_assert(!input_p);
2977                  def->args_ct[i].newreg = true;
2978                  ct_str++;
2979                  break;
2980  
2981              case 'p': /* plus */
2982                  /* Allocate to the register after the previous. */
2983                  tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2984                  o = i - 1;
2985                  tcg_debug_assert(!def->args_ct[o].pair);
2986                  tcg_debug_assert(!def->args_ct[o].ct);
2987                  def->args_ct[i] = (TCGArgConstraint){
2988                      .pair = 2,
2989                      .pair_index = o,
2990                      .regs = def->args_ct[o].regs << 1,
2991                  };
2992                  def->args_ct[o].pair = 1;
2993                  def->args_ct[o].pair_index = i;
2994                  tcg_debug_assert(ct_str[1] == '\0');
2995                  continue;
2996  
2997              case 'm': /* minus */
2998                  /* Allocate to the register before the previous. */
2999                  tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
3000                  o = i - 1;
3001                  tcg_debug_assert(!def->args_ct[o].pair);
3002                  tcg_debug_assert(!def->args_ct[o].ct);
3003                  def->args_ct[i] = (TCGArgConstraint){
3004                      .pair = 1,
3005                      .pair_index = o,
3006                      .regs = def->args_ct[o].regs >> 1,
3007                  };
3008                  def->args_ct[o].pair = 2;
3009                  def->args_ct[o].pair_index = i;
3010                  tcg_debug_assert(ct_str[1] == '\0');
3011                  continue;
3012              }
3013  
3014              do {
3015                  switch (*ct_str) {
3016                  case 'i':
3017                      def->args_ct[i].ct |= TCG_CT_CONST;
3018                      break;
3019  
3020                  /* Include all of the target-specific constraints. */
3021  
3022  #undef CONST
3023  #define CONST(CASE, MASK) \
3024      case CASE: def->args_ct[i].ct |= MASK; break;
3025  #define REGS(CASE, MASK) \
3026      case CASE: def->args_ct[i].regs |= MASK; break;
3027  
3028  #include "tcg-target-con-str.h"
3029  
3030  #undef REGS
3031  #undef CONST
3032                  default:
3033                  case '0' ... '9':
3034                  case '&':
3035                  case 'p':
3036                  case 'm':
3037                      /* Typo in TCGTargetOpDef constraint. */
3038                      g_assert_not_reached();
3039                  }
3040              } while (*++ct_str != '\0');
3041          }
3042  
3043          /* TCGTargetOpDef entry with too much information? */
3044          tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
3045  
3046          /*
3047           * Fix up output pairs that are aliased with inputs.
3048           * When we created the alias, we copied pair from the output.
3049           * There are three cases:
3050           *    (1a) Pairs of inputs alias pairs of outputs.
3051           *    (1b) One input aliases the first of a pair of outputs.
3052           *    (2)  One input aliases the second of a pair of outputs.
3053           *
3054           * Case 1a is handled by making sure that the pair_index'es are
3055           * properly updated so that they appear the same as a pair of inputs.
3056           *
3057           * Case 1b is handled by setting the pair_index of the input to
3058           * itself, simply so it doesn't point to an unrelated argument.
3059           * Since we don't encounter the "second" during the input allocation
3060           * phase, nothing happens with the second half of the input pair.
3061           *
3062           * Case 2 is handled by setting the second input to pair=3, the
3063           * first output to pair=3, and the pair_index'es to match.
3064           */
3065          if (saw_alias_pair) {
3066              for (i = def->nb_oargs; i < nb_args; i++) {
3067                  /*
3068                   * Since [0-9pm] must be alone in the constraint string,
3069                   * the only way they can both be set is if the pair comes
3070                   * from the output alias.
3071                   */
3072                  if (!def->args_ct[i].ialias) {
3073                      continue;
3074                  }
3075                  switch (def->args_ct[i].pair) {
3076                  case 0:
3077                      break;
3078                  case 1:
3079                      o = def->args_ct[i].alias_index;
3080                      o2 = def->args_ct[o].pair_index;
3081                      tcg_debug_assert(def->args_ct[o].pair == 1);
3082                      tcg_debug_assert(def->args_ct[o2].pair == 2);
3083                      if (def->args_ct[o2].oalias) {
3084                          /* Case 1a */
3085                          i2 = def->args_ct[o2].alias_index;
3086                          tcg_debug_assert(def->args_ct[i2].pair == 2);
3087                          def->args_ct[i2].pair_index = i;
3088                          def->args_ct[i].pair_index = i2;
3089                      } else {
3090                          /* Case 1b */
3091                          def->args_ct[i].pair_index = i;
3092                      }
3093                      break;
3094                  case 2:
3095                      o = def->args_ct[i].alias_index;
3096                      o2 = def->args_ct[o].pair_index;
3097                      tcg_debug_assert(def->args_ct[o].pair == 2);
3098                      tcg_debug_assert(def->args_ct[o2].pair == 1);
3099                      if (def->args_ct[o2].oalias) {
3100                          /* Case 1a */
3101                          i2 = def->args_ct[o2].alias_index;
3102                          tcg_debug_assert(def->args_ct[i2].pair == 1);
3103                          def->args_ct[i2].pair_index = i;
3104                          def->args_ct[i].pair_index = i2;
3105                      } else {
3106                          /* Case 2 */
3107                          def->args_ct[i].pair = 3;
3108                          def->args_ct[o2].pair = 3;
3109                          def->args_ct[i].pair_index = o2;
3110                          def->args_ct[o2].pair_index = i;
3111                      }
3112                      break;
3113                  default:
3114                      g_assert_not_reached();
3115                  }
3116              }
3117          }
3118  
3119          /* sort the constraints (XXX: this is just an heuristic) */
3120          sort_constraints(def, 0, def->nb_oargs);
3121          sort_constraints(def, def->nb_oargs, def->nb_iargs);
3122      }
3123  }
3124  
3125  static void remove_label_use(TCGOp *op, int idx)
3126  {
3127      TCGLabel *label = arg_label(op->args[idx]);
3128      TCGLabelUse *use;
3129  
3130      QSIMPLEQ_FOREACH(use, &label->branches, next) {
3131          if (use->op == op) {
3132              QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3133              return;
3134          }
3135      }
3136      g_assert_not_reached();
3137  }
3138  
3139  void tcg_op_remove(TCGContext *s, TCGOp *op)
3140  {
3141      switch (op->opc) {
3142      case INDEX_op_br:
3143          remove_label_use(op, 0);
3144          break;
3145      case INDEX_op_brcond_i32:
3146      case INDEX_op_brcond_i64:
3147          remove_label_use(op, 3);
3148          break;
3149      case INDEX_op_brcond2_i32:
3150          remove_label_use(op, 5);
3151          break;
3152      default:
3153          break;
3154      }
3155  
3156      QTAILQ_REMOVE(&s->ops, op, link);
3157      QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3158      s->nb_ops--;
3159  }
3160  
3161  void tcg_remove_ops_after(TCGOp *op)
3162  {
3163      TCGContext *s = tcg_ctx;
3164  
3165      while (true) {
3166          TCGOp *last = tcg_last_op();
3167          if (last == op) {
3168              return;
3169          }
3170          tcg_op_remove(s, last);
3171      }
3172  }
3173  
3174  static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3175  {
3176      TCGContext *s = tcg_ctx;
3177      TCGOp *op = NULL;
3178  
3179      if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3180          QTAILQ_FOREACH(op, &s->free_ops, link) {
3181              if (nargs <= op->nargs) {
3182                  QTAILQ_REMOVE(&s->free_ops, op, link);
3183                  nargs = op->nargs;
3184                  goto found;
3185              }
3186          }
3187      }
3188  
3189      /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3190      nargs = MAX(4, nargs);
3191      op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3192  
3193   found:
3194      memset(op, 0, offsetof(TCGOp, link));
3195      op->opc = opc;
3196      op->nargs = nargs;
3197  
3198      /* Check for bitfield overflow. */
3199      tcg_debug_assert(op->nargs == nargs);
3200  
3201      s->nb_ops++;
3202      return op;
3203  }
3204  
3205  TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3206  {
3207      TCGOp *op = tcg_op_alloc(opc, nargs);
3208      QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3209      return op;
3210  }
3211  
3212  TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3213                              TCGOpcode opc, unsigned nargs)
3214  {
3215      TCGOp *new_op = tcg_op_alloc(opc, nargs);
3216      QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3217      return new_op;
3218  }
3219  
3220  TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3221                             TCGOpcode opc, unsigned nargs)
3222  {
3223      TCGOp *new_op = tcg_op_alloc(opc, nargs);
3224      QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3225      return new_op;
3226  }
3227  
3228  static void move_label_uses(TCGLabel *to, TCGLabel *from)
3229  {
3230      TCGLabelUse *u;
3231  
3232      QSIMPLEQ_FOREACH(u, &from->branches, next) {
3233          TCGOp *op = u->op;
3234          switch (op->opc) {
3235          case INDEX_op_br:
3236              op->args[0] = label_arg(to);
3237              break;
3238          case INDEX_op_brcond_i32:
3239          case INDEX_op_brcond_i64:
3240              op->args[3] = label_arg(to);
3241              break;
3242          case INDEX_op_brcond2_i32:
3243              op->args[5] = label_arg(to);
3244              break;
3245          default:
3246              g_assert_not_reached();
3247          }
3248      }
3249  
3250      QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3251  }
3252  
3253  /* Reachable analysis : remove unreachable code.  */
3254  static void __attribute__((noinline))
3255  reachable_code_pass(TCGContext *s)
3256  {
3257      TCGOp *op, *op_next, *op_prev;
3258      bool dead = false;
3259  
3260      QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3261          bool remove = dead;
3262          TCGLabel *label;
3263  
3264          switch (op->opc) {
3265          case INDEX_op_set_label:
3266              label = arg_label(op->args[0]);
3267  
3268              /*
3269               * Note that the first op in the TB is always a load,
3270               * so there is always something before a label.
3271               */
3272              op_prev = QTAILQ_PREV(op, link);
3273  
3274              /*
3275               * If we find two sequential labels, move all branches to
3276               * reference the second label and remove the first label.
3277               * Do this before branch to next optimization, so that the
3278               * middle label is out of the way.
3279               */
3280              if (op_prev->opc == INDEX_op_set_label) {
3281                  move_label_uses(label, arg_label(op_prev->args[0]));
3282                  tcg_op_remove(s, op_prev);
3283                  op_prev = QTAILQ_PREV(op, link);
3284              }
3285  
3286              /*
3287               * Optimization can fold conditional branches to unconditional.
3288               * If we find a label which is preceded by an unconditional
3289               * branch to next, remove the branch.  We couldn't do this when
3290               * processing the branch because any dead code between the branch
3291               * and label had not yet been removed.
3292               */
3293              if (op_prev->opc == INDEX_op_br &&
3294                  label == arg_label(op_prev->args[0])) {
3295                  tcg_op_remove(s, op_prev);
3296                  /* Fall through means insns become live again.  */
3297                  dead = false;
3298              }
3299  
3300              if (QSIMPLEQ_EMPTY(&label->branches)) {
3301                  /*
3302                   * While there is an occasional backward branch, virtually
3303                   * all branches generated by the translators are forward.
3304                   * Which means that generally we will have already removed
3305                   * all references to the label that will be, and there is
3306                   * little to be gained by iterating.
3307                   */
3308                  remove = true;
3309              } else {
3310                  /* Once we see a label, insns become live again.  */
3311                  dead = false;
3312                  remove = false;
3313              }
3314              break;
3315  
3316          case INDEX_op_br:
3317          case INDEX_op_exit_tb:
3318          case INDEX_op_goto_ptr:
3319              /* Unconditional branches; everything following is dead.  */
3320              dead = true;
3321              break;
3322  
3323          case INDEX_op_call:
3324              /* Notice noreturn helper calls, raising exceptions.  */
3325              if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3326                  dead = true;
3327              }
3328              break;
3329  
3330          case INDEX_op_insn_start:
3331              /* Never remove -- we need to keep these for unwind.  */
3332              remove = false;
3333              break;
3334  
3335          default:
3336              break;
3337          }
3338  
3339          if (remove) {
3340              tcg_op_remove(s, op);
3341          }
3342      }
3343  }
3344  
3345  #define TS_DEAD  1
3346  #define TS_MEM   2
3347  
3348  #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3349  #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3350  
3351  /* For liveness_pass_1, the register preferences for a given temp.  */
3352  static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3353  {
3354      return ts->state_ptr;
3355  }
3356  
3357  /* For liveness_pass_1, reset the preferences for a given temp to the
3358   * maximal regset for its type.
3359   */
3360  static inline void la_reset_pref(TCGTemp *ts)
3361  {
3362      *la_temp_pref(ts)
3363          = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3364  }
3365  
3366  /* liveness analysis: end of function: all temps are dead, and globals
3367     should be in memory. */
3368  static void la_func_end(TCGContext *s, int ng, int nt)
3369  {
3370      int i;
3371  
3372      for (i = 0; i < ng; ++i) {
3373          s->temps[i].state = TS_DEAD | TS_MEM;
3374          la_reset_pref(&s->temps[i]);
3375      }
3376      for (i = ng; i < nt; ++i) {
3377          s->temps[i].state = TS_DEAD;
3378          la_reset_pref(&s->temps[i]);
3379      }
3380  }
3381  
3382  /* liveness analysis: end of basic block: all temps are dead, globals
3383     and local temps should be in memory. */
3384  static void la_bb_end(TCGContext *s, int ng, int nt)
3385  {
3386      int i;
3387  
3388      for (i = 0; i < nt; ++i) {
3389          TCGTemp *ts = &s->temps[i];
3390          int state;
3391  
3392          switch (ts->kind) {
3393          case TEMP_FIXED:
3394          case TEMP_GLOBAL:
3395          case TEMP_TB:
3396              state = TS_DEAD | TS_MEM;
3397              break;
3398          case TEMP_EBB:
3399          case TEMP_CONST:
3400              state = TS_DEAD;
3401              break;
3402          default:
3403              g_assert_not_reached();
3404          }
3405          ts->state = state;
3406          la_reset_pref(ts);
3407      }
3408  }
3409  
3410  /* liveness analysis: sync globals back to memory.  */
3411  static void la_global_sync(TCGContext *s, int ng)
3412  {
3413      int i;
3414  
3415      for (i = 0; i < ng; ++i) {
3416          int state = s->temps[i].state;
3417          s->temps[i].state = state | TS_MEM;
3418          if (state == TS_DEAD) {
3419              /* If the global was previously dead, reset prefs.  */
3420              la_reset_pref(&s->temps[i]);
3421          }
3422      }
3423  }
3424  
3425  /*
3426   * liveness analysis: conditional branch: all temps are dead unless
3427   * explicitly live-across-conditional-branch, globals and local temps
3428   * should be synced.
3429   */
3430  static void la_bb_sync(TCGContext *s, int ng, int nt)
3431  {
3432      la_global_sync(s, ng);
3433  
3434      for (int i = ng; i < nt; ++i) {
3435          TCGTemp *ts = &s->temps[i];
3436          int state;
3437  
3438          switch (ts->kind) {
3439          case TEMP_TB:
3440              state = ts->state;
3441              ts->state = state | TS_MEM;
3442              if (state != TS_DEAD) {
3443                  continue;
3444              }
3445              break;
3446          case TEMP_EBB:
3447          case TEMP_CONST:
3448              continue;
3449          default:
3450              g_assert_not_reached();
3451          }
3452          la_reset_pref(&s->temps[i]);
3453      }
3454  }
3455  
3456  /* liveness analysis: sync globals back to memory and kill.  */
3457  static void la_global_kill(TCGContext *s, int ng)
3458  {
3459      int i;
3460  
3461      for (i = 0; i < ng; i++) {
3462          s->temps[i].state = TS_DEAD | TS_MEM;
3463          la_reset_pref(&s->temps[i]);
3464      }
3465  }
3466  
3467  /* liveness analysis: note live globals crossing calls.  */
3468  static void la_cross_call(TCGContext *s, int nt)
3469  {
3470      TCGRegSet mask = ~tcg_target_call_clobber_regs;
3471      int i;
3472  
3473      for (i = 0; i < nt; i++) {
3474          TCGTemp *ts = &s->temps[i];
3475          if (!(ts->state & TS_DEAD)) {
3476              TCGRegSet *pset = la_temp_pref(ts);
3477              TCGRegSet set = *pset;
3478  
3479              set &= mask;
3480              /* If the combination is not possible, restart.  */
3481              if (set == 0) {
3482                  set = tcg_target_available_regs[ts->type] & mask;
3483              }
3484              *pset = set;
3485          }
3486      }
3487  }
3488  
3489  /*
3490   * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3491   * to TEMP_EBB, if possible.
3492   */
3493  static void __attribute__((noinline))
3494  liveness_pass_0(TCGContext *s)
3495  {
3496      void * const multiple_ebb = (void *)(uintptr_t)-1;
3497      int nb_temps = s->nb_temps;
3498      TCGOp *op, *ebb;
3499  
3500      for (int i = s->nb_globals; i < nb_temps; ++i) {
3501          s->temps[i].state_ptr = NULL;
3502      }
3503  
3504      /*
3505       * Represent each EBB by the op at which it begins.  In the case of
3506       * the first EBB, this is the first op, otherwise it is a label.
3507       * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3508       * within a single EBB, else MULTIPLE_EBB.
3509       */
3510      ebb = QTAILQ_FIRST(&s->ops);
3511      QTAILQ_FOREACH(op, &s->ops, link) {
3512          const TCGOpDef *def;
3513          int nb_oargs, nb_iargs;
3514  
3515          switch (op->opc) {
3516          case INDEX_op_set_label:
3517              ebb = op;
3518              continue;
3519          case INDEX_op_discard:
3520              continue;
3521          case INDEX_op_call:
3522              nb_oargs = TCGOP_CALLO(op);
3523              nb_iargs = TCGOP_CALLI(op);
3524              break;
3525          default:
3526              def = &tcg_op_defs[op->opc];
3527              nb_oargs = def->nb_oargs;
3528              nb_iargs = def->nb_iargs;
3529              break;
3530          }
3531  
3532          for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3533              TCGTemp *ts = arg_temp(op->args[i]);
3534  
3535              if (ts->kind != TEMP_TB) {
3536                  continue;
3537              }
3538              if (ts->state_ptr == NULL) {
3539                  ts->state_ptr = ebb;
3540              } else if (ts->state_ptr != ebb) {
3541                  ts->state_ptr = multiple_ebb;
3542              }
3543          }
3544      }
3545  
3546      /*
3547       * For TEMP_TB that turned out not to be used beyond one EBB,
3548       * reduce the liveness to TEMP_EBB.
3549       */
3550      for (int i = s->nb_globals; i < nb_temps; ++i) {
3551          TCGTemp *ts = &s->temps[i];
3552          if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3553              ts->kind = TEMP_EBB;
3554          }
3555      }
3556  }
3557  
3558  /* Liveness analysis : update the opc_arg_life array to tell if a
3559     given input arguments is dead. Instructions updating dead
3560     temporaries are removed. */
3561  static void __attribute__((noinline))
3562  liveness_pass_1(TCGContext *s)
3563  {
3564      int nb_globals = s->nb_globals;
3565      int nb_temps = s->nb_temps;
3566      TCGOp *op, *op_prev;
3567      TCGRegSet *prefs;
3568      int i;
3569  
3570      prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3571      for (i = 0; i < nb_temps; ++i) {
3572          s->temps[i].state_ptr = prefs + i;
3573      }
3574  
3575      /* ??? Should be redundant with the exit_tb that ends the TB.  */
3576      la_func_end(s, nb_globals, nb_temps);
3577  
3578      QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3579          int nb_iargs, nb_oargs;
3580          TCGOpcode opc_new, opc_new2;
3581          bool have_opc_new2;
3582          TCGLifeData arg_life = 0;
3583          TCGTemp *ts;
3584          TCGOpcode opc = op->opc;
3585          const TCGOpDef *def = &tcg_op_defs[opc];
3586  
3587          switch (opc) {
3588          case INDEX_op_call:
3589              {
3590                  const TCGHelperInfo *info = tcg_call_info(op);
3591                  int call_flags = tcg_call_flags(op);
3592  
3593                  nb_oargs = TCGOP_CALLO(op);
3594                  nb_iargs = TCGOP_CALLI(op);
3595  
3596                  /* pure functions can be removed if their result is unused */
3597                  if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3598                      for (i = 0; i < nb_oargs; i++) {
3599                          ts = arg_temp(op->args[i]);
3600                          if (ts->state != TS_DEAD) {
3601                              goto do_not_remove_call;
3602                          }
3603                      }
3604                      goto do_remove;
3605                  }
3606              do_not_remove_call:
3607  
3608                  /* Output args are dead.  */
3609                  for (i = 0; i < nb_oargs; i++) {
3610                      ts = arg_temp(op->args[i]);
3611                      if (ts->state & TS_DEAD) {
3612                          arg_life |= DEAD_ARG << i;
3613                      }
3614                      if (ts->state & TS_MEM) {
3615                          arg_life |= SYNC_ARG << i;
3616                      }
3617                      ts->state = TS_DEAD;
3618                      la_reset_pref(ts);
3619                  }
3620  
3621                  /* Not used -- it will be tcg_target_call_oarg_reg().  */
3622                  memset(op->output_pref, 0, sizeof(op->output_pref));
3623  
3624                  if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3625                                      TCG_CALL_NO_READ_GLOBALS))) {
3626                      la_global_kill(s, nb_globals);
3627                  } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3628                      la_global_sync(s, nb_globals);
3629                  }
3630  
3631                  /* Record arguments that die in this helper.  */
3632                  for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3633                      ts = arg_temp(op->args[i]);
3634                      if (ts->state & TS_DEAD) {
3635                          arg_life |= DEAD_ARG << i;
3636                      }
3637                  }
3638  
3639                  /* For all live registers, remove call-clobbered prefs.  */
3640                  la_cross_call(s, nb_temps);
3641  
3642                  /*
3643                   * Input arguments are live for preceding opcodes.
3644                   *
3645                   * For those arguments that die, and will be allocated in
3646                   * registers, clear the register set for that arg, to be
3647                   * filled in below.  For args that will be on the stack,
3648                   * reset to any available reg.  Process arguments in reverse
3649                   * order so that if a temp is used more than once, the stack
3650                   * reset to max happens before the register reset to 0.
3651                   */
3652                  for (i = nb_iargs - 1; i >= 0; i--) {
3653                      const TCGCallArgumentLoc *loc = &info->in[i];
3654                      ts = arg_temp(op->args[nb_oargs + i]);
3655  
3656                      if (ts->state & TS_DEAD) {
3657                          switch (loc->kind) {
3658                          case TCG_CALL_ARG_NORMAL:
3659                          case TCG_CALL_ARG_EXTEND_U:
3660                          case TCG_CALL_ARG_EXTEND_S:
3661                              if (arg_slot_reg_p(loc->arg_slot)) {
3662                                  *la_temp_pref(ts) = 0;
3663                                  break;
3664                              }
3665                              /* fall through */
3666                          default:
3667                              *la_temp_pref(ts) =
3668                                  tcg_target_available_regs[ts->type];
3669                              break;
3670                          }
3671                          ts->state &= ~TS_DEAD;
3672                      }
3673                  }
3674  
3675                  /*
3676                   * For each input argument, add its input register to prefs.
3677                   * If a temp is used once, this produces a single set bit;
3678                   * if a temp is used multiple times, this produces a set.
3679                   */
3680                  for (i = 0; i < nb_iargs; i++) {
3681                      const TCGCallArgumentLoc *loc = &info->in[i];
3682                      ts = arg_temp(op->args[nb_oargs + i]);
3683  
3684                      switch (loc->kind) {
3685                      case TCG_CALL_ARG_NORMAL:
3686                      case TCG_CALL_ARG_EXTEND_U:
3687                      case TCG_CALL_ARG_EXTEND_S:
3688                          if (arg_slot_reg_p(loc->arg_slot)) {
3689                              tcg_regset_set_reg(*la_temp_pref(ts),
3690                                  tcg_target_call_iarg_regs[loc->arg_slot]);
3691                          }
3692                          break;
3693                      default:
3694                          break;
3695                      }
3696                  }
3697              }
3698              break;
3699          case INDEX_op_insn_start:
3700              break;
3701          case INDEX_op_discard:
3702              /* mark the temporary as dead */
3703              ts = arg_temp(op->args[0]);
3704              ts->state = TS_DEAD;
3705              la_reset_pref(ts);
3706              break;
3707  
3708          case INDEX_op_add2_i32:
3709              opc_new = INDEX_op_add_i32;
3710              goto do_addsub2;
3711          case INDEX_op_sub2_i32:
3712              opc_new = INDEX_op_sub_i32;
3713              goto do_addsub2;
3714          case INDEX_op_add2_i64:
3715              opc_new = INDEX_op_add_i64;
3716              goto do_addsub2;
3717          case INDEX_op_sub2_i64:
3718              opc_new = INDEX_op_sub_i64;
3719          do_addsub2:
3720              nb_iargs = 4;
3721              nb_oargs = 2;
3722              /* Test if the high part of the operation is dead, but not
3723                 the low part.  The result can be optimized to a simple
3724                 add or sub.  This happens often for x86_64 guest when the
3725                 cpu mode is set to 32 bit.  */
3726              if (arg_temp(op->args[1])->state == TS_DEAD) {
3727                  if (arg_temp(op->args[0])->state == TS_DEAD) {
3728                      goto do_remove;
3729                  }
3730                  /* Replace the opcode and adjust the args in place,
3731                     leaving 3 unused args at the end.  */
3732                  op->opc = opc = opc_new;
3733                  op->args[1] = op->args[2];
3734                  op->args[2] = op->args[4];
3735                  /* Fall through and mark the single-word operation live.  */
3736                  nb_iargs = 2;
3737                  nb_oargs = 1;
3738              }
3739              goto do_not_remove;
3740  
3741          case INDEX_op_mulu2_i32:
3742              opc_new = INDEX_op_mul_i32;
3743              opc_new2 = INDEX_op_muluh_i32;
3744              have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3745              goto do_mul2;
3746          case INDEX_op_muls2_i32:
3747              opc_new = INDEX_op_mul_i32;
3748              opc_new2 = INDEX_op_mulsh_i32;
3749              have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3750              goto do_mul2;
3751          case INDEX_op_mulu2_i64:
3752              opc_new = INDEX_op_mul_i64;
3753              opc_new2 = INDEX_op_muluh_i64;
3754              have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3755              goto do_mul2;
3756          case INDEX_op_muls2_i64:
3757              opc_new = INDEX_op_mul_i64;
3758              opc_new2 = INDEX_op_mulsh_i64;
3759              have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3760              goto do_mul2;
3761          do_mul2:
3762              nb_iargs = 2;
3763              nb_oargs = 2;
3764              if (arg_temp(op->args[1])->state == TS_DEAD) {
3765                  if (arg_temp(op->args[0])->state == TS_DEAD) {
3766                      /* Both parts of the operation are dead.  */
3767                      goto do_remove;
3768                  }
3769                  /* The high part of the operation is dead; generate the low. */
3770                  op->opc = opc = opc_new;
3771                  op->args[1] = op->args[2];
3772                  op->args[2] = op->args[3];
3773              } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3774                  /* The low part of the operation is dead; generate the high. */
3775                  op->opc = opc = opc_new2;
3776                  op->args[0] = op->args[1];
3777                  op->args[1] = op->args[2];
3778                  op->args[2] = op->args[3];
3779              } else {
3780                  goto do_not_remove;
3781              }
3782              /* Mark the single-word operation live.  */
3783              nb_oargs = 1;
3784              goto do_not_remove;
3785  
3786          default:
3787              /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3788              nb_iargs = def->nb_iargs;
3789              nb_oargs = def->nb_oargs;
3790  
3791              /* Test if the operation can be removed because all
3792                 its outputs are dead. We assume that nb_oargs == 0
3793                 implies side effects */
3794              if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3795                  for (i = 0; i < nb_oargs; i++) {
3796                      if (arg_temp(op->args[i])->state != TS_DEAD) {
3797                          goto do_not_remove;
3798                      }
3799                  }
3800                  goto do_remove;
3801              }
3802              goto do_not_remove;
3803  
3804          do_remove:
3805              tcg_op_remove(s, op);
3806              break;
3807  
3808          do_not_remove:
3809              for (i = 0; i < nb_oargs; i++) {
3810                  ts = arg_temp(op->args[i]);
3811  
3812                  /* Remember the preference of the uses that followed.  */
3813                  if (i < ARRAY_SIZE(op->output_pref)) {
3814                      op->output_pref[i] = *la_temp_pref(ts);
3815                  }
3816  
3817                  /* Output args are dead.  */
3818                  if (ts->state & TS_DEAD) {
3819                      arg_life |= DEAD_ARG << i;
3820                  }
3821                  if (ts->state & TS_MEM) {
3822                      arg_life |= SYNC_ARG << i;
3823                  }
3824                  ts->state = TS_DEAD;
3825                  la_reset_pref(ts);
3826              }
3827  
3828              /* If end of basic block, update.  */
3829              if (def->flags & TCG_OPF_BB_EXIT) {
3830                  la_func_end(s, nb_globals, nb_temps);
3831              } else if (def->flags & TCG_OPF_COND_BRANCH) {
3832                  la_bb_sync(s, nb_globals, nb_temps);
3833              } else if (def->flags & TCG_OPF_BB_END) {
3834                  la_bb_end(s, nb_globals, nb_temps);
3835              } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3836                  la_global_sync(s, nb_globals);
3837                  if (def->flags & TCG_OPF_CALL_CLOBBER) {
3838                      la_cross_call(s, nb_temps);
3839                  }
3840              }
3841  
3842              /* Record arguments that die in this opcode.  */
3843              for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3844                  ts = arg_temp(op->args[i]);
3845                  if (ts->state & TS_DEAD) {
3846                      arg_life |= DEAD_ARG << i;
3847                  }
3848              }
3849  
3850              /* Input arguments are live for preceding opcodes.  */
3851              for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3852                  ts = arg_temp(op->args[i]);
3853                  if (ts->state & TS_DEAD) {
3854                      /* For operands that were dead, initially allow
3855                         all regs for the type.  */
3856                      *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3857                      ts->state &= ~TS_DEAD;
3858                  }
3859              }
3860  
3861              /* Incorporate constraints for this operand.  */
3862              switch (opc) {
3863              case INDEX_op_mov_i32:
3864              case INDEX_op_mov_i64:
3865                  /* Note that these are TCG_OPF_NOT_PRESENT and do not
3866                     have proper constraints.  That said, special case
3867                     moves to propagate preferences backward.  */
3868                  if (IS_DEAD_ARG(1)) {
3869                      *la_temp_pref(arg_temp(op->args[0]))
3870                          = *la_temp_pref(arg_temp(op->args[1]));
3871                  }
3872                  break;
3873  
3874              default:
3875                  for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3876                      const TCGArgConstraint *ct = &def->args_ct[i];
3877                      TCGRegSet set, *pset;
3878  
3879                      ts = arg_temp(op->args[i]);
3880                      pset = la_temp_pref(ts);
3881                      set = *pset;
3882  
3883                      set &= ct->regs;
3884                      if (ct->ialias) {
3885                          set &= output_pref(op, ct->alias_index);
3886                      }
3887                      /* If the combination is not possible, restart.  */
3888                      if (set == 0) {
3889                          set = ct->regs;
3890                      }
3891                      *pset = set;
3892                  }
3893                  break;
3894              }
3895              break;
3896          }
3897          op->life = arg_life;
3898      }
3899  }
3900  
3901  /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3902  static bool __attribute__((noinline))
3903  liveness_pass_2(TCGContext *s)
3904  {
3905      int nb_globals = s->nb_globals;
3906      int nb_temps, i;
3907      bool changes = false;
3908      TCGOp *op, *op_next;
3909  
3910      /* Create a temporary for each indirect global.  */
3911      for (i = 0; i < nb_globals; ++i) {
3912          TCGTemp *its = &s->temps[i];
3913          if (its->indirect_reg) {
3914              TCGTemp *dts = tcg_temp_alloc(s);
3915              dts->type = its->type;
3916              dts->base_type = its->base_type;
3917              dts->temp_subindex = its->temp_subindex;
3918              dts->kind = TEMP_EBB;
3919              its->state_ptr = dts;
3920          } else {
3921              its->state_ptr = NULL;
3922          }
3923          /* All globals begin dead.  */
3924          its->state = TS_DEAD;
3925      }
3926      for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3927          TCGTemp *its = &s->temps[i];
3928          its->state_ptr = NULL;
3929          its->state = TS_DEAD;
3930      }
3931  
3932      QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3933          TCGOpcode opc = op->opc;
3934          const TCGOpDef *def = &tcg_op_defs[opc];
3935          TCGLifeData arg_life = op->life;
3936          int nb_iargs, nb_oargs, call_flags;
3937          TCGTemp *arg_ts, *dir_ts;
3938  
3939          if (opc == INDEX_op_call) {
3940              nb_oargs = TCGOP_CALLO(op);
3941              nb_iargs = TCGOP_CALLI(op);
3942              call_flags = tcg_call_flags(op);
3943          } else {
3944              nb_iargs = def->nb_iargs;
3945              nb_oargs = def->nb_oargs;
3946  
3947              /* Set flags similar to how calls require.  */
3948              if (def->flags & TCG_OPF_COND_BRANCH) {
3949                  /* Like reading globals: sync_globals */
3950                  call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3951              } else if (def->flags & TCG_OPF_BB_END) {
3952                  /* Like writing globals: save_globals */
3953                  call_flags = 0;
3954              } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3955                  /* Like reading globals: sync_globals */
3956                  call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3957              } else {
3958                  /* No effect on globals.  */
3959                  call_flags = (TCG_CALL_NO_READ_GLOBALS |
3960                                TCG_CALL_NO_WRITE_GLOBALS);
3961              }
3962          }
3963  
3964          /* Make sure that input arguments are available.  */
3965          for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3966              arg_ts = arg_temp(op->args[i]);
3967              dir_ts = arg_ts->state_ptr;
3968              if (dir_ts && arg_ts->state == TS_DEAD) {
3969                  TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3970                                    ? INDEX_op_ld_i32
3971                                    : INDEX_op_ld_i64);
3972                  TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3973  
3974                  lop->args[0] = temp_arg(dir_ts);
3975                  lop->args[1] = temp_arg(arg_ts->mem_base);
3976                  lop->args[2] = arg_ts->mem_offset;
3977  
3978                  /* Loaded, but synced with memory.  */
3979                  arg_ts->state = TS_MEM;
3980              }
3981          }
3982  
3983          /* Perform input replacement, and mark inputs that became dead.
3984             No action is required except keeping temp_state up to date
3985             so that we reload when needed.  */
3986          for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3987              arg_ts = arg_temp(op->args[i]);
3988              dir_ts = arg_ts->state_ptr;
3989              if (dir_ts) {
3990                  op->args[i] = temp_arg(dir_ts);
3991                  changes = true;
3992                  if (IS_DEAD_ARG(i)) {
3993                      arg_ts->state = TS_DEAD;
3994                  }
3995              }
3996          }
3997  
3998          /* Liveness analysis should ensure that the following are
3999             all correct, for call sites and basic block end points.  */
4000          if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4001              /* Nothing to do */
4002          } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4003              for (i = 0; i < nb_globals; ++i) {
4004                  /* Liveness should see that globals are synced back,
4005                     that is, either TS_DEAD or TS_MEM.  */
4006                  arg_ts = &s->temps[i];
4007                  tcg_debug_assert(arg_ts->state_ptr == 0
4008                                   || arg_ts->state != 0);
4009              }
4010          } else {
4011              for (i = 0; i < nb_globals; ++i) {
4012                  /* Liveness should see that globals are saved back,
4013                     that is, TS_DEAD, waiting to be reloaded.  */
4014                  arg_ts = &s->temps[i];
4015                  tcg_debug_assert(arg_ts->state_ptr == 0
4016                                   || arg_ts->state == TS_DEAD);
4017              }
4018          }
4019  
4020          /* Outputs become available.  */
4021          if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
4022              arg_ts = arg_temp(op->args[0]);
4023              dir_ts = arg_ts->state_ptr;
4024              if (dir_ts) {
4025                  op->args[0] = temp_arg(dir_ts);
4026                  changes = true;
4027  
4028                  /* The output is now live and modified.  */
4029                  arg_ts->state = 0;
4030  
4031                  if (NEED_SYNC_ARG(0)) {
4032                      TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4033                                        ? INDEX_op_st_i32
4034                                        : INDEX_op_st_i64);
4035                      TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4036                      TCGTemp *out_ts = dir_ts;
4037  
4038                      if (IS_DEAD_ARG(0)) {
4039                          out_ts = arg_temp(op->args[1]);
4040                          arg_ts->state = TS_DEAD;
4041                          tcg_op_remove(s, op);
4042                      } else {
4043                          arg_ts->state = TS_MEM;
4044                      }
4045  
4046                      sop->args[0] = temp_arg(out_ts);
4047                      sop->args[1] = temp_arg(arg_ts->mem_base);
4048                      sop->args[2] = arg_ts->mem_offset;
4049                  } else {
4050                      tcg_debug_assert(!IS_DEAD_ARG(0));
4051                  }
4052              }
4053          } else {
4054              for (i = 0; i < nb_oargs; i++) {
4055                  arg_ts = arg_temp(op->args[i]);
4056                  dir_ts = arg_ts->state_ptr;
4057                  if (!dir_ts) {
4058                      continue;
4059                  }
4060                  op->args[i] = temp_arg(dir_ts);
4061                  changes = true;
4062  
4063                  /* The output is now live and modified.  */
4064                  arg_ts->state = 0;
4065  
4066                  /* Sync outputs upon their last write.  */
4067                  if (NEED_SYNC_ARG(i)) {
4068                      TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4069                                        ? INDEX_op_st_i32
4070                                        : INDEX_op_st_i64);
4071                      TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4072  
4073                      sop->args[0] = temp_arg(dir_ts);
4074                      sop->args[1] = temp_arg(arg_ts->mem_base);
4075                      sop->args[2] = arg_ts->mem_offset;
4076  
4077                      arg_ts->state = TS_MEM;
4078                  }
4079                  /* Drop outputs that are dead.  */
4080                  if (IS_DEAD_ARG(i)) {
4081                      arg_ts->state = TS_DEAD;
4082                  }
4083              }
4084          }
4085      }
4086  
4087      return changes;
4088  }
4089  
4090  static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4091  {
4092      intptr_t off;
4093      int size, align;
4094  
4095      /* When allocating an object, look at the full type. */
4096      size = tcg_type_size(ts->base_type);
4097      switch (ts->base_type) {
4098      case TCG_TYPE_I32:
4099          align = 4;
4100          break;
4101      case TCG_TYPE_I64:
4102      case TCG_TYPE_V64:
4103          align = 8;
4104          break;
4105      case TCG_TYPE_I128:
4106      case TCG_TYPE_V128:
4107      case TCG_TYPE_V256:
4108          /*
4109           * Note that we do not require aligned storage for V256,
4110           * and that we provide alignment for I128 to match V128,
4111           * even if that's above what the host ABI requires.
4112           */
4113          align = 16;
4114          break;
4115      default:
4116          g_assert_not_reached();
4117      }
4118  
4119      /*
4120       * Assume the stack is sufficiently aligned.
4121       * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4122       * and do not require 16 byte vector alignment.  This seems slightly
4123       * easier than fully parameterizing the above switch statement.
4124       */
4125      align = MIN(TCG_TARGET_STACK_ALIGN, align);
4126      off = ROUND_UP(s->current_frame_offset, align);
4127  
4128      /* If we've exhausted the stack frame, restart with a smaller TB. */
4129      if (off + size > s->frame_end) {
4130          tcg_raise_tb_overflow(s);
4131      }
4132      s->current_frame_offset = off + size;
4133  #if defined(__sparc__)
4134      off += TCG_TARGET_STACK_BIAS;
4135  #endif
4136  
4137      /* If the object was subdivided, assign memory to all the parts. */
4138      if (ts->base_type != ts->type) {
4139          int part_size = tcg_type_size(ts->type);
4140          int part_count = size / part_size;
4141  
4142          /*
4143           * Each part is allocated sequentially in tcg_temp_new_internal.
4144           * Jump back to the first part by subtracting the current index.
4145           */
4146          ts -= ts->temp_subindex;
4147          for (int i = 0; i < part_count; ++i) {
4148              ts[i].mem_offset = off + i * part_size;
4149              ts[i].mem_base = s->frame_temp;
4150              ts[i].mem_allocated = 1;
4151          }
4152      } else {
4153          ts->mem_offset = off;
4154          ts->mem_base = s->frame_temp;
4155          ts->mem_allocated = 1;
4156      }
4157  }
4158  
4159  /* Assign @reg to @ts, and update reg_to_temp[]. */
4160  static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4161  {
4162      if (ts->val_type == TEMP_VAL_REG) {
4163          TCGReg old = ts->reg;
4164          tcg_debug_assert(s->reg_to_temp[old] == ts);
4165          if (old == reg) {
4166              return;
4167          }
4168          s->reg_to_temp[old] = NULL;
4169      }
4170      tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4171      s->reg_to_temp[reg] = ts;
4172      ts->val_type = TEMP_VAL_REG;
4173      ts->reg = reg;
4174  }
4175  
4176  /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4177  static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4178  {
4179      tcg_debug_assert(type != TEMP_VAL_REG);
4180      if (ts->val_type == TEMP_VAL_REG) {
4181          TCGReg reg = ts->reg;
4182          tcg_debug_assert(s->reg_to_temp[reg] == ts);
4183          s->reg_to_temp[reg] = NULL;
4184      }
4185      ts->val_type = type;
4186  }
4187  
4188  static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4189  
4190  /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4191     mark it free; otherwise mark it dead.  */
4192  static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4193  {
4194      TCGTempVal new_type;
4195  
4196      switch (ts->kind) {
4197      case TEMP_FIXED:
4198          return;
4199      case TEMP_GLOBAL:
4200      case TEMP_TB:
4201          new_type = TEMP_VAL_MEM;
4202          break;
4203      case TEMP_EBB:
4204          new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4205          break;
4206      case TEMP_CONST:
4207          new_type = TEMP_VAL_CONST;
4208          break;
4209      default:
4210          g_assert_not_reached();
4211      }
4212      set_temp_val_nonreg(s, ts, new_type);
4213  }
4214  
4215  /* Mark a temporary as dead.  */
4216  static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4217  {
4218      temp_free_or_dead(s, ts, 1);
4219  }
4220  
4221  /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4222     registers needs to be allocated to store a constant.  If 'free_or_dead'
4223     is non-zero, subsequently release the temporary; if it is positive, the
4224     temp is dead; if it is negative, the temp is free.  */
4225  static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4226                        TCGRegSet preferred_regs, int free_or_dead)
4227  {
4228      if (!temp_readonly(ts) && !ts->mem_coherent) {
4229          if (!ts->mem_allocated) {
4230              temp_allocate_frame(s, ts);
4231          }
4232          switch (ts->val_type) {
4233          case TEMP_VAL_CONST:
4234              /* If we're going to free the temp immediately, then we won't
4235                 require it later in a register, so attempt to store the
4236                 constant to memory directly.  */
4237              if (free_or_dead
4238                  && tcg_out_sti(s, ts->type, ts->val,
4239                                 ts->mem_base->reg, ts->mem_offset)) {
4240                  break;
4241              }
4242              temp_load(s, ts, tcg_target_available_regs[ts->type],
4243                        allocated_regs, preferred_regs);
4244              /* fallthrough */
4245  
4246          case TEMP_VAL_REG:
4247              tcg_out_st(s, ts->type, ts->reg,
4248                         ts->mem_base->reg, ts->mem_offset);
4249              break;
4250  
4251          case TEMP_VAL_MEM:
4252              break;
4253  
4254          case TEMP_VAL_DEAD:
4255          default:
4256              g_assert_not_reached();
4257          }
4258          ts->mem_coherent = 1;
4259      }
4260      if (free_or_dead) {
4261          temp_free_or_dead(s, ts, free_or_dead);
4262      }
4263  }
4264  
4265  /* free register 'reg' by spilling the corresponding temporary if necessary */
4266  static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4267  {
4268      TCGTemp *ts = s->reg_to_temp[reg];
4269      if (ts != NULL) {
4270          temp_sync(s, ts, allocated_regs, 0, -1);
4271      }
4272  }
4273  
4274  /**
4275   * tcg_reg_alloc:
4276   * @required_regs: Set of registers in which we must allocate.
4277   * @allocated_regs: Set of registers which must be avoided.
4278   * @preferred_regs: Set of registers we should prefer.
4279   * @rev: True if we search the registers in "indirect" order.
4280   *
4281   * The allocated register must be in @required_regs & ~@allocated_regs,
4282   * but if we can put it in @preferred_regs we may save a move later.
4283   */
4284  static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4285                              TCGRegSet allocated_regs,
4286                              TCGRegSet preferred_regs, bool rev)
4287  {
4288      int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4289      TCGRegSet reg_ct[2];
4290      const int *order;
4291  
4292      reg_ct[1] = required_regs & ~allocated_regs;
4293      tcg_debug_assert(reg_ct[1] != 0);
4294      reg_ct[0] = reg_ct[1] & preferred_regs;
4295  
4296      /* Skip the preferred_regs option if it cannot be satisfied,
4297         or if the preference made no difference.  */
4298      f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4299  
4300      order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4301  
4302      /* Try free registers, preferences first.  */
4303      for (j = f; j < 2; j++) {
4304          TCGRegSet set = reg_ct[j];
4305  
4306          if (tcg_regset_single(set)) {
4307              /* One register in the set.  */
4308              TCGReg reg = tcg_regset_first(set);
4309              if (s->reg_to_temp[reg] == NULL) {
4310                  return reg;
4311              }
4312          } else {
4313              for (i = 0; i < n; i++) {
4314                  TCGReg reg = order[i];
4315                  if (s->reg_to_temp[reg] == NULL &&
4316                      tcg_regset_test_reg(set, reg)) {
4317                      return reg;
4318                  }
4319              }
4320          }
4321      }
4322  
4323      /* We must spill something.  */
4324      for (j = f; j < 2; j++) {
4325          TCGRegSet set = reg_ct[j];
4326  
4327          if (tcg_regset_single(set)) {
4328              /* One register in the set.  */
4329              TCGReg reg = tcg_regset_first(set);
4330              tcg_reg_free(s, reg, allocated_regs);
4331              return reg;
4332          } else {
4333              for (i = 0; i < n; i++) {
4334                  TCGReg reg = order[i];
4335                  if (tcg_regset_test_reg(set, reg)) {
4336                      tcg_reg_free(s, reg, allocated_regs);
4337                      return reg;
4338                  }
4339              }
4340          }
4341      }
4342  
4343      g_assert_not_reached();
4344  }
4345  
4346  static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4347                                   TCGRegSet allocated_regs,
4348                                   TCGRegSet preferred_regs, bool rev)
4349  {
4350      int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4351      TCGRegSet reg_ct[2];
4352      const int *order;
4353  
4354      /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4355      reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4356      tcg_debug_assert(reg_ct[1] != 0);
4357      reg_ct[0] = reg_ct[1] & preferred_regs;
4358  
4359      order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4360  
4361      /*
4362       * Skip the preferred_regs option if it cannot be satisfied,
4363       * or if the preference made no difference.
4364       */
4365      k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4366  
4367      /*
4368       * Minimize the number of flushes by looking for 2 free registers first,
4369       * then a single flush, then two flushes.
4370       */
4371      for (fmin = 2; fmin >= 0; fmin--) {
4372          for (j = k; j < 2; j++) {
4373              TCGRegSet set = reg_ct[j];
4374  
4375              for (i = 0; i < n; i++) {
4376                  TCGReg reg = order[i];
4377  
4378                  if (tcg_regset_test_reg(set, reg)) {
4379                      int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4380                      if (f >= fmin) {
4381                          tcg_reg_free(s, reg, allocated_regs);
4382                          tcg_reg_free(s, reg + 1, allocated_regs);
4383                          return reg;
4384                      }
4385                  }
4386              }
4387          }
4388      }
4389      g_assert_not_reached();
4390  }
4391  
4392  /* Make sure the temporary is in a register.  If needed, allocate the register
4393     from DESIRED while avoiding ALLOCATED.  */
4394  static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4395                        TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4396  {
4397      TCGReg reg;
4398  
4399      switch (ts->val_type) {
4400      case TEMP_VAL_REG:
4401          return;
4402      case TEMP_VAL_CONST:
4403          reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4404                              preferred_regs, ts->indirect_base);
4405          if (ts->type <= TCG_TYPE_I64) {
4406              tcg_out_movi(s, ts->type, reg, ts->val);
4407          } else {
4408              uint64_t val = ts->val;
4409              MemOp vece = MO_64;
4410  
4411              /*
4412               * Find the minimal vector element that matches the constant.
4413               * The targets will, in general, have to do this search anyway,
4414               * do this generically.
4415               */
4416              if (val == dup_const(MO_8, val)) {
4417                  vece = MO_8;
4418              } else if (val == dup_const(MO_16, val)) {
4419                  vece = MO_16;
4420              } else if (val == dup_const(MO_32, val)) {
4421                  vece = MO_32;
4422              }
4423  
4424              tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4425          }
4426          ts->mem_coherent = 0;
4427          break;
4428      case TEMP_VAL_MEM:
4429          reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4430                              preferred_regs, ts->indirect_base);
4431          tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4432          ts->mem_coherent = 1;
4433          break;
4434      case TEMP_VAL_DEAD:
4435      default:
4436          g_assert_not_reached();
4437      }
4438      set_temp_val_reg(s, ts, reg);
4439  }
4440  
4441  /* Save a temporary to memory. 'allocated_regs' is used in case a
4442     temporary registers needs to be allocated to store a constant.  */
4443  static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4444  {
4445      /* The liveness analysis already ensures that globals are back
4446         in memory. Keep an tcg_debug_assert for safety. */
4447      tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4448  }
4449  
4450  /* save globals to their canonical location and assume they can be
4451     modified be the following code. 'allocated_regs' is used in case a
4452     temporary registers needs to be allocated to store a constant. */
4453  static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4454  {
4455      int i, n;
4456  
4457      for (i = 0, n = s->nb_globals; i < n; i++) {
4458          temp_save(s, &s->temps[i], allocated_regs);
4459      }
4460  }
4461  
4462  /* sync globals to their canonical location and assume they can be
4463     read by the following code. 'allocated_regs' is used in case a
4464     temporary registers needs to be allocated to store a constant. */
4465  static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4466  {
4467      int i, n;
4468  
4469      for (i = 0, n = s->nb_globals; i < n; i++) {
4470          TCGTemp *ts = &s->temps[i];
4471          tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4472                           || ts->kind == TEMP_FIXED
4473                           || ts->mem_coherent);
4474      }
4475  }
4476  
4477  /* at the end of a basic block, we assume all temporaries are dead and
4478     all globals are stored at their canonical location. */
4479  static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4480  {
4481      int i;
4482  
4483      for (i = s->nb_globals; i < s->nb_temps; i++) {
4484          TCGTemp *ts = &s->temps[i];
4485  
4486          switch (ts->kind) {
4487          case TEMP_TB:
4488              temp_save(s, ts, allocated_regs);
4489              break;
4490          case TEMP_EBB:
4491              /* The liveness analysis already ensures that temps are dead.
4492                 Keep an tcg_debug_assert for safety. */
4493              tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4494              break;
4495          case TEMP_CONST:
4496              /* Similarly, we should have freed any allocated register. */
4497              tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4498              break;
4499          default:
4500              g_assert_not_reached();
4501          }
4502      }
4503  
4504      save_globals(s, allocated_regs);
4505  }
4506  
4507  /*
4508   * At a conditional branch, we assume all temporaries are dead unless
4509   * explicitly live-across-conditional-branch; all globals and local
4510   * temps are synced to their location.
4511   */
4512  static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4513  {
4514      sync_globals(s, allocated_regs);
4515  
4516      for (int i = s->nb_globals; i < s->nb_temps; i++) {
4517          TCGTemp *ts = &s->temps[i];
4518          /*
4519           * The liveness analysis already ensures that temps are dead.
4520           * Keep tcg_debug_asserts for safety.
4521           */
4522          switch (ts->kind) {
4523          case TEMP_TB:
4524              tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4525              break;
4526          case TEMP_EBB:
4527          case TEMP_CONST:
4528              break;
4529          default:
4530              g_assert_not_reached();
4531          }
4532      }
4533  }
4534  
4535  /*
4536   * Specialized code generation for INDEX_op_mov_* with a constant.
4537   */
4538  static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4539                                    tcg_target_ulong val, TCGLifeData arg_life,
4540                                    TCGRegSet preferred_regs)
4541  {
4542      /* ENV should not be modified.  */
4543      tcg_debug_assert(!temp_readonly(ots));
4544  
4545      /* The movi is not explicitly generated here.  */
4546      set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4547      ots->val = val;
4548      ots->mem_coherent = 0;
4549      if (NEED_SYNC_ARG(0)) {
4550          temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4551      } else if (IS_DEAD_ARG(0)) {
4552          temp_dead(s, ots);
4553      }
4554  }
4555  
4556  /*
4557   * Specialized code generation for INDEX_op_mov_*.
4558   */
4559  static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4560  {
4561      const TCGLifeData arg_life = op->life;
4562      TCGRegSet allocated_regs, preferred_regs;
4563      TCGTemp *ts, *ots;
4564      TCGType otype, itype;
4565      TCGReg oreg, ireg;
4566  
4567      allocated_regs = s->reserved_regs;
4568      preferred_regs = output_pref(op, 0);
4569      ots = arg_temp(op->args[0]);
4570      ts = arg_temp(op->args[1]);
4571  
4572      /* ENV should not be modified.  */
4573      tcg_debug_assert(!temp_readonly(ots));
4574  
4575      /* Note that otype != itype for no-op truncation.  */
4576      otype = ots->type;
4577      itype = ts->type;
4578  
4579      if (ts->val_type == TEMP_VAL_CONST) {
4580          /* propagate constant or generate sti */
4581          tcg_target_ulong val = ts->val;
4582          if (IS_DEAD_ARG(1)) {
4583              temp_dead(s, ts);
4584          }
4585          tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4586          return;
4587      }
4588  
4589      /* If the source value is in memory we're going to be forced
4590         to have it in a register in order to perform the copy.  Copy
4591         the SOURCE value into its own register first, that way we
4592         don't have to reload SOURCE the next time it is used. */
4593      if (ts->val_type == TEMP_VAL_MEM) {
4594          temp_load(s, ts, tcg_target_available_regs[itype],
4595                    allocated_regs, preferred_regs);
4596      }
4597      tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4598      ireg = ts->reg;
4599  
4600      if (IS_DEAD_ARG(0)) {
4601          /* mov to a non-saved dead register makes no sense (even with
4602             liveness analysis disabled). */
4603          tcg_debug_assert(NEED_SYNC_ARG(0));
4604          if (!ots->mem_allocated) {
4605              temp_allocate_frame(s, ots);
4606          }
4607          tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4608          if (IS_DEAD_ARG(1)) {
4609              temp_dead(s, ts);
4610          }
4611          temp_dead(s, ots);
4612          return;
4613      }
4614  
4615      if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4616          /*
4617           * The mov can be suppressed.  Kill input first, so that it
4618           * is unlinked from reg_to_temp, then set the output to the
4619           * reg that we saved from the input.
4620           */
4621          temp_dead(s, ts);
4622          oreg = ireg;
4623      } else {
4624          if (ots->val_type == TEMP_VAL_REG) {
4625              oreg = ots->reg;
4626          } else {
4627              /* Make sure to not spill the input register during allocation. */
4628              oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4629                                   allocated_regs | ((TCGRegSet)1 << ireg),
4630                                   preferred_regs, ots->indirect_base);
4631          }
4632          if (!tcg_out_mov(s, otype, oreg, ireg)) {
4633              /*
4634               * Cross register class move not supported.
4635               * Store the source register into the destination slot
4636               * and leave the destination temp as TEMP_VAL_MEM.
4637               */
4638              assert(!temp_readonly(ots));
4639              if (!ts->mem_allocated) {
4640                  temp_allocate_frame(s, ots);
4641              }
4642              tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4643              set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4644              ots->mem_coherent = 1;
4645              return;
4646          }
4647      }
4648      set_temp_val_reg(s, ots, oreg);
4649      ots->mem_coherent = 0;
4650  
4651      if (NEED_SYNC_ARG(0)) {
4652          temp_sync(s, ots, allocated_regs, 0, 0);
4653      }
4654  }
4655  
4656  /*
4657   * Specialized code generation for INDEX_op_dup_vec.
4658   */
4659  static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4660  {
4661      const TCGLifeData arg_life = op->life;
4662      TCGRegSet dup_out_regs, dup_in_regs;
4663      TCGTemp *its, *ots;
4664      TCGType itype, vtype;
4665      unsigned vece;
4666      int lowpart_ofs;
4667      bool ok;
4668  
4669      ots = arg_temp(op->args[0]);
4670      its = arg_temp(op->args[1]);
4671  
4672      /* ENV should not be modified.  */
4673      tcg_debug_assert(!temp_readonly(ots));
4674  
4675      itype = its->type;
4676      vece = TCGOP_VECE(op);
4677      vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4678  
4679      if (its->val_type == TEMP_VAL_CONST) {
4680          /* Propagate constant via movi -> dupi.  */
4681          tcg_target_ulong val = its->val;
4682          if (IS_DEAD_ARG(1)) {
4683              temp_dead(s, its);
4684          }
4685          tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4686          return;
4687      }
4688  
4689      dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4690      dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4691  
4692      /* Allocate the output register now.  */
4693      if (ots->val_type != TEMP_VAL_REG) {
4694          TCGRegSet allocated_regs = s->reserved_regs;
4695          TCGReg oreg;
4696  
4697          if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4698              /* Make sure to not spill the input register. */
4699              tcg_regset_set_reg(allocated_regs, its->reg);
4700          }
4701          oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4702                               output_pref(op, 0), ots->indirect_base);
4703          set_temp_val_reg(s, ots, oreg);
4704      }
4705  
4706      switch (its->val_type) {
4707      case TEMP_VAL_REG:
4708          /*
4709           * The dup constriaints must be broad, covering all possible VECE.
4710           * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4711           * to fail, indicating that extra moves are required for that case.
4712           */
4713          if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4714              if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4715                  goto done;
4716              }
4717              /* Try again from memory or a vector input register.  */
4718          }
4719          if (!its->mem_coherent) {
4720              /*
4721               * The input register is not synced, and so an extra store
4722               * would be required to use memory.  Attempt an integer-vector
4723               * register move first.  We do not have a TCGRegSet for this.
4724               */
4725              if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4726                  break;
4727              }
4728              /* Sync the temp back to its slot and load from there.  */
4729              temp_sync(s, its, s->reserved_regs, 0, 0);
4730          }
4731          /* fall through */
4732  
4733      case TEMP_VAL_MEM:
4734          lowpart_ofs = 0;
4735          if (HOST_BIG_ENDIAN) {
4736              lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4737          }
4738          if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4739                               its->mem_offset + lowpart_ofs)) {
4740              goto done;
4741          }
4742          /* Load the input into the destination vector register. */
4743          tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4744          break;
4745  
4746      default:
4747          g_assert_not_reached();
4748      }
4749  
4750      /* We now have a vector input register, so dup must succeed. */
4751      ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4752      tcg_debug_assert(ok);
4753  
4754   done:
4755      ots->mem_coherent = 0;
4756      if (IS_DEAD_ARG(1)) {
4757          temp_dead(s, its);
4758      }
4759      if (NEED_SYNC_ARG(0)) {
4760          temp_sync(s, ots, s->reserved_regs, 0, 0);
4761      }
4762      if (IS_DEAD_ARG(0)) {
4763          temp_dead(s, ots);
4764      }
4765  }
4766  
4767  static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4768  {
4769      const TCGLifeData arg_life = op->life;
4770      const TCGOpDef * const def = &tcg_op_defs[op->opc];
4771      TCGRegSet i_allocated_regs;
4772      TCGRegSet o_allocated_regs;
4773      int i, k, nb_iargs, nb_oargs;
4774      TCGReg reg;
4775      TCGArg arg;
4776      const TCGArgConstraint *arg_ct;
4777      TCGTemp *ts;
4778      TCGArg new_args[TCG_MAX_OP_ARGS];
4779      int const_args[TCG_MAX_OP_ARGS];
4780  
4781      nb_oargs = def->nb_oargs;
4782      nb_iargs = def->nb_iargs;
4783  
4784      /* copy constants */
4785      memcpy(new_args + nb_oargs + nb_iargs,
4786             op->args + nb_oargs + nb_iargs,
4787             sizeof(TCGArg) * def->nb_cargs);
4788  
4789      i_allocated_regs = s->reserved_regs;
4790      o_allocated_regs = s->reserved_regs;
4791  
4792      /* satisfy input constraints */
4793      for (k = 0; k < nb_iargs; k++) {
4794          TCGRegSet i_preferred_regs, i_required_regs;
4795          bool allocate_new_reg, copyto_new_reg;
4796          TCGTemp *ts2;
4797          int i1, i2;
4798  
4799          i = def->args_ct[nb_oargs + k].sort_index;
4800          arg = op->args[i];
4801          arg_ct = &def->args_ct[i];
4802          ts = arg_temp(arg);
4803  
4804          if (ts->val_type == TEMP_VAL_CONST
4805              && tcg_target_const_match(ts->val, ts->type, arg_ct->ct, TCGOP_VECE(op))) {
4806              /* constant is OK for instruction */
4807              const_args[i] = 1;
4808              new_args[i] = ts->val;
4809              continue;
4810          }
4811  
4812          reg = ts->reg;
4813          i_preferred_regs = 0;
4814          i_required_regs = arg_ct->regs;
4815          allocate_new_reg = false;
4816          copyto_new_reg = false;
4817  
4818          switch (arg_ct->pair) {
4819          case 0: /* not paired */
4820              if (arg_ct->ialias) {
4821                  i_preferred_regs = output_pref(op, arg_ct->alias_index);
4822  
4823                  /*
4824                   * If the input is readonly, then it cannot also be an
4825                   * output and aliased to itself.  If the input is not
4826                   * dead after the instruction, we must allocate a new
4827                   * register and move it.
4828                   */
4829                  if (temp_readonly(ts) || !IS_DEAD_ARG(i)
4830                      || def->args_ct[arg_ct->alias_index].newreg) {
4831                      allocate_new_reg = true;
4832                  } else if (ts->val_type == TEMP_VAL_REG) {
4833                      /*
4834                       * Check if the current register has already been
4835                       * allocated for another input.
4836                       */
4837                      allocate_new_reg =
4838                          tcg_regset_test_reg(i_allocated_regs, reg);
4839                  }
4840              }
4841              if (!allocate_new_reg) {
4842                  temp_load(s, ts, i_required_regs, i_allocated_regs,
4843                            i_preferred_regs);
4844                  reg = ts->reg;
4845                  allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4846              }
4847              if (allocate_new_reg) {
4848                  /*
4849                   * Allocate a new register matching the constraint
4850                   * and move the temporary register into it.
4851                   */
4852                  temp_load(s, ts, tcg_target_available_regs[ts->type],
4853                            i_allocated_regs, 0);
4854                  reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4855                                      i_preferred_regs, ts->indirect_base);
4856                  copyto_new_reg = true;
4857              }
4858              break;
4859  
4860          case 1:
4861              /* First of an input pair; if i1 == i2, the second is an output. */
4862              i1 = i;
4863              i2 = arg_ct->pair_index;
4864              ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4865  
4866              /*
4867               * It is easier to default to allocating a new pair
4868               * and to identify a few cases where it's not required.
4869               */
4870              if (arg_ct->ialias) {
4871                  i_preferred_regs = output_pref(op, arg_ct->alias_index);
4872                  if (IS_DEAD_ARG(i1) &&
4873                      IS_DEAD_ARG(i2) &&
4874                      !temp_readonly(ts) &&
4875                      ts->val_type == TEMP_VAL_REG &&
4876                      ts->reg < TCG_TARGET_NB_REGS - 1 &&
4877                      tcg_regset_test_reg(i_required_regs, reg) &&
4878                      !tcg_regset_test_reg(i_allocated_regs, reg) &&
4879                      !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4880                      (ts2
4881                       ? ts2->val_type == TEMP_VAL_REG &&
4882                         ts2->reg == reg + 1 &&
4883                         !temp_readonly(ts2)
4884                       : s->reg_to_temp[reg + 1] == NULL)) {
4885                      break;
4886                  }
4887              } else {
4888                  /* Without aliasing, the pair must also be an input. */
4889                  tcg_debug_assert(ts2);
4890                  if (ts->val_type == TEMP_VAL_REG &&
4891                      ts2->val_type == TEMP_VAL_REG &&
4892                      ts2->reg == reg + 1 &&
4893                      tcg_regset_test_reg(i_required_regs, reg)) {
4894                      break;
4895                  }
4896              }
4897              reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4898                                       0, ts->indirect_base);
4899              goto do_pair;
4900  
4901          case 2: /* pair second */
4902              reg = new_args[arg_ct->pair_index] + 1;
4903              goto do_pair;
4904  
4905          case 3: /* ialias with second output, no first input */
4906              tcg_debug_assert(arg_ct->ialias);
4907              i_preferred_regs = output_pref(op, arg_ct->alias_index);
4908  
4909              if (IS_DEAD_ARG(i) &&
4910                  !temp_readonly(ts) &&
4911                  ts->val_type == TEMP_VAL_REG &&
4912                  reg > 0 &&
4913                  s->reg_to_temp[reg - 1] == NULL &&
4914                  tcg_regset_test_reg(i_required_regs, reg) &&
4915                  !tcg_regset_test_reg(i_allocated_regs, reg) &&
4916                  !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4917                  tcg_regset_set_reg(i_allocated_regs, reg - 1);
4918                  break;
4919              }
4920              reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4921                                       i_allocated_regs, 0,
4922                                       ts->indirect_base);
4923              tcg_regset_set_reg(i_allocated_regs, reg);
4924              reg += 1;
4925              goto do_pair;
4926  
4927          do_pair:
4928              /*
4929               * If an aliased input is not dead after the instruction,
4930               * we must allocate a new register and move it.
4931               */
4932              if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4933                  TCGRegSet t_allocated_regs = i_allocated_regs;
4934  
4935                  /*
4936                   * Because of the alias, and the continued life, make sure
4937                   * that the temp is somewhere *other* than the reg pair,
4938                   * and we get a copy in reg.
4939                   */
4940                  tcg_regset_set_reg(t_allocated_regs, reg);
4941                  tcg_regset_set_reg(t_allocated_regs, reg + 1);
4942                  if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4943                      /* If ts was already in reg, copy it somewhere else. */
4944                      TCGReg nr;
4945                      bool ok;
4946  
4947                      tcg_debug_assert(ts->kind != TEMP_FIXED);
4948                      nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4949                                         t_allocated_regs, 0, ts->indirect_base);
4950                      ok = tcg_out_mov(s, ts->type, nr, reg);
4951                      tcg_debug_assert(ok);
4952  
4953                      set_temp_val_reg(s, ts, nr);
4954                  } else {
4955                      temp_load(s, ts, tcg_target_available_regs[ts->type],
4956                                t_allocated_regs, 0);
4957                      copyto_new_reg = true;
4958                  }
4959              } else {
4960                  /* Preferably allocate to reg, otherwise copy. */
4961                  i_required_regs = (TCGRegSet)1 << reg;
4962                  temp_load(s, ts, i_required_regs, i_allocated_regs,
4963                            i_preferred_regs);
4964                  copyto_new_reg = ts->reg != reg;
4965              }
4966              break;
4967  
4968          default:
4969              g_assert_not_reached();
4970          }
4971  
4972          if (copyto_new_reg) {
4973              if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4974                  /*
4975                   * Cross register class move not supported.  Sync the
4976                   * temp back to its slot and load from there.
4977                   */
4978                  temp_sync(s, ts, i_allocated_regs, 0, 0);
4979                  tcg_out_ld(s, ts->type, reg,
4980                             ts->mem_base->reg, ts->mem_offset);
4981              }
4982          }
4983          new_args[i] = reg;
4984          const_args[i] = 0;
4985          tcg_regset_set_reg(i_allocated_regs, reg);
4986      }
4987  
4988      /* mark dead temporaries and free the associated registers */
4989      for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4990          if (IS_DEAD_ARG(i)) {
4991              temp_dead(s, arg_temp(op->args[i]));
4992          }
4993      }
4994  
4995      if (def->flags & TCG_OPF_COND_BRANCH) {
4996          tcg_reg_alloc_cbranch(s, i_allocated_regs);
4997      } else if (def->flags & TCG_OPF_BB_END) {
4998          tcg_reg_alloc_bb_end(s, i_allocated_regs);
4999      } else {
5000          if (def->flags & TCG_OPF_CALL_CLOBBER) {
5001              /* XXX: permit generic clobber register list ? */
5002              for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5003                  if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5004                      tcg_reg_free(s, i, i_allocated_regs);
5005                  }
5006              }
5007          }
5008          if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5009              /* sync globals if the op has side effects and might trigger
5010                 an exception. */
5011              sync_globals(s, i_allocated_regs);
5012          }
5013  
5014          /* satisfy the output constraints */
5015          for(k = 0; k < nb_oargs; k++) {
5016              i = def->args_ct[k].sort_index;
5017              arg = op->args[i];
5018              arg_ct = &def->args_ct[i];
5019              ts = arg_temp(arg);
5020  
5021              /* ENV should not be modified.  */
5022              tcg_debug_assert(!temp_readonly(ts));
5023  
5024              switch (arg_ct->pair) {
5025              case 0: /* not paired */
5026                  if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5027                      reg = new_args[arg_ct->alias_index];
5028                  } else if (arg_ct->newreg) {
5029                      reg = tcg_reg_alloc(s, arg_ct->regs,
5030                                          i_allocated_regs | o_allocated_regs,
5031                                          output_pref(op, k), ts->indirect_base);
5032                  } else {
5033                      reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5034                                          output_pref(op, k), ts->indirect_base);
5035                  }
5036                  break;
5037  
5038              case 1: /* first of pair */
5039                  tcg_debug_assert(!arg_ct->newreg);
5040                  if (arg_ct->oalias) {
5041                      reg = new_args[arg_ct->alias_index];
5042                      break;
5043                  }
5044                  reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5045                                           output_pref(op, k), ts->indirect_base);
5046                  break;
5047  
5048              case 2: /* second of pair */
5049                  tcg_debug_assert(!arg_ct->newreg);
5050                  if (arg_ct->oalias) {
5051                      reg = new_args[arg_ct->alias_index];
5052                  } else {
5053                      reg = new_args[arg_ct->pair_index] + 1;
5054                  }
5055                  break;
5056  
5057              case 3: /* first of pair, aliasing with a second input */
5058                  tcg_debug_assert(!arg_ct->newreg);
5059                  reg = new_args[arg_ct->pair_index] - 1;
5060                  break;
5061  
5062              default:
5063                  g_assert_not_reached();
5064              }
5065              tcg_regset_set_reg(o_allocated_regs, reg);
5066              set_temp_val_reg(s, ts, reg);
5067              ts->mem_coherent = 0;
5068              new_args[i] = reg;
5069          }
5070      }
5071  
5072      /* emit instruction */
5073      switch (op->opc) {
5074      case INDEX_op_ext8s_i32:
5075          tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5076          break;
5077      case INDEX_op_ext8s_i64:
5078          tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5079          break;
5080      case INDEX_op_ext8u_i32:
5081      case INDEX_op_ext8u_i64:
5082          tcg_out_ext8u(s, new_args[0], new_args[1]);
5083          break;
5084      case INDEX_op_ext16s_i32:
5085          tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5086          break;
5087      case INDEX_op_ext16s_i64:
5088          tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5089          break;
5090      case INDEX_op_ext16u_i32:
5091      case INDEX_op_ext16u_i64:
5092          tcg_out_ext16u(s, new_args[0], new_args[1]);
5093          break;
5094      case INDEX_op_ext32s_i64:
5095          tcg_out_ext32s(s, new_args[0], new_args[1]);
5096          break;
5097      case INDEX_op_ext32u_i64:
5098          tcg_out_ext32u(s, new_args[0], new_args[1]);
5099          break;
5100      case INDEX_op_ext_i32_i64:
5101          tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5102          break;
5103      case INDEX_op_extu_i32_i64:
5104          tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5105          break;
5106      case INDEX_op_extrl_i64_i32:
5107          tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5108          break;
5109      default:
5110          if (def->flags & TCG_OPF_VECTOR) {
5111              tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
5112                             new_args, const_args);
5113          } else {
5114              tcg_out_op(s, op->opc, new_args, const_args);
5115          }
5116          break;
5117      }
5118  
5119      /* move the outputs in the correct register if needed */
5120      for(i = 0; i < nb_oargs; i++) {
5121          ts = arg_temp(op->args[i]);
5122  
5123          /* ENV should not be modified.  */
5124          tcg_debug_assert(!temp_readonly(ts));
5125  
5126          if (NEED_SYNC_ARG(i)) {
5127              temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5128          } else if (IS_DEAD_ARG(i)) {
5129              temp_dead(s, ts);
5130          }
5131      }
5132  }
5133  
5134  static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5135  {
5136      const TCGLifeData arg_life = op->life;
5137      TCGTemp *ots, *itsl, *itsh;
5138      TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
5139  
5140      /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5141      tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5142      tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5143  
5144      ots = arg_temp(op->args[0]);
5145      itsl = arg_temp(op->args[1]);
5146      itsh = arg_temp(op->args[2]);
5147  
5148      /* ENV should not be modified.  */
5149      tcg_debug_assert(!temp_readonly(ots));
5150  
5151      /* Allocate the output register now.  */
5152      if (ots->val_type != TEMP_VAL_REG) {
5153          TCGRegSet allocated_regs = s->reserved_regs;
5154          TCGRegSet dup_out_regs =
5155              tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
5156          TCGReg oreg;
5157  
5158          /* Make sure to not spill the input registers. */
5159          if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5160              tcg_regset_set_reg(allocated_regs, itsl->reg);
5161          }
5162          if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5163              tcg_regset_set_reg(allocated_regs, itsh->reg);
5164          }
5165  
5166          oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5167                               output_pref(op, 0), ots->indirect_base);
5168          set_temp_val_reg(s, ots, oreg);
5169      }
5170  
5171      /* Promote dup2 of immediates to dupi_vec. */
5172      if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5173          uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5174          MemOp vece = MO_64;
5175  
5176          if (val == dup_const(MO_8, val)) {
5177              vece = MO_8;
5178          } else if (val == dup_const(MO_16, val)) {
5179              vece = MO_16;
5180          } else if (val == dup_const(MO_32, val)) {
5181              vece = MO_32;
5182          }
5183  
5184          tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5185          goto done;
5186      }
5187  
5188      /* If the two inputs form one 64-bit value, try dupm_vec. */
5189      if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5190          itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5191          itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5192          TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5193  
5194          temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5195          temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5196  
5197          if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5198                               its->mem_base->reg, its->mem_offset)) {
5199              goto done;
5200          }
5201      }
5202  
5203      /* Fall back to generic expansion. */
5204      return false;
5205  
5206   done:
5207      ots->mem_coherent = 0;
5208      if (IS_DEAD_ARG(1)) {
5209          temp_dead(s, itsl);
5210      }
5211      if (IS_DEAD_ARG(2)) {
5212          temp_dead(s, itsh);
5213      }
5214      if (NEED_SYNC_ARG(0)) {
5215          temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5216      } else if (IS_DEAD_ARG(0)) {
5217          temp_dead(s, ots);
5218      }
5219      return true;
5220  }
5221  
5222  static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5223                           TCGRegSet allocated_regs)
5224  {
5225      if (ts->val_type == TEMP_VAL_REG) {
5226          if (ts->reg != reg) {
5227              tcg_reg_free(s, reg, allocated_regs);
5228              if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5229                  /*
5230                   * Cross register class move not supported.  Sync the
5231                   * temp back to its slot and load from there.
5232                   */
5233                  temp_sync(s, ts, allocated_regs, 0, 0);
5234                  tcg_out_ld(s, ts->type, reg,
5235                             ts->mem_base->reg, ts->mem_offset);
5236              }
5237          }
5238      } else {
5239          TCGRegSet arg_set = 0;
5240  
5241          tcg_reg_free(s, reg, allocated_regs);
5242          tcg_regset_set_reg(arg_set, reg);
5243          temp_load(s, ts, arg_set, allocated_regs, 0);
5244      }
5245  }
5246  
5247  static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5248                           TCGRegSet allocated_regs)
5249  {
5250      /*
5251       * When the destination is on the stack, load up the temp and store.
5252       * If there are many call-saved registers, the temp might live to
5253       * see another use; otherwise it'll be discarded.
5254       */
5255      temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5256      tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5257                 arg_slot_stk_ofs(arg_slot));
5258  }
5259  
5260  static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5261                              TCGTemp *ts, TCGRegSet *allocated_regs)
5262  {
5263      if (arg_slot_reg_p(l->arg_slot)) {
5264          TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5265          load_arg_reg(s, reg, ts, *allocated_regs);
5266          tcg_regset_set_reg(*allocated_regs, reg);
5267      } else {
5268          load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5269      }
5270  }
5271  
5272  static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5273                           intptr_t ref_off, TCGRegSet *allocated_regs)
5274  {
5275      TCGReg reg;
5276  
5277      if (arg_slot_reg_p(arg_slot)) {
5278          reg = tcg_target_call_iarg_regs[arg_slot];
5279          tcg_reg_free(s, reg, *allocated_regs);
5280          tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5281          tcg_regset_set_reg(*allocated_regs, reg);
5282      } else {
5283          reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5284                              *allocated_regs, 0, false);
5285          tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5286          tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5287                     arg_slot_stk_ofs(arg_slot));
5288      }
5289  }
5290  
5291  static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5292  {
5293      const int nb_oargs = TCGOP_CALLO(op);
5294      const int nb_iargs = TCGOP_CALLI(op);
5295      const TCGLifeData arg_life = op->life;
5296      const TCGHelperInfo *info = tcg_call_info(op);
5297      TCGRegSet allocated_regs = s->reserved_regs;
5298      int i;
5299  
5300      /*
5301       * Move inputs into place in reverse order,
5302       * so that we place stacked arguments first.
5303       */
5304      for (i = nb_iargs - 1; i >= 0; --i) {
5305          const TCGCallArgumentLoc *loc = &info->in[i];
5306          TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5307  
5308          switch (loc->kind) {
5309          case TCG_CALL_ARG_NORMAL:
5310          case TCG_CALL_ARG_EXTEND_U:
5311          case TCG_CALL_ARG_EXTEND_S:
5312              load_arg_normal(s, loc, ts, &allocated_regs);
5313              break;
5314          case TCG_CALL_ARG_BY_REF:
5315              load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5316              load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5317                           arg_slot_stk_ofs(loc->ref_slot),
5318                           &allocated_regs);
5319              break;
5320          case TCG_CALL_ARG_BY_REF_N:
5321              load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5322              break;
5323          default:
5324              g_assert_not_reached();
5325          }
5326      }
5327  
5328      /* Mark dead temporaries and free the associated registers.  */
5329      for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5330          if (IS_DEAD_ARG(i)) {
5331              temp_dead(s, arg_temp(op->args[i]));
5332          }
5333      }
5334  
5335      /* Clobber call registers.  */
5336      for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5337          if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5338              tcg_reg_free(s, i, allocated_regs);
5339          }
5340      }
5341  
5342      /*
5343       * Save globals if they might be written by the helper,
5344       * sync them if they might be read.
5345       */
5346      if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5347          /* Nothing to do */
5348      } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5349          sync_globals(s, allocated_regs);
5350      } else {
5351          save_globals(s, allocated_regs);
5352      }
5353  
5354      /*
5355       * If the ABI passes a pointer to the returned struct as the first
5356       * argument, load that now.  Pass a pointer to the output home slot.
5357       */
5358      if (info->out_kind == TCG_CALL_RET_BY_REF) {
5359          TCGTemp *ts = arg_temp(op->args[0]);
5360  
5361          if (!ts->mem_allocated) {
5362              temp_allocate_frame(s, ts);
5363          }
5364          load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5365      }
5366  
5367      tcg_out_call(s, tcg_call_func(op), info);
5368  
5369      /* Assign output registers and emit moves if needed.  */
5370      switch (info->out_kind) {
5371      case TCG_CALL_RET_NORMAL:
5372          for (i = 0; i < nb_oargs; i++) {
5373              TCGTemp *ts = arg_temp(op->args[i]);
5374              TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5375  
5376              /* ENV should not be modified.  */
5377              tcg_debug_assert(!temp_readonly(ts));
5378  
5379              set_temp_val_reg(s, ts, reg);
5380              ts->mem_coherent = 0;
5381          }
5382          break;
5383  
5384      case TCG_CALL_RET_BY_VEC:
5385          {
5386              TCGTemp *ts = arg_temp(op->args[0]);
5387  
5388              tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5389              tcg_debug_assert(ts->temp_subindex == 0);
5390              if (!ts->mem_allocated) {
5391                  temp_allocate_frame(s, ts);
5392              }
5393              tcg_out_st(s, TCG_TYPE_V128,
5394                         tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5395                         ts->mem_base->reg, ts->mem_offset);
5396          }
5397          /* fall through to mark all parts in memory */
5398  
5399      case TCG_CALL_RET_BY_REF:
5400          /* The callee has performed a write through the reference. */
5401          for (i = 0; i < nb_oargs; i++) {
5402              TCGTemp *ts = arg_temp(op->args[i]);
5403              ts->val_type = TEMP_VAL_MEM;
5404          }
5405          break;
5406  
5407      default:
5408          g_assert_not_reached();
5409      }
5410  
5411      /* Flush or discard output registers as needed. */
5412      for (i = 0; i < nb_oargs; i++) {
5413          TCGTemp *ts = arg_temp(op->args[i]);
5414          if (NEED_SYNC_ARG(i)) {
5415              temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5416          } else if (IS_DEAD_ARG(i)) {
5417              temp_dead(s, ts);
5418          }
5419      }
5420  }
5421  
5422  /**
5423   * atom_and_align_for_opc:
5424   * @s: tcg context
5425   * @opc: memory operation code
5426   * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5427   * @allow_two_ops: true if we are prepared to issue two operations
5428   *
5429   * Return the alignment and atomicity to use for the inline fast path
5430   * for the given memory operation.  The alignment may be larger than
5431   * that specified in @opc, and the correct alignment will be diagnosed
5432   * by the slow path helper.
5433   *
5434   * If @allow_two_ops, the host is prepared to test for 2x alignment,
5435   * and issue two loads or stores for subalignment.
5436   */
5437  static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5438                                             MemOp host_atom, bool allow_two_ops)
5439  {
5440      MemOp align = get_alignment_bits(opc);
5441      MemOp size = opc & MO_SIZE;
5442      MemOp half = size ? size - 1 : 0;
5443      MemOp atmax;
5444      MemOp atom;
5445  
5446      /* When serialized, no further atomicity required.  */
5447      if (s->gen_tb->cflags & CF_PARALLEL) {
5448          atom = opc & MO_ATOM_MASK;
5449      } else {
5450          atom = MO_ATOM_NONE;
5451      }
5452  
5453      switch (atom) {
5454      case MO_ATOM_NONE:
5455          /* The operation requires no specific atomicity. */
5456          atmax = MO_8;
5457          break;
5458  
5459      case MO_ATOM_IFALIGN:
5460          atmax = size;
5461          break;
5462  
5463      case MO_ATOM_IFALIGN_PAIR:
5464          atmax = half;
5465          break;
5466  
5467      case MO_ATOM_WITHIN16:
5468          atmax = size;
5469          if (size == MO_128) {
5470              /* Misalignment implies !within16, and therefore no atomicity. */
5471          } else if (host_atom != MO_ATOM_WITHIN16) {
5472              /* The host does not implement within16, so require alignment. */
5473              align = MAX(align, size);
5474          }
5475          break;
5476  
5477      case MO_ATOM_WITHIN16_PAIR:
5478          atmax = size;
5479          /*
5480           * Misalignment implies !within16, and therefore half atomicity.
5481           * Any host prepared for two operations can implement this with
5482           * half alignment.
5483           */
5484          if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5485              align = MAX(align, half);
5486          }
5487          break;
5488  
5489      case MO_ATOM_SUBALIGN:
5490          atmax = size;
5491          if (host_atom != MO_ATOM_SUBALIGN) {
5492              /* If unaligned but not odd, there are subobjects up to half. */
5493              if (allow_two_ops) {
5494                  align = MAX(align, half);
5495              } else {
5496                  align = MAX(align, size);
5497              }
5498          }
5499          break;
5500  
5501      default:
5502          g_assert_not_reached();
5503      }
5504  
5505      return (TCGAtomAlign){ .atom = atmax, .align = align };
5506  }
5507  
5508  /*
5509   * Similarly for qemu_ld/st slow path helpers.
5510   * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5511   * using only the provided backend tcg_out_* functions.
5512   */
5513  
5514  static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5515  {
5516      int ofs = arg_slot_stk_ofs(slot);
5517  
5518      /*
5519       * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5520       * require extension to uint64_t, adjust the address for uint32_t.
5521       */
5522      if (HOST_BIG_ENDIAN &&
5523          TCG_TARGET_REG_BITS == 64 &&
5524          type == TCG_TYPE_I32) {
5525          ofs += 4;
5526      }
5527      return ofs;
5528  }
5529  
5530  static void tcg_out_helper_load_slots(TCGContext *s,
5531                                        unsigned nmov, TCGMovExtend *mov,
5532                                        const TCGLdstHelperParam *parm)
5533  {
5534      unsigned i;
5535      TCGReg dst3;
5536  
5537      /*
5538       * Start from the end, storing to the stack first.
5539       * This frees those registers, so we need not consider overlap.
5540       */
5541      for (i = nmov; i-- > 0; ) {
5542          unsigned slot = mov[i].dst;
5543  
5544          if (arg_slot_reg_p(slot)) {
5545              goto found_reg;
5546          }
5547  
5548          TCGReg src = mov[i].src;
5549          TCGType dst_type = mov[i].dst_type;
5550          MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5551  
5552          /* The argument is going onto the stack; extend into scratch. */
5553          if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5554              tcg_debug_assert(parm->ntmp != 0);
5555              mov[i].dst = src = parm->tmp[0];
5556              tcg_out_movext1(s, &mov[i]);
5557          }
5558  
5559          tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5560                     tcg_out_helper_stk_ofs(dst_type, slot));
5561      }
5562      return;
5563  
5564   found_reg:
5565      /*
5566       * The remaining arguments are in registers.
5567       * Convert slot numbers to argument registers.
5568       */
5569      nmov = i + 1;
5570      for (i = 0; i < nmov; ++i) {
5571          mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5572      }
5573  
5574      switch (nmov) {
5575      case 4:
5576          /* The backend must have provided enough temps for the worst case. */
5577          tcg_debug_assert(parm->ntmp >= 2);
5578  
5579          dst3 = mov[3].dst;
5580          for (unsigned j = 0; j < 3; ++j) {
5581              if (dst3 == mov[j].src) {
5582                  /*
5583                   * Conflict. Copy the source to a temporary, perform the
5584                   * remaining moves, then the extension from our scratch
5585                   * on the way out.
5586                   */
5587                  TCGReg scratch = parm->tmp[1];
5588  
5589                  tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5590                  tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5591                  tcg_out_movext1_new_src(s, &mov[3], scratch);
5592                  break;
5593              }
5594          }
5595  
5596          /* No conflicts: perform this move and continue. */
5597          tcg_out_movext1(s, &mov[3]);
5598          /* fall through */
5599  
5600      case 3:
5601          tcg_out_movext3(s, mov, mov + 1, mov + 2,
5602                          parm->ntmp ? parm->tmp[0] : -1);
5603          break;
5604      case 2:
5605          tcg_out_movext2(s, mov, mov + 1,
5606                          parm->ntmp ? parm->tmp[0] : -1);
5607          break;
5608      case 1:
5609          tcg_out_movext1(s, mov);
5610          break;
5611      default:
5612          g_assert_not_reached();
5613      }
5614  }
5615  
5616  static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5617                                      TCGType type, tcg_target_long imm,
5618                                      const TCGLdstHelperParam *parm)
5619  {
5620      if (arg_slot_reg_p(slot)) {
5621          tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5622      } else {
5623          int ofs = tcg_out_helper_stk_ofs(type, slot);
5624          if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5625              tcg_debug_assert(parm->ntmp != 0);
5626              tcg_out_movi(s, type, parm->tmp[0], imm);
5627              tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5628          }
5629      }
5630  }
5631  
5632  static void tcg_out_helper_load_common_args(TCGContext *s,
5633                                              const TCGLabelQemuLdst *ldst,
5634                                              const TCGLdstHelperParam *parm,
5635                                              const TCGHelperInfo *info,
5636                                              unsigned next_arg)
5637  {
5638      TCGMovExtend ptr_mov = {
5639          .dst_type = TCG_TYPE_PTR,
5640          .src_type = TCG_TYPE_PTR,
5641          .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5642      };
5643      const TCGCallArgumentLoc *loc = &info->in[0];
5644      TCGType type;
5645      unsigned slot;
5646      tcg_target_ulong imm;
5647  
5648      /*
5649       * Handle env, which is always first.
5650       */
5651      ptr_mov.dst = loc->arg_slot;
5652      ptr_mov.src = TCG_AREG0;
5653      tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5654  
5655      /*
5656       * Handle oi.
5657       */
5658      imm = ldst->oi;
5659      loc = &info->in[next_arg];
5660      type = TCG_TYPE_I32;
5661      switch (loc->kind) {
5662      case TCG_CALL_ARG_NORMAL:
5663          break;
5664      case TCG_CALL_ARG_EXTEND_U:
5665      case TCG_CALL_ARG_EXTEND_S:
5666          /* No extension required for MemOpIdx. */
5667          tcg_debug_assert(imm <= INT32_MAX);
5668          type = TCG_TYPE_REG;
5669          break;
5670      default:
5671          g_assert_not_reached();
5672      }
5673      tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5674      next_arg++;
5675  
5676      /*
5677       * Handle ra.
5678       */
5679      loc = &info->in[next_arg];
5680      slot = loc->arg_slot;
5681      if (parm->ra_gen) {
5682          int arg_reg = -1;
5683          TCGReg ra_reg;
5684  
5685          if (arg_slot_reg_p(slot)) {
5686              arg_reg = tcg_target_call_iarg_regs[slot];
5687          }
5688          ra_reg = parm->ra_gen(s, ldst, arg_reg);
5689  
5690          ptr_mov.dst = slot;
5691          ptr_mov.src = ra_reg;
5692          tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5693      } else {
5694          imm = (uintptr_t)ldst->raddr;
5695          tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5696      }
5697  }
5698  
5699  static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5700                                         const TCGCallArgumentLoc *loc,
5701                                         TCGType dst_type, TCGType src_type,
5702                                         TCGReg lo, TCGReg hi)
5703  {
5704      MemOp reg_mo;
5705  
5706      if (dst_type <= TCG_TYPE_REG) {
5707          MemOp src_ext;
5708  
5709          switch (loc->kind) {
5710          case TCG_CALL_ARG_NORMAL:
5711              src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5712              break;
5713          case TCG_CALL_ARG_EXTEND_U:
5714              dst_type = TCG_TYPE_REG;
5715              src_ext = MO_UL;
5716              break;
5717          case TCG_CALL_ARG_EXTEND_S:
5718              dst_type = TCG_TYPE_REG;
5719              src_ext = MO_SL;
5720              break;
5721          default:
5722              g_assert_not_reached();
5723          }
5724  
5725          mov[0].dst = loc->arg_slot;
5726          mov[0].dst_type = dst_type;
5727          mov[0].src = lo;
5728          mov[0].src_type = src_type;
5729          mov[0].src_ext = src_ext;
5730          return 1;
5731      }
5732  
5733      if (TCG_TARGET_REG_BITS == 32) {
5734          assert(dst_type == TCG_TYPE_I64);
5735          reg_mo = MO_32;
5736      } else {
5737          assert(dst_type == TCG_TYPE_I128);
5738          reg_mo = MO_64;
5739      }
5740  
5741      mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5742      mov[0].src = lo;
5743      mov[0].dst_type = TCG_TYPE_REG;
5744      mov[0].src_type = TCG_TYPE_REG;
5745      mov[0].src_ext = reg_mo;
5746  
5747      mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5748      mov[1].src = hi;
5749      mov[1].dst_type = TCG_TYPE_REG;
5750      mov[1].src_type = TCG_TYPE_REG;
5751      mov[1].src_ext = reg_mo;
5752  
5753      return 2;
5754  }
5755  
5756  static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5757                                     const TCGLdstHelperParam *parm)
5758  {
5759      const TCGHelperInfo *info;
5760      const TCGCallArgumentLoc *loc;
5761      TCGMovExtend mov[2];
5762      unsigned next_arg, nmov;
5763      MemOp mop = get_memop(ldst->oi);
5764  
5765      switch (mop & MO_SIZE) {
5766      case MO_8:
5767      case MO_16:
5768      case MO_32:
5769          info = &info_helper_ld32_mmu;
5770          break;
5771      case MO_64:
5772          info = &info_helper_ld64_mmu;
5773          break;
5774      case MO_128:
5775          info = &info_helper_ld128_mmu;
5776          break;
5777      default:
5778          g_assert_not_reached();
5779      }
5780  
5781      /* Defer env argument. */
5782      next_arg = 1;
5783  
5784      loc = &info->in[next_arg];
5785      if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5786          /*
5787           * 32-bit host with 32-bit guest: zero-extend the guest address
5788           * to 64-bits for the helper by storing the low part, then
5789           * load a zero for the high part.
5790           */
5791          tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5792                                 TCG_TYPE_I32, TCG_TYPE_I32,
5793                                 ldst->addrlo_reg, -1);
5794          tcg_out_helper_load_slots(s, 1, mov, parm);
5795  
5796          tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5797                                  TCG_TYPE_I32, 0, parm);
5798          next_arg += 2;
5799      } else {
5800          nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5801                                        ldst->addrlo_reg, ldst->addrhi_reg);
5802          tcg_out_helper_load_slots(s, nmov, mov, parm);
5803          next_arg += nmov;
5804      }
5805  
5806      switch (info->out_kind) {
5807      case TCG_CALL_RET_NORMAL:
5808      case TCG_CALL_RET_BY_VEC:
5809          break;
5810      case TCG_CALL_RET_BY_REF:
5811          /*
5812           * The return reference is in the first argument slot.
5813           * We need memory in which to return: re-use the top of stack.
5814           */
5815          {
5816              int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5817  
5818              if (arg_slot_reg_p(0)) {
5819                  tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5820                                   TCG_REG_CALL_STACK, ofs_slot0);
5821              } else {
5822                  tcg_debug_assert(parm->ntmp != 0);
5823                  tcg_out_addi_ptr(s, parm->tmp[0],
5824                                   TCG_REG_CALL_STACK, ofs_slot0);
5825                  tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5826                             TCG_REG_CALL_STACK, ofs_slot0);
5827              }
5828          }
5829          break;
5830      default:
5831          g_assert_not_reached();
5832      }
5833  
5834      tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5835  }
5836  
5837  static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5838                                    bool load_sign,
5839                                    const TCGLdstHelperParam *parm)
5840  {
5841      MemOp mop = get_memop(ldst->oi);
5842      TCGMovExtend mov[2];
5843      int ofs_slot0;
5844  
5845      switch (ldst->type) {
5846      case TCG_TYPE_I64:
5847          if (TCG_TARGET_REG_BITS == 32) {
5848              break;
5849          }
5850          /* fall through */
5851  
5852      case TCG_TYPE_I32:
5853          mov[0].dst = ldst->datalo_reg;
5854          mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5855          mov[0].dst_type = ldst->type;
5856          mov[0].src_type = TCG_TYPE_REG;
5857  
5858          /*
5859           * If load_sign, then we allowed the helper to perform the
5860           * appropriate sign extension to tcg_target_ulong, and all
5861           * we need now is a plain move.
5862           *
5863           * If they do not, then we expect the relevant extension
5864           * instruction to be no more expensive than a move, and
5865           * we thus save the icache etc by only using one of two
5866           * helper functions.
5867           */
5868          if (load_sign || !(mop & MO_SIGN)) {
5869              if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5870                  mov[0].src_ext = MO_32;
5871              } else {
5872                  mov[0].src_ext = MO_64;
5873              }
5874          } else {
5875              mov[0].src_ext = mop & MO_SSIZE;
5876          }
5877          tcg_out_movext1(s, mov);
5878          return;
5879  
5880      case TCG_TYPE_I128:
5881          tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5882          ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5883          switch (TCG_TARGET_CALL_RET_I128) {
5884          case TCG_CALL_RET_NORMAL:
5885              break;
5886          case TCG_CALL_RET_BY_VEC:
5887              tcg_out_st(s, TCG_TYPE_V128,
5888                         tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5889                         TCG_REG_CALL_STACK, ofs_slot0);
5890              /* fall through */
5891          case TCG_CALL_RET_BY_REF:
5892              tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5893                         TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5894              tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5895                         TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5896              return;
5897          default:
5898              g_assert_not_reached();
5899          }
5900          break;
5901  
5902      default:
5903          g_assert_not_reached();
5904      }
5905  
5906      mov[0].dst = ldst->datalo_reg;
5907      mov[0].src =
5908          tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5909      mov[0].dst_type = TCG_TYPE_REG;
5910      mov[0].src_type = TCG_TYPE_REG;
5911      mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5912  
5913      mov[1].dst = ldst->datahi_reg;
5914      mov[1].src =
5915          tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5916      mov[1].dst_type = TCG_TYPE_REG;
5917      mov[1].src_type = TCG_TYPE_REG;
5918      mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5919  
5920      tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5921  }
5922  
5923  static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5924                                     const TCGLdstHelperParam *parm)
5925  {
5926      const TCGHelperInfo *info;
5927      const TCGCallArgumentLoc *loc;
5928      TCGMovExtend mov[4];
5929      TCGType data_type;
5930      unsigned next_arg, nmov, n;
5931      MemOp mop = get_memop(ldst->oi);
5932  
5933      switch (mop & MO_SIZE) {
5934      case MO_8:
5935      case MO_16:
5936      case MO_32:
5937          info = &info_helper_st32_mmu;
5938          data_type = TCG_TYPE_I32;
5939          break;
5940      case MO_64:
5941          info = &info_helper_st64_mmu;
5942          data_type = TCG_TYPE_I64;
5943          break;
5944      case MO_128:
5945          info = &info_helper_st128_mmu;
5946          data_type = TCG_TYPE_I128;
5947          break;
5948      default:
5949          g_assert_not_reached();
5950      }
5951  
5952      /* Defer env argument. */
5953      next_arg = 1;
5954      nmov = 0;
5955  
5956      /* Handle addr argument. */
5957      loc = &info->in[next_arg];
5958      if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5959          /*
5960           * 32-bit host with 32-bit guest: zero-extend the guest address
5961           * to 64-bits for the helper by storing the low part.  Later,
5962           * after we have processed the register inputs, we will load a
5963           * zero for the high part.
5964           */
5965          tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5966                                 TCG_TYPE_I32, TCG_TYPE_I32,
5967                                 ldst->addrlo_reg, -1);
5968          next_arg += 2;
5969          nmov += 1;
5970      } else {
5971          n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5972                                     ldst->addrlo_reg, ldst->addrhi_reg);
5973          next_arg += n;
5974          nmov += n;
5975      }
5976  
5977      /* Handle data argument. */
5978      loc = &info->in[next_arg];
5979      switch (loc->kind) {
5980      case TCG_CALL_ARG_NORMAL:
5981      case TCG_CALL_ARG_EXTEND_U:
5982      case TCG_CALL_ARG_EXTEND_S:
5983          n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
5984                                     ldst->datalo_reg, ldst->datahi_reg);
5985          next_arg += n;
5986          nmov += n;
5987          tcg_out_helper_load_slots(s, nmov, mov, parm);
5988          break;
5989  
5990      case TCG_CALL_ARG_BY_REF:
5991          tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5992          tcg_debug_assert(data_type == TCG_TYPE_I128);
5993          tcg_out_st(s, TCG_TYPE_I64,
5994                     HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
5995                     TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
5996          tcg_out_st(s, TCG_TYPE_I64,
5997                     HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
5998                     TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
5999  
6000          tcg_out_helper_load_slots(s, nmov, mov, parm);
6001  
6002          if (arg_slot_reg_p(loc->arg_slot)) {
6003              tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6004                               TCG_REG_CALL_STACK,
6005                               arg_slot_stk_ofs(loc->ref_slot));
6006          } else {
6007              tcg_debug_assert(parm->ntmp != 0);
6008              tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6009                               arg_slot_stk_ofs(loc->ref_slot));
6010              tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6011                         TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6012          }
6013          next_arg += 2;
6014          break;
6015  
6016      default:
6017          g_assert_not_reached();
6018      }
6019  
6020      if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6021          /* Zero extend the address by loading a zero for the high part. */
6022          loc = &info->in[1 + !HOST_BIG_ENDIAN];
6023          tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6024      }
6025  
6026      tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6027  }
6028  
6029  int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6030  {
6031      int i, start_words, num_insns;
6032      TCGOp *op;
6033  
6034      if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6035                   && qemu_log_in_addr_range(pc_start))) {
6036          FILE *logfile = qemu_log_trylock();
6037          if (logfile) {
6038              fprintf(logfile, "OP:\n");
6039              tcg_dump_ops(s, logfile, false);
6040              fprintf(logfile, "\n");
6041              qemu_log_unlock(logfile);
6042          }
6043      }
6044  
6045  #ifdef CONFIG_DEBUG_TCG
6046      /* Ensure all labels referenced have been emitted.  */
6047      {
6048          TCGLabel *l;
6049          bool error = false;
6050  
6051          QSIMPLEQ_FOREACH(l, &s->labels, next) {
6052              if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6053                  qemu_log_mask(CPU_LOG_TB_OP,
6054                                "$L%d referenced but not present.\n", l->id);
6055                  error = true;
6056              }
6057          }
6058          assert(!error);
6059      }
6060  #endif
6061  
6062      tcg_optimize(s);
6063  
6064      reachable_code_pass(s);
6065      liveness_pass_0(s);
6066      liveness_pass_1(s);
6067  
6068      if (s->nb_indirects > 0) {
6069          if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6070                       && qemu_log_in_addr_range(pc_start))) {
6071              FILE *logfile = qemu_log_trylock();
6072              if (logfile) {
6073                  fprintf(logfile, "OP before indirect lowering:\n");
6074                  tcg_dump_ops(s, logfile, false);
6075                  fprintf(logfile, "\n");
6076                  qemu_log_unlock(logfile);
6077              }
6078          }
6079  
6080          /* Replace indirect temps with direct temps.  */
6081          if (liveness_pass_2(s)) {
6082              /* If changes were made, re-run liveness.  */
6083              liveness_pass_1(s);
6084          }
6085      }
6086  
6087      if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6088                   && qemu_log_in_addr_range(pc_start))) {
6089          FILE *logfile = qemu_log_trylock();
6090          if (logfile) {
6091              fprintf(logfile, "OP after optimization and liveness analysis:\n");
6092              tcg_dump_ops(s, logfile, true);
6093              fprintf(logfile, "\n");
6094              qemu_log_unlock(logfile);
6095          }
6096      }
6097  
6098      /* Initialize goto_tb jump offsets. */
6099      tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6100      tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6101      tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6102      tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6103  
6104      tcg_reg_alloc_start(s);
6105  
6106      /*
6107       * Reset the buffer pointers when restarting after overflow.
6108       * TODO: Move this into translate-all.c with the rest of the
6109       * buffer management.  Having only this done here is confusing.
6110       */
6111      s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6112      s->code_ptr = s->code_buf;
6113  
6114  #ifdef TCG_TARGET_NEED_LDST_LABELS
6115      QSIMPLEQ_INIT(&s->ldst_labels);
6116  #endif
6117  #ifdef TCG_TARGET_NEED_POOL_LABELS
6118      s->pool_labels = NULL;
6119  #endif
6120  
6121      start_words = s->insn_start_words;
6122      s->gen_insn_data =
6123          tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6124  
6125      tcg_out_tb_start(s);
6126  
6127      num_insns = -1;
6128      QTAILQ_FOREACH(op, &s->ops, link) {
6129          TCGOpcode opc = op->opc;
6130  
6131          switch (opc) {
6132          case INDEX_op_mov_i32:
6133          case INDEX_op_mov_i64:
6134          case INDEX_op_mov_vec:
6135              tcg_reg_alloc_mov(s, op);
6136              break;
6137          case INDEX_op_dup_vec:
6138              tcg_reg_alloc_dup(s, op);
6139              break;
6140          case INDEX_op_insn_start:
6141              if (num_insns >= 0) {
6142                  size_t off = tcg_current_code_size(s);
6143                  s->gen_insn_end_off[num_insns] = off;
6144                  /* Assert that we do not overflow our stored offset.  */
6145                  assert(s->gen_insn_end_off[num_insns] == off);
6146              }
6147              num_insns++;
6148              for (i = 0; i < start_words; ++i) {
6149                  s->gen_insn_data[num_insns * start_words + i] =
6150                      tcg_get_insn_start_param(op, i);
6151              }
6152              break;
6153          case INDEX_op_discard:
6154              temp_dead(s, arg_temp(op->args[0]));
6155              break;
6156          case INDEX_op_set_label:
6157              tcg_reg_alloc_bb_end(s, s->reserved_regs);
6158              tcg_out_label(s, arg_label(op->args[0]));
6159              break;
6160          case INDEX_op_call:
6161              tcg_reg_alloc_call(s, op);
6162              break;
6163          case INDEX_op_exit_tb:
6164              tcg_out_exit_tb(s, op->args[0]);
6165              break;
6166          case INDEX_op_goto_tb:
6167              tcg_out_goto_tb(s, op->args[0]);
6168              break;
6169          case INDEX_op_dup2_vec:
6170              if (tcg_reg_alloc_dup2(s, op)) {
6171                  break;
6172              }
6173              /* fall through */
6174          default:
6175              /* Sanity check that we've not introduced any unhandled opcodes. */
6176              tcg_debug_assert(tcg_op_supported(opc));
6177              /* Note: in order to speed up the code, it would be much
6178                 faster to have specialized register allocator functions for
6179                 some common argument patterns */
6180              tcg_reg_alloc_op(s, op);
6181              break;
6182          }
6183          /* Test for (pending) buffer overflow.  The assumption is that any
6184             one operation beginning below the high water mark cannot overrun
6185             the buffer completely.  Thus we can test for overflow after
6186             generating code without having to check during generation.  */
6187          if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6188              return -1;
6189          }
6190          /* Test for TB overflow, as seen by gen_insn_end_off.  */
6191          if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6192              return -2;
6193          }
6194      }
6195      tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6196      s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6197  
6198      /* Generate TB finalization at the end of block */
6199  #ifdef TCG_TARGET_NEED_LDST_LABELS
6200      i = tcg_out_ldst_finalize(s);
6201      if (i < 0) {
6202          return i;
6203      }
6204  #endif
6205  #ifdef TCG_TARGET_NEED_POOL_LABELS
6206      i = tcg_out_pool_finalize(s);
6207      if (i < 0) {
6208          return i;
6209      }
6210  #endif
6211      if (!tcg_resolve_relocs(s)) {
6212          return -2;
6213      }
6214  
6215  #ifndef CONFIG_TCG_INTERPRETER
6216      /* flush instruction cache */
6217      flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6218                          (uintptr_t)s->code_buf,
6219                          tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6220  #endif
6221  
6222      return tcg_current_code_size(s);
6223  }
6224  
6225  #ifdef ELF_HOST_MACHINE
6226  /* In order to use this feature, the backend needs to do three things:
6227  
6228     (1) Define ELF_HOST_MACHINE to indicate both what value to
6229         put into the ELF image and to indicate support for the feature.
6230  
6231     (2) Define tcg_register_jit.  This should create a buffer containing
6232         the contents of a .debug_frame section that describes the post-
6233         prologue unwind info for the tcg machine.
6234  
6235     (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6236  */
6237  
6238  /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6239  typedef enum {
6240      JIT_NOACTION = 0,
6241      JIT_REGISTER_FN,
6242      JIT_UNREGISTER_FN
6243  } jit_actions_t;
6244  
6245  struct jit_code_entry {
6246      struct jit_code_entry *next_entry;
6247      struct jit_code_entry *prev_entry;
6248      const void *symfile_addr;
6249      uint64_t symfile_size;
6250  };
6251  
6252  struct jit_descriptor {
6253      uint32_t version;
6254      uint32_t action_flag;
6255      struct jit_code_entry *relevant_entry;
6256      struct jit_code_entry *first_entry;
6257  };
6258  
6259  void __jit_debug_register_code(void) __attribute__((noinline));
6260  void __jit_debug_register_code(void)
6261  {
6262      asm("");
6263  }
6264  
6265  /* Must statically initialize the version, because GDB may check
6266     the version before we can set it.  */
6267  struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6268  
6269  /* End GDB interface.  */
6270  
6271  static int find_string(const char *strtab, const char *str)
6272  {
6273      const char *p = strtab + 1;
6274  
6275      while (1) {
6276          if (strcmp(p, str) == 0) {
6277              return p - strtab;
6278          }
6279          p += strlen(p) + 1;
6280      }
6281  }
6282  
6283  static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6284                                   const void *debug_frame,
6285                                   size_t debug_frame_size)
6286  {
6287      struct __attribute__((packed)) DebugInfo {
6288          uint32_t  len;
6289          uint16_t  version;
6290          uint32_t  abbrev;
6291          uint8_t   ptr_size;
6292          uint8_t   cu_die;
6293          uint16_t  cu_lang;
6294          uintptr_t cu_low_pc;
6295          uintptr_t cu_high_pc;
6296          uint8_t   fn_die;
6297          char      fn_name[16];
6298          uintptr_t fn_low_pc;
6299          uintptr_t fn_high_pc;
6300          uint8_t   cu_eoc;
6301      };
6302  
6303      struct ElfImage {
6304          ElfW(Ehdr) ehdr;
6305          ElfW(Phdr) phdr;
6306          ElfW(Shdr) shdr[7];
6307          ElfW(Sym)  sym[2];
6308          struct DebugInfo di;
6309          uint8_t    da[24];
6310          char       str[80];
6311      };
6312  
6313      struct ElfImage *img;
6314  
6315      static const struct ElfImage img_template = {
6316          .ehdr = {
6317              .e_ident[EI_MAG0] = ELFMAG0,
6318              .e_ident[EI_MAG1] = ELFMAG1,
6319              .e_ident[EI_MAG2] = ELFMAG2,
6320              .e_ident[EI_MAG3] = ELFMAG3,
6321              .e_ident[EI_CLASS] = ELF_CLASS,
6322              .e_ident[EI_DATA] = ELF_DATA,
6323              .e_ident[EI_VERSION] = EV_CURRENT,
6324              .e_type = ET_EXEC,
6325              .e_machine = ELF_HOST_MACHINE,
6326              .e_version = EV_CURRENT,
6327              .e_phoff = offsetof(struct ElfImage, phdr),
6328              .e_shoff = offsetof(struct ElfImage, shdr),
6329              .e_ehsize = sizeof(ElfW(Shdr)),
6330              .e_phentsize = sizeof(ElfW(Phdr)),
6331              .e_phnum = 1,
6332              .e_shentsize = sizeof(ElfW(Shdr)),
6333              .e_shnum = ARRAY_SIZE(img->shdr),
6334              .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6335  #ifdef ELF_HOST_FLAGS
6336              .e_flags = ELF_HOST_FLAGS,
6337  #endif
6338  #ifdef ELF_OSABI
6339              .e_ident[EI_OSABI] = ELF_OSABI,
6340  #endif
6341          },
6342          .phdr = {
6343              .p_type = PT_LOAD,
6344              .p_flags = PF_X,
6345          },
6346          .shdr = {
6347              [0] = { .sh_type = SHT_NULL },
6348              /* Trick: The contents of code_gen_buffer are not present in
6349                 this fake ELF file; that got allocated elsewhere.  Therefore
6350                 we mark .text as SHT_NOBITS (similar to .bss) so that readers
6351                 will not look for contents.  We can record any address.  */
6352              [1] = { /* .text */
6353                  .sh_type = SHT_NOBITS,
6354                  .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6355              },
6356              [2] = { /* .debug_info */
6357                  .sh_type = SHT_PROGBITS,
6358                  .sh_offset = offsetof(struct ElfImage, di),
6359                  .sh_size = sizeof(struct DebugInfo),
6360              },
6361              [3] = { /* .debug_abbrev */
6362                  .sh_type = SHT_PROGBITS,
6363                  .sh_offset = offsetof(struct ElfImage, da),
6364                  .sh_size = sizeof(img->da),
6365              },
6366              [4] = { /* .debug_frame */
6367                  .sh_type = SHT_PROGBITS,
6368                  .sh_offset = sizeof(struct ElfImage),
6369              },
6370              [5] = { /* .symtab */
6371                  .sh_type = SHT_SYMTAB,
6372                  .sh_offset = offsetof(struct ElfImage, sym),
6373                  .sh_size = sizeof(img->sym),
6374                  .sh_info = 1,
6375                  .sh_link = ARRAY_SIZE(img->shdr) - 1,
6376                  .sh_entsize = sizeof(ElfW(Sym)),
6377              },
6378              [6] = { /* .strtab */
6379                  .sh_type = SHT_STRTAB,
6380                  .sh_offset = offsetof(struct ElfImage, str),
6381                  .sh_size = sizeof(img->str),
6382              }
6383          },
6384          .sym = {
6385              [1] = { /* code_gen_buffer */
6386                  .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6387                  .st_shndx = 1,
6388              }
6389          },
6390          .di = {
6391              .len = sizeof(struct DebugInfo) - 4,
6392              .version = 2,
6393              .ptr_size = sizeof(void *),
6394              .cu_die = 1,
6395              .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6396              .fn_die = 2,
6397              .fn_name = "code_gen_buffer"
6398          },
6399          .da = {
6400              1,          /* abbrev number (the cu) */
6401              0x11, 1,    /* DW_TAG_compile_unit, has children */
6402              0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6403              0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6404              0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6405              0, 0,       /* end of abbrev */
6406              2,          /* abbrev number (the fn) */
6407              0x2e, 0,    /* DW_TAG_subprogram, no children */
6408              0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6409              0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6410              0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6411              0, 0,       /* end of abbrev */
6412              0           /* no more abbrev */
6413          },
6414          .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6415                 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6416      };
6417  
6418      /* We only need a single jit entry; statically allocate it.  */
6419      static struct jit_code_entry one_entry;
6420  
6421      uintptr_t buf = (uintptr_t)buf_ptr;
6422      size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6423      DebugFrameHeader *dfh;
6424  
6425      img = g_malloc(img_size);
6426      *img = img_template;
6427  
6428      img->phdr.p_vaddr = buf;
6429      img->phdr.p_paddr = buf;
6430      img->phdr.p_memsz = buf_size;
6431  
6432      img->shdr[1].sh_name = find_string(img->str, ".text");
6433      img->shdr[1].sh_addr = buf;
6434      img->shdr[1].sh_size = buf_size;
6435  
6436      img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6437      img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6438  
6439      img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6440      img->shdr[4].sh_size = debug_frame_size;
6441  
6442      img->shdr[5].sh_name = find_string(img->str, ".symtab");
6443      img->shdr[6].sh_name = find_string(img->str, ".strtab");
6444  
6445      img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6446      img->sym[1].st_value = buf;
6447      img->sym[1].st_size = buf_size;
6448  
6449      img->di.cu_low_pc = buf;
6450      img->di.cu_high_pc = buf + buf_size;
6451      img->di.fn_low_pc = buf;
6452      img->di.fn_high_pc = buf + buf_size;
6453  
6454      dfh = (DebugFrameHeader *)(img + 1);
6455      memcpy(dfh, debug_frame, debug_frame_size);
6456      dfh->fde.func_start = buf;
6457      dfh->fde.func_len = buf_size;
6458  
6459  #ifdef DEBUG_JIT
6460      /* Enable this block to be able to debug the ELF image file creation.
6461         One can use readelf, objdump, or other inspection utilities.  */
6462      {
6463          g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6464          FILE *f = fopen(jit, "w+b");
6465          if (f) {
6466              if (fwrite(img, img_size, 1, f) != img_size) {
6467                  /* Avoid stupid unused return value warning for fwrite.  */
6468              }
6469              fclose(f);
6470          }
6471      }
6472  #endif
6473  
6474      one_entry.symfile_addr = img;
6475      one_entry.symfile_size = img_size;
6476  
6477      __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6478      __jit_debug_descriptor.relevant_entry = &one_entry;
6479      __jit_debug_descriptor.first_entry = &one_entry;
6480      __jit_debug_register_code();
6481  }
6482  #else
6483  /* No support for the feature.  Provide the entry point expected by exec.c,
6484     and implement the internal function we declared earlier.  */
6485  
6486  static void tcg_register_jit_int(const void *buf, size_t size,
6487                                   const void *debug_frame,
6488                                   size_t debug_frame_size)
6489  {
6490  }
6491  
6492  void tcg_register_jit(const void *buf, size_t buf_size)
6493  {
6494  }
6495  #endif /* ELF_HOST_MACHINE */
6496  
6497  #if !TCG_TARGET_MAYBE_vec
6498  void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6499  {
6500      g_assert_not_reached();
6501  }
6502  #endif
6503