xref: /openbmc/qemu/tcg/tcg-op-ldst.c (revision d6b5c4c1b516a8176b74ec35a0af8cf89b04b6c1)
1  /*
2   * Tiny Code Generator for QEMU
3   *
4   * Copyright (c) 2008 Fabrice Bellard
5   *
6   * Permission is hereby granted, free of charge, to any person obtaining a copy
7   * of this software and associated documentation files (the "Software"), to deal
8   * in the Software without restriction, including without limitation the rights
9   * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10   * copies of the Software, and to permit persons to whom the Software is
11   * furnished to do so, subject to the following conditions:
12   *
13   * The above copyright notice and this permission notice shall be included in
14   * all copies or substantial portions of the Software.
15   *
16   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22   * THE SOFTWARE.
23   */
24  
25  #include "qemu/osdep.h"
26  #include "tcg/tcg.h"
27  #include "tcg/tcg-temp-internal.h"
28  #include "tcg/tcg-op-common.h"
29  #include "tcg/tcg-mo.h"
30  #include "exec/translation-block.h"
31  #include "exec/plugin-gen.h"
32  #include "tcg-internal.h"
33  
34  
35  static void check_max_alignment(unsigned a_bits)
36  {
37      /*
38       * The requested alignment cannot overlap the TLB flags.
39       * FIXME: Must keep the count up-to-date with "exec/cpu-all.h".
40       */
41      if (tcg_use_softmmu) {
42          tcg_debug_assert(a_bits + 5 <= tcg_ctx->page_bits);
43      }
44  }
45  
46  static MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
47  {
48      unsigned a_bits = get_alignment_bits(op);
49  
50      check_max_alignment(a_bits);
51  
52      /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
53      if (a_bits == (op & MO_SIZE)) {
54          op = (op & ~MO_AMASK) | MO_ALIGN;
55      }
56  
57      switch (op & MO_SIZE) {
58      case MO_8:
59          op &= ~MO_BSWAP;
60          break;
61      case MO_16:
62          break;
63      case MO_32:
64          if (!is64) {
65              op &= ~MO_SIGN;
66          }
67          break;
68      case MO_64:
69          if (is64) {
70              op &= ~MO_SIGN;
71              break;
72          }
73          /* fall through */
74      default:
75          g_assert_not_reached();
76      }
77      if (st) {
78          op &= ~MO_SIGN;
79      }
80  
81      /* In serial mode, reduce atomicity. */
82      if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
83          op &= ~MO_ATOM_MASK;
84          op |= MO_ATOM_NONE;
85      }
86  
87      return op;
88  }
89  
90  static void gen_ldst(TCGOpcode opc, TCGTemp *vl, TCGTemp *vh,
91                       TCGTemp *addr, MemOpIdx oi)
92  {
93      if (TCG_TARGET_REG_BITS == 64 || tcg_ctx->addr_type == TCG_TYPE_I32) {
94          if (vh) {
95              tcg_gen_op4(opc, temp_arg(vl), temp_arg(vh), temp_arg(addr), oi);
96          } else {
97              tcg_gen_op3(opc, temp_arg(vl), temp_arg(addr), oi);
98          }
99      } else {
100          /* See TCGV_LOW/HIGH. */
101          TCGTemp *al = addr + HOST_BIG_ENDIAN;
102          TCGTemp *ah = addr + !HOST_BIG_ENDIAN;
103  
104          if (vh) {
105              tcg_gen_op5(opc, temp_arg(vl), temp_arg(vh),
106                          temp_arg(al), temp_arg(ah), oi);
107          } else {
108              tcg_gen_op4(opc, temp_arg(vl), temp_arg(al), temp_arg(ah), oi);
109          }
110      }
111  }
112  
113  static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 v, TCGTemp *addr, MemOpIdx oi)
114  {
115      if (TCG_TARGET_REG_BITS == 32) {
116          TCGTemp *vl = tcgv_i32_temp(TCGV_LOW(v));
117          TCGTemp *vh = tcgv_i32_temp(TCGV_HIGH(v));
118          gen_ldst(opc, vl, vh, addr, oi);
119      } else {
120          gen_ldst(opc, tcgv_i64_temp(v), NULL, addr, oi);
121      }
122  }
123  
124  static void tcg_gen_req_mo(TCGBar type)
125  {
126      type &= tcg_ctx->guest_mo;
127      type &= ~TCG_TARGET_DEFAULT_MO;
128      if (type) {
129          tcg_gen_mb(type | TCG_BAR_SC);
130      }
131  }
132  
133  /* Only required for loads, where value might overlap addr. */
134  static TCGv_i64 plugin_maybe_preserve_addr(TCGTemp *addr)
135  {
136  #ifdef CONFIG_PLUGIN
137      if (tcg_ctx->plugin_insn != NULL) {
138          /* Save a copy of the vaddr for use after a load.  */
139          TCGv_i64 temp = tcg_temp_ebb_new_i64();
140          if (tcg_ctx->addr_type == TCG_TYPE_I32) {
141              tcg_gen_extu_i32_i64(temp, temp_tcgv_i32(addr));
142          } else {
143              tcg_gen_mov_i64(temp, temp_tcgv_i64(addr));
144          }
145          return temp;
146      }
147  #endif
148      return NULL;
149  }
150  
151  static void
152  plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi,
153                           enum qemu_plugin_mem_rw rw)
154  {
155  #ifdef CONFIG_PLUGIN
156      if (tcg_ctx->plugin_insn != NULL) {
157          qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
158  
159          if (tcg_ctx->addr_type == TCG_TYPE_I32) {
160              if (!copy_addr) {
161                  copy_addr = tcg_temp_ebb_new_i64();
162                  tcg_gen_extu_i32_i64(copy_addr, temp_tcgv_i32(orig_addr));
163              }
164              plugin_gen_empty_mem_callback(copy_addr, info);
165              tcg_temp_free_i64(copy_addr);
166          } else {
167              if (copy_addr) {
168                  plugin_gen_empty_mem_callback(copy_addr, info);
169                  tcg_temp_free_i64(copy_addr);
170              } else {
171                  plugin_gen_empty_mem_callback(temp_tcgv_i64(orig_addr), info);
172              }
173          }
174      }
175  #endif
176  }
177  
178  static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
179                                      TCGArg idx, MemOp memop)
180  {
181      MemOp orig_memop;
182      MemOpIdx orig_oi, oi;
183      TCGv_i64 copy_addr;
184      TCGOpcode opc;
185  
186      tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
187      orig_memop = memop = tcg_canonicalize_memop(memop, 0, 0);
188      orig_oi = oi = make_memop_idx(memop, idx);
189  
190      if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
191          memop &= ~MO_BSWAP;
192          /* The bswap primitive benefits from zero-extended input.  */
193          if ((memop & MO_SSIZE) == MO_SW) {
194              memop &= ~MO_SIGN;
195          }
196          oi = make_memop_idx(memop, idx);
197      }
198  
199      copy_addr = plugin_maybe_preserve_addr(addr);
200      if (tcg_ctx->addr_type == TCG_TYPE_I32) {
201          opc = INDEX_op_qemu_ld_a32_i32;
202      } else {
203          opc = INDEX_op_qemu_ld_a64_i32;
204      }
205      gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
206      plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
207  
208      if ((orig_memop ^ memop) & MO_BSWAP) {
209          switch (orig_memop & MO_SIZE) {
210          case MO_16:
211              tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
212                                             ? TCG_BSWAP_IZ | TCG_BSWAP_OS
213                                             : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
214              break;
215          case MO_32:
216              tcg_gen_bswap32_i32(val, val);
217              break;
218          default:
219              g_assert_not_reached();
220          }
221      }
222  }
223  
224  void tcg_gen_qemu_ld_i32_chk(TCGv_i32 val, TCGTemp *addr, TCGArg idx,
225                               MemOp memop, TCGType addr_type)
226  {
227      tcg_debug_assert(addr_type == tcg_ctx->addr_type);
228      tcg_debug_assert((memop & MO_SIZE) <= MO_32);
229      tcg_gen_qemu_ld_i32_int(val, addr, idx, memop);
230  }
231  
232  static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr,
233                                      TCGArg idx, MemOp memop)
234  {
235      TCGv_i32 swap = NULL;
236      MemOpIdx orig_oi, oi;
237      TCGOpcode opc;
238  
239      tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
240      memop = tcg_canonicalize_memop(memop, 0, 1);
241      orig_oi = oi = make_memop_idx(memop, idx);
242  
243      if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
244          swap = tcg_temp_ebb_new_i32();
245          switch (memop & MO_SIZE) {
246          case MO_16:
247              tcg_gen_bswap16_i32(swap, val, 0);
248              break;
249          case MO_32:
250              tcg_gen_bswap32_i32(swap, val);
251              break;
252          default:
253              g_assert_not_reached();
254          }
255          val = swap;
256          memop &= ~MO_BSWAP;
257          oi = make_memop_idx(memop, idx);
258      }
259  
260      if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
261          if (tcg_ctx->addr_type == TCG_TYPE_I32) {
262              opc = INDEX_op_qemu_st8_a32_i32;
263          } else {
264              opc = INDEX_op_qemu_st8_a64_i32;
265          }
266      } else {
267          if (tcg_ctx->addr_type == TCG_TYPE_I32) {
268              opc = INDEX_op_qemu_st_a32_i32;
269          } else {
270              opc = INDEX_op_qemu_st_a64_i32;
271          }
272      }
273      gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
274      plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
275  
276      if (swap) {
277          tcg_temp_free_i32(swap);
278      }
279  }
280  
281  void tcg_gen_qemu_st_i32_chk(TCGv_i32 val, TCGTemp *addr, TCGArg idx,
282                               MemOp memop, TCGType addr_type)
283  {
284      tcg_debug_assert(addr_type == tcg_ctx->addr_type);
285      tcg_debug_assert((memop & MO_SIZE) <= MO_32);
286      tcg_gen_qemu_st_i32_int(val, addr, idx, memop);
287  }
288  
289  static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr,
290                                      TCGArg idx, MemOp memop)
291  {
292      MemOp orig_memop;
293      MemOpIdx orig_oi, oi;
294      TCGv_i64 copy_addr;
295      TCGOpcode opc;
296  
297      if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
298          tcg_gen_qemu_ld_i32_int(TCGV_LOW(val), addr, idx, memop);
299          if (memop & MO_SIGN) {
300              tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
301          } else {
302              tcg_gen_movi_i32(TCGV_HIGH(val), 0);
303          }
304          return;
305      }
306  
307      tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
308      orig_memop = memop = tcg_canonicalize_memop(memop, 1, 0);
309      orig_oi = oi = make_memop_idx(memop, idx);
310  
311      if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
312          memop &= ~MO_BSWAP;
313          /* The bswap primitive benefits from zero-extended input.  */
314          if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
315              memop &= ~MO_SIGN;
316          }
317          oi = make_memop_idx(memop, idx);
318      }
319  
320      copy_addr = plugin_maybe_preserve_addr(addr);
321      if (tcg_ctx->addr_type == TCG_TYPE_I32) {
322          opc = INDEX_op_qemu_ld_a32_i64;
323      } else {
324          opc = INDEX_op_qemu_ld_a64_i64;
325      }
326      gen_ldst_i64(opc, val, addr, oi);
327      plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
328  
329      if ((orig_memop ^ memop) & MO_BSWAP) {
330          int flags = (orig_memop & MO_SIGN
331                       ? TCG_BSWAP_IZ | TCG_BSWAP_OS
332                       : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
333          switch (orig_memop & MO_SIZE) {
334          case MO_16:
335              tcg_gen_bswap16_i64(val, val, flags);
336              break;
337          case MO_32:
338              tcg_gen_bswap32_i64(val, val, flags);
339              break;
340          case MO_64:
341              tcg_gen_bswap64_i64(val, val);
342              break;
343          default:
344              g_assert_not_reached();
345          }
346      }
347  }
348  
349  void tcg_gen_qemu_ld_i64_chk(TCGv_i64 val, TCGTemp *addr, TCGArg idx,
350                               MemOp memop, TCGType addr_type)
351  {
352      tcg_debug_assert(addr_type == tcg_ctx->addr_type);
353      tcg_debug_assert((memop & MO_SIZE) <= MO_64);
354      tcg_gen_qemu_ld_i64_int(val, addr, idx, memop);
355  }
356  
357  static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr,
358                                      TCGArg idx, MemOp memop)
359  {
360      TCGv_i64 swap = NULL;
361      MemOpIdx orig_oi, oi;
362      TCGOpcode opc;
363  
364      if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
365          tcg_gen_qemu_st_i32_int(TCGV_LOW(val), addr, idx, memop);
366          return;
367      }
368  
369      tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
370      memop = tcg_canonicalize_memop(memop, 1, 1);
371      orig_oi = oi = make_memop_idx(memop, idx);
372  
373      if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
374          swap = tcg_temp_ebb_new_i64();
375          switch (memop & MO_SIZE) {
376          case MO_16:
377              tcg_gen_bswap16_i64(swap, val, 0);
378              break;
379          case MO_32:
380              tcg_gen_bswap32_i64(swap, val, 0);
381              break;
382          case MO_64:
383              tcg_gen_bswap64_i64(swap, val);
384              break;
385          default:
386              g_assert_not_reached();
387          }
388          val = swap;
389          memop &= ~MO_BSWAP;
390          oi = make_memop_idx(memop, idx);
391      }
392  
393      if (tcg_ctx->addr_type == TCG_TYPE_I32) {
394          opc = INDEX_op_qemu_st_a32_i64;
395      } else {
396          opc = INDEX_op_qemu_st_a64_i64;
397      }
398      gen_ldst_i64(opc, val, addr, oi);
399      plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
400  
401      if (swap) {
402          tcg_temp_free_i64(swap);
403      }
404  }
405  
406  void tcg_gen_qemu_st_i64_chk(TCGv_i64 val, TCGTemp *addr, TCGArg idx,
407                               MemOp memop, TCGType addr_type)
408  {
409      tcg_debug_assert(addr_type == tcg_ctx->addr_type);
410      tcg_debug_assert((memop & MO_SIZE) <= MO_64);
411      tcg_gen_qemu_st_i64_int(val, addr, idx, memop);
412  }
413  
414  /*
415   * Return true if @mop, without knowledge of the pointer alignment,
416   * does not require 16-byte atomicity, and it would be adventagous
417   * to avoid a call to a helper function.
418   */
419  static bool use_two_i64_for_i128(MemOp mop)
420  {
421      /* Two softmmu tlb lookups is larger than one function call. */
422      if (tcg_use_softmmu) {
423          return false;
424      }
425  
426      /*
427       * For user-only, two 64-bit operations may well be smaller than a call.
428       * Determine if that would be legal for the requested atomicity.
429       */
430      switch (mop & MO_ATOM_MASK) {
431      case MO_ATOM_NONE:
432      case MO_ATOM_IFALIGN_PAIR:
433          return true;
434      case MO_ATOM_IFALIGN:
435      case MO_ATOM_SUBALIGN:
436      case MO_ATOM_WITHIN16:
437      case MO_ATOM_WITHIN16_PAIR:
438          return false;
439      default:
440          g_assert_not_reached();
441      }
442  }
443  
444  static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
445  {
446      MemOp mop_1 = orig, mop_2;
447  
448      /* Reduce the size to 64-bit. */
449      mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
450  
451      /* Retain the alignment constraints of the original. */
452      switch (orig & MO_AMASK) {
453      case MO_UNALN:
454      case MO_ALIGN_2:
455      case MO_ALIGN_4:
456          mop_2 = mop_1;
457          break;
458      case MO_ALIGN_8:
459          /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
460          mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
461          mop_2 = mop_1;
462          break;
463      case MO_ALIGN:
464          /* Second has 8-byte alignment; first has 16-byte alignment. */
465          mop_2 = mop_1;
466          mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
467          break;
468      case MO_ALIGN_16:
469      case MO_ALIGN_32:
470      case MO_ALIGN_64:
471          /* Second has 8-byte alignment; first retains original. */
472          mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
473          break;
474      default:
475          g_assert_not_reached();
476      }
477  
478      /* Use a memory ordering implemented by the host. */
479      if ((orig & MO_BSWAP) && !tcg_target_has_memory_bswap(mop_1)) {
480          mop_1 &= ~MO_BSWAP;
481          mop_2 &= ~MO_BSWAP;
482      }
483  
484      ret[0] = mop_1;
485      ret[1] = mop_2;
486  }
487  
488  static TCGv_i64 maybe_extend_addr64(TCGTemp *addr)
489  {
490      if (tcg_ctx->addr_type == TCG_TYPE_I32) {
491          TCGv_i64 a64 = tcg_temp_ebb_new_i64();
492          tcg_gen_extu_i32_i64(a64, temp_tcgv_i32(addr));
493          return a64;
494      }
495      return temp_tcgv_i64(addr);
496  }
497  
498  static void maybe_free_addr64(TCGv_i64 a64)
499  {
500      if (tcg_ctx->addr_type == TCG_TYPE_I32) {
501          tcg_temp_free_i64(a64);
502      }
503  }
504  
505  static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
506                                       TCGArg idx, MemOp memop)
507  {
508      MemOpIdx orig_oi;
509      TCGv_i64 ext_addr = NULL;
510      TCGOpcode opc;
511  
512      check_max_alignment(get_alignment_bits(memop));
513      tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
514  
515      /* In serial mode, reduce atomicity. */
516      if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
517          memop &= ~MO_ATOM_MASK;
518          memop |= MO_ATOM_NONE;
519      }
520      orig_oi = make_memop_idx(memop, idx);
521  
522      /* TODO: For now, force 32-bit hosts to use the helper. */
523      if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
524          TCGv_i64 lo, hi;
525          bool need_bswap = false;
526          MemOpIdx oi = orig_oi;
527  
528          if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
529              lo = TCGV128_HIGH(val);
530              hi = TCGV128_LOW(val);
531              oi = make_memop_idx(memop & ~MO_BSWAP, idx);
532              need_bswap = true;
533          } else {
534              lo = TCGV128_LOW(val);
535              hi = TCGV128_HIGH(val);
536          }
537  
538          if (tcg_ctx->addr_type == TCG_TYPE_I32) {
539              opc = INDEX_op_qemu_ld_a32_i128;
540          } else {
541              opc = INDEX_op_qemu_ld_a64_i128;
542          }
543          gen_ldst(opc, tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi);
544  
545          if (need_bswap) {
546              tcg_gen_bswap64_i64(lo, lo);
547              tcg_gen_bswap64_i64(hi, hi);
548          }
549      } else if (use_two_i64_for_i128(memop)) {
550          MemOp mop[2];
551          TCGTemp *addr_p8;
552          TCGv_i64 x, y;
553          bool need_bswap;
554  
555          canonicalize_memop_i128_as_i64(mop, memop);
556          need_bswap = (mop[0] ^ memop) & MO_BSWAP;
557  
558          if (tcg_ctx->addr_type == TCG_TYPE_I32) {
559              opc = INDEX_op_qemu_ld_a32_i64;
560          } else {
561              opc = INDEX_op_qemu_ld_a64_i64;
562          }
563  
564          /*
565           * Since there are no global TCGv_i128, there is no visible state
566           * changed if the second load faults.  Load directly into the two
567           * subwords.
568           */
569          if ((memop & MO_BSWAP) == MO_LE) {
570              x = TCGV128_LOW(val);
571              y = TCGV128_HIGH(val);
572          } else {
573              x = TCGV128_HIGH(val);
574              y = TCGV128_LOW(val);
575          }
576  
577          gen_ldst_i64(opc, x, addr, make_memop_idx(mop[0], idx));
578  
579          if (need_bswap) {
580              tcg_gen_bswap64_i64(x, x);
581          }
582  
583          if (tcg_ctx->addr_type == TCG_TYPE_I32) {
584              TCGv_i32 t = tcg_temp_ebb_new_i32();
585              tcg_gen_addi_i32(t, temp_tcgv_i32(addr), 8);
586              addr_p8 = tcgv_i32_temp(t);
587          } else {
588              TCGv_i64 t = tcg_temp_ebb_new_i64();
589              tcg_gen_addi_i64(t, temp_tcgv_i64(addr), 8);
590              addr_p8 = tcgv_i64_temp(t);
591          }
592  
593          gen_ldst_i64(opc, y, addr_p8, make_memop_idx(mop[1], idx));
594          tcg_temp_free_internal(addr_p8);
595  
596          if (need_bswap) {
597              tcg_gen_bswap64_i64(y, y);
598          }
599      } else {
600          if (tcg_ctx->addr_type == TCG_TYPE_I32) {
601              ext_addr = tcg_temp_ebb_new_i64();
602              tcg_gen_extu_i32_i64(ext_addr, temp_tcgv_i32(addr));
603              addr = tcgv_i64_temp(ext_addr);
604          }
605          gen_helper_ld_i128(val, tcg_env, temp_tcgv_i64(addr),
606                             tcg_constant_i32(orig_oi));
607      }
608  
609      plugin_gen_mem_callbacks(ext_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
610  }
611  
612  void tcg_gen_qemu_ld_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
613                                MemOp memop, TCGType addr_type)
614  {
615      tcg_debug_assert(addr_type == tcg_ctx->addr_type);
616      tcg_debug_assert((memop & MO_SIZE) == MO_128);
617      tcg_debug_assert((memop & MO_SIGN) == 0);
618      tcg_gen_qemu_ld_i128_int(val, addr, idx, memop);
619  }
620  
621  static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
622                                       TCGArg idx, MemOp memop)
623  {
624      MemOpIdx orig_oi;
625      TCGv_i64 ext_addr = NULL;
626      TCGOpcode opc;
627  
628      check_max_alignment(get_alignment_bits(memop));
629      tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
630  
631      /* In serial mode, reduce atomicity. */
632      if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
633          memop &= ~MO_ATOM_MASK;
634          memop |= MO_ATOM_NONE;
635      }
636      orig_oi = make_memop_idx(memop, idx);
637  
638      /* TODO: For now, force 32-bit hosts to use the helper. */
639  
640      if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
641          TCGv_i64 lo, hi;
642          MemOpIdx oi = orig_oi;
643          bool need_bswap = false;
644  
645          if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
646              lo = tcg_temp_ebb_new_i64();
647              hi = tcg_temp_ebb_new_i64();
648              tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
649              tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
650              oi = make_memop_idx(memop & ~MO_BSWAP, idx);
651              need_bswap = true;
652          } else {
653              lo = TCGV128_LOW(val);
654              hi = TCGV128_HIGH(val);
655          }
656  
657          if (tcg_ctx->addr_type == TCG_TYPE_I32) {
658              opc = INDEX_op_qemu_st_a32_i128;
659          } else {
660              opc = INDEX_op_qemu_st_a64_i128;
661          }
662          gen_ldst(opc, tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi);
663  
664          if (need_bswap) {
665              tcg_temp_free_i64(lo);
666              tcg_temp_free_i64(hi);
667          }
668      } else if (use_two_i64_for_i128(memop)) {
669          MemOp mop[2];
670          TCGTemp *addr_p8;
671          TCGv_i64 x, y, b = NULL;
672  
673          canonicalize_memop_i128_as_i64(mop, memop);
674  
675          if (tcg_ctx->addr_type == TCG_TYPE_I32) {
676              opc = INDEX_op_qemu_st_a32_i64;
677          } else {
678              opc = INDEX_op_qemu_st_a64_i64;
679          }
680  
681          if ((memop & MO_BSWAP) == MO_LE) {
682              x = TCGV128_LOW(val);
683              y = TCGV128_HIGH(val);
684          } else {
685              x = TCGV128_HIGH(val);
686              y = TCGV128_LOW(val);
687          }
688  
689          if ((mop[0] ^ memop) & MO_BSWAP) {
690              b = tcg_temp_ebb_new_i64();
691              tcg_gen_bswap64_i64(b, x);
692              x = b;
693          }
694  
695          gen_ldst_i64(opc, x, addr, make_memop_idx(mop[0], idx));
696  
697          if (tcg_ctx->addr_type == TCG_TYPE_I32) {
698              TCGv_i32 t = tcg_temp_ebb_new_i32();
699              tcg_gen_addi_i32(t, temp_tcgv_i32(addr), 8);
700              addr_p8 = tcgv_i32_temp(t);
701          } else {
702              TCGv_i64 t = tcg_temp_ebb_new_i64();
703              tcg_gen_addi_i64(t, temp_tcgv_i64(addr), 8);
704              addr_p8 = tcgv_i64_temp(t);
705          }
706  
707          if (b) {
708              tcg_gen_bswap64_i64(b, y);
709              gen_ldst_i64(opc, b, addr_p8, make_memop_idx(mop[1], idx));
710              tcg_temp_free_i64(b);
711          } else {
712              gen_ldst_i64(opc, y, addr_p8, make_memop_idx(mop[1], idx));
713          }
714          tcg_temp_free_internal(addr_p8);
715      } else {
716          if (tcg_ctx->addr_type == TCG_TYPE_I32) {
717              ext_addr = tcg_temp_ebb_new_i64();
718              tcg_gen_extu_i32_i64(ext_addr, temp_tcgv_i32(addr));
719              addr = tcgv_i64_temp(ext_addr);
720          }
721          gen_helper_st_i128(tcg_env, temp_tcgv_i64(addr), val,
722                             tcg_constant_i32(orig_oi));
723      }
724  
725      plugin_gen_mem_callbacks(ext_addr, addr, orig_oi, QEMU_PLUGIN_MEM_W);
726  }
727  
728  void tcg_gen_qemu_st_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
729                                MemOp memop, TCGType addr_type)
730  {
731      tcg_debug_assert(addr_type == tcg_ctx->addr_type);
732      tcg_debug_assert((memop & MO_SIZE) == MO_128);
733      tcg_debug_assert((memop & MO_SIGN) == 0);
734      tcg_gen_qemu_st_i128_int(val, addr, idx, memop);
735  }
736  
737  void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
738  {
739      switch (opc & MO_SSIZE) {
740      case MO_SB:
741          tcg_gen_ext8s_i32(ret, val);
742          break;
743      case MO_UB:
744          tcg_gen_ext8u_i32(ret, val);
745          break;
746      case MO_SW:
747          tcg_gen_ext16s_i32(ret, val);
748          break;
749      case MO_UW:
750          tcg_gen_ext16u_i32(ret, val);
751          break;
752      case MO_UL:
753      case MO_SL:
754          tcg_gen_mov_i32(ret, val);
755          break;
756      default:
757          g_assert_not_reached();
758      }
759  }
760  
761  void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
762  {
763      switch (opc & MO_SSIZE) {
764      case MO_SB:
765          tcg_gen_ext8s_i64(ret, val);
766          break;
767      case MO_UB:
768          tcg_gen_ext8u_i64(ret, val);
769          break;
770      case MO_SW:
771          tcg_gen_ext16s_i64(ret, val);
772          break;
773      case MO_UW:
774          tcg_gen_ext16u_i64(ret, val);
775          break;
776      case MO_SL:
777          tcg_gen_ext32s_i64(ret, val);
778          break;
779      case MO_UL:
780          tcg_gen_ext32u_i64(ret, val);
781          break;
782      case MO_UQ:
783      case MO_SQ:
784          tcg_gen_mov_i64(ret, val);
785          break;
786      default:
787          g_assert_not_reached();
788      }
789  }
790  
791  typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv_i64,
792                                    TCGv_i32, TCGv_i32, TCGv_i32);
793  typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv_i64,
794                                    TCGv_i64, TCGv_i64, TCGv_i32);
795  typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv_i64,
796                                     TCGv_i128, TCGv_i128, TCGv_i32);
797  typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv_i64,
798                                    TCGv_i32, TCGv_i32);
799  typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv_i64,
800                                    TCGv_i64, TCGv_i32);
801  
802  #ifdef CONFIG_ATOMIC64
803  # define WITH_ATOMIC64(X) X,
804  #else
805  # define WITH_ATOMIC64(X)
806  #endif
807  #if HAVE_CMPXCHG128
808  # define WITH_ATOMIC128(X) X,
809  #else
810  # define WITH_ATOMIC128(X)
811  #endif
812  
813  static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
814      [MO_8] = gen_helper_atomic_cmpxchgb,
815      [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
816      [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
817      [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
818      [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
819      WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
820      WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
821      WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
822      WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
823  };
824  
825  static void tcg_gen_nonatomic_cmpxchg_i32_int(TCGv_i32 retv, TCGTemp *addr,
826                                                TCGv_i32 cmpv, TCGv_i32 newv,
827                                                TCGArg idx, MemOp memop)
828  {
829      TCGv_i32 t1 = tcg_temp_ebb_new_i32();
830      TCGv_i32 t2 = tcg_temp_ebb_new_i32();
831  
832      tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
833  
834      tcg_gen_qemu_ld_i32_int(t1, addr, idx, memop & ~MO_SIGN);
835      tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
836      tcg_gen_qemu_st_i32_int(t2, addr, idx, memop);
837      tcg_temp_free_i32(t2);
838  
839      if (memop & MO_SIGN) {
840          tcg_gen_ext_i32(retv, t1, memop);
841      } else {
842          tcg_gen_mov_i32(retv, t1);
843      }
844      tcg_temp_free_i32(t1);
845  }
846  
847  void tcg_gen_nonatomic_cmpxchg_i32_chk(TCGv_i32 retv, TCGTemp *addr,
848                                         TCGv_i32 cmpv, TCGv_i32 newv,
849                                         TCGArg idx, MemOp memop,
850                                         TCGType addr_type)
851  {
852      tcg_debug_assert(addr_type == tcg_ctx->addr_type);
853      tcg_debug_assert((memop & MO_SIZE) <= MO_32);
854      tcg_gen_nonatomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
855  }
856  
857  static void tcg_gen_atomic_cmpxchg_i32_int(TCGv_i32 retv, TCGTemp *addr,
858                                             TCGv_i32 cmpv, TCGv_i32 newv,
859                                             TCGArg idx, MemOp memop)
860  {
861      gen_atomic_cx_i32 gen;
862      TCGv_i64 a64;
863      MemOpIdx oi;
864  
865      if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
866          tcg_gen_nonatomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
867          return;
868      }
869  
870      memop = tcg_canonicalize_memop(memop, 0, 0);
871      gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
872      tcg_debug_assert(gen != NULL);
873  
874      oi = make_memop_idx(memop & ~MO_SIGN, idx);
875      a64 = maybe_extend_addr64(addr);
876      gen(retv, tcg_env, a64, cmpv, newv, tcg_constant_i32(oi));
877      maybe_free_addr64(a64);
878  
879      if (memop & MO_SIGN) {
880          tcg_gen_ext_i32(retv, retv, memop);
881      }
882  }
883  
884  void tcg_gen_atomic_cmpxchg_i32_chk(TCGv_i32 retv, TCGTemp *addr,
885                                      TCGv_i32 cmpv, TCGv_i32 newv,
886                                      TCGArg idx, MemOp memop,
887                                      TCGType addr_type)
888  {
889      tcg_debug_assert(addr_type == tcg_ctx->addr_type);
890      tcg_debug_assert((memop & MO_SIZE) <= MO_32);
891      tcg_gen_atomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
892  }
893  
894  static void tcg_gen_nonatomic_cmpxchg_i64_int(TCGv_i64 retv, TCGTemp *addr,
895                                                TCGv_i64 cmpv, TCGv_i64 newv,
896                                                TCGArg idx, MemOp memop)
897  {
898      TCGv_i64 t1, t2;
899  
900      if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
901          tcg_gen_nonatomic_cmpxchg_i32_int(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
902                                            TCGV_LOW(newv), idx, memop);
903          if (memop & MO_SIGN) {
904              tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
905          } else {
906              tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
907          }
908          return;
909      }
910  
911      t1 = tcg_temp_ebb_new_i64();
912      t2 = tcg_temp_ebb_new_i64();
913  
914      tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
915  
916      tcg_gen_qemu_ld_i64_int(t1, addr, idx, memop & ~MO_SIGN);
917      tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
918      tcg_gen_qemu_st_i64_int(t2, addr, idx, memop);
919      tcg_temp_free_i64(t2);
920  
921      if (memop & MO_SIGN) {
922          tcg_gen_ext_i64(retv, t1, memop);
923      } else {
924          tcg_gen_mov_i64(retv, t1);
925      }
926      tcg_temp_free_i64(t1);
927  }
928  
929  void tcg_gen_nonatomic_cmpxchg_i64_chk(TCGv_i64 retv, TCGTemp *addr,
930                                         TCGv_i64 cmpv, TCGv_i64 newv,
931                                         TCGArg idx, MemOp memop,
932                                         TCGType addr_type)
933  {
934      tcg_debug_assert(addr_type == tcg_ctx->addr_type);
935      tcg_debug_assert((memop & MO_SIZE) <= MO_64);
936      tcg_gen_nonatomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
937  }
938  
939  static void tcg_gen_atomic_cmpxchg_i64_int(TCGv_i64 retv, TCGTemp *addr,
940                                             TCGv_i64 cmpv, TCGv_i64 newv,
941                                             TCGArg idx, MemOp memop)
942  {
943      if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
944          tcg_gen_nonatomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
945          return;
946      }
947  
948      if ((memop & MO_SIZE) == MO_64) {
949          gen_atomic_cx_i64 gen;
950  
951          memop = tcg_canonicalize_memop(memop, 1, 0);
952          gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
953          if (gen) {
954              MemOpIdx oi = make_memop_idx(memop, idx);
955              TCGv_i64 a64 = maybe_extend_addr64(addr);
956              gen(retv, tcg_env, a64, cmpv, newv, tcg_constant_i32(oi));
957              maybe_free_addr64(a64);
958              return;
959          }
960  
961          gen_helper_exit_atomic(tcg_env);
962  
963          /*
964           * Produce a result for a well-formed opcode stream.  This satisfies
965           * liveness for set before used, which happens before this dead code
966           * is removed.
967           */
968          tcg_gen_movi_i64(retv, 0);
969          return;
970      }
971  
972      if (TCG_TARGET_REG_BITS == 32) {
973          tcg_gen_atomic_cmpxchg_i32_int(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
974                                         TCGV_LOW(newv), idx, memop);
975          if (memop & MO_SIGN) {
976              tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
977          } else {
978              tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
979          }
980      } else {
981          TCGv_i32 c32 = tcg_temp_ebb_new_i32();
982          TCGv_i32 n32 = tcg_temp_ebb_new_i32();
983          TCGv_i32 r32 = tcg_temp_ebb_new_i32();
984  
985          tcg_gen_extrl_i64_i32(c32, cmpv);
986          tcg_gen_extrl_i64_i32(n32, newv);
987          tcg_gen_atomic_cmpxchg_i32_int(r32, addr, c32, n32,
988                                         idx, memop & ~MO_SIGN);
989          tcg_temp_free_i32(c32);
990          tcg_temp_free_i32(n32);
991  
992          tcg_gen_extu_i32_i64(retv, r32);
993          tcg_temp_free_i32(r32);
994  
995          if (memop & MO_SIGN) {
996              tcg_gen_ext_i64(retv, retv, memop);
997          }
998      }
999  }
1000  
1001  void tcg_gen_atomic_cmpxchg_i64_chk(TCGv_i64 retv, TCGTemp *addr,
1002                                      TCGv_i64 cmpv, TCGv_i64 newv,
1003                                      TCGArg idx, MemOp memop, TCGType addr_type)
1004  {
1005      tcg_debug_assert(addr_type == tcg_ctx->addr_type);
1006      tcg_debug_assert((memop & MO_SIZE) <= MO_64);
1007      tcg_gen_atomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
1008  }
1009  
1010  static void tcg_gen_nonatomic_cmpxchg_i128_int(TCGv_i128 retv, TCGTemp *addr,
1011                                                 TCGv_i128 cmpv, TCGv_i128 newv,
1012                                                 TCGArg idx, MemOp memop)
1013  {
1014      if (TCG_TARGET_REG_BITS == 32) {
1015          /* Inline expansion below is simply too large for 32-bit hosts. */
1016          MemOpIdx oi = make_memop_idx(memop, idx);
1017          TCGv_i64 a64 = maybe_extend_addr64(addr);
1018  
1019          gen_helper_nonatomic_cmpxchgo(retv, tcg_env, a64, cmpv, newv,
1020                                        tcg_constant_i32(oi));
1021          maybe_free_addr64(a64);
1022      } else {
1023          TCGv_i128 oldv = tcg_temp_ebb_new_i128();
1024          TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
1025          TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1026          TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1027          TCGv_i64 z = tcg_constant_i64(0);
1028  
1029          tcg_gen_qemu_ld_i128_int(oldv, addr, idx, memop);
1030  
1031          /* Compare i128 */
1032          tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
1033          tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
1034          tcg_gen_or_i64(t0, t0, t1);
1035  
1036          /* tmpv = equal ? newv : oldv */
1037          tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
1038                              TCGV128_LOW(newv), TCGV128_LOW(oldv));
1039          tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
1040                              TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
1041  
1042          /* Unconditional writeback. */
1043          tcg_gen_qemu_st_i128_int(tmpv, addr, idx, memop);
1044          tcg_gen_mov_i128(retv, oldv);
1045  
1046          tcg_temp_free_i64(t0);
1047          tcg_temp_free_i64(t1);
1048          tcg_temp_free_i128(tmpv);
1049          tcg_temp_free_i128(oldv);
1050      }
1051  }
1052  
1053  void tcg_gen_nonatomic_cmpxchg_i128_chk(TCGv_i128 retv, TCGTemp *addr,
1054                                          TCGv_i128 cmpv, TCGv_i128 newv,
1055                                          TCGArg idx, MemOp memop,
1056                                          TCGType addr_type)
1057  {
1058      tcg_debug_assert(addr_type == tcg_ctx->addr_type);
1059      tcg_debug_assert((memop & (MO_SIZE | MO_SIGN)) == MO_128);
1060      tcg_gen_nonatomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
1061  }
1062  
1063  static void tcg_gen_atomic_cmpxchg_i128_int(TCGv_i128 retv, TCGTemp *addr,
1064                                              TCGv_i128 cmpv, TCGv_i128 newv,
1065                                              TCGArg idx, MemOp memop)
1066  {
1067      gen_atomic_cx_i128 gen;
1068  
1069      if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
1070          tcg_gen_nonatomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
1071          return;
1072      }
1073  
1074      gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
1075      if (gen) {
1076          MemOpIdx oi = make_memop_idx(memop, idx);
1077          TCGv_i64 a64 = maybe_extend_addr64(addr);
1078          gen(retv, tcg_env, a64, cmpv, newv, tcg_constant_i32(oi));
1079          maybe_free_addr64(a64);
1080          return;
1081      }
1082  
1083      gen_helper_exit_atomic(tcg_env);
1084  
1085      /*
1086       * Produce a result for a well-formed opcode stream.  This satisfies
1087       * liveness for set before used, which happens before this dead code
1088       * is removed.
1089       */
1090      tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
1091      tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
1092  }
1093  
1094  void tcg_gen_atomic_cmpxchg_i128_chk(TCGv_i128 retv, TCGTemp *addr,
1095                                       TCGv_i128 cmpv, TCGv_i128 newv,
1096                                       TCGArg idx, MemOp memop,
1097                                       TCGType addr_type)
1098  {
1099      tcg_debug_assert(addr_type == tcg_ctx->addr_type);
1100      tcg_debug_assert((memop & (MO_SIZE | MO_SIGN)) == MO_128);
1101      tcg_gen_atomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
1102  }
1103  
1104  static void do_nonatomic_op_i32(TCGv_i32 ret, TCGTemp *addr, TCGv_i32 val,
1105                                  TCGArg idx, MemOp memop, bool new_val,
1106                                  void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
1107  {
1108      TCGv_i32 t1 = tcg_temp_ebb_new_i32();
1109      TCGv_i32 t2 = tcg_temp_ebb_new_i32();
1110  
1111      memop = tcg_canonicalize_memop(memop, 0, 0);
1112  
1113      tcg_gen_qemu_ld_i32_int(t1, addr, idx, memop);
1114      tcg_gen_ext_i32(t2, val, memop);
1115      gen(t2, t1, t2);
1116      tcg_gen_qemu_st_i32_int(t2, addr, idx, memop);
1117  
1118      tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
1119      tcg_temp_free_i32(t1);
1120      tcg_temp_free_i32(t2);
1121  }
1122  
1123  static void do_atomic_op_i32(TCGv_i32 ret, TCGTemp *addr, TCGv_i32 val,
1124                               TCGArg idx, MemOp memop, void * const table[])
1125  {
1126      gen_atomic_op_i32 gen;
1127      TCGv_i64 a64;
1128      MemOpIdx oi;
1129  
1130      memop = tcg_canonicalize_memop(memop, 0, 0);
1131  
1132      gen = table[memop & (MO_SIZE | MO_BSWAP)];
1133      tcg_debug_assert(gen != NULL);
1134  
1135      oi = make_memop_idx(memop & ~MO_SIGN, idx);
1136      a64 = maybe_extend_addr64(addr);
1137      gen(ret, tcg_env, a64, val, tcg_constant_i32(oi));
1138      maybe_free_addr64(a64);
1139  
1140      if (memop & MO_SIGN) {
1141          tcg_gen_ext_i32(ret, ret, memop);
1142      }
1143  }
1144  
1145  static void do_nonatomic_op_i64(TCGv_i64 ret, TCGTemp *addr, TCGv_i64 val,
1146                                  TCGArg idx, MemOp memop, bool new_val,
1147                                  void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
1148  {
1149      TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1150      TCGv_i64 t2 = tcg_temp_ebb_new_i64();
1151  
1152      memop = tcg_canonicalize_memop(memop, 1, 0);
1153  
1154      tcg_gen_qemu_ld_i64_int(t1, addr, idx, memop);
1155      tcg_gen_ext_i64(t2, val, memop);
1156      gen(t2, t1, t2);
1157      tcg_gen_qemu_st_i64_int(t2, addr, idx, memop);
1158  
1159      tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
1160      tcg_temp_free_i64(t1);
1161      tcg_temp_free_i64(t2);
1162  }
1163  
1164  static void do_atomic_op_i64(TCGv_i64 ret, TCGTemp *addr, TCGv_i64 val,
1165                               TCGArg idx, MemOp memop, void * const table[])
1166  {
1167      memop = tcg_canonicalize_memop(memop, 1, 0);
1168  
1169      if ((memop & MO_SIZE) == MO_64) {
1170          gen_atomic_op_i64 gen = table[memop & (MO_SIZE | MO_BSWAP)];
1171  
1172          if (gen) {
1173              MemOpIdx oi = make_memop_idx(memop & ~MO_SIGN, idx);
1174              TCGv_i64 a64 = maybe_extend_addr64(addr);
1175              gen(ret, tcg_env, a64, val, tcg_constant_i32(oi));
1176              maybe_free_addr64(a64);
1177              return;
1178          }
1179  
1180          gen_helper_exit_atomic(tcg_env);
1181          /* Produce a result, so that we have a well-formed opcode stream
1182             with respect to uses of the result in the (dead) code following.  */
1183          tcg_gen_movi_i64(ret, 0);
1184      } else {
1185          TCGv_i32 v32 = tcg_temp_ebb_new_i32();
1186          TCGv_i32 r32 = tcg_temp_ebb_new_i32();
1187  
1188          tcg_gen_extrl_i64_i32(v32, val);
1189          do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
1190          tcg_temp_free_i32(v32);
1191  
1192          tcg_gen_extu_i32_i64(ret, r32);
1193          tcg_temp_free_i32(r32);
1194  
1195          if (memop & MO_SIGN) {
1196              tcg_gen_ext_i64(ret, ret, memop);
1197          }
1198      }
1199  }
1200  
1201  #define GEN_ATOMIC_HELPER(NAME, OP, NEW)                                \
1202  static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = {          \
1203      [MO_8] = gen_helper_atomic_##NAME##b,                               \
1204      [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le,                   \
1205      [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be,                   \
1206      [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le,                   \
1207      [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be,                   \
1208      WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le)     \
1209      WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be)     \
1210  };                                                                      \
1211  void tcg_gen_atomic_##NAME##_i32_chk(TCGv_i32 ret, TCGTemp *addr,       \
1212                                       TCGv_i32 val, TCGArg idx,          \
1213                                       MemOp memop, TCGType addr_type)    \
1214  {                                                                       \
1215      tcg_debug_assert(addr_type == tcg_ctx->addr_type);                  \
1216      tcg_debug_assert((memop & MO_SIZE) <= MO_32);                       \
1217      if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {                        \
1218          do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME);     \
1219      } else {                                                            \
1220          do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW,            \
1221                              tcg_gen_##OP##_i32);                        \
1222      }                                                                   \
1223  }                                                                       \
1224  void tcg_gen_atomic_##NAME##_i64_chk(TCGv_i64 ret, TCGTemp *addr,       \
1225                                       TCGv_i64 val, TCGArg idx,          \
1226                                       MemOp memop, TCGType addr_type)    \
1227  {                                                                       \
1228      tcg_debug_assert(addr_type == tcg_ctx->addr_type);                  \
1229      tcg_debug_assert((memop & MO_SIZE) <= MO_64);                       \
1230      if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {                        \
1231          do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME);     \
1232      } else {                                                            \
1233          do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW,            \
1234                              tcg_gen_##OP##_i64);                        \
1235      }                                                                   \
1236  }
1237  
1238  GEN_ATOMIC_HELPER(fetch_add, add, 0)
1239  GEN_ATOMIC_HELPER(fetch_and, and, 0)
1240  GEN_ATOMIC_HELPER(fetch_or, or, 0)
1241  GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
1242  GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
1243  GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
1244  GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
1245  GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
1246  
1247  GEN_ATOMIC_HELPER(add_fetch, add, 1)
1248  GEN_ATOMIC_HELPER(and_fetch, and, 1)
1249  GEN_ATOMIC_HELPER(or_fetch, or, 1)
1250  GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
1251  GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
1252  GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
1253  GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
1254  GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
1255  
1256  static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
1257  {
1258      tcg_gen_mov_i32(r, b);
1259  }
1260  
1261  static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
1262  {
1263      tcg_gen_mov_i64(r, b);
1264  }
1265  
1266  GEN_ATOMIC_HELPER(xchg, mov2, 0)
1267  
1268  #undef GEN_ATOMIC_HELPER
1269