xref: /openbmc/qemu/tcg/tcg-op.c (revision c1eaa6d0df6ed9e021f751d0be6eb321551a9bea)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg.h"
28 #include "tcg/tcg-op.h"
29 #include "tcg/tcg-mo.h"
30 #include "exec/plugin-gen.h"
31 #include "tcg-internal.h"
32 
33 
34 void tcg_gen_op1(TCGOpcode opc, TCGArg a1)
35 {
36     TCGOp *op = tcg_emit_op(opc, 1);
37     op->args[0] = a1;
38 }
39 
40 void tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2)
41 {
42     TCGOp *op = tcg_emit_op(opc, 2);
43     op->args[0] = a1;
44     op->args[1] = a2;
45 }
46 
47 void tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3)
48 {
49     TCGOp *op = tcg_emit_op(opc, 3);
50     op->args[0] = a1;
51     op->args[1] = a2;
52     op->args[2] = a3;
53 }
54 
55 void tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4)
56 {
57     TCGOp *op = tcg_emit_op(opc, 4);
58     op->args[0] = a1;
59     op->args[1] = a2;
60     op->args[2] = a3;
61     op->args[3] = a4;
62 }
63 
64 void tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
65                  TCGArg a4, TCGArg a5)
66 {
67     TCGOp *op = tcg_emit_op(opc, 5);
68     op->args[0] = a1;
69     op->args[1] = a2;
70     op->args[2] = a3;
71     op->args[3] = a4;
72     op->args[4] = a5;
73 }
74 
75 void tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
76                  TCGArg a4, TCGArg a5, TCGArg a6)
77 {
78     TCGOp *op = tcg_emit_op(opc, 6);
79     op->args[0] = a1;
80     op->args[1] = a2;
81     op->args[2] = a3;
82     op->args[3] = a4;
83     op->args[4] = a5;
84     op->args[5] = a6;
85 }
86 
87 void tcg_gen_mb(TCGBar mb_type)
88 {
89     if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {
90         tcg_gen_op1(INDEX_op_mb, mb_type);
91     }
92 }
93 
94 /* 32 bit ops */
95 
96 void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg)
97 {
98     tcg_gen_mov_i32(ret, tcg_constant_i32(arg));
99 }
100 
101 void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
102 {
103     /* some cases can be optimized here */
104     if (arg2 == 0) {
105         tcg_gen_mov_i32(ret, arg1);
106     } else {
107         tcg_gen_add_i32(ret, arg1, tcg_constant_i32(arg2));
108     }
109 }
110 
111 void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2)
112 {
113     if (arg1 == 0 && TCG_TARGET_HAS_neg_i32) {
114         /* Don't recurse with tcg_gen_neg_i32.  */
115         tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg2);
116     } else {
117         tcg_gen_sub_i32(ret, tcg_constant_i32(arg1), arg2);
118     }
119 }
120 
121 void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
122 {
123     /* some cases can be optimized here */
124     if (arg2 == 0) {
125         tcg_gen_mov_i32(ret, arg1);
126     } else {
127         tcg_gen_sub_i32(ret, arg1, tcg_constant_i32(arg2));
128     }
129 }
130 
131 void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
132 {
133     /* Some cases can be optimized here.  */
134     switch (arg2) {
135     case 0:
136         tcg_gen_movi_i32(ret, 0);
137         return;
138     case -1:
139         tcg_gen_mov_i32(ret, arg1);
140         return;
141     case 0xff:
142         /* Don't recurse with tcg_gen_ext8u_i32.  */
143         if (TCG_TARGET_HAS_ext8u_i32) {
144             tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg1);
145             return;
146         }
147         break;
148     case 0xffff:
149         if (TCG_TARGET_HAS_ext16u_i32) {
150             tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg1);
151             return;
152         }
153         break;
154     }
155 
156     tcg_gen_and_i32(ret, arg1, tcg_constant_i32(arg2));
157 }
158 
159 void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
160 {
161     /* Some cases can be optimized here.  */
162     if (arg2 == -1) {
163         tcg_gen_movi_i32(ret, -1);
164     } else if (arg2 == 0) {
165         tcg_gen_mov_i32(ret, arg1);
166     } else {
167         tcg_gen_or_i32(ret, arg1, tcg_constant_i32(arg2));
168     }
169 }
170 
171 void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
172 {
173     /* Some cases can be optimized here.  */
174     if (arg2 == 0) {
175         tcg_gen_mov_i32(ret, arg1);
176     } else if (arg2 == -1 && TCG_TARGET_HAS_not_i32) {
177         /* Don't recurse with tcg_gen_not_i32.  */
178         tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg1);
179     } else {
180         tcg_gen_xor_i32(ret, arg1, tcg_constant_i32(arg2));
181     }
182 }
183 
184 void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
185 {
186     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
187     if (arg2 == 0) {
188         tcg_gen_mov_i32(ret, arg1);
189     } else {
190         tcg_gen_shl_i32(ret, arg1, tcg_constant_i32(arg2));
191     }
192 }
193 
194 void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
195 {
196     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
197     if (arg2 == 0) {
198         tcg_gen_mov_i32(ret, arg1);
199     } else {
200         tcg_gen_shr_i32(ret, arg1, tcg_constant_i32(arg2));
201     }
202 }
203 
204 void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
205 {
206     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
207     if (arg2 == 0) {
208         tcg_gen_mov_i32(ret, arg1);
209     } else {
210         tcg_gen_sar_i32(ret, arg1, tcg_constant_i32(arg2));
211     }
212 }
213 
214 void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *l)
215 {
216     if (cond == TCG_COND_ALWAYS) {
217         tcg_gen_br(l);
218     } else if (cond != TCG_COND_NEVER) {
219         l->refs++;
220         tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_arg(l));
221     }
222 }
223 
224 void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *l)
225 {
226     if (cond == TCG_COND_ALWAYS) {
227         tcg_gen_br(l);
228     } else if (cond != TCG_COND_NEVER) {
229         tcg_gen_brcond_i32(cond, arg1, tcg_constant_i32(arg2), l);
230     }
231 }
232 
233 void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret,
234                          TCGv_i32 arg1, TCGv_i32 arg2)
235 {
236     if (cond == TCG_COND_ALWAYS) {
237         tcg_gen_movi_i32(ret, 1);
238     } else if (cond == TCG_COND_NEVER) {
239         tcg_gen_movi_i32(ret, 0);
240     } else {
241         tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
242     }
243 }
244 
245 void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret,
246                           TCGv_i32 arg1, int32_t arg2)
247 {
248     tcg_gen_setcond_i32(cond, ret, arg1, tcg_constant_i32(arg2));
249 }
250 
251 void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
252 {
253     if (arg2 == 0) {
254         tcg_gen_movi_i32(ret, 0);
255     } else if (is_power_of_2(arg2)) {
256         tcg_gen_shli_i32(ret, arg1, ctz32(arg2));
257     } else {
258         tcg_gen_mul_i32(ret, arg1, tcg_constant_i32(arg2));
259     }
260 }
261 
262 void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
263 {
264     if (TCG_TARGET_HAS_div_i32) {
265         tcg_gen_op3_i32(INDEX_op_div_i32, ret, arg1, arg2);
266     } else if (TCG_TARGET_HAS_div2_i32) {
267         TCGv_i32 t0 = tcg_temp_new_i32();
268         tcg_gen_sari_i32(t0, arg1, 31);
269         tcg_gen_op5_i32(INDEX_op_div2_i32, ret, t0, arg1, t0, arg2);
270         tcg_temp_free_i32(t0);
271     } else {
272         gen_helper_div_i32(ret, arg1, arg2);
273     }
274 }
275 
276 void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
277 {
278     if (TCG_TARGET_HAS_rem_i32) {
279         tcg_gen_op3_i32(INDEX_op_rem_i32, ret, arg1, arg2);
280     } else if (TCG_TARGET_HAS_div_i32) {
281         TCGv_i32 t0 = tcg_temp_new_i32();
282         tcg_gen_op3_i32(INDEX_op_div_i32, t0, arg1, arg2);
283         tcg_gen_mul_i32(t0, t0, arg2);
284         tcg_gen_sub_i32(ret, arg1, t0);
285         tcg_temp_free_i32(t0);
286     } else if (TCG_TARGET_HAS_div2_i32) {
287         TCGv_i32 t0 = tcg_temp_new_i32();
288         tcg_gen_sari_i32(t0, arg1, 31);
289         tcg_gen_op5_i32(INDEX_op_div2_i32, t0, ret, arg1, t0, arg2);
290         tcg_temp_free_i32(t0);
291     } else {
292         gen_helper_rem_i32(ret, arg1, arg2);
293     }
294 }
295 
296 void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
297 {
298     if (TCG_TARGET_HAS_div_i32) {
299         tcg_gen_op3_i32(INDEX_op_divu_i32, ret, arg1, arg2);
300     } else if (TCG_TARGET_HAS_div2_i32) {
301         TCGv_i32 t0 = tcg_temp_new_i32();
302         tcg_gen_movi_i32(t0, 0);
303         tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, t0, arg2);
304         tcg_temp_free_i32(t0);
305     } else {
306         gen_helper_divu_i32(ret, arg1, arg2);
307     }
308 }
309 
310 void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
311 {
312     if (TCG_TARGET_HAS_rem_i32) {
313         tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2);
314     } else if (TCG_TARGET_HAS_div_i32) {
315         TCGv_i32 t0 = tcg_temp_new_i32();
316         tcg_gen_op3_i32(INDEX_op_divu_i32, t0, arg1, arg2);
317         tcg_gen_mul_i32(t0, t0, arg2);
318         tcg_gen_sub_i32(ret, arg1, t0);
319         tcg_temp_free_i32(t0);
320     } else if (TCG_TARGET_HAS_div2_i32) {
321         TCGv_i32 t0 = tcg_temp_new_i32();
322         tcg_gen_movi_i32(t0, 0);
323         tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, t0, arg2);
324         tcg_temp_free_i32(t0);
325     } else {
326         gen_helper_remu_i32(ret, arg1, arg2);
327     }
328 }
329 
330 void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
331 {
332     if (TCG_TARGET_HAS_andc_i32) {
333         tcg_gen_op3_i32(INDEX_op_andc_i32, ret, arg1, arg2);
334     } else {
335         TCGv_i32 t0 = tcg_temp_new_i32();
336         tcg_gen_not_i32(t0, arg2);
337         tcg_gen_and_i32(ret, arg1, t0);
338         tcg_temp_free_i32(t0);
339     }
340 }
341 
342 void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
343 {
344     if (TCG_TARGET_HAS_eqv_i32) {
345         tcg_gen_op3_i32(INDEX_op_eqv_i32, ret, arg1, arg2);
346     } else {
347         tcg_gen_xor_i32(ret, arg1, arg2);
348         tcg_gen_not_i32(ret, ret);
349     }
350 }
351 
352 void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
353 {
354     if (TCG_TARGET_HAS_nand_i32) {
355         tcg_gen_op3_i32(INDEX_op_nand_i32, ret, arg1, arg2);
356     } else {
357         tcg_gen_and_i32(ret, arg1, arg2);
358         tcg_gen_not_i32(ret, ret);
359     }
360 }
361 
362 void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
363 {
364     if (TCG_TARGET_HAS_nor_i32) {
365         tcg_gen_op3_i32(INDEX_op_nor_i32, ret, arg1, arg2);
366     } else {
367         tcg_gen_or_i32(ret, arg1, arg2);
368         tcg_gen_not_i32(ret, ret);
369     }
370 }
371 
372 void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
373 {
374     if (TCG_TARGET_HAS_orc_i32) {
375         tcg_gen_op3_i32(INDEX_op_orc_i32, ret, arg1, arg2);
376     } else {
377         TCGv_i32 t0 = tcg_temp_new_i32();
378         tcg_gen_not_i32(t0, arg2);
379         tcg_gen_or_i32(ret, arg1, t0);
380         tcg_temp_free_i32(t0);
381     }
382 }
383 
384 void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
385 {
386     if (TCG_TARGET_HAS_clz_i32) {
387         tcg_gen_op3_i32(INDEX_op_clz_i32, ret, arg1, arg2);
388     } else if (TCG_TARGET_HAS_clz_i64) {
389         TCGv_i64 t1 = tcg_temp_new_i64();
390         TCGv_i64 t2 = tcg_temp_new_i64();
391         tcg_gen_extu_i32_i64(t1, arg1);
392         tcg_gen_extu_i32_i64(t2, arg2);
393         tcg_gen_addi_i64(t2, t2, 32);
394         tcg_gen_clz_i64(t1, t1, t2);
395         tcg_gen_extrl_i64_i32(ret, t1);
396         tcg_temp_free_i64(t1);
397         tcg_temp_free_i64(t2);
398         tcg_gen_subi_i32(ret, ret, 32);
399     } else {
400         gen_helper_clz_i32(ret, arg1, arg2);
401     }
402 }
403 
404 void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
405 {
406     tcg_gen_clz_i32(ret, arg1, tcg_constant_i32(arg2));
407 }
408 
409 void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
410 {
411     if (TCG_TARGET_HAS_ctz_i32) {
412         tcg_gen_op3_i32(INDEX_op_ctz_i32, ret, arg1, arg2);
413     } else if (TCG_TARGET_HAS_ctz_i64) {
414         TCGv_i64 t1 = tcg_temp_new_i64();
415         TCGv_i64 t2 = tcg_temp_new_i64();
416         tcg_gen_extu_i32_i64(t1, arg1);
417         tcg_gen_extu_i32_i64(t2, arg2);
418         tcg_gen_ctz_i64(t1, t1, t2);
419         tcg_gen_extrl_i64_i32(ret, t1);
420         tcg_temp_free_i64(t1);
421         tcg_temp_free_i64(t2);
422     } else if (TCG_TARGET_HAS_ctpop_i32
423                || TCG_TARGET_HAS_ctpop_i64
424                || TCG_TARGET_HAS_clz_i32
425                || TCG_TARGET_HAS_clz_i64) {
426         TCGv_i32 z, t = tcg_temp_new_i32();
427 
428         if (TCG_TARGET_HAS_ctpop_i32 || TCG_TARGET_HAS_ctpop_i64) {
429             tcg_gen_subi_i32(t, arg1, 1);
430             tcg_gen_andc_i32(t, t, arg1);
431             tcg_gen_ctpop_i32(t, t);
432         } else {
433             /* Since all non-x86 hosts have clz(0) == 32, don't fight it.  */
434             tcg_gen_neg_i32(t, arg1);
435             tcg_gen_and_i32(t, t, arg1);
436             tcg_gen_clzi_i32(t, t, 32);
437             tcg_gen_xori_i32(t, t, 31);
438         }
439         z = tcg_constant_i32(0);
440         tcg_gen_movcond_i32(TCG_COND_EQ, ret, arg1, z, arg2, t);
441         tcg_temp_free_i32(t);
442     } else {
443         gen_helper_ctz_i32(ret, arg1, arg2);
444     }
445 }
446 
447 void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
448 {
449     if (!TCG_TARGET_HAS_ctz_i32 && TCG_TARGET_HAS_ctpop_i32 && arg2 == 32) {
450         /* This equivalence has the advantage of not requiring a fixup.  */
451         TCGv_i32 t = tcg_temp_new_i32();
452         tcg_gen_subi_i32(t, arg1, 1);
453         tcg_gen_andc_i32(t, t, arg1);
454         tcg_gen_ctpop_i32(ret, t);
455         tcg_temp_free_i32(t);
456     } else {
457         tcg_gen_ctz_i32(ret, arg1, tcg_constant_i32(arg2));
458     }
459 }
460 
461 void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg)
462 {
463     if (TCG_TARGET_HAS_clz_i32) {
464         TCGv_i32 t = tcg_temp_new_i32();
465         tcg_gen_sari_i32(t, arg, 31);
466         tcg_gen_xor_i32(t, t, arg);
467         tcg_gen_clzi_i32(t, t, 32);
468         tcg_gen_subi_i32(ret, t, 1);
469         tcg_temp_free_i32(t);
470     } else {
471         gen_helper_clrsb_i32(ret, arg);
472     }
473 }
474 
475 void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1)
476 {
477     if (TCG_TARGET_HAS_ctpop_i32) {
478         tcg_gen_op2_i32(INDEX_op_ctpop_i32, ret, arg1);
479     } else if (TCG_TARGET_HAS_ctpop_i64) {
480         TCGv_i64 t = tcg_temp_new_i64();
481         tcg_gen_extu_i32_i64(t, arg1);
482         tcg_gen_ctpop_i64(t, t);
483         tcg_gen_extrl_i64_i32(ret, t);
484         tcg_temp_free_i64(t);
485     } else {
486         gen_helper_ctpop_i32(ret, arg1);
487     }
488 }
489 
490 void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
491 {
492     if (TCG_TARGET_HAS_rot_i32) {
493         tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, arg2);
494     } else {
495         TCGv_i32 t0, t1;
496 
497         t0 = tcg_temp_new_i32();
498         t1 = tcg_temp_new_i32();
499         tcg_gen_shl_i32(t0, arg1, arg2);
500         tcg_gen_subfi_i32(t1, 32, arg2);
501         tcg_gen_shr_i32(t1, arg1, t1);
502         tcg_gen_or_i32(ret, t0, t1);
503         tcg_temp_free_i32(t0);
504         tcg_temp_free_i32(t1);
505     }
506 }
507 
508 void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
509 {
510     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
511     /* some cases can be optimized here */
512     if (arg2 == 0) {
513         tcg_gen_mov_i32(ret, arg1);
514     } else if (TCG_TARGET_HAS_rot_i32) {
515         tcg_gen_rotl_i32(ret, arg1, tcg_constant_i32(arg2));
516     } else {
517         TCGv_i32 t0, t1;
518         t0 = tcg_temp_new_i32();
519         t1 = tcg_temp_new_i32();
520         tcg_gen_shli_i32(t0, arg1, arg2);
521         tcg_gen_shri_i32(t1, arg1, 32 - arg2);
522         tcg_gen_or_i32(ret, t0, t1);
523         tcg_temp_free_i32(t0);
524         tcg_temp_free_i32(t1);
525     }
526 }
527 
528 void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
529 {
530     if (TCG_TARGET_HAS_rot_i32) {
531         tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, arg2);
532     } else {
533         TCGv_i32 t0, t1;
534 
535         t0 = tcg_temp_new_i32();
536         t1 = tcg_temp_new_i32();
537         tcg_gen_shr_i32(t0, arg1, arg2);
538         tcg_gen_subfi_i32(t1, 32, arg2);
539         tcg_gen_shl_i32(t1, arg1, t1);
540         tcg_gen_or_i32(ret, t0, t1);
541         tcg_temp_free_i32(t0);
542         tcg_temp_free_i32(t1);
543     }
544 }
545 
546 void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
547 {
548     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
549     /* some cases can be optimized here */
550     if (arg2 == 0) {
551         tcg_gen_mov_i32(ret, arg1);
552     } else {
553         tcg_gen_rotli_i32(ret, arg1, 32 - arg2);
554     }
555 }
556 
557 void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
558                          unsigned int ofs, unsigned int len)
559 {
560     uint32_t mask;
561     TCGv_i32 t1;
562 
563     tcg_debug_assert(ofs < 32);
564     tcg_debug_assert(len > 0);
565     tcg_debug_assert(len <= 32);
566     tcg_debug_assert(ofs + len <= 32);
567 
568     if (len == 32) {
569         tcg_gen_mov_i32(ret, arg2);
570         return;
571     }
572     if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) {
573         tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len);
574         return;
575     }
576 
577     t1 = tcg_temp_new_i32();
578 
579     if (TCG_TARGET_HAS_extract2_i32) {
580         if (ofs + len == 32) {
581             tcg_gen_shli_i32(t1, arg1, len);
582             tcg_gen_extract2_i32(ret, t1, arg2, len);
583             goto done;
584         }
585         if (ofs == 0) {
586             tcg_gen_extract2_i32(ret, arg1, arg2, len);
587             tcg_gen_rotli_i32(ret, ret, len);
588             goto done;
589         }
590     }
591 
592     mask = (1u << len) - 1;
593     if (ofs + len < 32) {
594         tcg_gen_andi_i32(t1, arg2, mask);
595         tcg_gen_shli_i32(t1, t1, ofs);
596     } else {
597         tcg_gen_shli_i32(t1, arg2, ofs);
598     }
599     tcg_gen_andi_i32(ret, arg1, ~(mask << ofs));
600     tcg_gen_or_i32(ret, ret, t1);
601  done:
602     tcg_temp_free_i32(t1);
603 }
604 
605 void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg,
606                            unsigned int ofs, unsigned int len)
607 {
608     tcg_debug_assert(ofs < 32);
609     tcg_debug_assert(len > 0);
610     tcg_debug_assert(len <= 32);
611     tcg_debug_assert(ofs + len <= 32);
612 
613     if (ofs + len == 32) {
614         tcg_gen_shli_i32(ret, arg, ofs);
615     } else if (ofs == 0) {
616         tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
617     } else if (TCG_TARGET_HAS_deposit_i32
618                && TCG_TARGET_deposit_i32_valid(ofs, len)) {
619         TCGv_i32 zero = tcg_constant_i32(0);
620         tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, zero, arg, ofs, len);
621     } else {
622         /* To help two-operand hosts we prefer to zero-extend first,
623            which allows ARG to stay live.  */
624         switch (len) {
625         case 16:
626             if (TCG_TARGET_HAS_ext16u_i32) {
627                 tcg_gen_ext16u_i32(ret, arg);
628                 tcg_gen_shli_i32(ret, ret, ofs);
629                 return;
630             }
631             break;
632         case 8:
633             if (TCG_TARGET_HAS_ext8u_i32) {
634                 tcg_gen_ext8u_i32(ret, arg);
635                 tcg_gen_shli_i32(ret, ret, ofs);
636                 return;
637             }
638             break;
639         }
640         /* Otherwise prefer zero-extension over AND for code size.  */
641         switch (ofs + len) {
642         case 16:
643             if (TCG_TARGET_HAS_ext16u_i32) {
644                 tcg_gen_shli_i32(ret, arg, ofs);
645                 tcg_gen_ext16u_i32(ret, ret);
646                 return;
647             }
648             break;
649         case 8:
650             if (TCG_TARGET_HAS_ext8u_i32) {
651                 tcg_gen_shli_i32(ret, arg, ofs);
652                 tcg_gen_ext8u_i32(ret, ret);
653                 return;
654             }
655             break;
656         }
657         tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
658         tcg_gen_shli_i32(ret, ret, ofs);
659     }
660 }
661 
662 void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg,
663                          unsigned int ofs, unsigned int len)
664 {
665     tcg_debug_assert(ofs < 32);
666     tcg_debug_assert(len > 0);
667     tcg_debug_assert(len <= 32);
668     tcg_debug_assert(ofs + len <= 32);
669 
670     /* Canonicalize certain special cases, even if extract is supported.  */
671     if (ofs + len == 32) {
672         tcg_gen_shri_i32(ret, arg, 32 - len);
673         return;
674     }
675     if (ofs == 0) {
676         tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
677         return;
678     }
679 
680     if (TCG_TARGET_HAS_extract_i32
681         && TCG_TARGET_extract_i32_valid(ofs, len)) {
682         tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, ofs, len);
683         return;
684     }
685 
686     /* Assume that zero-extension, if available, is cheaper than a shift.  */
687     switch (ofs + len) {
688     case 16:
689         if (TCG_TARGET_HAS_ext16u_i32) {
690             tcg_gen_ext16u_i32(ret, arg);
691             tcg_gen_shri_i32(ret, ret, ofs);
692             return;
693         }
694         break;
695     case 8:
696         if (TCG_TARGET_HAS_ext8u_i32) {
697             tcg_gen_ext8u_i32(ret, arg);
698             tcg_gen_shri_i32(ret, ret, ofs);
699             return;
700         }
701         break;
702     }
703 
704     /* ??? Ideally we'd know what values are available for immediate AND.
705        Assume that 8 bits are available, plus the special case of 16,
706        so that we get ext8u, ext16u.  */
707     switch (len) {
708     case 1 ... 8: case 16:
709         tcg_gen_shri_i32(ret, arg, ofs);
710         tcg_gen_andi_i32(ret, ret, (1u << len) - 1);
711         break;
712     default:
713         tcg_gen_shli_i32(ret, arg, 32 - len - ofs);
714         tcg_gen_shri_i32(ret, ret, 32 - len);
715         break;
716     }
717 }
718 
719 void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg,
720                           unsigned int ofs, unsigned int len)
721 {
722     tcg_debug_assert(ofs < 32);
723     tcg_debug_assert(len > 0);
724     tcg_debug_assert(len <= 32);
725     tcg_debug_assert(ofs + len <= 32);
726 
727     /* Canonicalize certain special cases, even if extract is supported.  */
728     if (ofs + len == 32) {
729         tcg_gen_sari_i32(ret, arg, 32 - len);
730         return;
731     }
732     if (ofs == 0) {
733         switch (len) {
734         case 16:
735             tcg_gen_ext16s_i32(ret, arg);
736             return;
737         case 8:
738             tcg_gen_ext8s_i32(ret, arg);
739             return;
740         }
741     }
742 
743     if (TCG_TARGET_HAS_sextract_i32
744         && TCG_TARGET_extract_i32_valid(ofs, len)) {
745         tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, ofs, len);
746         return;
747     }
748 
749     /* Assume that sign-extension, if available, is cheaper than a shift.  */
750     switch (ofs + len) {
751     case 16:
752         if (TCG_TARGET_HAS_ext16s_i32) {
753             tcg_gen_ext16s_i32(ret, arg);
754             tcg_gen_sari_i32(ret, ret, ofs);
755             return;
756         }
757         break;
758     case 8:
759         if (TCG_TARGET_HAS_ext8s_i32) {
760             tcg_gen_ext8s_i32(ret, arg);
761             tcg_gen_sari_i32(ret, ret, ofs);
762             return;
763         }
764         break;
765     }
766     switch (len) {
767     case 16:
768         if (TCG_TARGET_HAS_ext16s_i32) {
769             tcg_gen_shri_i32(ret, arg, ofs);
770             tcg_gen_ext16s_i32(ret, ret);
771             return;
772         }
773         break;
774     case 8:
775         if (TCG_TARGET_HAS_ext8s_i32) {
776             tcg_gen_shri_i32(ret, arg, ofs);
777             tcg_gen_ext8s_i32(ret, ret);
778             return;
779         }
780         break;
781     }
782 
783     tcg_gen_shli_i32(ret, arg, 32 - len - ofs);
784     tcg_gen_sari_i32(ret, ret, 32 - len);
785 }
786 
787 /*
788  * Extract 32-bits from a 64-bit input, ah:al, starting from ofs.
789  * Unlike tcg_gen_extract_i32 above, len is fixed at 32.
790  */
791 void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah,
792                           unsigned int ofs)
793 {
794     tcg_debug_assert(ofs <= 32);
795     if (ofs == 0) {
796         tcg_gen_mov_i32(ret, al);
797     } else if (ofs == 32) {
798         tcg_gen_mov_i32(ret, ah);
799     } else if (al == ah) {
800         tcg_gen_rotri_i32(ret, al, ofs);
801     } else if (TCG_TARGET_HAS_extract2_i32) {
802         tcg_gen_op4i_i32(INDEX_op_extract2_i32, ret, al, ah, ofs);
803     } else {
804         TCGv_i32 t0 = tcg_temp_new_i32();
805         tcg_gen_shri_i32(t0, al, ofs);
806         tcg_gen_deposit_i32(ret, t0, ah, 32 - ofs, ofs);
807         tcg_temp_free_i32(t0);
808     }
809 }
810 
811 void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1,
812                          TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2)
813 {
814     if (cond == TCG_COND_ALWAYS) {
815         tcg_gen_mov_i32(ret, v1);
816     } else if (cond == TCG_COND_NEVER) {
817         tcg_gen_mov_i32(ret, v2);
818     } else if (TCG_TARGET_HAS_movcond_i32) {
819         tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond);
820     } else {
821         TCGv_i32 t0 = tcg_temp_new_i32();
822         TCGv_i32 t1 = tcg_temp_new_i32();
823         tcg_gen_setcond_i32(cond, t0, c1, c2);
824         tcg_gen_neg_i32(t0, t0);
825         tcg_gen_and_i32(t1, v1, t0);
826         tcg_gen_andc_i32(ret, v2, t0);
827         tcg_gen_or_i32(ret, ret, t1);
828         tcg_temp_free_i32(t0);
829         tcg_temp_free_i32(t1);
830     }
831 }
832 
833 void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
834                       TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
835 {
836     if (TCG_TARGET_HAS_add2_i32) {
837         tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh);
838     } else {
839         TCGv_i64 t0 = tcg_temp_new_i64();
840         TCGv_i64 t1 = tcg_temp_new_i64();
841         tcg_gen_concat_i32_i64(t0, al, ah);
842         tcg_gen_concat_i32_i64(t1, bl, bh);
843         tcg_gen_add_i64(t0, t0, t1);
844         tcg_gen_extr_i64_i32(rl, rh, t0);
845         tcg_temp_free_i64(t0);
846         tcg_temp_free_i64(t1);
847     }
848 }
849 
850 void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
851                       TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
852 {
853     if (TCG_TARGET_HAS_sub2_i32) {
854         tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh);
855     } else {
856         TCGv_i64 t0 = tcg_temp_new_i64();
857         TCGv_i64 t1 = tcg_temp_new_i64();
858         tcg_gen_concat_i32_i64(t0, al, ah);
859         tcg_gen_concat_i32_i64(t1, bl, bh);
860         tcg_gen_sub_i64(t0, t0, t1);
861         tcg_gen_extr_i64_i32(rl, rh, t0);
862         tcg_temp_free_i64(t0);
863         tcg_temp_free_i64(t1);
864     }
865 }
866 
867 void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
868 {
869     if (TCG_TARGET_HAS_mulu2_i32) {
870         tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2);
871     } else if (TCG_TARGET_HAS_muluh_i32) {
872         TCGv_i32 t = tcg_temp_new_i32();
873         tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
874         tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2);
875         tcg_gen_mov_i32(rl, t);
876         tcg_temp_free_i32(t);
877     } else if (TCG_TARGET_REG_BITS == 64) {
878         TCGv_i64 t0 = tcg_temp_new_i64();
879         TCGv_i64 t1 = tcg_temp_new_i64();
880         tcg_gen_extu_i32_i64(t0, arg1);
881         tcg_gen_extu_i32_i64(t1, arg2);
882         tcg_gen_mul_i64(t0, t0, t1);
883         tcg_gen_extr_i64_i32(rl, rh, t0);
884         tcg_temp_free_i64(t0);
885         tcg_temp_free_i64(t1);
886     } else {
887         qemu_build_not_reached();
888     }
889 }
890 
891 void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
892 {
893     if (TCG_TARGET_HAS_muls2_i32) {
894         tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2);
895     } else if (TCG_TARGET_HAS_mulsh_i32) {
896         TCGv_i32 t = tcg_temp_new_i32();
897         tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
898         tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2);
899         tcg_gen_mov_i32(rl, t);
900         tcg_temp_free_i32(t);
901     } else if (TCG_TARGET_REG_BITS == 32) {
902         TCGv_i32 t0 = tcg_temp_new_i32();
903         TCGv_i32 t1 = tcg_temp_new_i32();
904         TCGv_i32 t2 = tcg_temp_new_i32();
905         TCGv_i32 t3 = tcg_temp_new_i32();
906         tcg_gen_mulu2_i32(t0, t1, arg1, arg2);
907         /* Adjust for negative inputs.  */
908         tcg_gen_sari_i32(t2, arg1, 31);
909         tcg_gen_sari_i32(t3, arg2, 31);
910         tcg_gen_and_i32(t2, t2, arg2);
911         tcg_gen_and_i32(t3, t3, arg1);
912         tcg_gen_sub_i32(rh, t1, t2);
913         tcg_gen_sub_i32(rh, rh, t3);
914         tcg_gen_mov_i32(rl, t0);
915         tcg_temp_free_i32(t0);
916         tcg_temp_free_i32(t1);
917         tcg_temp_free_i32(t2);
918         tcg_temp_free_i32(t3);
919     } else {
920         TCGv_i64 t0 = tcg_temp_new_i64();
921         TCGv_i64 t1 = tcg_temp_new_i64();
922         tcg_gen_ext_i32_i64(t0, arg1);
923         tcg_gen_ext_i32_i64(t1, arg2);
924         tcg_gen_mul_i64(t0, t0, t1);
925         tcg_gen_extr_i64_i32(rl, rh, t0);
926         tcg_temp_free_i64(t0);
927         tcg_temp_free_i64(t1);
928     }
929 }
930 
931 void tcg_gen_mulsu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
932 {
933     if (TCG_TARGET_REG_BITS == 32) {
934         TCGv_i32 t0 = tcg_temp_new_i32();
935         TCGv_i32 t1 = tcg_temp_new_i32();
936         TCGv_i32 t2 = tcg_temp_new_i32();
937         tcg_gen_mulu2_i32(t0, t1, arg1, arg2);
938         /* Adjust for negative input for the signed arg1.  */
939         tcg_gen_sari_i32(t2, arg1, 31);
940         tcg_gen_and_i32(t2, t2, arg2);
941         tcg_gen_sub_i32(rh, t1, t2);
942         tcg_gen_mov_i32(rl, t0);
943         tcg_temp_free_i32(t0);
944         tcg_temp_free_i32(t1);
945         tcg_temp_free_i32(t2);
946     } else {
947         TCGv_i64 t0 = tcg_temp_new_i64();
948         TCGv_i64 t1 = tcg_temp_new_i64();
949         tcg_gen_ext_i32_i64(t0, arg1);
950         tcg_gen_extu_i32_i64(t1, arg2);
951         tcg_gen_mul_i64(t0, t0, t1);
952         tcg_gen_extr_i64_i32(rl, rh, t0);
953         tcg_temp_free_i64(t0);
954         tcg_temp_free_i64(t1);
955     }
956 }
957 
958 void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg)
959 {
960     if (TCG_TARGET_HAS_ext8s_i32) {
961         tcg_gen_op2_i32(INDEX_op_ext8s_i32, ret, arg);
962     } else {
963         tcg_gen_shli_i32(ret, arg, 24);
964         tcg_gen_sari_i32(ret, ret, 24);
965     }
966 }
967 
968 void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg)
969 {
970     if (TCG_TARGET_HAS_ext16s_i32) {
971         tcg_gen_op2_i32(INDEX_op_ext16s_i32, ret, arg);
972     } else {
973         tcg_gen_shli_i32(ret, arg, 16);
974         tcg_gen_sari_i32(ret, ret, 16);
975     }
976 }
977 
978 void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg)
979 {
980     if (TCG_TARGET_HAS_ext8u_i32) {
981         tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg);
982     } else {
983         tcg_gen_andi_i32(ret, arg, 0xffu);
984     }
985 }
986 
987 void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg)
988 {
989     if (TCG_TARGET_HAS_ext16u_i32) {
990         tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg);
991     } else {
992         tcg_gen_andi_i32(ret, arg, 0xffffu);
993     }
994 }
995 
996 void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags)
997 {
998     /* Only one extension flag may be present. */
999     tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
1000 
1001     if (TCG_TARGET_HAS_bswap16_i32) {
1002         tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg, flags);
1003     } else {
1004         TCGv_i32 t0 = tcg_temp_new_i32();
1005         TCGv_i32 t1 = tcg_temp_new_i32();
1006 
1007         tcg_gen_shri_i32(t0, arg, 8);
1008         if (!(flags & TCG_BSWAP_IZ)) {
1009             tcg_gen_ext8u_i32(t0, t0);
1010         }
1011 
1012         if (flags & TCG_BSWAP_OS) {
1013             tcg_gen_shli_i32(t1, arg, 24);
1014             tcg_gen_sari_i32(t1, t1, 16);
1015         } else if (flags & TCG_BSWAP_OZ) {
1016             tcg_gen_ext8u_i32(t1, arg);
1017             tcg_gen_shli_i32(t1, t1, 8);
1018         } else {
1019             tcg_gen_shli_i32(t1, arg, 8);
1020         }
1021 
1022         tcg_gen_or_i32(ret, t0, t1);
1023         tcg_temp_free_i32(t0);
1024         tcg_temp_free_i32(t1);
1025     }
1026 }
1027 
1028 void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
1029 {
1030     if (TCG_TARGET_HAS_bswap32_i32) {
1031         tcg_gen_op3i_i32(INDEX_op_bswap32_i32, ret, arg, 0);
1032     } else {
1033         TCGv_i32 t0 = tcg_temp_new_i32();
1034         TCGv_i32 t1 = tcg_temp_new_i32();
1035         TCGv_i32 t2 = tcg_constant_i32(0x00ff00ff);
1036 
1037                                         /* arg = abcd */
1038         tcg_gen_shri_i32(t0, arg, 8);   /*  t0 = .abc */
1039         tcg_gen_and_i32(t1, arg, t2);   /*  t1 = .b.d */
1040         tcg_gen_and_i32(t0, t0, t2);    /*  t0 = .a.c */
1041         tcg_gen_shli_i32(t1, t1, 8);    /*  t1 = b.d. */
1042         tcg_gen_or_i32(ret, t0, t1);    /* ret = badc */
1043 
1044         tcg_gen_shri_i32(t0, ret, 16);  /*  t0 = ..ba */
1045         tcg_gen_shli_i32(t1, ret, 16);  /*  t1 = dc.. */
1046         tcg_gen_or_i32(ret, t0, t1);    /* ret = dcba */
1047 
1048         tcg_temp_free_i32(t0);
1049         tcg_temp_free_i32(t1);
1050     }
1051 }
1052 
1053 void tcg_gen_hswap_i32(TCGv_i32 ret, TCGv_i32 arg)
1054 {
1055     /* Swapping 2 16-bit elements is a rotate. */
1056     tcg_gen_rotli_i32(ret, arg, 16);
1057 }
1058 
1059 void tcg_gen_smin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1060 {
1061     tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, a, b);
1062 }
1063 
1064 void tcg_gen_umin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1065 {
1066     tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, a, b);
1067 }
1068 
1069 void tcg_gen_smax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1070 {
1071     tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, b, a);
1072 }
1073 
1074 void tcg_gen_umax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1075 {
1076     tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, b, a);
1077 }
1078 
1079 void tcg_gen_abs_i32(TCGv_i32 ret, TCGv_i32 a)
1080 {
1081     TCGv_i32 t = tcg_temp_new_i32();
1082 
1083     tcg_gen_sari_i32(t, a, 31);
1084     tcg_gen_xor_i32(ret, a, t);
1085     tcg_gen_sub_i32(ret, ret, t);
1086     tcg_temp_free_i32(t);
1087 }
1088 
1089 /* 64-bit ops */
1090 
1091 #if TCG_TARGET_REG_BITS == 32
1092 /* These are all inline for TCG_TARGET_REG_BITS == 64.  */
1093 
1094 void tcg_gen_discard_i64(TCGv_i64 arg)
1095 {
1096     tcg_gen_discard_i32(TCGV_LOW(arg));
1097     tcg_gen_discard_i32(TCGV_HIGH(arg));
1098 }
1099 
1100 void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg)
1101 {
1102     TCGTemp *ts = tcgv_i64_temp(arg);
1103 
1104     /* Canonicalize TCGv_i64 TEMP_CONST into TCGv_i32 TEMP_CONST. */
1105     if (ts->kind == TEMP_CONST) {
1106         tcg_gen_movi_i64(ret, ts->val);
1107     } else {
1108         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1109         tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
1110     }
1111 }
1112 
1113 void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
1114 {
1115     tcg_gen_movi_i32(TCGV_LOW(ret), arg);
1116     tcg_gen_movi_i32(TCGV_HIGH(ret), arg >> 32);
1117 }
1118 
1119 void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1120 {
1121     tcg_gen_ld8u_i32(TCGV_LOW(ret), arg2, offset);
1122     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1123 }
1124 
1125 void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1126 {
1127     tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset);
1128     tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1129 }
1130 
1131 void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1132 {
1133     tcg_gen_ld16u_i32(TCGV_LOW(ret), arg2, offset);
1134     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1135 }
1136 
1137 void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1138 {
1139     tcg_gen_ld16s_i32(TCGV_LOW(ret), arg2, offset);
1140     tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1141 }
1142 
1143 void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1144 {
1145     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
1146     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1147 }
1148 
1149 void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1150 {
1151     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
1152     tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1153 }
1154 
1155 void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1156 {
1157     /* Since arg2 and ret have different types,
1158        they cannot be the same temporary */
1159 #if HOST_BIG_ENDIAN
1160     tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset);
1161     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4);
1162 #else
1163     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
1164     tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset + 4);
1165 #endif
1166 }
1167 
1168 void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
1169 {
1170     tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset);
1171 }
1172 
1173 void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
1174 {
1175     tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset);
1176 }
1177 
1178 void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
1179 {
1180     tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset);
1181 }
1182 
1183 void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
1184 {
1185 #if HOST_BIG_ENDIAN
1186     tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset);
1187     tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4);
1188 #else
1189     tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset);
1190     tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset + 4);
1191 #endif
1192 }
1193 
1194 void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1195 {
1196     tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1),
1197                      TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2));
1198 }
1199 
1200 void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1201 {
1202     tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1),
1203                      TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2));
1204 }
1205 
1206 void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1207 {
1208     tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1209     tcg_gen_and_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1210 }
1211 
1212 void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1213 {
1214     tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1215     tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1216 }
1217 
1218 void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1219 {
1220     tcg_gen_xor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1221     tcg_gen_xor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1222 }
1223 
1224 void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1225 {
1226     gen_helper_shl_i64(ret, arg1, arg2);
1227 }
1228 
1229 void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1230 {
1231     gen_helper_shr_i64(ret, arg1, arg2);
1232 }
1233 
1234 void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1235 {
1236     gen_helper_sar_i64(ret, arg1, arg2);
1237 }
1238 
1239 void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1240 {
1241     TCGv_i64 t0;
1242     TCGv_i32 t1;
1243 
1244     t0 = tcg_temp_new_i64();
1245     t1 = tcg_temp_new_i32();
1246 
1247     tcg_gen_mulu2_i32(TCGV_LOW(t0), TCGV_HIGH(t0),
1248                       TCGV_LOW(arg1), TCGV_LOW(arg2));
1249 
1250     tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
1251     tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
1252     tcg_gen_mul_i32(t1, TCGV_HIGH(arg1), TCGV_LOW(arg2));
1253     tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
1254 
1255     tcg_gen_mov_i64(ret, t0);
1256     tcg_temp_free_i64(t0);
1257     tcg_temp_free_i32(t1);
1258 }
1259 
1260 #else
1261 
1262 void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
1263 {
1264     tcg_gen_mov_i64(ret, tcg_constant_i64(arg));
1265 }
1266 
1267 #endif /* TCG_TARGET_REG_SIZE == 32 */
1268 
1269 void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1270 {
1271     /* some cases can be optimized here */
1272     if (arg2 == 0) {
1273         tcg_gen_mov_i64(ret, arg1);
1274     } else if (TCG_TARGET_REG_BITS == 64) {
1275         tcg_gen_add_i64(ret, arg1, tcg_constant_i64(arg2));
1276     } else {
1277         tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
1278                          TCGV_LOW(arg1), TCGV_HIGH(arg1),
1279                          tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));
1280     }
1281 }
1282 
1283 void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2)
1284 {
1285     if (arg1 == 0 && TCG_TARGET_HAS_neg_i64) {
1286         /* Don't recurse with tcg_gen_neg_i64.  */
1287         tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg2);
1288     } else if (TCG_TARGET_REG_BITS == 64) {
1289         tcg_gen_sub_i64(ret, tcg_constant_i64(arg1), arg2);
1290     } else {
1291         tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
1292                          tcg_constant_i32(arg1), tcg_constant_i32(arg1 >> 32),
1293                          TCGV_LOW(arg2), TCGV_HIGH(arg2));
1294     }
1295 }
1296 
1297 void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1298 {
1299     /* some cases can be optimized here */
1300     if (arg2 == 0) {
1301         tcg_gen_mov_i64(ret, arg1);
1302     } else if (TCG_TARGET_REG_BITS == 64) {
1303         tcg_gen_sub_i64(ret, arg1, tcg_constant_i64(arg2));
1304     } else {
1305         tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
1306                          TCGV_LOW(arg1), TCGV_HIGH(arg1),
1307                          tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));
1308     }
1309 }
1310 
1311 void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1312 {
1313     if (TCG_TARGET_REG_BITS == 32) {
1314         tcg_gen_andi_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
1315         tcg_gen_andi_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
1316         return;
1317     }
1318 
1319     /* Some cases can be optimized here.  */
1320     switch (arg2) {
1321     case 0:
1322         tcg_gen_movi_i64(ret, 0);
1323         return;
1324     case -1:
1325         tcg_gen_mov_i64(ret, arg1);
1326         return;
1327     case 0xff:
1328         /* Don't recurse with tcg_gen_ext8u_i64.  */
1329         if (TCG_TARGET_HAS_ext8u_i64) {
1330             tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg1);
1331             return;
1332         }
1333         break;
1334     case 0xffff:
1335         if (TCG_TARGET_HAS_ext16u_i64) {
1336             tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg1);
1337             return;
1338         }
1339         break;
1340     case 0xffffffffu:
1341         if (TCG_TARGET_HAS_ext32u_i64) {
1342             tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg1);
1343             return;
1344         }
1345         break;
1346     }
1347 
1348     tcg_gen_and_i64(ret, arg1, tcg_constant_i64(arg2));
1349 }
1350 
1351 void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1352 {
1353     if (TCG_TARGET_REG_BITS == 32) {
1354         tcg_gen_ori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
1355         tcg_gen_ori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
1356         return;
1357     }
1358     /* Some cases can be optimized here.  */
1359     if (arg2 == -1) {
1360         tcg_gen_movi_i64(ret, -1);
1361     } else if (arg2 == 0) {
1362         tcg_gen_mov_i64(ret, arg1);
1363     } else {
1364         tcg_gen_or_i64(ret, arg1, tcg_constant_i64(arg2));
1365     }
1366 }
1367 
1368 void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1369 {
1370     if (TCG_TARGET_REG_BITS == 32) {
1371         tcg_gen_xori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
1372         tcg_gen_xori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
1373         return;
1374     }
1375     /* Some cases can be optimized here.  */
1376     if (arg2 == 0) {
1377         tcg_gen_mov_i64(ret, arg1);
1378     } else if (arg2 == -1 && TCG_TARGET_HAS_not_i64) {
1379         /* Don't recurse with tcg_gen_not_i64.  */
1380         tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg1);
1381     } else {
1382         tcg_gen_xor_i64(ret, arg1, tcg_constant_i64(arg2));
1383     }
1384 }
1385 
1386 static inline void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
1387                                       unsigned c, bool right, bool arith)
1388 {
1389     tcg_debug_assert(c < 64);
1390     if (c == 0) {
1391         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
1392         tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
1393     } else if (c >= 32) {
1394         c -= 32;
1395         if (right) {
1396             if (arith) {
1397                 tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
1398                 tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), 31);
1399             } else {
1400                 tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
1401                 tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1402             }
1403         } else {
1404             tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), c);
1405             tcg_gen_movi_i32(TCGV_LOW(ret), 0);
1406         }
1407     } else if (right) {
1408         if (TCG_TARGET_HAS_extract2_i32) {
1409             tcg_gen_extract2_i32(TCGV_LOW(ret),
1410                                  TCGV_LOW(arg1), TCGV_HIGH(arg1), c);
1411         } else {
1412             tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c);
1413             tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(ret),
1414                                 TCGV_HIGH(arg1), 32 - c, c);
1415         }
1416         if (arith) {
1417             tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c);
1418         } else {
1419             tcg_gen_shri_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c);
1420         }
1421     } else {
1422         if (TCG_TARGET_HAS_extract2_i32) {
1423             tcg_gen_extract2_i32(TCGV_HIGH(ret),
1424                                  TCGV_LOW(arg1), TCGV_HIGH(arg1), 32 - c);
1425         } else {
1426             TCGv_i32 t0 = tcg_temp_new_i32();
1427             tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c);
1428             tcg_gen_deposit_i32(TCGV_HIGH(ret), t0,
1429                                 TCGV_HIGH(arg1), c, 32 - c);
1430             tcg_temp_free_i32(t0);
1431         }
1432         tcg_gen_shli_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c);
1433     }
1434 }
1435 
1436 void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1437 {
1438     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
1439     if (TCG_TARGET_REG_BITS == 32) {
1440         tcg_gen_shifti_i64(ret, arg1, arg2, 0, 0);
1441     } else if (arg2 == 0) {
1442         tcg_gen_mov_i64(ret, arg1);
1443     } else {
1444         tcg_gen_shl_i64(ret, arg1, tcg_constant_i64(arg2));
1445     }
1446 }
1447 
1448 void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1449 {
1450     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
1451     if (TCG_TARGET_REG_BITS == 32) {
1452         tcg_gen_shifti_i64(ret, arg1, arg2, 1, 0);
1453     } else if (arg2 == 0) {
1454         tcg_gen_mov_i64(ret, arg1);
1455     } else {
1456         tcg_gen_shr_i64(ret, arg1, tcg_constant_i64(arg2));
1457     }
1458 }
1459 
1460 void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1461 {
1462     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
1463     if (TCG_TARGET_REG_BITS == 32) {
1464         tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1);
1465     } else if (arg2 == 0) {
1466         tcg_gen_mov_i64(ret, arg1);
1467     } else {
1468         tcg_gen_sar_i64(ret, arg1, tcg_constant_i64(arg2));
1469     }
1470 }
1471 
1472 void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l)
1473 {
1474     if (cond == TCG_COND_ALWAYS) {
1475         tcg_gen_br(l);
1476     } else if (cond != TCG_COND_NEVER) {
1477         l->refs++;
1478         if (TCG_TARGET_REG_BITS == 32) {
1479             tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1),
1480                               TCGV_HIGH(arg1), TCGV_LOW(arg2),
1481                               TCGV_HIGH(arg2), cond, label_arg(l));
1482         } else {
1483             tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond,
1484                               label_arg(l));
1485         }
1486     }
1487 }
1488 
1489 void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *l)
1490 {
1491     if (TCG_TARGET_REG_BITS == 64) {
1492         tcg_gen_brcond_i64(cond, arg1, tcg_constant_i64(arg2), l);
1493     } else if (cond == TCG_COND_ALWAYS) {
1494         tcg_gen_br(l);
1495     } else if (cond != TCG_COND_NEVER) {
1496         l->refs++;
1497         tcg_gen_op6ii_i32(INDEX_op_brcond2_i32,
1498                           TCGV_LOW(arg1), TCGV_HIGH(arg1),
1499                           tcg_constant_i32(arg2),
1500                           tcg_constant_i32(arg2 >> 32),
1501                           cond, label_arg(l));
1502     }
1503 }
1504 
1505 void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
1506                          TCGv_i64 arg1, TCGv_i64 arg2)
1507 {
1508     if (cond == TCG_COND_ALWAYS) {
1509         tcg_gen_movi_i64(ret, 1);
1510     } else if (cond == TCG_COND_NEVER) {
1511         tcg_gen_movi_i64(ret, 0);
1512     } else {
1513         if (TCG_TARGET_REG_BITS == 32) {
1514             tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
1515                              TCGV_LOW(arg1), TCGV_HIGH(arg1),
1516                              TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
1517             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1518         } else {
1519             tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
1520         }
1521     }
1522 }
1523 
1524 void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
1525                           TCGv_i64 arg1, int64_t arg2)
1526 {
1527     if (TCG_TARGET_REG_BITS == 64) {
1528         tcg_gen_setcond_i64(cond, ret, arg1, tcg_constant_i64(arg2));
1529     } else if (cond == TCG_COND_ALWAYS) {
1530         tcg_gen_movi_i64(ret, 1);
1531     } else if (cond == TCG_COND_NEVER) {
1532         tcg_gen_movi_i64(ret, 0);
1533     } else {
1534         tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
1535                          TCGV_LOW(arg1), TCGV_HIGH(arg1),
1536                          tcg_constant_i32(arg2),
1537                          tcg_constant_i32(arg2 >> 32), cond);
1538         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1539     }
1540 }
1541 
1542 void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1543 {
1544     if (arg2 == 0) {
1545         tcg_gen_movi_i64(ret, 0);
1546     } else if (is_power_of_2(arg2)) {
1547         tcg_gen_shli_i64(ret, arg1, ctz64(arg2));
1548     } else {
1549         TCGv_i64 t0 = tcg_const_i64(arg2);
1550         tcg_gen_mul_i64(ret, arg1, t0);
1551         tcg_temp_free_i64(t0);
1552     }
1553 }
1554 
1555 void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1556 {
1557     if (TCG_TARGET_HAS_div_i64) {
1558         tcg_gen_op3_i64(INDEX_op_div_i64, ret, arg1, arg2);
1559     } else if (TCG_TARGET_HAS_div2_i64) {
1560         TCGv_i64 t0 = tcg_temp_new_i64();
1561         tcg_gen_sari_i64(t0, arg1, 63);
1562         tcg_gen_op5_i64(INDEX_op_div2_i64, ret, t0, arg1, t0, arg2);
1563         tcg_temp_free_i64(t0);
1564     } else {
1565         gen_helper_div_i64(ret, arg1, arg2);
1566     }
1567 }
1568 
1569 void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1570 {
1571     if (TCG_TARGET_HAS_rem_i64) {
1572         tcg_gen_op3_i64(INDEX_op_rem_i64, ret, arg1, arg2);
1573     } else if (TCG_TARGET_HAS_div_i64) {
1574         TCGv_i64 t0 = tcg_temp_new_i64();
1575         tcg_gen_op3_i64(INDEX_op_div_i64, t0, arg1, arg2);
1576         tcg_gen_mul_i64(t0, t0, arg2);
1577         tcg_gen_sub_i64(ret, arg1, t0);
1578         tcg_temp_free_i64(t0);
1579     } else if (TCG_TARGET_HAS_div2_i64) {
1580         TCGv_i64 t0 = tcg_temp_new_i64();
1581         tcg_gen_sari_i64(t0, arg1, 63);
1582         tcg_gen_op5_i64(INDEX_op_div2_i64, t0, ret, arg1, t0, arg2);
1583         tcg_temp_free_i64(t0);
1584     } else {
1585         gen_helper_rem_i64(ret, arg1, arg2);
1586     }
1587 }
1588 
1589 void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1590 {
1591     if (TCG_TARGET_HAS_div_i64) {
1592         tcg_gen_op3_i64(INDEX_op_divu_i64, ret, arg1, arg2);
1593     } else if (TCG_TARGET_HAS_div2_i64) {
1594         TCGv_i64 t0 = tcg_temp_new_i64();
1595         tcg_gen_movi_i64(t0, 0);
1596         tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, t0, arg2);
1597         tcg_temp_free_i64(t0);
1598     } else {
1599         gen_helper_divu_i64(ret, arg1, arg2);
1600     }
1601 }
1602 
1603 void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1604 {
1605     if (TCG_TARGET_HAS_rem_i64) {
1606         tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2);
1607     } else if (TCG_TARGET_HAS_div_i64) {
1608         TCGv_i64 t0 = tcg_temp_new_i64();
1609         tcg_gen_op3_i64(INDEX_op_divu_i64, t0, arg1, arg2);
1610         tcg_gen_mul_i64(t0, t0, arg2);
1611         tcg_gen_sub_i64(ret, arg1, t0);
1612         tcg_temp_free_i64(t0);
1613     } else if (TCG_TARGET_HAS_div2_i64) {
1614         TCGv_i64 t0 = tcg_temp_new_i64();
1615         tcg_gen_movi_i64(t0, 0);
1616         tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, t0, arg2);
1617         tcg_temp_free_i64(t0);
1618     } else {
1619         gen_helper_remu_i64(ret, arg1, arg2);
1620     }
1621 }
1622 
1623 void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg)
1624 {
1625     if (TCG_TARGET_REG_BITS == 32) {
1626         tcg_gen_ext8s_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1627         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1628     } else if (TCG_TARGET_HAS_ext8s_i64) {
1629         tcg_gen_op2_i64(INDEX_op_ext8s_i64, ret, arg);
1630     } else {
1631         tcg_gen_shli_i64(ret, arg, 56);
1632         tcg_gen_sari_i64(ret, ret, 56);
1633     }
1634 }
1635 
1636 void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg)
1637 {
1638     if (TCG_TARGET_REG_BITS == 32) {
1639         tcg_gen_ext16s_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1640         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1641     } else if (TCG_TARGET_HAS_ext16s_i64) {
1642         tcg_gen_op2_i64(INDEX_op_ext16s_i64, ret, arg);
1643     } else {
1644         tcg_gen_shli_i64(ret, arg, 48);
1645         tcg_gen_sari_i64(ret, ret, 48);
1646     }
1647 }
1648 
1649 void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg)
1650 {
1651     if (TCG_TARGET_REG_BITS == 32) {
1652         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1653         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1654     } else if (TCG_TARGET_HAS_ext32s_i64) {
1655         tcg_gen_op2_i64(INDEX_op_ext32s_i64, ret, arg);
1656     } else {
1657         tcg_gen_shli_i64(ret, arg, 32);
1658         tcg_gen_sari_i64(ret, ret, 32);
1659     }
1660 }
1661 
1662 void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg)
1663 {
1664     if (TCG_TARGET_REG_BITS == 32) {
1665         tcg_gen_ext8u_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1666         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1667     } else if (TCG_TARGET_HAS_ext8u_i64) {
1668         tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg);
1669     } else {
1670         tcg_gen_andi_i64(ret, arg, 0xffu);
1671     }
1672 }
1673 
1674 void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg)
1675 {
1676     if (TCG_TARGET_REG_BITS == 32) {
1677         tcg_gen_ext16u_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1678         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1679     } else if (TCG_TARGET_HAS_ext16u_i64) {
1680         tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg);
1681     } else {
1682         tcg_gen_andi_i64(ret, arg, 0xffffu);
1683     }
1684 }
1685 
1686 void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg)
1687 {
1688     if (TCG_TARGET_REG_BITS == 32) {
1689         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1690         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1691     } else if (TCG_TARGET_HAS_ext32u_i64) {
1692         tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg);
1693     } else {
1694         tcg_gen_andi_i64(ret, arg, 0xffffffffu);
1695     }
1696 }
1697 
1698 void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags)
1699 {
1700     /* Only one extension flag may be present. */
1701     tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
1702 
1703     if (TCG_TARGET_REG_BITS == 32) {
1704         tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg), flags);
1705         if (flags & TCG_BSWAP_OS) {
1706             tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1707         } else {
1708             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1709         }
1710     } else if (TCG_TARGET_HAS_bswap16_i64) {
1711         tcg_gen_op3i_i64(INDEX_op_bswap16_i64, ret, arg, flags);
1712     } else {
1713         TCGv_i64 t0 = tcg_temp_new_i64();
1714         TCGv_i64 t1 = tcg_temp_new_i64();
1715 
1716         tcg_gen_shri_i64(t0, arg, 8);
1717         if (!(flags & TCG_BSWAP_IZ)) {
1718             tcg_gen_ext8u_i64(t0, t0);
1719         }
1720 
1721         if (flags & TCG_BSWAP_OS) {
1722             tcg_gen_shli_i64(t1, arg, 56);
1723             tcg_gen_sari_i64(t1, t1, 48);
1724         } else if (flags & TCG_BSWAP_OZ) {
1725             tcg_gen_ext8u_i64(t1, arg);
1726             tcg_gen_shli_i64(t1, t1, 8);
1727         } else {
1728             tcg_gen_shli_i64(t1, arg, 8);
1729         }
1730 
1731         tcg_gen_or_i64(ret, t0, t1);
1732         tcg_temp_free_i64(t0);
1733         tcg_temp_free_i64(t1);
1734     }
1735 }
1736 
1737 void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags)
1738 {
1739     /* Only one extension flag may be present. */
1740     tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
1741 
1742     if (TCG_TARGET_REG_BITS == 32) {
1743         tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1744         if (flags & TCG_BSWAP_OS) {
1745             tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1746         } else {
1747             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1748         }
1749     } else if (TCG_TARGET_HAS_bswap32_i64) {
1750         tcg_gen_op3i_i64(INDEX_op_bswap32_i64, ret, arg, flags);
1751     } else {
1752         TCGv_i64 t0 = tcg_temp_new_i64();
1753         TCGv_i64 t1 = tcg_temp_new_i64();
1754         TCGv_i64 t2 = tcg_constant_i64(0x00ff00ff);
1755 
1756                                             /* arg = xxxxabcd */
1757         tcg_gen_shri_i64(t0, arg, 8);       /*  t0 = .xxxxabc */
1758         tcg_gen_and_i64(t1, arg, t2);       /*  t1 = .....b.d */
1759         tcg_gen_and_i64(t0, t0, t2);        /*  t0 = .....a.c */
1760         tcg_gen_shli_i64(t1, t1, 8);        /*  t1 = ....b.d. */
1761         tcg_gen_or_i64(ret, t0, t1);        /* ret = ....badc */
1762 
1763         tcg_gen_shli_i64(t1, ret, 48);      /*  t1 = dc...... */
1764         tcg_gen_shri_i64(t0, ret, 16);      /*  t0 = ......ba */
1765         if (flags & TCG_BSWAP_OS) {
1766             tcg_gen_sari_i64(t1, t1, 32);   /*  t1 = ssssdc.. */
1767         } else {
1768             tcg_gen_shri_i64(t1, t1, 32);   /*  t1 = ....dc.. */
1769         }
1770         tcg_gen_or_i64(ret, t0, t1);        /* ret = ssssdcba */
1771 
1772         tcg_temp_free_i64(t0);
1773         tcg_temp_free_i64(t1);
1774     }
1775 }
1776 
1777 void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg)
1778 {
1779     if (TCG_TARGET_REG_BITS == 32) {
1780         TCGv_i32 t0, t1;
1781         t0 = tcg_temp_new_i32();
1782         t1 = tcg_temp_new_i32();
1783 
1784         tcg_gen_bswap32_i32(t0, TCGV_LOW(arg));
1785         tcg_gen_bswap32_i32(t1, TCGV_HIGH(arg));
1786         tcg_gen_mov_i32(TCGV_LOW(ret), t1);
1787         tcg_gen_mov_i32(TCGV_HIGH(ret), t0);
1788         tcg_temp_free_i32(t0);
1789         tcg_temp_free_i32(t1);
1790     } else if (TCG_TARGET_HAS_bswap64_i64) {
1791         tcg_gen_op3i_i64(INDEX_op_bswap64_i64, ret, arg, 0);
1792     } else {
1793         TCGv_i64 t0 = tcg_temp_new_i64();
1794         TCGv_i64 t1 = tcg_temp_new_i64();
1795         TCGv_i64 t2 = tcg_temp_new_i64();
1796 
1797                                         /* arg = abcdefgh */
1798         tcg_gen_movi_i64(t2, 0x00ff00ff00ff00ffull);
1799         tcg_gen_shri_i64(t0, arg, 8);   /*  t0 = .abcdefg */
1800         tcg_gen_and_i64(t1, arg, t2);   /*  t1 = .b.d.f.h */
1801         tcg_gen_and_i64(t0, t0, t2);    /*  t0 = .a.c.e.g */
1802         tcg_gen_shli_i64(t1, t1, 8);    /*  t1 = b.d.f.h. */
1803         tcg_gen_or_i64(ret, t0, t1);    /* ret = badcfehg */
1804 
1805         tcg_gen_movi_i64(t2, 0x0000ffff0000ffffull);
1806         tcg_gen_shri_i64(t0, ret, 16);  /*  t0 = ..badcfe */
1807         tcg_gen_and_i64(t1, ret, t2);   /*  t1 = ..dc..hg */
1808         tcg_gen_and_i64(t0, t0, t2);    /*  t0 = ..ba..fe */
1809         tcg_gen_shli_i64(t1, t1, 16);   /*  t1 = dc..hg.. */
1810         tcg_gen_or_i64(ret, t0, t1);    /* ret = dcbahgfe */
1811 
1812         tcg_gen_shri_i64(t0, ret, 32);  /*  t0 = ....dcba */
1813         tcg_gen_shli_i64(t1, ret, 32);  /*  t1 = hgfe.... */
1814         tcg_gen_or_i64(ret, t0, t1);    /* ret = hgfedcba */
1815 
1816         tcg_temp_free_i64(t0);
1817         tcg_temp_free_i64(t1);
1818         tcg_temp_free_i64(t2);
1819     }
1820 }
1821 
1822 void tcg_gen_hswap_i64(TCGv_i64 ret, TCGv_i64 arg)
1823 {
1824     uint64_t m = 0x0000ffff0000ffffull;
1825     TCGv_i64 t0 = tcg_temp_new_i64();
1826     TCGv_i64 t1 = tcg_temp_new_i64();
1827 
1828     /* See include/qemu/bitops.h, hswap64. */
1829     tcg_gen_rotli_i64(t1, arg, 32);
1830     tcg_gen_andi_i64(t0, t1, m);
1831     tcg_gen_shli_i64(t0, t0, 16);
1832     tcg_gen_shri_i64(t1, t1, 16);
1833     tcg_gen_andi_i64(t1, t1, m);
1834     tcg_gen_or_i64(ret, t0, t1);
1835 
1836     tcg_temp_free_i64(t0);
1837     tcg_temp_free_i64(t1);
1838 }
1839 
1840 void tcg_gen_wswap_i64(TCGv_i64 ret, TCGv_i64 arg)
1841 {
1842     /* Swapping 2 32-bit elements is a rotate. */
1843     tcg_gen_rotli_i64(ret, arg, 32);
1844 }
1845 
1846 void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg)
1847 {
1848     if (TCG_TARGET_REG_BITS == 32) {
1849         tcg_gen_not_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1850         tcg_gen_not_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
1851     } else if (TCG_TARGET_HAS_not_i64) {
1852         tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg);
1853     } else {
1854         tcg_gen_xori_i64(ret, arg, -1);
1855     }
1856 }
1857 
1858 void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1859 {
1860     if (TCG_TARGET_REG_BITS == 32) {
1861         tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1862         tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1863     } else if (TCG_TARGET_HAS_andc_i64) {
1864         tcg_gen_op3_i64(INDEX_op_andc_i64, ret, arg1, arg2);
1865     } else {
1866         TCGv_i64 t0 = tcg_temp_new_i64();
1867         tcg_gen_not_i64(t0, arg2);
1868         tcg_gen_and_i64(ret, arg1, t0);
1869         tcg_temp_free_i64(t0);
1870     }
1871 }
1872 
1873 void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1874 {
1875     if (TCG_TARGET_REG_BITS == 32) {
1876         tcg_gen_eqv_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1877         tcg_gen_eqv_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1878     } else if (TCG_TARGET_HAS_eqv_i64) {
1879         tcg_gen_op3_i64(INDEX_op_eqv_i64, ret, arg1, arg2);
1880     } else {
1881         tcg_gen_xor_i64(ret, arg1, arg2);
1882         tcg_gen_not_i64(ret, ret);
1883     }
1884 }
1885 
1886 void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1887 {
1888     if (TCG_TARGET_REG_BITS == 32) {
1889         tcg_gen_nand_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1890         tcg_gen_nand_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1891     } else if (TCG_TARGET_HAS_nand_i64) {
1892         tcg_gen_op3_i64(INDEX_op_nand_i64, ret, arg1, arg2);
1893     } else {
1894         tcg_gen_and_i64(ret, arg1, arg2);
1895         tcg_gen_not_i64(ret, ret);
1896     }
1897 }
1898 
1899 void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1900 {
1901     if (TCG_TARGET_REG_BITS == 32) {
1902         tcg_gen_nor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1903         tcg_gen_nor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1904     } else if (TCG_TARGET_HAS_nor_i64) {
1905         tcg_gen_op3_i64(INDEX_op_nor_i64, ret, arg1, arg2);
1906     } else {
1907         tcg_gen_or_i64(ret, arg1, arg2);
1908         tcg_gen_not_i64(ret, ret);
1909     }
1910 }
1911 
1912 void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1913 {
1914     if (TCG_TARGET_REG_BITS == 32) {
1915         tcg_gen_orc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1916         tcg_gen_orc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1917     } else if (TCG_TARGET_HAS_orc_i64) {
1918         tcg_gen_op3_i64(INDEX_op_orc_i64, ret, arg1, arg2);
1919     } else {
1920         TCGv_i64 t0 = tcg_temp_new_i64();
1921         tcg_gen_not_i64(t0, arg2);
1922         tcg_gen_or_i64(ret, arg1, t0);
1923         tcg_temp_free_i64(t0);
1924     }
1925 }
1926 
1927 void tcg_gen_clz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1928 {
1929     if (TCG_TARGET_HAS_clz_i64) {
1930         tcg_gen_op3_i64(INDEX_op_clz_i64, ret, arg1, arg2);
1931     } else {
1932         gen_helper_clz_i64(ret, arg1, arg2);
1933     }
1934 }
1935 
1936 void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
1937 {
1938     if (TCG_TARGET_REG_BITS == 32
1939         && TCG_TARGET_HAS_clz_i32
1940         && arg2 <= 0xffffffffu) {
1941         TCGv_i32 t = tcg_temp_new_i32();
1942         tcg_gen_clzi_i32(t, TCGV_LOW(arg1), arg2 - 32);
1943         tcg_gen_addi_i32(t, t, 32);
1944         tcg_gen_clz_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), t);
1945         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1946         tcg_temp_free_i32(t);
1947     } else {
1948         TCGv_i64 t0 = tcg_const_i64(arg2);
1949         tcg_gen_clz_i64(ret, arg1, t0);
1950         tcg_temp_free_i64(t0);
1951     }
1952 }
1953 
1954 void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1955 {
1956     if (TCG_TARGET_HAS_ctz_i64) {
1957         tcg_gen_op3_i64(INDEX_op_ctz_i64, ret, arg1, arg2);
1958     } else if (TCG_TARGET_HAS_ctpop_i64 || TCG_TARGET_HAS_clz_i64) {
1959         TCGv_i64 z, t = tcg_temp_new_i64();
1960 
1961         if (TCG_TARGET_HAS_ctpop_i64) {
1962             tcg_gen_subi_i64(t, arg1, 1);
1963             tcg_gen_andc_i64(t, t, arg1);
1964             tcg_gen_ctpop_i64(t, t);
1965         } else {
1966             /* Since all non-x86 hosts have clz(0) == 64, don't fight it.  */
1967             tcg_gen_neg_i64(t, arg1);
1968             tcg_gen_and_i64(t, t, arg1);
1969             tcg_gen_clzi_i64(t, t, 64);
1970             tcg_gen_xori_i64(t, t, 63);
1971         }
1972         z = tcg_constant_i64(0);
1973         tcg_gen_movcond_i64(TCG_COND_EQ, ret, arg1, z, arg2, t);
1974         tcg_temp_free_i64(t);
1975         tcg_temp_free_i64(z);
1976     } else {
1977         gen_helper_ctz_i64(ret, arg1, arg2);
1978     }
1979 }
1980 
1981 void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
1982 {
1983     if (TCG_TARGET_REG_BITS == 32
1984         && TCG_TARGET_HAS_ctz_i32
1985         && arg2 <= 0xffffffffu) {
1986         TCGv_i32 t32 = tcg_temp_new_i32();
1987         tcg_gen_ctzi_i32(t32, TCGV_HIGH(arg1), arg2 - 32);
1988         tcg_gen_addi_i32(t32, t32, 32);
1989         tcg_gen_ctz_i32(TCGV_LOW(ret), TCGV_LOW(arg1), t32);
1990         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1991         tcg_temp_free_i32(t32);
1992     } else if (!TCG_TARGET_HAS_ctz_i64
1993                && TCG_TARGET_HAS_ctpop_i64
1994                && arg2 == 64) {
1995         /* This equivalence has the advantage of not requiring a fixup.  */
1996         TCGv_i64 t = tcg_temp_new_i64();
1997         tcg_gen_subi_i64(t, arg1, 1);
1998         tcg_gen_andc_i64(t, t, arg1);
1999         tcg_gen_ctpop_i64(ret, t);
2000         tcg_temp_free_i64(t);
2001     } else {
2002         TCGv_i64 t0 = tcg_const_i64(arg2);
2003         tcg_gen_ctz_i64(ret, arg1, t0);
2004         tcg_temp_free_i64(t0);
2005     }
2006 }
2007 
2008 void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg)
2009 {
2010     if (TCG_TARGET_HAS_clz_i64 || TCG_TARGET_HAS_clz_i32) {
2011         TCGv_i64 t = tcg_temp_new_i64();
2012         tcg_gen_sari_i64(t, arg, 63);
2013         tcg_gen_xor_i64(t, t, arg);
2014         tcg_gen_clzi_i64(t, t, 64);
2015         tcg_gen_subi_i64(ret, t, 1);
2016         tcg_temp_free_i64(t);
2017     } else {
2018         gen_helper_clrsb_i64(ret, arg);
2019     }
2020 }
2021 
2022 void tcg_gen_ctpop_i64(TCGv_i64 ret, TCGv_i64 arg1)
2023 {
2024     if (TCG_TARGET_HAS_ctpop_i64) {
2025         tcg_gen_op2_i64(INDEX_op_ctpop_i64, ret, arg1);
2026     } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_ctpop_i32) {
2027         tcg_gen_ctpop_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
2028         tcg_gen_ctpop_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
2029         tcg_gen_add_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(ret));
2030         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2031     } else {
2032         gen_helper_ctpop_i64(ret, arg1);
2033     }
2034 }
2035 
2036 void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
2037 {
2038     if (TCG_TARGET_HAS_rot_i64) {
2039         tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, arg2);
2040     } else {
2041         TCGv_i64 t0, t1;
2042         t0 = tcg_temp_new_i64();
2043         t1 = tcg_temp_new_i64();
2044         tcg_gen_shl_i64(t0, arg1, arg2);
2045         tcg_gen_subfi_i64(t1, 64, arg2);
2046         tcg_gen_shr_i64(t1, arg1, t1);
2047         tcg_gen_or_i64(ret, t0, t1);
2048         tcg_temp_free_i64(t0);
2049         tcg_temp_free_i64(t1);
2050     }
2051 }
2052 
2053 void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
2054 {
2055     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
2056     /* some cases can be optimized here */
2057     if (arg2 == 0) {
2058         tcg_gen_mov_i64(ret, arg1);
2059     } else if (TCG_TARGET_HAS_rot_i64) {
2060         tcg_gen_rotl_i64(ret, arg1, tcg_constant_i64(arg2));
2061     } else {
2062         TCGv_i64 t0, t1;
2063         t0 = tcg_temp_new_i64();
2064         t1 = tcg_temp_new_i64();
2065         tcg_gen_shli_i64(t0, arg1, arg2);
2066         tcg_gen_shri_i64(t1, arg1, 64 - arg2);
2067         tcg_gen_or_i64(ret, t0, t1);
2068         tcg_temp_free_i64(t0);
2069         tcg_temp_free_i64(t1);
2070     }
2071 }
2072 
2073 void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
2074 {
2075     if (TCG_TARGET_HAS_rot_i64) {
2076         tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, arg2);
2077     } else {
2078         TCGv_i64 t0, t1;
2079         t0 = tcg_temp_new_i64();
2080         t1 = tcg_temp_new_i64();
2081         tcg_gen_shr_i64(t0, arg1, arg2);
2082         tcg_gen_subfi_i64(t1, 64, arg2);
2083         tcg_gen_shl_i64(t1, arg1, t1);
2084         tcg_gen_or_i64(ret, t0, t1);
2085         tcg_temp_free_i64(t0);
2086         tcg_temp_free_i64(t1);
2087     }
2088 }
2089 
2090 void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
2091 {
2092     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
2093     /* some cases can be optimized here */
2094     if (arg2 == 0) {
2095         tcg_gen_mov_i64(ret, arg1);
2096     } else {
2097         tcg_gen_rotli_i64(ret, arg1, 64 - arg2);
2098     }
2099 }
2100 
2101 void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
2102                          unsigned int ofs, unsigned int len)
2103 {
2104     uint64_t mask;
2105     TCGv_i64 t1;
2106 
2107     tcg_debug_assert(ofs < 64);
2108     tcg_debug_assert(len > 0);
2109     tcg_debug_assert(len <= 64);
2110     tcg_debug_assert(ofs + len <= 64);
2111 
2112     if (len == 64) {
2113         tcg_gen_mov_i64(ret, arg2);
2114         return;
2115     }
2116     if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) {
2117         tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
2118         return;
2119     }
2120 
2121     if (TCG_TARGET_REG_BITS == 32) {
2122         if (ofs >= 32) {
2123             tcg_gen_deposit_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1),
2124                                 TCGV_LOW(arg2), ofs - 32, len);
2125             tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
2126             return;
2127         }
2128         if (ofs + len <= 32) {
2129             tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(arg1),
2130                                 TCGV_LOW(arg2), ofs, len);
2131             tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
2132             return;
2133         }
2134     }
2135 
2136     t1 = tcg_temp_new_i64();
2137 
2138     if (TCG_TARGET_HAS_extract2_i64) {
2139         if (ofs + len == 64) {
2140             tcg_gen_shli_i64(t1, arg1, len);
2141             tcg_gen_extract2_i64(ret, t1, arg2, len);
2142             goto done;
2143         }
2144         if (ofs == 0) {
2145             tcg_gen_extract2_i64(ret, arg1, arg2, len);
2146             tcg_gen_rotli_i64(ret, ret, len);
2147             goto done;
2148         }
2149     }
2150 
2151     mask = (1ull << len) - 1;
2152     if (ofs + len < 64) {
2153         tcg_gen_andi_i64(t1, arg2, mask);
2154         tcg_gen_shli_i64(t1, t1, ofs);
2155     } else {
2156         tcg_gen_shli_i64(t1, arg2, ofs);
2157     }
2158     tcg_gen_andi_i64(ret, arg1, ~(mask << ofs));
2159     tcg_gen_or_i64(ret, ret, t1);
2160  done:
2161     tcg_temp_free_i64(t1);
2162 }
2163 
2164 void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg,
2165                            unsigned int ofs, unsigned int len)
2166 {
2167     tcg_debug_assert(ofs < 64);
2168     tcg_debug_assert(len > 0);
2169     tcg_debug_assert(len <= 64);
2170     tcg_debug_assert(ofs + len <= 64);
2171 
2172     if (ofs + len == 64) {
2173         tcg_gen_shli_i64(ret, arg, ofs);
2174     } else if (ofs == 0) {
2175         tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
2176     } else if (TCG_TARGET_HAS_deposit_i64
2177                && TCG_TARGET_deposit_i64_valid(ofs, len)) {
2178         TCGv_i64 zero = tcg_constant_i64(0);
2179         tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, zero, arg, ofs, len);
2180     } else {
2181         if (TCG_TARGET_REG_BITS == 32) {
2182             if (ofs >= 32) {
2183                 tcg_gen_deposit_z_i32(TCGV_HIGH(ret), TCGV_LOW(arg),
2184                                       ofs - 32, len);
2185                 tcg_gen_movi_i32(TCGV_LOW(ret), 0);
2186                 return;
2187             }
2188             if (ofs + len <= 32) {
2189                 tcg_gen_deposit_z_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
2190                 tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2191                 return;
2192             }
2193         }
2194         /* To help two-operand hosts we prefer to zero-extend first,
2195            which allows ARG to stay live.  */
2196         switch (len) {
2197         case 32:
2198             if (TCG_TARGET_HAS_ext32u_i64) {
2199                 tcg_gen_ext32u_i64(ret, arg);
2200                 tcg_gen_shli_i64(ret, ret, ofs);
2201                 return;
2202             }
2203             break;
2204         case 16:
2205             if (TCG_TARGET_HAS_ext16u_i64) {
2206                 tcg_gen_ext16u_i64(ret, arg);
2207                 tcg_gen_shli_i64(ret, ret, ofs);
2208                 return;
2209             }
2210             break;
2211         case 8:
2212             if (TCG_TARGET_HAS_ext8u_i64) {
2213                 tcg_gen_ext8u_i64(ret, arg);
2214                 tcg_gen_shli_i64(ret, ret, ofs);
2215                 return;
2216             }
2217             break;
2218         }
2219         /* Otherwise prefer zero-extension over AND for code size.  */
2220         switch (ofs + len) {
2221         case 32:
2222             if (TCG_TARGET_HAS_ext32u_i64) {
2223                 tcg_gen_shli_i64(ret, arg, ofs);
2224                 tcg_gen_ext32u_i64(ret, ret);
2225                 return;
2226             }
2227             break;
2228         case 16:
2229             if (TCG_TARGET_HAS_ext16u_i64) {
2230                 tcg_gen_shli_i64(ret, arg, ofs);
2231                 tcg_gen_ext16u_i64(ret, ret);
2232                 return;
2233             }
2234             break;
2235         case 8:
2236             if (TCG_TARGET_HAS_ext8u_i64) {
2237                 tcg_gen_shli_i64(ret, arg, ofs);
2238                 tcg_gen_ext8u_i64(ret, ret);
2239                 return;
2240             }
2241             break;
2242         }
2243         tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
2244         tcg_gen_shli_i64(ret, ret, ofs);
2245     }
2246 }
2247 
2248 void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
2249                          unsigned int ofs, unsigned int len)
2250 {
2251     tcg_debug_assert(ofs < 64);
2252     tcg_debug_assert(len > 0);
2253     tcg_debug_assert(len <= 64);
2254     tcg_debug_assert(ofs + len <= 64);
2255 
2256     /* Canonicalize certain special cases, even if extract is supported.  */
2257     if (ofs + len == 64) {
2258         tcg_gen_shri_i64(ret, arg, 64 - len);
2259         return;
2260     }
2261     if (ofs == 0) {
2262         tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
2263         return;
2264     }
2265 
2266     if (TCG_TARGET_REG_BITS == 32) {
2267         /* Look for a 32-bit extract within one of the two words.  */
2268         if (ofs >= 32) {
2269             tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_HIGH(arg), ofs - 32, len);
2270             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2271             return;
2272         }
2273         if (ofs + len <= 32) {
2274             tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
2275             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2276             return;
2277         }
2278         /* The field is split across two words.  One double-word
2279            shift is better than two double-word shifts.  */
2280         goto do_shift_and;
2281     }
2282 
2283     if (TCG_TARGET_HAS_extract_i64
2284         && TCG_TARGET_extract_i64_valid(ofs, len)) {
2285         tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, ofs, len);
2286         return;
2287     }
2288 
2289     /* Assume that zero-extension, if available, is cheaper than a shift.  */
2290     switch (ofs + len) {
2291     case 32:
2292         if (TCG_TARGET_HAS_ext32u_i64) {
2293             tcg_gen_ext32u_i64(ret, arg);
2294             tcg_gen_shri_i64(ret, ret, ofs);
2295             return;
2296         }
2297         break;
2298     case 16:
2299         if (TCG_TARGET_HAS_ext16u_i64) {
2300             tcg_gen_ext16u_i64(ret, arg);
2301             tcg_gen_shri_i64(ret, ret, ofs);
2302             return;
2303         }
2304         break;
2305     case 8:
2306         if (TCG_TARGET_HAS_ext8u_i64) {
2307             tcg_gen_ext8u_i64(ret, arg);
2308             tcg_gen_shri_i64(ret, ret, ofs);
2309             return;
2310         }
2311         break;
2312     }
2313 
2314     /* ??? Ideally we'd know what values are available for immediate AND.
2315        Assume that 8 bits are available, plus the special cases of 16 and 32,
2316        so that we get ext8u, ext16u, and ext32u.  */
2317     switch (len) {
2318     case 1 ... 8: case 16: case 32:
2319     do_shift_and:
2320         tcg_gen_shri_i64(ret, arg, ofs);
2321         tcg_gen_andi_i64(ret, ret, (1ull << len) - 1);
2322         break;
2323     default:
2324         tcg_gen_shli_i64(ret, arg, 64 - len - ofs);
2325         tcg_gen_shri_i64(ret, ret, 64 - len);
2326         break;
2327     }
2328 }
2329 
2330 void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg,
2331                           unsigned int ofs, unsigned int len)
2332 {
2333     tcg_debug_assert(ofs < 64);
2334     tcg_debug_assert(len > 0);
2335     tcg_debug_assert(len <= 64);
2336     tcg_debug_assert(ofs + len <= 64);
2337 
2338     /* Canonicalize certain special cases, even if sextract is supported.  */
2339     if (ofs + len == 64) {
2340         tcg_gen_sari_i64(ret, arg, 64 - len);
2341         return;
2342     }
2343     if (ofs == 0) {
2344         switch (len) {
2345         case 32:
2346             tcg_gen_ext32s_i64(ret, arg);
2347             return;
2348         case 16:
2349             tcg_gen_ext16s_i64(ret, arg);
2350             return;
2351         case 8:
2352             tcg_gen_ext8s_i64(ret, arg);
2353             return;
2354         }
2355     }
2356 
2357     if (TCG_TARGET_REG_BITS == 32) {
2358         /* Look for a 32-bit extract within one of the two words.  */
2359         if (ofs >= 32) {
2360             tcg_gen_sextract_i32(TCGV_LOW(ret), TCGV_HIGH(arg), ofs - 32, len);
2361         } else if (ofs + len <= 32) {
2362             tcg_gen_sextract_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
2363         } else if (ofs == 0) {
2364             tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
2365             tcg_gen_sextract_i32(TCGV_HIGH(ret), TCGV_HIGH(arg), 0, len - 32);
2366             return;
2367         } else if (len > 32) {
2368             TCGv_i32 t = tcg_temp_new_i32();
2369             /* Extract the bits for the high word normally.  */
2370             tcg_gen_sextract_i32(t, TCGV_HIGH(arg), ofs + 32, len - 32);
2371             /* Shift the field down for the low part.  */
2372             tcg_gen_shri_i64(ret, arg, ofs);
2373             /* Overwrite the shift into the high part.  */
2374             tcg_gen_mov_i32(TCGV_HIGH(ret), t);
2375             tcg_temp_free_i32(t);
2376             return;
2377         } else {
2378             /* Shift the field down for the low part, such that the
2379                field sits at the MSB.  */
2380             tcg_gen_shri_i64(ret, arg, ofs + len - 32);
2381             /* Shift the field down from the MSB, sign extending.  */
2382             tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_LOW(ret), 32 - len);
2383         }
2384         /* Sign-extend the field from 32 bits.  */
2385         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
2386         return;
2387     }
2388 
2389     if (TCG_TARGET_HAS_sextract_i64
2390         && TCG_TARGET_extract_i64_valid(ofs, len)) {
2391         tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, arg, ofs, len);
2392         return;
2393     }
2394 
2395     /* Assume that sign-extension, if available, is cheaper than a shift.  */
2396     switch (ofs + len) {
2397     case 32:
2398         if (TCG_TARGET_HAS_ext32s_i64) {
2399             tcg_gen_ext32s_i64(ret, arg);
2400             tcg_gen_sari_i64(ret, ret, ofs);
2401             return;
2402         }
2403         break;
2404     case 16:
2405         if (TCG_TARGET_HAS_ext16s_i64) {
2406             tcg_gen_ext16s_i64(ret, arg);
2407             tcg_gen_sari_i64(ret, ret, ofs);
2408             return;
2409         }
2410         break;
2411     case 8:
2412         if (TCG_TARGET_HAS_ext8s_i64) {
2413             tcg_gen_ext8s_i64(ret, arg);
2414             tcg_gen_sari_i64(ret, ret, ofs);
2415             return;
2416         }
2417         break;
2418     }
2419     switch (len) {
2420     case 32:
2421         if (TCG_TARGET_HAS_ext32s_i64) {
2422             tcg_gen_shri_i64(ret, arg, ofs);
2423             tcg_gen_ext32s_i64(ret, ret);
2424             return;
2425         }
2426         break;
2427     case 16:
2428         if (TCG_TARGET_HAS_ext16s_i64) {
2429             tcg_gen_shri_i64(ret, arg, ofs);
2430             tcg_gen_ext16s_i64(ret, ret);
2431             return;
2432         }
2433         break;
2434     case 8:
2435         if (TCG_TARGET_HAS_ext8s_i64) {
2436             tcg_gen_shri_i64(ret, arg, ofs);
2437             tcg_gen_ext8s_i64(ret, ret);
2438             return;
2439         }
2440         break;
2441     }
2442     tcg_gen_shli_i64(ret, arg, 64 - len - ofs);
2443     tcg_gen_sari_i64(ret, ret, 64 - len);
2444 }
2445 
2446 /*
2447  * Extract 64 bits from a 128-bit input, ah:al, starting from ofs.
2448  * Unlike tcg_gen_extract_i64 above, len is fixed at 64.
2449  */
2450 void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah,
2451                           unsigned int ofs)
2452 {
2453     tcg_debug_assert(ofs <= 64);
2454     if (ofs == 0) {
2455         tcg_gen_mov_i64(ret, al);
2456     } else if (ofs == 64) {
2457         tcg_gen_mov_i64(ret, ah);
2458     } else if (al == ah) {
2459         tcg_gen_rotri_i64(ret, al, ofs);
2460     } else if (TCG_TARGET_HAS_extract2_i64) {
2461         tcg_gen_op4i_i64(INDEX_op_extract2_i64, ret, al, ah, ofs);
2462     } else {
2463         TCGv_i64 t0 = tcg_temp_new_i64();
2464         tcg_gen_shri_i64(t0, al, ofs);
2465         tcg_gen_deposit_i64(ret, t0, ah, 64 - ofs, ofs);
2466         tcg_temp_free_i64(t0);
2467     }
2468 }
2469 
2470 void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1,
2471                          TCGv_i64 c2, TCGv_i64 v1, TCGv_i64 v2)
2472 {
2473     if (cond == TCG_COND_ALWAYS) {
2474         tcg_gen_mov_i64(ret, v1);
2475     } else if (cond == TCG_COND_NEVER) {
2476         tcg_gen_mov_i64(ret, v2);
2477     } else if (TCG_TARGET_REG_BITS == 32) {
2478         TCGv_i32 t0 = tcg_temp_new_i32();
2479         TCGv_i32 t1 = tcg_temp_new_i32();
2480         tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0,
2481                          TCGV_LOW(c1), TCGV_HIGH(c1),
2482                          TCGV_LOW(c2), TCGV_HIGH(c2), cond);
2483 
2484         if (TCG_TARGET_HAS_movcond_i32) {
2485             tcg_gen_movi_i32(t1, 0);
2486             tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, t1,
2487                                 TCGV_LOW(v1), TCGV_LOW(v2));
2488             tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, t1,
2489                                 TCGV_HIGH(v1), TCGV_HIGH(v2));
2490         } else {
2491             tcg_gen_neg_i32(t0, t0);
2492 
2493             tcg_gen_and_i32(t1, TCGV_LOW(v1), t0);
2494             tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(v2), t0);
2495             tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t1);
2496 
2497             tcg_gen_and_i32(t1, TCGV_HIGH(v1), t0);
2498             tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(v2), t0);
2499             tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t1);
2500         }
2501         tcg_temp_free_i32(t0);
2502         tcg_temp_free_i32(t1);
2503     } else if (TCG_TARGET_HAS_movcond_i64) {
2504         tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond);
2505     } else {
2506         TCGv_i64 t0 = tcg_temp_new_i64();
2507         TCGv_i64 t1 = tcg_temp_new_i64();
2508         tcg_gen_setcond_i64(cond, t0, c1, c2);
2509         tcg_gen_neg_i64(t0, t0);
2510         tcg_gen_and_i64(t1, v1, t0);
2511         tcg_gen_andc_i64(ret, v2, t0);
2512         tcg_gen_or_i64(ret, ret, t1);
2513         tcg_temp_free_i64(t0);
2514         tcg_temp_free_i64(t1);
2515     }
2516 }
2517 
2518 void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
2519                       TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
2520 {
2521     if (TCG_TARGET_HAS_add2_i64) {
2522         tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh);
2523     } else {
2524         TCGv_i64 t0 = tcg_temp_new_i64();
2525         TCGv_i64 t1 = tcg_temp_new_i64();
2526         tcg_gen_add_i64(t0, al, bl);
2527         tcg_gen_setcond_i64(TCG_COND_LTU, t1, t0, al);
2528         tcg_gen_add_i64(rh, ah, bh);
2529         tcg_gen_add_i64(rh, rh, t1);
2530         tcg_gen_mov_i64(rl, t0);
2531         tcg_temp_free_i64(t0);
2532         tcg_temp_free_i64(t1);
2533     }
2534 }
2535 
2536 void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
2537                       TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
2538 {
2539     if (TCG_TARGET_HAS_sub2_i64) {
2540         tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh);
2541     } else {
2542         TCGv_i64 t0 = tcg_temp_new_i64();
2543         TCGv_i64 t1 = tcg_temp_new_i64();
2544         tcg_gen_sub_i64(t0, al, bl);
2545         tcg_gen_setcond_i64(TCG_COND_LTU, t1, al, bl);
2546         tcg_gen_sub_i64(rh, ah, bh);
2547         tcg_gen_sub_i64(rh, rh, t1);
2548         tcg_gen_mov_i64(rl, t0);
2549         tcg_temp_free_i64(t0);
2550         tcg_temp_free_i64(t1);
2551     }
2552 }
2553 
2554 void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
2555 {
2556     if (TCG_TARGET_HAS_mulu2_i64) {
2557         tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2);
2558     } else if (TCG_TARGET_HAS_muluh_i64) {
2559         TCGv_i64 t = tcg_temp_new_i64();
2560         tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
2561         tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2);
2562         tcg_gen_mov_i64(rl, t);
2563         tcg_temp_free_i64(t);
2564     } else {
2565         TCGv_i64 t0 = tcg_temp_new_i64();
2566         tcg_gen_mul_i64(t0, arg1, arg2);
2567         gen_helper_muluh_i64(rh, arg1, arg2);
2568         tcg_gen_mov_i64(rl, t0);
2569         tcg_temp_free_i64(t0);
2570     }
2571 }
2572 
2573 void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
2574 {
2575     if (TCG_TARGET_HAS_muls2_i64) {
2576         tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2);
2577     } else if (TCG_TARGET_HAS_mulsh_i64) {
2578         TCGv_i64 t = tcg_temp_new_i64();
2579         tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
2580         tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2);
2581         tcg_gen_mov_i64(rl, t);
2582         tcg_temp_free_i64(t);
2583     } else if (TCG_TARGET_HAS_mulu2_i64 || TCG_TARGET_HAS_muluh_i64) {
2584         TCGv_i64 t0 = tcg_temp_new_i64();
2585         TCGv_i64 t1 = tcg_temp_new_i64();
2586         TCGv_i64 t2 = tcg_temp_new_i64();
2587         TCGv_i64 t3 = tcg_temp_new_i64();
2588         tcg_gen_mulu2_i64(t0, t1, arg1, arg2);
2589         /* Adjust for negative inputs.  */
2590         tcg_gen_sari_i64(t2, arg1, 63);
2591         tcg_gen_sari_i64(t3, arg2, 63);
2592         tcg_gen_and_i64(t2, t2, arg2);
2593         tcg_gen_and_i64(t3, t3, arg1);
2594         tcg_gen_sub_i64(rh, t1, t2);
2595         tcg_gen_sub_i64(rh, rh, t3);
2596         tcg_gen_mov_i64(rl, t0);
2597         tcg_temp_free_i64(t0);
2598         tcg_temp_free_i64(t1);
2599         tcg_temp_free_i64(t2);
2600         tcg_temp_free_i64(t3);
2601     } else {
2602         TCGv_i64 t0 = tcg_temp_new_i64();
2603         tcg_gen_mul_i64(t0, arg1, arg2);
2604         gen_helper_mulsh_i64(rh, arg1, arg2);
2605         tcg_gen_mov_i64(rl, t0);
2606         tcg_temp_free_i64(t0);
2607     }
2608 }
2609 
2610 void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
2611 {
2612     TCGv_i64 t0 = tcg_temp_new_i64();
2613     TCGv_i64 t1 = tcg_temp_new_i64();
2614     TCGv_i64 t2 = tcg_temp_new_i64();
2615     tcg_gen_mulu2_i64(t0, t1, arg1, arg2);
2616     /* Adjust for negative input for the signed arg1.  */
2617     tcg_gen_sari_i64(t2, arg1, 63);
2618     tcg_gen_and_i64(t2, t2, arg2);
2619     tcg_gen_sub_i64(rh, t1, t2);
2620     tcg_gen_mov_i64(rl, t0);
2621     tcg_temp_free_i64(t0);
2622     tcg_temp_free_i64(t1);
2623     tcg_temp_free_i64(t2);
2624 }
2625 
2626 void tcg_gen_smin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2627 {
2628     tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, a, b);
2629 }
2630 
2631 void tcg_gen_umin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2632 {
2633     tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, a, b);
2634 }
2635 
2636 void tcg_gen_smax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2637 {
2638     tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, b, a);
2639 }
2640 
2641 void tcg_gen_umax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2642 {
2643     tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, b, a);
2644 }
2645 
2646 void tcg_gen_abs_i64(TCGv_i64 ret, TCGv_i64 a)
2647 {
2648     TCGv_i64 t = tcg_temp_new_i64();
2649 
2650     tcg_gen_sari_i64(t, a, 63);
2651     tcg_gen_xor_i64(ret, a, t);
2652     tcg_gen_sub_i64(ret, ret, t);
2653     tcg_temp_free_i64(t);
2654 }
2655 
2656 /* Size changing operations.  */
2657 
2658 void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
2659 {
2660     if (TCG_TARGET_REG_BITS == 32) {
2661         tcg_gen_mov_i32(ret, TCGV_LOW(arg));
2662     } else if (TCG_TARGET_HAS_extrl_i64_i32) {
2663         tcg_gen_op2(INDEX_op_extrl_i64_i32,
2664                     tcgv_i32_arg(ret), tcgv_i64_arg(arg));
2665     } else {
2666         tcg_gen_mov_i32(ret, (TCGv_i32)arg);
2667     }
2668 }
2669 
2670 void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
2671 {
2672     if (TCG_TARGET_REG_BITS == 32) {
2673         tcg_gen_mov_i32(ret, TCGV_HIGH(arg));
2674     } else if (TCG_TARGET_HAS_extrh_i64_i32) {
2675         tcg_gen_op2(INDEX_op_extrh_i64_i32,
2676                     tcgv_i32_arg(ret), tcgv_i64_arg(arg));
2677     } else {
2678         TCGv_i64 t = tcg_temp_new_i64();
2679         tcg_gen_shri_i64(t, arg, 32);
2680         tcg_gen_mov_i32(ret, (TCGv_i32)t);
2681         tcg_temp_free_i64(t);
2682     }
2683 }
2684 
2685 void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
2686 {
2687     if (TCG_TARGET_REG_BITS == 32) {
2688         tcg_gen_mov_i32(TCGV_LOW(ret), arg);
2689         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2690     } else {
2691         tcg_gen_op2(INDEX_op_extu_i32_i64,
2692                     tcgv_i64_arg(ret), tcgv_i32_arg(arg));
2693     }
2694 }
2695 
2696 void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
2697 {
2698     if (TCG_TARGET_REG_BITS == 32) {
2699         tcg_gen_mov_i32(TCGV_LOW(ret), arg);
2700         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
2701     } else {
2702         tcg_gen_op2(INDEX_op_ext_i32_i64,
2703                     tcgv_i64_arg(ret), tcgv_i32_arg(arg));
2704     }
2705 }
2706 
2707 void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high)
2708 {
2709     TCGv_i64 tmp;
2710 
2711     if (TCG_TARGET_REG_BITS == 32) {
2712         tcg_gen_mov_i32(TCGV_LOW(dest), low);
2713         tcg_gen_mov_i32(TCGV_HIGH(dest), high);
2714         return;
2715     }
2716 
2717     tmp = tcg_temp_new_i64();
2718     /* These extensions are only needed for type correctness.
2719        We may be able to do better given target specific information.  */
2720     tcg_gen_extu_i32_i64(tmp, high);
2721     tcg_gen_extu_i32_i64(dest, low);
2722     /* If deposit is available, use it.  Otherwise use the extra
2723        knowledge that we have of the zero-extensions above.  */
2724     if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(32, 32)) {
2725         tcg_gen_deposit_i64(dest, dest, tmp, 32, 32);
2726     } else {
2727         tcg_gen_shli_i64(tmp, tmp, 32);
2728         tcg_gen_or_i64(dest, dest, tmp);
2729     }
2730     tcg_temp_free_i64(tmp);
2731 }
2732 
2733 void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg)
2734 {
2735     if (TCG_TARGET_REG_BITS == 32) {
2736         tcg_gen_mov_i32(lo, TCGV_LOW(arg));
2737         tcg_gen_mov_i32(hi, TCGV_HIGH(arg));
2738     } else {
2739         tcg_gen_extrl_i64_i32(lo, arg);
2740         tcg_gen_extrh_i64_i32(hi, arg);
2741     }
2742 }
2743 
2744 void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg)
2745 {
2746     tcg_gen_ext32u_i64(lo, arg);
2747     tcg_gen_shri_i64(hi, arg, 32);
2748 }
2749 
2750 void tcg_gen_extr_i128_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i128 arg)
2751 {
2752     tcg_gen_mov_i64(lo, TCGV128_LOW(arg));
2753     tcg_gen_mov_i64(hi, TCGV128_HIGH(arg));
2754 }
2755 
2756 void tcg_gen_concat_i64_i128(TCGv_i128 ret, TCGv_i64 lo, TCGv_i64 hi)
2757 {
2758     tcg_gen_mov_i64(TCGV128_LOW(ret), lo);
2759     tcg_gen_mov_i64(TCGV128_HIGH(ret), hi);
2760 }
2761 
2762 void tcg_gen_mov_i128(TCGv_i128 dst, TCGv_i128 src)
2763 {
2764     if (dst != src) {
2765         tcg_gen_mov_i64(TCGV128_LOW(dst), TCGV128_LOW(src));
2766         tcg_gen_mov_i64(TCGV128_HIGH(dst), TCGV128_HIGH(src));
2767     }
2768 }
2769 
2770 /* QEMU specific operations.  */
2771 
2772 void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx)
2773 {
2774     /*
2775      * Let the jit code return the read-only version of the
2776      * TranslationBlock, so that we minimize the pc-relative
2777      * distance of the address of the exit_tb code to TB.
2778      * This will improve utilization of pc-relative address loads.
2779      *
2780      * TODO: Move this to translator_loop, so that all const
2781      * TranslationBlock pointers refer to read-only memory.
2782      * This requires coordination with targets that do not use
2783      * the translator_loop.
2784      */
2785     uintptr_t val = (uintptr_t)tcg_splitwx_to_rx((void *)tb) + idx;
2786 
2787     if (tb == NULL) {
2788         tcg_debug_assert(idx == 0);
2789     } else if (idx <= TB_EXIT_IDXMAX) {
2790 #ifdef CONFIG_DEBUG_TCG
2791         /* This is an exit following a goto_tb.  Verify that we have
2792            seen this numbered exit before, via tcg_gen_goto_tb.  */
2793         tcg_debug_assert(tcg_ctx->goto_tb_issue_mask & (1 << idx));
2794 #endif
2795     } else {
2796         /* This is an exit via the exitreq label.  */
2797         tcg_debug_assert(idx == TB_EXIT_REQUESTED);
2798     }
2799 
2800     plugin_gen_disable_mem_helpers();
2801     tcg_gen_op1i(INDEX_op_exit_tb, val);
2802 }
2803 
2804 void tcg_gen_goto_tb(unsigned idx)
2805 {
2806     /* We tested CF_NO_GOTO_TB in translator_use_goto_tb. */
2807     tcg_debug_assert(!(tcg_ctx->gen_tb->cflags & CF_NO_GOTO_TB));
2808     /* We only support two chained exits.  */
2809     tcg_debug_assert(idx <= TB_EXIT_IDXMAX);
2810 #ifdef CONFIG_DEBUG_TCG
2811     /* Verify that we haven't seen this numbered exit before.  */
2812     tcg_debug_assert((tcg_ctx->goto_tb_issue_mask & (1 << idx)) == 0);
2813     tcg_ctx->goto_tb_issue_mask |= 1 << idx;
2814 #endif
2815     plugin_gen_disable_mem_helpers();
2816     tcg_gen_op1i(INDEX_op_goto_tb, idx);
2817 }
2818 
2819 void tcg_gen_lookup_and_goto_ptr(void)
2820 {
2821     TCGv_ptr ptr;
2822 
2823     if (tcg_ctx->gen_tb->cflags & CF_NO_GOTO_PTR) {
2824         tcg_gen_exit_tb(NULL, 0);
2825         return;
2826     }
2827 
2828     plugin_gen_disable_mem_helpers();
2829     ptr = tcg_temp_new_ptr();
2830     gen_helper_lookup_tb_ptr(ptr, cpu_env);
2831     tcg_gen_op1i(INDEX_op_goto_ptr, tcgv_ptr_arg(ptr));
2832     tcg_temp_free_ptr(ptr);
2833 }
2834 
2835 static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
2836 {
2837     /* Trigger the asserts within as early as possible.  */
2838     unsigned a_bits = get_alignment_bits(op);
2839 
2840     /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
2841     if (a_bits == (op & MO_SIZE)) {
2842         op = (op & ~MO_AMASK) | MO_ALIGN;
2843     }
2844 
2845     switch (op & MO_SIZE) {
2846     case MO_8:
2847         op &= ~MO_BSWAP;
2848         break;
2849     case MO_16:
2850         break;
2851     case MO_32:
2852         if (!is64) {
2853             op &= ~MO_SIGN;
2854         }
2855         break;
2856     case MO_64:
2857         if (is64) {
2858             op &= ~MO_SIGN;
2859             break;
2860         }
2861         /* fall through */
2862     default:
2863         g_assert_not_reached();
2864     }
2865     if (st) {
2866         op &= ~MO_SIGN;
2867     }
2868     return op;
2869 }
2870 
2871 static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
2872                          MemOp memop, TCGArg idx)
2873 {
2874     MemOpIdx oi = make_memop_idx(memop, idx);
2875 #if TARGET_LONG_BITS == 32
2876     tcg_gen_op3i_i32(opc, val, addr, oi);
2877 #else
2878     if (TCG_TARGET_REG_BITS == 32) {
2879         tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
2880     } else {
2881         tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
2882     }
2883 #endif
2884 }
2885 
2886 static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
2887                          MemOp memop, TCGArg idx)
2888 {
2889     MemOpIdx oi = make_memop_idx(memop, idx);
2890 #if TARGET_LONG_BITS == 32
2891     if (TCG_TARGET_REG_BITS == 32) {
2892         tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
2893     } else {
2894         tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
2895     }
2896 #else
2897     if (TCG_TARGET_REG_BITS == 32) {
2898         tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
2899                          TCGV_LOW(addr), TCGV_HIGH(addr), oi);
2900     } else {
2901         tcg_gen_op3i_i64(opc, val, addr, oi);
2902     }
2903 #endif
2904 }
2905 
2906 static void tcg_gen_req_mo(TCGBar type)
2907 {
2908 #ifdef TCG_GUEST_DEFAULT_MO
2909     type &= TCG_GUEST_DEFAULT_MO;
2910 #endif
2911     type &= ~TCG_TARGET_DEFAULT_MO;
2912     if (type) {
2913         tcg_gen_mb(type | TCG_BAR_SC);
2914     }
2915 }
2916 
2917 static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
2918 {
2919 #ifdef CONFIG_PLUGIN
2920     if (tcg_ctx->plugin_insn != NULL) {
2921         /* Save a copy of the vaddr for use after a load.  */
2922         TCGv temp = tcg_temp_new();
2923         tcg_gen_mov_tl(temp, vaddr);
2924         return temp;
2925     }
2926 #endif
2927     return vaddr;
2928 }
2929 
2930 static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi,
2931                                      enum qemu_plugin_mem_rw rw)
2932 {
2933 #ifdef CONFIG_PLUGIN
2934     if (tcg_ctx->plugin_insn != NULL) {
2935         qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
2936         plugin_gen_empty_mem_callback(vaddr, info);
2937         tcg_temp_free(vaddr);
2938     }
2939 #endif
2940 }
2941 
2942 void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
2943 {
2944     MemOp orig_memop;
2945     MemOpIdx oi;
2946 
2947     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
2948     memop = tcg_canonicalize_memop(memop, 0, 0);
2949     oi = make_memop_idx(memop, idx);
2950 
2951     orig_memop = memop;
2952     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
2953         memop &= ~MO_BSWAP;
2954         /* The bswap primitive benefits from zero-extended input.  */
2955         if ((memop & MO_SSIZE) == MO_SW) {
2956             memop &= ~MO_SIGN;
2957         }
2958     }
2959 
2960     addr = plugin_prep_mem_callbacks(addr);
2961     gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
2962     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
2963 
2964     if ((orig_memop ^ memop) & MO_BSWAP) {
2965         switch (orig_memop & MO_SIZE) {
2966         case MO_16:
2967             tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
2968                                            ? TCG_BSWAP_IZ | TCG_BSWAP_OS
2969                                            : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
2970             break;
2971         case MO_32:
2972             tcg_gen_bswap32_i32(val, val);
2973             break;
2974         default:
2975             g_assert_not_reached();
2976         }
2977     }
2978 }
2979 
2980 void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
2981 {
2982     TCGv_i32 swap = NULL;
2983     MemOpIdx oi;
2984 
2985     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
2986     memop = tcg_canonicalize_memop(memop, 0, 1);
2987     oi = make_memop_idx(memop, idx);
2988 
2989     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
2990         swap = tcg_temp_new_i32();
2991         switch (memop & MO_SIZE) {
2992         case MO_16:
2993             tcg_gen_bswap16_i32(swap, val, 0);
2994             break;
2995         case MO_32:
2996             tcg_gen_bswap32_i32(swap, val);
2997             break;
2998         default:
2999             g_assert_not_reached();
3000         }
3001         val = swap;
3002         memop &= ~MO_BSWAP;
3003     }
3004 
3005     addr = plugin_prep_mem_callbacks(addr);
3006     if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
3007         gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
3008     } else {
3009         gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
3010     }
3011     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
3012 
3013     if (swap) {
3014         tcg_temp_free_i32(swap);
3015     }
3016 }
3017 
3018 void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
3019 {
3020     MemOp orig_memop;
3021     MemOpIdx oi;
3022 
3023     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
3024         tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
3025         if (memop & MO_SIGN) {
3026             tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
3027         } else {
3028             tcg_gen_movi_i32(TCGV_HIGH(val), 0);
3029         }
3030         return;
3031     }
3032 
3033     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
3034     memop = tcg_canonicalize_memop(memop, 1, 0);
3035     oi = make_memop_idx(memop, idx);
3036 
3037     orig_memop = memop;
3038     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
3039         memop &= ~MO_BSWAP;
3040         /* The bswap primitive benefits from zero-extended input.  */
3041         if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
3042             memop &= ~MO_SIGN;
3043         }
3044     }
3045 
3046     addr = plugin_prep_mem_callbacks(addr);
3047     gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
3048     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
3049 
3050     if ((orig_memop ^ memop) & MO_BSWAP) {
3051         int flags = (orig_memop & MO_SIGN
3052                      ? TCG_BSWAP_IZ | TCG_BSWAP_OS
3053                      : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
3054         switch (orig_memop & MO_SIZE) {
3055         case MO_16:
3056             tcg_gen_bswap16_i64(val, val, flags);
3057             break;
3058         case MO_32:
3059             tcg_gen_bswap32_i64(val, val, flags);
3060             break;
3061         case MO_64:
3062             tcg_gen_bswap64_i64(val, val);
3063             break;
3064         default:
3065             g_assert_not_reached();
3066         }
3067     }
3068 }
3069 
3070 void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
3071 {
3072     TCGv_i64 swap = NULL;
3073     MemOpIdx oi;
3074 
3075     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
3076         tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
3077         return;
3078     }
3079 
3080     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
3081     memop = tcg_canonicalize_memop(memop, 1, 1);
3082     oi = make_memop_idx(memop, idx);
3083 
3084     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
3085         swap = tcg_temp_new_i64();
3086         switch (memop & MO_SIZE) {
3087         case MO_16:
3088             tcg_gen_bswap16_i64(swap, val, 0);
3089             break;
3090         case MO_32:
3091             tcg_gen_bswap32_i64(swap, val, 0);
3092             break;
3093         case MO_64:
3094             tcg_gen_bswap64_i64(swap, val);
3095             break;
3096         default:
3097             g_assert_not_reached();
3098         }
3099         val = swap;
3100         memop &= ~MO_BSWAP;
3101     }
3102 
3103     addr = plugin_prep_mem_callbacks(addr);
3104     gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
3105     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
3106 
3107     if (swap) {
3108         tcg_temp_free_i64(swap);
3109     }
3110 }
3111 
3112 static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
3113 {
3114     MemOp mop_1 = orig, mop_2;
3115 
3116     tcg_debug_assert((orig & MO_SIZE) == MO_128);
3117     tcg_debug_assert((orig & MO_SIGN) == 0);
3118 
3119     /* Use a memory ordering implemented by the host. */
3120     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (orig & MO_BSWAP)) {
3121         mop_1 &= ~MO_BSWAP;
3122     }
3123 
3124     /* Reduce the size to 64-bit. */
3125     mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
3126 
3127     /* Retain the alignment constraints of the original. */
3128     switch (orig & MO_AMASK) {
3129     case MO_UNALN:
3130     case MO_ALIGN_2:
3131     case MO_ALIGN_4:
3132         mop_2 = mop_1;
3133         break;
3134     case MO_ALIGN_8:
3135         /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
3136         mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
3137         mop_2 = mop_1;
3138         break;
3139     case MO_ALIGN:
3140         /* Second has 8-byte alignment; first has 16-byte alignment. */
3141         mop_2 = mop_1;
3142         mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
3143         break;
3144     case MO_ALIGN_16:
3145     case MO_ALIGN_32:
3146     case MO_ALIGN_64:
3147         /* Second has 8-byte alignment; first retains original. */
3148         mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
3149         break;
3150     default:
3151         g_assert_not_reached();
3152     }
3153     ret[0] = mop_1;
3154     ret[1] = mop_2;
3155 }
3156 
3157 void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
3158 {
3159     MemOp mop[2];
3160     TCGv addr_p8;
3161     TCGv_i64 x, y;
3162 
3163     canonicalize_memop_i128_as_i64(mop, memop);
3164 
3165     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
3166     addr = plugin_prep_mem_callbacks(addr);
3167 
3168     /* TODO: respect atomicity of the operation. */
3169     /* TODO: allow the tcg backend to see the whole operation. */
3170 
3171     /*
3172      * Since there are no global TCGv_i128, there is no visible state
3173      * changed if the second load faults.  Load directly into the two
3174      * subwords.
3175      */
3176     if ((memop & MO_BSWAP) == MO_LE) {
3177         x = TCGV128_LOW(val);
3178         y = TCGV128_HIGH(val);
3179     } else {
3180         x = TCGV128_HIGH(val);
3181         y = TCGV128_LOW(val);
3182     }
3183 
3184     gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
3185 
3186     if ((mop[0] ^ memop) & MO_BSWAP) {
3187         tcg_gen_bswap64_i64(x, x);
3188     }
3189 
3190     addr_p8 = tcg_temp_new();
3191     tcg_gen_addi_tl(addr_p8, addr, 8);
3192     gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
3193     tcg_temp_free(addr_p8);
3194 
3195     if ((mop[0] ^ memop) & MO_BSWAP) {
3196         tcg_gen_bswap64_i64(y, y);
3197     }
3198 
3199     plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
3200                              QEMU_PLUGIN_MEM_R);
3201 }
3202 
3203 void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
3204 {
3205     MemOp mop[2];
3206     TCGv addr_p8;
3207     TCGv_i64 x, y;
3208 
3209     canonicalize_memop_i128_as_i64(mop, memop);
3210 
3211     tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
3212     addr = plugin_prep_mem_callbacks(addr);
3213 
3214     /* TODO: respect atomicity of the operation. */
3215     /* TODO: allow the tcg backend to see the whole operation. */
3216 
3217     if ((memop & MO_BSWAP) == MO_LE) {
3218         x = TCGV128_LOW(val);
3219         y = TCGV128_HIGH(val);
3220     } else {
3221         x = TCGV128_HIGH(val);
3222         y = TCGV128_LOW(val);
3223     }
3224 
3225     addr_p8 = tcg_temp_new();
3226     if ((mop[0] ^ memop) & MO_BSWAP) {
3227         TCGv_i64 t = tcg_temp_new_i64();
3228 
3229         tcg_gen_bswap64_i64(t, x);
3230         gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
3231         tcg_gen_bswap64_i64(t, y);
3232         tcg_gen_addi_tl(addr_p8, addr, 8);
3233         gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
3234         tcg_temp_free_i64(t);
3235     } else {
3236         gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
3237         tcg_gen_addi_tl(addr_p8, addr, 8);
3238         gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
3239     }
3240     tcg_temp_free(addr_p8);
3241 
3242     plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
3243                              QEMU_PLUGIN_MEM_W);
3244 }
3245 
3246 static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
3247 {
3248     switch (opc & MO_SSIZE) {
3249     case MO_SB:
3250         tcg_gen_ext8s_i32(ret, val);
3251         break;
3252     case MO_UB:
3253         tcg_gen_ext8u_i32(ret, val);
3254         break;
3255     case MO_SW:
3256         tcg_gen_ext16s_i32(ret, val);
3257         break;
3258     case MO_UW:
3259         tcg_gen_ext16u_i32(ret, val);
3260         break;
3261     default:
3262         tcg_gen_mov_i32(ret, val);
3263         break;
3264     }
3265 }
3266 
3267 static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
3268 {
3269     switch (opc & MO_SSIZE) {
3270     case MO_SB:
3271         tcg_gen_ext8s_i64(ret, val);
3272         break;
3273     case MO_UB:
3274         tcg_gen_ext8u_i64(ret, val);
3275         break;
3276     case MO_SW:
3277         tcg_gen_ext16s_i64(ret, val);
3278         break;
3279     case MO_UW:
3280         tcg_gen_ext16u_i64(ret, val);
3281         break;
3282     case MO_SL:
3283         tcg_gen_ext32s_i64(ret, val);
3284         break;
3285     case MO_UL:
3286         tcg_gen_ext32u_i64(ret, val);
3287         break;
3288     default:
3289         tcg_gen_mov_i64(ret, val);
3290         break;
3291     }
3292 }
3293 
3294 typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
3295                                   TCGv_i32, TCGv_i32, TCGv_i32);
3296 typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
3297                                   TCGv_i64, TCGv_i64, TCGv_i32);
3298 typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv,
3299                                    TCGv_i128, TCGv_i128, TCGv_i32);
3300 typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
3301                                   TCGv_i32, TCGv_i32);
3302 typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
3303                                   TCGv_i64, TCGv_i32);
3304 
3305 #ifdef CONFIG_ATOMIC64
3306 # define WITH_ATOMIC64(X) X,
3307 #else
3308 # define WITH_ATOMIC64(X)
3309 #endif
3310 #ifdef CONFIG_CMPXCHG128
3311 # define WITH_ATOMIC128(X) X,
3312 #else
3313 # define WITH_ATOMIC128(X)
3314 #endif
3315 
3316 static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
3317     [MO_8] = gen_helper_atomic_cmpxchgb,
3318     [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
3319     [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
3320     [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
3321     [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
3322     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
3323     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
3324     WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
3325     WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
3326 };
3327 
3328 void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
3329                                    TCGv_i32 newv, TCGArg idx, MemOp memop)
3330 {
3331     TCGv_i32 t1 = tcg_temp_new_i32();
3332     TCGv_i32 t2 = tcg_temp_new_i32();
3333 
3334     tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
3335 
3336     tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
3337     tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
3338     tcg_gen_qemu_st_i32(t2, addr, idx, memop);
3339     tcg_temp_free_i32(t2);
3340 
3341     if (memop & MO_SIGN) {
3342         tcg_gen_ext_i32(retv, t1, memop);
3343     } else {
3344         tcg_gen_mov_i32(retv, t1);
3345     }
3346     tcg_temp_free_i32(t1);
3347 }
3348 
3349 void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
3350                                 TCGv_i32 newv, TCGArg idx, MemOp memop)
3351 {
3352     gen_atomic_cx_i32 gen;
3353     MemOpIdx oi;
3354 
3355     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
3356         tcg_gen_nonatomic_cmpxchg_i32(retv, addr, cmpv, newv, idx, memop);
3357         return;
3358     }
3359 
3360     memop = tcg_canonicalize_memop(memop, 0, 0);
3361     gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
3362     tcg_debug_assert(gen != NULL);
3363 
3364     oi = make_memop_idx(memop & ~MO_SIGN, idx);
3365     gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
3366 
3367     if (memop & MO_SIGN) {
3368         tcg_gen_ext_i32(retv, retv, memop);
3369     }
3370 }
3371 
3372 void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
3373                                    TCGv_i64 newv, TCGArg idx, MemOp memop)
3374 {
3375     TCGv_i64 t1, t2;
3376 
3377     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
3378         tcg_gen_nonatomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
3379                                       TCGV_LOW(newv), idx, memop);
3380         if (memop & MO_SIGN) {
3381             tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
3382         } else {
3383             tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
3384         }
3385         return;
3386     }
3387 
3388     t1 = tcg_temp_new_i64();
3389     t2 = tcg_temp_new_i64();
3390 
3391     tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
3392 
3393     tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
3394     tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
3395     tcg_gen_qemu_st_i64(t2, addr, idx, memop);
3396     tcg_temp_free_i64(t2);
3397 
3398     if (memop & MO_SIGN) {
3399         tcg_gen_ext_i64(retv, t1, memop);
3400     } else {
3401         tcg_gen_mov_i64(retv, t1);
3402     }
3403     tcg_temp_free_i64(t1);
3404 }
3405 
3406 void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
3407                                 TCGv_i64 newv, TCGArg idx, MemOp memop)
3408 {
3409     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
3410         tcg_gen_nonatomic_cmpxchg_i64(retv, addr, cmpv, newv, idx, memop);
3411         return;
3412     }
3413 
3414     if ((memop & MO_SIZE) == MO_64) {
3415         gen_atomic_cx_i64 gen;
3416 
3417         memop = tcg_canonicalize_memop(memop, 1, 0);
3418         gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
3419         if (gen) {
3420             MemOpIdx oi = make_memop_idx(memop, idx);
3421             gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
3422             return;
3423         }
3424 
3425         gen_helper_exit_atomic(cpu_env);
3426 
3427         /*
3428          * Produce a result for a well-formed opcode stream.  This satisfies
3429          * liveness for set before used, which happens before this dead code
3430          * is removed.
3431          */
3432         tcg_gen_movi_i64(retv, 0);
3433         return;
3434     }
3435 
3436     if (TCG_TARGET_REG_BITS == 32) {
3437         tcg_gen_atomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
3438                                    TCGV_LOW(newv), idx, memop);
3439         if (memop & MO_SIGN) {
3440             tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
3441         } else {
3442             tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
3443         }
3444     } else {
3445         TCGv_i32 c32 = tcg_temp_new_i32();
3446         TCGv_i32 n32 = tcg_temp_new_i32();
3447         TCGv_i32 r32 = tcg_temp_new_i32();
3448 
3449         tcg_gen_extrl_i64_i32(c32, cmpv);
3450         tcg_gen_extrl_i64_i32(n32, newv);
3451         tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
3452         tcg_temp_free_i32(c32);
3453         tcg_temp_free_i32(n32);
3454 
3455         tcg_gen_extu_i32_i64(retv, r32);
3456         tcg_temp_free_i32(r32);
3457 
3458         if (memop & MO_SIGN) {
3459             tcg_gen_ext_i64(retv, retv, memop);
3460         }
3461     }
3462 }
3463 
3464 void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
3465                                     TCGv_i128 newv, TCGArg idx, MemOp memop)
3466 {
3467     if (TCG_TARGET_REG_BITS == 32) {
3468         /* Inline expansion below is simply too large for 32-bit hosts. */
3469         gen_atomic_cx_i128 gen = ((memop & MO_BSWAP) == MO_LE
3470                                   ? gen_helper_nonatomic_cmpxchgo_le
3471                                   : gen_helper_nonatomic_cmpxchgo_be);
3472         MemOpIdx oi = make_memop_idx(memop, idx);
3473 
3474         tcg_debug_assert((memop & MO_SIZE) == MO_128);
3475         tcg_debug_assert((memop & MO_SIGN) == 0);
3476 
3477         gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
3478     } else {
3479         TCGv_i128 oldv = tcg_temp_new_i128();
3480         TCGv_i128 tmpv = tcg_temp_new_i128();
3481         TCGv_i64 t0 = tcg_temp_new_i64();
3482         TCGv_i64 t1 = tcg_temp_new_i64();
3483         TCGv_i64 z = tcg_constant_i64(0);
3484 
3485         tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
3486 
3487         /* Compare i128 */
3488         tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
3489         tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
3490         tcg_gen_or_i64(t0, t0, t1);
3491 
3492         /* tmpv = equal ? newv : oldv */
3493         tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
3494                             TCGV128_LOW(newv), TCGV128_LOW(oldv));
3495         tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
3496                             TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
3497 
3498         /* Unconditional writeback. */
3499         tcg_gen_qemu_st_i128(tmpv, addr, idx, memop);
3500         tcg_gen_mov_i128(retv, oldv);
3501 
3502         tcg_temp_free_i64(t0);
3503         tcg_temp_free_i64(t1);
3504         tcg_temp_free_i128(tmpv);
3505         tcg_temp_free_i128(oldv);
3506     }
3507 }
3508 
3509 void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
3510                                  TCGv_i128 newv, TCGArg idx, MemOp memop)
3511 {
3512     gen_atomic_cx_i128 gen;
3513 
3514     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
3515         tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
3516         return;
3517     }
3518 
3519     tcg_debug_assert((memop & MO_SIZE) == MO_128);
3520     tcg_debug_assert((memop & MO_SIGN) == 0);
3521     gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
3522 
3523     if (gen) {
3524         MemOpIdx oi = make_memop_idx(memop, idx);
3525         gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
3526         return;
3527     }
3528 
3529     gen_helper_exit_atomic(cpu_env);
3530 
3531     /*
3532      * Produce a result for a well-formed opcode stream.  This satisfies
3533      * liveness for set before used, which happens before this dead code
3534      * is removed.
3535      */
3536     tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
3537     tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
3538 }
3539 
3540 static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
3541                                 TCGArg idx, MemOp memop, bool new_val,
3542                                 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
3543 {
3544     TCGv_i32 t1 = tcg_temp_new_i32();
3545     TCGv_i32 t2 = tcg_temp_new_i32();
3546 
3547     memop = tcg_canonicalize_memop(memop, 0, 0);
3548 
3549     tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
3550     tcg_gen_ext_i32(t2, val, memop);
3551     gen(t2, t1, t2);
3552     tcg_gen_qemu_st_i32(t2, addr, idx, memop);
3553 
3554     tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
3555     tcg_temp_free_i32(t1);
3556     tcg_temp_free_i32(t2);
3557 }
3558 
3559 static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
3560                              TCGArg idx, MemOp memop, void * const table[])
3561 {
3562     gen_atomic_op_i32 gen;
3563     MemOpIdx oi;
3564 
3565     memop = tcg_canonicalize_memop(memop, 0, 0);
3566 
3567     gen = table[memop & (MO_SIZE | MO_BSWAP)];
3568     tcg_debug_assert(gen != NULL);
3569 
3570     oi = make_memop_idx(memop & ~MO_SIGN, idx);
3571     gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
3572 
3573     if (memop & MO_SIGN) {
3574         tcg_gen_ext_i32(ret, ret, memop);
3575     }
3576 }
3577 
3578 static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
3579                                 TCGArg idx, MemOp memop, bool new_val,
3580                                 void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
3581 {
3582     TCGv_i64 t1 = tcg_temp_new_i64();
3583     TCGv_i64 t2 = tcg_temp_new_i64();
3584 
3585     memop = tcg_canonicalize_memop(memop, 1, 0);
3586 
3587     tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
3588     tcg_gen_ext_i64(t2, val, memop);
3589     gen(t2, t1, t2);
3590     tcg_gen_qemu_st_i64(t2, addr, idx, memop);
3591 
3592     tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
3593     tcg_temp_free_i64(t1);
3594     tcg_temp_free_i64(t2);
3595 }
3596 
3597 static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
3598                              TCGArg idx, MemOp memop, void * const table[])
3599 {
3600     memop = tcg_canonicalize_memop(memop, 1, 0);
3601 
3602     if ((memop & MO_SIZE) == MO_64) {
3603 #ifdef CONFIG_ATOMIC64
3604         gen_atomic_op_i64 gen;
3605         MemOpIdx oi;
3606 
3607         gen = table[memop & (MO_SIZE | MO_BSWAP)];
3608         tcg_debug_assert(gen != NULL);
3609 
3610         oi = make_memop_idx(memop & ~MO_SIGN, idx);
3611         gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
3612 #else
3613         gen_helper_exit_atomic(cpu_env);
3614         /* Produce a result, so that we have a well-formed opcode stream
3615            with respect to uses of the result in the (dead) code following.  */
3616         tcg_gen_movi_i64(ret, 0);
3617 #endif /* CONFIG_ATOMIC64 */
3618     } else {
3619         TCGv_i32 v32 = tcg_temp_new_i32();
3620         TCGv_i32 r32 = tcg_temp_new_i32();
3621 
3622         tcg_gen_extrl_i64_i32(v32, val);
3623         do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
3624         tcg_temp_free_i32(v32);
3625 
3626         tcg_gen_extu_i32_i64(ret, r32);
3627         tcg_temp_free_i32(r32);
3628 
3629         if (memop & MO_SIGN) {
3630             tcg_gen_ext_i64(ret, ret, memop);
3631         }
3632     }
3633 }
3634 
3635 #define GEN_ATOMIC_HELPER(NAME, OP, NEW)                                \
3636 static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = {          \
3637     [MO_8] = gen_helper_atomic_##NAME##b,                               \
3638     [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le,                   \
3639     [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be,                   \
3640     [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le,                   \
3641     [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be,                   \
3642     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le)     \
3643     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be)     \
3644 };                                                                      \
3645 void tcg_gen_atomic_##NAME##_i32                                        \
3646     (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop)    \
3647 {                                                                       \
3648     if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {                        \
3649         do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME);     \
3650     } else {                                                            \
3651         do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW,            \
3652                             tcg_gen_##OP##_i32);                        \
3653     }                                                                   \
3654 }                                                                       \
3655 void tcg_gen_atomic_##NAME##_i64                                        \
3656     (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop)    \
3657 {                                                                       \
3658     if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {                        \
3659         do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME);     \
3660     } else {                                                            \
3661         do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW,            \
3662                             tcg_gen_##OP##_i64);                        \
3663     }                                                                   \
3664 }
3665 
3666 GEN_ATOMIC_HELPER(fetch_add, add, 0)
3667 GEN_ATOMIC_HELPER(fetch_and, and, 0)
3668 GEN_ATOMIC_HELPER(fetch_or, or, 0)
3669 GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
3670 GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
3671 GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
3672 GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
3673 GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
3674 
3675 GEN_ATOMIC_HELPER(add_fetch, add, 1)
3676 GEN_ATOMIC_HELPER(and_fetch, and, 1)
3677 GEN_ATOMIC_HELPER(or_fetch, or, 1)
3678 GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
3679 GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
3680 GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
3681 GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
3682 GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
3683 
3684 static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
3685 {
3686     tcg_gen_mov_i32(r, b);
3687 }
3688 
3689 static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
3690 {
3691     tcg_gen_mov_i64(r, b);
3692 }
3693 
3694 GEN_ATOMIC_HELPER(xchg, mov2, 0)
3695 
3696 #undef GEN_ATOMIC_HELPER
3697