xref: /openbmc/qemu/tcg/tcg-op.c (revision b43047a2)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "cpu.h"
27 #include "exec/exec-all.h"
28 #include "tcg.h"
29 #include "tcg-op.h"
30 #include "tcg-mo.h"
31 #include "trace-tcg.h"
32 #include "trace/mem.h"
33 
34 /* Reduce the number of ifdefs below.  This assumes that all uses of
35    TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
36    the compiler can eliminate.  */
37 #if TCG_TARGET_REG_BITS == 64
38 extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64);
39 extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
40 #define TCGV_LOW  TCGV_LOW_link_error
41 #define TCGV_HIGH TCGV_HIGH_link_error
42 #endif
43 
44 void tcg_gen_op1(TCGOpcode opc, TCGArg a1)
45 {
46     TCGOp *op = tcg_emit_op(opc);
47     op->args[0] = a1;
48 }
49 
50 void tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2)
51 {
52     TCGOp *op = tcg_emit_op(opc);
53     op->args[0] = a1;
54     op->args[1] = a2;
55 }
56 
57 void tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3)
58 {
59     TCGOp *op = tcg_emit_op(opc);
60     op->args[0] = a1;
61     op->args[1] = a2;
62     op->args[2] = a3;
63 }
64 
65 void tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4)
66 {
67     TCGOp *op = tcg_emit_op(opc);
68     op->args[0] = a1;
69     op->args[1] = a2;
70     op->args[2] = a3;
71     op->args[3] = a4;
72 }
73 
74 void tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
75                  TCGArg a4, TCGArg a5)
76 {
77     TCGOp *op = tcg_emit_op(opc);
78     op->args[0] = a1;
79     op->args[1] = a2;
80     op->args[2] = a3;
81     op->args[3] = a4;
82     op->args[4] = a5;
83 }
84 
85 void tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
86                  TCGArg a4, TCGArg a5, TCGArg a6)
87 {
88     TCGOp *op = tcg_emit_op(opc);
89     op->args[0] = a1;
90     op->args[1] = a2;
91     op->args[2] = a3;
92     op->args[3] = a4;
93     op->args[4] = a5;
94     op->args[5] = a6;
95 }
96 
97 void tcg_gen_mb(TCGBar mb_type)
98 {
99     if (tcg_ctx->tb_cflags & CF_PARALLEL) {
100         tcg_gen_op1(INDEX_op_mb, mb_type);
101     }
102 }
103 
104 /* 32 bit ops */
105 
106 void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
107 {
108     /* some cases can be optimized here */
109     if (arg2 == 0) {
110         tcg_gen_mov_i32(ret, arg1);
111     } else {
112         TCGv_i32 t0 = tcg_const_i32(arg2);
113         tcg_gen_add_i32(ret, arg1, t0);
114         tcg_temp_free_i32(t0);
115     }
116 }
117 
118 void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2)
119 {
120     if (arg1 == 0 && TCG_TARGET_HAS_neg_i32) {
121         /* Don't recurse with tcg_gen_neg_i32.  */
122         tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg2);
123     } else {
124         TCGv_i32 t0 = tcg_const_i32(arg1);
125         tcg_gen_sub_i32(ret, t0, arg2);
126         tcg_temp_free_i32(t0);
127     }
128 }
129 
130 void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
131 {
132     /* some cases can be optimized here */
133     if (arg2 == 0) {
134         tcg_gen_mov_i32(ret, arg1);
135     } else {
136         TCGv_i32 t0 = tcg_const_i32(arg2);
137         tcg_gen_sub_i32(ret, arg1, t0);
138         tcg_temp_free_i32(t0);
139     }
140 }
141 
142 void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
143 {
144     TCGv_i32 t0;
145     /* Some cases can be optimized here.  */
146     switch (arg2) {
147     case 0:
148         tcg_gen_movi_i32(ret, 0);
149         return;
150     case -1:
151         tcg_gen_mov_i32(ret, arg1);
152         return;
153     case 0xff:
154         /* Don't recurse with tcg_gen_ext8u_i32.  */
155         if (TCG_TARGET_HAS_ext8u_i32) {
156             tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg1);
157             return;
158         }
159         break;
160     case 0xffff:
161         if (TCG_TARGET_HAS_ext16u_i32) {
162             tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg1);
163             return;
164         }
165         break;
166     }
167     t0 = tcg_const_i32(arg2);
168     tcg_gen_and_i32(ret, arg1, t0);
169     tcg_temp_free_i32(t0);
170 }
171 
172 void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
173 {
174     /* Some cases can be optimized here.  */
175     if (arg2 == -1) {
176         tcg_gen_movi_i32(ret, -1);
177     } else if (arg2 == 0) {
178         tcg_gen_mov_i32(ret, arg1);
179     } else {
180         TCGv_i32 t0 = tcg_const_i32(arg2);
181         tcg_gen_or_i32(ret, arg1, t0);
182         tcg_temp_free_i32(t0);
183     }
184 }
185 
186 void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
187 {
188     /* Some cases can be optimized here.  */
189     if (arg2 == 0) {
190         tcg_gen_mov_i32(ret, arg1);
191     } else if (arg2 == -1 && TCG_TARGET_HAS_not_i32) {
192         /* Don't recurse with tcg_gen_not_i32.  */
193         tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg1);
194     } else {
195         TCGv_i32 t0 = tcg_const_i32(arg2);
196         tcg_gen_xor_i32(ret, arg1, t0);
197         tcg_temp_free_i32(t0);
198     }
199 }
200 
201 void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
202 {
203     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
204     if (arg2 == 0) {
205         tcg_gen_mov_i32(ret, arg1);
206     } else {
207         TCGv_i32 t0 = tcg_const_i32(arg2);
208         tcg_gen_shl_i32(ret, arg1, t0);
209         tcg_temp_free_i32(t0);
210     }
211 }
212 
213 void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
214 {
215     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
216     if (arg2 == 0) {
217         tcg_gen_mov_i32(ret, arg1);
218     } else {
219         TCGv_i32 t0 = tcg_const_i32(arg2);
220         tcg_gen_shr_i32(ret, arg1, t0);
221         tcg_temp_free_i32(t0);
222     }
223 }
224 
225 void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
226 {
227     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
228     if (arg2 == 0) {
229         tcg_gen_mov_i32(ret, arg1);
230     } else {
231         TCGv_i32 t0 = tcg_const_i32(arg2);
232         tcg_gen_sar_i32(ret, arg1, t0);
233         tcg_temp_free_i32(t0);
234     }
235 }
236 
237 void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *l)
238 {
239     if (cond == TCG_COND_ALWAYS) {
240         tcg_gen_br(l);
241     } else if (cond != TCG_COND_NEVER) {
242         l->refs++;
243         tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_arg(l));
244     }
245 }
246 
247 void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *l)
248 {
249     if (cond == TCG_COND_ALWAYS) {
250         tcg_gen_br(l);
251     } else if (cond != TCG_COND_NEVER) {
252         TCGv_i32 t0 = tcg_const_i32(arg2);
253         tcg_gen_brcond_i32(cond, arg1, t0, l);
254         tcg_temp_free_i32(t0);
255     }
256 }
257 
258 void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret,
259                          TCGv_i32 arg1, TCGv_i32 arg2)
260 {
261     if (cond == TCG_COND_ALWAYS) {
262         tcg_gen_movi_i32(ret, 1);
263     } else if (cond == TCG_COND_NEVER) {
264         tcg_gen_movi_i32(ret, 0);
265     } else {
266         tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
267     }
268 }
269 
270 void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret,
271                           TCGv_i32 arg1, int32_t arg2)
272 {
273     TCGv_i32 t0 = tcg_const_i32(arg2);
274     tcg_gen_setcond_i32(cond, ret, arg1, t0);
275     tcg_temp_free_i32(t0);
276 }
277 
278 void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
279 {
280     if (arg2 == 0) {
281         tcg_gen_movi_i32(ret, 0);
282     } else if (is_power_of_2(arg2)) {
283         tcg_gen_shli_i32(ret, arg1, ctz32(arg2));
284     } else {
285         TCGv_i32 t0 = tcg_const_i32(arg2);
286         tcg_gen_mul_i32(ret, arg1, t0);
287         tcg_temp_free_i32(t0);
288     }
289 }
290 
291 void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
292 {
293     if (TCG_TARGET_HAS_div_i32) {
294         tcg_gen_op3_i32(INDEX_op_div_i32, ret, arg1, arg2);
295     } else if (TCG_TARGET_HAS_div2_i32) {
296         TCGv_i32 t0 = tcg_temp_new_i32();
297         tcg_gen_sari_i32(t0, arg1, 31);
298         tcg_gen_op5_i32(INDEX_op_div2_i32, ret, t0, arg1, t0, arg2);
299         tcg_temp_free_i32(t0);
300     } else {
301         gen_helper_div_i32(ret, arg1, arg2);
302     }
303 }
304 
305 void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
306 {
307     if (TCG_TARGET_HAS_rem_i32) {
308         tcg_gen_op3_i32(INDEX_op_rem_i32, ret, arg1, arg2);
309     } else if (TCG_TARGET_HAS_div_i32) {
310         TCGv_i32 t0 = tcg_temp_new_i32();
311         tcg_gen_op3_i32(INDEX_op_div_i32, t0, arg1, arg2);
312         tcg_gen_mul_i32(t0, t0, arg2);
313         tcg_gen_sub_i32(ret, arg1, t0);
314         tcg_temp_free_i32(t0);
315     } else if (TCG_TARGET_HAS_div2_i32) {
316         TCGv_i32 t0 = tcg_temp_new_i32();
317         tcg_gen_sari_i32(t0, arg1, 31);
318         tcg_gen_op5_i32(INDEX_op_div2_i32, t0, ret, arg1, t0, arg2);
319         tcg_temp_free_i32(t0);
320     } else {
321         gen_helper_rem_i32(ret, arg1, arg2);
322     }
323 }
324 
325 void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
326 {
327     if (TCG_TARGET_HAS_div_i32) {
328         tcg_gen_op3_i32(INDEX_op_divu_i32, ret, arg1, arg2);
329     } else if (TCG_TARGET_HAS_div2_i32) {
330         TCGv_i32 t0 = tcg_temp_new_i32();
331         tcg_gen_movi_i32(t0, 0);
332         tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, t0, arg2);
333         tcg_temp_free_i32(t0);
334     } else {
335         gen_helper_divu_i32(ret, arg1, arg2);
336     }
337 }
338 
339 void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
340 {
341     if (TCG_TARGET_HAS_rem_i32) {
342         tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2);
343     } else if (TCG_TARGET_HAS_div_i32) {
344         TCGv_i32 t0 = tcg_temp_new_i32();
345         tcg_gen_op3_i32(INDEX_op_divu_i32, t0, arg1, arg2);
346         tcg_gen_mul_i32(t0, t0, arg2);
347         tcg_gen_sub_i32(ret, arg1, t0);
348         tcg_temp_free_i32(t0);
349     } else if (TCG_TARGET_HAS_div2_i32) {
350         TCGv_i32 t0 = tcg_temp_new_i32();
351         tcg_gen_movi_i32(t0, 0);
352         tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, t0, arg2);
353         tcg_temp_free_i32(t0);
354     } else {
355         gen_helper_remu_i32(ret, arg1, arg2);
356     }
357 }
358 
359 void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
360 {
361     if (TCG_TARGET_HAS_andc_i32) {
362         tcg_gen_op3_i32(INDEX_op_andc_i32, ret, arg1, arg2);
363     } else {
364         TCGv_i32 t0 = tcg_temp_new_i32();
365         tcg_gen_not_i32(t0, arg2);
366         tcg_gen_and_i32(ret, arg1, t0);
367         tcg_temp_free_i32(t0);
368     }
369 }
370 
371 void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
372 {
373     if (TCG_TARGET_HAS_eqv_i32) {
374         tcg_gen_op3_i32(INDEX_op_eqv_i32, ret, arg1, arg2);
375     } else {
376         tcg_gen_xor_i32(ret, arg1, arg2);
377         tcg_gen_not_i32(ret, ret);
378     }
379 }
380 
381 void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
382 {
383     if (TCG_TARGET_HAS_nand_i32) {
384         tcg_gen_op3_i32(INDEX_op_nand_i32, ret, arg1, arg2);
385     } else {
386         tcg_gen_and_i32(ret, arg1, arg2);
387         tcg_gen_not_i32(ret, ret);
388     }
389 }
390 
391 void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
392 {
393     if (TCG_TARGET_HAS_nor_i32) {
394         tcg_gen_op3_i32(INDEX_op_nor_i32, ret, arg1, arg2);
395     } else {
396         tcg_gen_or_i32(ret, arg1, arg2);
397         tcg_gen_not_i32(ret, ret);
398     }
399 }
400 
401 void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
402 {
403     if (TCG_TARGET_HAS_orc_i32) {
404         tcg_gen_op3_i32(INDEX_op_orc_i32, ret, arg1, arg2);
405     } else {
406         TCGv_i32 t0 = tcg_temp_new_i32();
407         tcg_gen_not_i32(t0, arg2);
408         tcg_gen_or_i32(ret, arg1, t0);
409         tcg_temp_free_i32(t0);
410     }
411 }
412 
413 void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
414 {
415     if (TCG_TARGET_HAS_clz_i32) {
416         tcg_gen_op3_i32(INDEX_op_clz_i32, ret, arg1, arg2);
417     } else if (TCG_TARGET_HAS_clz_i64) {
418         TCGv_i64 t1 = tcg_temp_new_i64();
419         TCGv_i64 t2 = tcg_temp_new_i64();
420         tcg_gen_extu_i32_i64(t1, arg1);
421         tcg_gen_extu_i32_i64(t2, arg2);
422         tcg_gen_addi_i64(t2, t2, 32);
423         tcg_gen_clz_i64(t1, t1, t2);
424         tcg_gen_extrl_i64_i32(ret, t1);
425         tcg_temp_free_i64(t1);
426         tcg_temp_free_i64(t2);
427         tcg_gen_subi_i32(ret, ret, 32);
428     } else {
429         gen_helper_clz_i32(ret, arg1, arg2);
430     }
431 }
432 
433 void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
434 {
435     TCGv_i32 t = tcg_const_i32(arg2);
436     tcg_gen_clz_i32(ret, arg1, t);
437     tcg_temp_free_i32(t);
438 }
439 
440 void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
441 {
442     if (TCG_TARGET_HAS_ctz_i32) {
443         tcg_gen_op3_i32(INDEX_op_ctz_i32, ret, arg1, arg2);
444     } else if (TCG_TARGET_HAS_ctz_i64) {
445         TCGv_i64 t1 = tcg_temp_new_i64();
446         TCGv_i64 t2 = tcg_temp_new_i64();
447         tcg_gen_extu_i32_i64(t1, arg1);
448         tcg_gen_extu_i32_i64(t2, arg2);
449         tcg_gen_ctz_i64(t1, t1, t2);
450         tcg_gen_extrl_i64_i32(ret, t1);
451         tcg_temp_free_i64(t1);
452         tcg_temp_free_i64(t2);
453     } else if (TCG_TARGET_HAS_ctpop_i32
454                || TCG_TARGET_HAS_ctpop_i64
455                || TCG_TARGET_HAS_clz_i32
456                || TCG_TARGET_HAS_clz_i64) {
457         TCGv_i32 z, t = tcg_temp_new_i32();
458 
459         if (TCG_TARGET_HAS_ctpop_i32 || TCG_TARGET_HAS_ctpop_i64) {
460             tcg_gen_subi_i32(t, arg1, 1);
461             tcg_gen_andc_i32(t, t, arg1);
462             tcg_gen_ctpop_i32(t, t);
463         } else {
464             /* Since all non-x86 hosts have clz(0) == 32, don't fight it.  */
465             tcg_gen_neg_i32(t, arg1);
466             tcg_gen_and_i32(t, t, arg1);
467             tcg_gen_clzi_i32(t, t, 32);
468             tcg_gen_xori_i32(t, t, 31);
469         }
470         z = tcg_const_i32(0);
471         tcg_gen_movcond_i32(TCG_COND_EQ, ret, arg1, z, arg2, t);
472         tcg_temp_free_i32(t);
473         tcg_temp_free_i32(z);
474     } else {
475         gen_helper_ctz_i32(ret, arg1, arg2);
476     }
477 }
478 
479 void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
480 {
481     if (!TCG_TARGET_HAS_ctz_i32 && TCG_TARGET_HAS_ctpop_i32 && arg2 == 32) {
482         /* This equivalence has the advantage of not requiring a fixup.  */
483         TCGv_i32 t = tcg_temp_new_i32();
484         tcg_gen_subi_i32(t, arg1, 1);
485         tcg_gen_andc_i32(t, t, arg1);
486         tcg_gen_ctpop_i32(ret, t);
487         tcg_temp_free_i32(t);
488     } else {
489         TCGv_i32 t = tcg_const_i32(arg2);
490         tcg_gen_ctz_i32(ret, arg1, t);
491         tcg_temp_free_i32(t);
492     }
493 }
494 
495 void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg)
496 {
497     if (TCG_TARGET_HAS_clz_i32) {
498         TCGv_i32 t = tcg_temp_new_i32();
499         tcg_gen_sari_i32(t, arg, 31);
500         tcg_gen_xor_i32(t, t, arg);
501         tcg_gen_clzi_i32(t, t, 32);
502         tcg_gen_subi_i32(ret, t, 1);
503         tcg_temp_free_i32(t);
504     } else {
505         gen_helper_clrsb_i32(ret, arg);
506     }
507 }
508 
509 void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1)
510 {
511     if (TCG_TARGET_HAS_ctpop_i32) {
512         tcg_gen_op2_i32(INDEX_op_ctpop_i32, ret, arg1);
513     } else if (TCG_TARGET_HAS_ctpop_i64) {
514         TCGv_i64 t = tcg_temp_new_i64();
515         tcg_gen_extu_i32_i64(t, arg1);
516         tcg_gen_ctpop_i64(t, t);
517         tcg_gen_extrl_i64_i32(ret, t);
518         tcg_temp_free_i64(t);
519     } else {
520         gen_helper_ctpop_i32(ret, arg1);
521     }
522 }
523 
524 void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
525 {
526     if (TCG_TARGET_HAS_rot_i32) {
527         tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, arg2);
528     } else {
529         TCGv_i32 t0, t1;
530 
531         t0 = tcg_temp_new_i32();
532         t1 = tcg_temp_new_i32();
533         tcg_gen_shl_i32(t0, arg1, arg2);
534         tcg_gen_subfi_i32(t1, 32, arg2);
535         tcg_gen_shr_i32(t1, arg1, t1);
536         tcg_gen_or_i32(ret, t0, t1);
537         tcg_temp_free_i32(t0);
538         tcg_temp_free_i32(t1);
539     }
540 }
541 
542 void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
543 {
544     tcg_debug_assert(arg2 < 32);
545     /* some cases can be optimized here */
546     if (arg2 == 0) {
547         tcg_gen_mov_i32(ret, arg1);
548     } else if (TCG_TARGET_HAS_rot_i32) {
549         TCGv_i32 t0 = tcg_const_i32(arg2);
550         tcg_gen_rotl_i32(ret, arg1, t0);
551         tcg_temp_free_i32(t0);
552     } else {
553         TCGv_i32 t0, t1;
554         t0 = tcg_temp_new_i32();
555         t1 = tcg_temp_new_i32();
556         tcg_gen_shli_i32(t0, arg1, arg2);
557         tcg_gen_shri_i32(t1, arg1, 32 - arg2);
558         tcg_gen_or_i32(ret, t0, t1);
559         tcg_temp_free_i32(t0);
560         tcg_temp_free_i32(t1);
561     }
562 }
563 
564 void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
565 {
566     if (TCG_TARGET_HAS_rot_i32) {
567         tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, arg2);
568     } else {
569         TCGv_i32 t0, t1;
570 
571         t0 = tcg_temp_new_i32();
572         t1 = tcg_temp_new_i32();
573         tcg_gen_shr_i32(t0, arg1, arg2);
574         tcg_gen_subfi_i32(t1, 32, arg2);
575         tcg_gen_shl_i32(t1, arg1, t1);
576         tcg_gen_or_i32(ret, t0, t1);
577         tcg_temp_free_i32(t0);
578         tcg_temp_free_i32(t1);
579     }
580 }
581 
582 void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
583 {
584     tcg_debug_assert(arg2 < 32);
585     /* some cases can be optimized here */
586     if (arg2 == 0) {
587         tcg_gen_mov_i32(ret, arg1);
588     } else {
589         tcg_gen_rotli_i32(ret, arg1, 32 - arg2);
590     }
591 }
592 
593 void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
594                          unsigned int ofs, unsigned int len)
595 {
596     uint32_t mask;
597     TCGv_i32 t1;
598 
599     tcg_debug_assert(ofs < 32);
600     tcg_debug_assert(len > 0);
601     tcg_debug_assert(len <= 32);
602     tcg_debug_assert(ofs + len <= 32);
603 
604     if (len == 32) {
605         tcg_gen_mov_i32(ret, arg2);
606         return;
607     }
608     if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) {
609         tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len);
610         return;
611     }
612 
613     t1 = tcg_temp_new_i32();
614 
615     if (TCG_TARGET_HAS_extract2_i32) {
616         if (ofs + len == 32) {
617             tcg_gen_shli_i32(t1, arg1, len);
618             tcg_gen_extract2_i32(ret, t1, arg2, len);
619             goto done;
620         }
621         if (ofs == 0) {
622             tcg_gen_extract2_i32(ret, arg1, arg2, len);
623             tcg_gen_rotli_i32(ret, ret, len);
624             goto done;
625         }
626     }
627 
628     mask = (1u << len) - 1;
629     if (ofs + len < 32) {
630         tcg_gen_andi_i32(t1, arg2, mask);
631         tcg_gen_shli_i32(t1, t1, ofs);
632     } else {
633         tcg_gen_shli_i32(t1, arg2, ofs);
634     }
635     tcg_gen_andi_i32(ret, arg1, ~(mask << ofs));
636     tcg_gen_or_i32(ret, ret, t1);
637  done:
638     tcg_temp_free_i32(t1);
639 }
640 
641 void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg,
642                            unsigned int ofs, unsigned int len)
643 {
644     tcg_debug_assert(ofs < 32);
645     tcg_debug_assert(len > 0);
646     tcg_debug_assert(len <= 32);
647     tcg_debug_assert(ofs + len <= 32);
648 
649     if (ofs + len == 32) {
650         tcg_gen_shli_i32(ret, arg, ofs);
651     } else if (ofs == 0) {
652         tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
653     } else if (TCG_TARGET_HAS_deposit_i32
654                && TCG_TARGET_deposit_i32_valid(ofs, len)) {
655         TCGv_i32 zero = tcg_const_i32(0);
656         tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, zero, arg, ofs, len);
657         tcg_temp_free_i32(zero);
658     } else {
659         /* To help two-operand hosts we prefer to zero-extend first,
660            which allows ARG to stay live.  */
661         switch (len) {
662         case 16:
663             if (TCG_TARGET_HAS_ext16u_i32) {
664                 tcg_gen_ext16u_i32(ret, arg);
665                 tcg_gen_shli_i32(ret, ret, ofs);
666                 return;
667             }
668             break;
669         case 8:
670             if (TCG_TARGET_HAS_ext8u_i32) {
671                 tcg_gen_ext8u_i32(ret, arg);
672                 tcg_gen_shli_i32(ret, ret, ofs);
673                 return;
674             }
675             break;
676         }
677         /* Otherwise prefer zero-extension over AND for code size.  */
678         switch (ofs + len) {
679         case 16:
680             if (TCG_TARGET_HAS_ext16u_i32) {
681                 tcg_gen_shli_i32(ret, arg, ofs);
682                 tcg_gen_ext16u_i32(ret, ret);
683                 return;
684             }
685             break;
686         case 8:
687             if (TCG_TARGET_HAS_ext8u_i32) {
688                 tcg_gen_shli_i32(ret, arg, ofs);
689                 tcg_gen_ext8u_i32(ret, ret);
690                 return;
691             }
692             break;
693         }
694         tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
695         tcg_gen_shli_i32(ret, ret, ofs);
696     }
697 }
698 
699 void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg,
700                          unsigned int ofs, unsigned int len)
701 {
702     tcg_debug_assert(ofs < 32);
703     tcg_debug_assert(len > 0);
704     tcg_debug_assert(len <= 32);
705     tcg_debug_assert(ofs + len <= 32);
706 
707     /* Canonicalize certain special cases, even if extract is supported.  */
708     if (ofs + len == 32) {
709         tcg_gen_shri_i32(ret, arg, 32 - len);
710         return;
711     }
712     if (ofs == 0) {
713         tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
714         return;
715     }
716 
717     if (TCG_TARGET_HAS_extract_i32
718         && TCG_TARGET_extract_i32_valid(ofs, len)) {
719         tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, ofs, len);
720         return;
721     }
722 
723     /* Assume that zero-extension, if available, is cheaper than a shift.  */
724     switch (ofs + len) {
725     case 16:
726         if (TCG_TARGET_HAS_ext16u_i32) {
727             tcg_gen_ext16u_i32(ret, arg);
728             tcg_gen_shri_i32(ret, ret, ofs);
729             return;
730         }
731         break;
732     case 8:
733         if (TCG_TARGET_HAS_ext8u_i32) {
734             tcg_gen_ext8u_i32(ret, arg);
735             tcg_gen_shri_i32(ret, ret, ofs);
736             return;
737         }
738         break;
739     }
740 
741     /* ??? Ideally we'd know what values are available for immediate AND.
742        Assume that 8 bits are available, plus the special case of 16,
743        so that we get ext8u, ext16u.  */
744     switch (len) {
745     case 1 ... 8: case 16:
746         tcg_gen_shri_i32(ret, arg, ofs);
747         tcg_gen_andi_i32(ret, ret, (1u << len) - 1);
748         break;
749     default:
750         tcg_gen_shli_i32(ret, arg, 32 - len - ofs);
751         tcg_gen_shri_i32(ret, ret, 32 - len);
752         break;
753     }
754 }
755 
756 void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg,
757                           unsigned int ofs, unsigned int len)
758 {
759     tcg_debug_assert(ofs < 32);
760     tcg_debug_assert(len > 0);
761     tcg_debug_assert(len <= 32);
762     tcg_debug_assert(ofs + len <= 32);
763 
764     /* Canonicalize certain special cases, even if extract is supported.  */
765     if (ofs + len == 32) {
766         tcg_gen_sari_i32(ret, arg, 32 - len);
767         return;
768     }
769     if (ofs == 0) {
770         switch (len) {
771         case 16:
772             tcg_gen_ext16s_i32(ret, arg);
773             return;
774         case 8:
775             tcg_gen_ext8s_i32(ret, arg);
776             return;
777         }
778     }
779 
780     if (TCG_TARGET_HAS_sextract_i32
781         && TCG_TARGET_extract_i32_valid(ofs, len)) {
782         tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, ofs, len);
783         return;
784     }
785 
786     /* Assume that sign-extension, if available, is cheaper than a shift.  */
787     switch (ofs + len) {
788     case 16:
789         if (TCG_TARGET_HAS_ext16s_i32) {
790             tcg_gen_ext16s_i32(ret, arg);
791             tcg_gen_sari_i32(ret, ret, ofs);
792             return;
793         }
794         break;
795     case 8:
796         if (TCG_TARGET_HAS_ext8s_i32) {
797             tcg_gen_ext8s_i32(ret, arg);
798             tcg_gen_sari_i32(ret, ret, ofs);
799             return;
800         }
801         break;
802     }
803     switch (len) {
804     case 16:
805         if (TCG_TARGET_HAS_ext16s_i32) {
806             tcg_gen_shri_i32(ret, arg, ofs);
807             tcg_gen_ext16s_i32(ret, ret);
808             return;
809         }
810         break;
811     case 8:
812         if (TCG_TARGET_HAS_ext8s_i32) {
813             tcg_gen_shri_i32(ret, arg, ofs);
814             tcg_gen_ext8s_i32(ret, ret);
815             return;
816         }
817         break;
818     }
819 
820     tcg_gen_shli_i32(ret, arg, 32 - len - ofs);
821     tcg_gen_sari_i32(ret, ret, 32 - len);
822 }
823 
824 /*
825  * Extract 32-bits from a 64-bit input, ah:al, starting from ofs.
826  * Unlike tcg_gen_extract_i32 above, len is fixed at 32.
827  */
828 void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah,
829                           unsigned int ofs)
830 {
831     tcg_debug_assert(ofs <= 32);
832     if (ofs == 0) {
833         tcg_gen_mov_i32(ret, al);
834     } else if (ofs == 32) {
835         tcg_gen_mov_i32(ret, ah);
836     } else if (al == ah) {
837         tcg_gen_rotri_i32(ret, al, ofs);
838     } else if (TCG_TARGET_HAS_extract2_i32) {
839         tcg_gen_op4i_i32(INDEX_op_extract2_i32, ret, al, ah, ofs);
840     } else {
841         TCGv_i32 t0 = tcg_temp_new_i32();
842         tcg_gen_shri_i32(t0, al, ofs);
843         tcg_gen_deposit_i32(ret, t0, ah, 32 - ofs, ofs);
844         tcg_temp_free_i32(t0);
845     }
846 }
847 
848 void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1,
849                          TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2)
850 {
851     if (cond == TCG_COND_ALWAYS) {
852         tcg_gen_mov_i32(ret, v1);
853     } else if (cond == TCG_COND_NEVER) {
854         tcg_gen_mov_i32(ret, v2);
855     } else if (TCG_TARGET_HAS_movcond_i32) {
856         tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond);
857     } else {
858         TCGv_i32 t0 = tcg_temp_new_i32();
859         TCGv_i32 t1 = tcg_temp_new_i32();
860         tcg_gen_setcond_i32(cond, t0, c1, c2);
861         tcg_gen_neg_i32(t0, t0);
862         tcg_gen_and_i32(t1, v1, t0);
863         tcg_gen_andc_i32(ret, v2, t0);
864         tcg_gen_or_i32(ret, ret, t1);
865         tcg_temp_free_i32(t0);
866         tcg_temp_free_i32(t1);
867     }
868 }
869 
870 void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
871                       TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
872 {
873     if (TCG_TARGET_HAS_add2_i32) {
874         tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh);
875     } else {
876         TCGv_i64 t0 = tcg_temp_new_i64();
877         TCGv_i64 t1 = tcg_temp_new_i64();
878         tcg_gen_concat_i32_i64(t0, al, ah);
879         tcg_gen_concat_i32_i64(t1, bl, bh);
880         tcg_gen_add_i64(t0, t0, t1);
881         tcg_gen_extr_i64_i32(rl, rh, t0);
882         tcg_temp_free_i64(t0);
883         tcg_temp_free_i64(t1);
884     }
885 }
886 
887 void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
888                       TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
889 {
890     if (TCG_TARGET_HAS_sub2_i32) {
891         tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh);
892     } else {
893         TCGv_i64 t0 = tcg_temp_new_i64();
894         TCGv_i64 t1 = tcg_temp_new_i64();
895         tcg_gen_concat_i32_i64(t0, al, ah);
896         tcg_gen_concat_i32_i64(t1, bl, bh);
897         tcg_gen_sub_i64(t0, t0, t1);
898         tcg_gen_extr_i64_i32(rl, rh, t0);
899         tcg_temp_free_i64(t0);
900         tcg_temp_free_i64(t1);
901     }
902 }
903 
904 void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
905 {
906     if (TCG_TARGET_HAS_mulu2_i32) {
907         tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2);
908     } else if (TCG_TARGET_HAS_muluh_i32) {
909         TCGv_i32 t = tcg_temp_new_i32();
910         tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
911         tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2);
912         tcg_gen_mov_i32(rl, t);
913         tcg_temp_free_i32(t);
914     } else {
915         TCGv_i64 t0 = tcg_temp_new_i64();
916         TCGv_i64 t1 = tcg_temp_new_i64();
917         tcg_gen_extu_i32_i64(t0, arg1);
918         tcg_gen_extu_i32_i64(t1, arg2);
919         tcg_gen_mul_i64(t0, t0, t1);
920         tcg_gen_extr_i64_i32(rl, rh, t0);
921         tcg_temp_free_i64(t0);
922         tcg_temp_free_i64(t1);
923     }
924 }
925 
926 void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
927 {
928     if (TCG_TARGET_HAS_muls2_i32) {
929         tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2);
930     } else if (TCG_TARGET_HAS_mulsh_i32) {
931         TCGv_i32 t = tcg_temp_new_i32();
932         tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
933         tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2);
934         tcg_gen_mov_i32(rl, t);
935         tcg_temp_free_i32(t);
936     } else if (TCG_TARGET_REG_BITS == 32) {
937         TCGv_i32 t0 = tcg_temp_new_i32();
938         TCGv_i32 t1 = tcg_temp_new_i32();
939         TCGv_i32 t2 = tcg_temp_new_i32();
940         TCGv_i32 t3 = tcg_temp_new_i32();
941         tcg_gen_mulu2_i32(t0, t1, arg1, arg2);
942         /* Adjust for negative inputs.  */
943         tcg_gen_sari_i32(t2, arg1, 31);
944         tcg_gen_sari_i32(t3, arg2, 31);
945         tcg_gen_and_i32(t2, t2, arg2);
946         tcg_gen_and_i32(t3, t3, arg1);
947         tcg_gen_sub_i32(rh, t1, t2);
948         tcg_gen_sub_i32(rh, rh, t3);
949         tcg_gen_mov_i32(rl, t0);
950         tcg_temp_free_i32(t0);
951         tcg_temp_free_i32(t1);
952         tcg_temp_free_i32(t2);
953         tcg_temp_free_i32(t3);
954     } else {
955         TCGv_i64 t0 = tcg_temp_new_i64();
956         TCGv_i64 t1 = tcg_temp_new_i64();
957         tcg_gen_ext_i32_i64(t0, arg1);
958         tcg_gen_ext_i32_i64(t1, arg2);
959         tcg_gen_mul_i64(t0, t0, t1);
960         tcg_gen_extr_i64_i32(rl, rh, t0);
961         tcg_temp_free_i64(t0);
962         tcg_temp_free_i64(t1);
963     }
964 }
965 
966 void tcg_gen_mulsu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
967 {
968     if (TCG_TARGET_REG_BITS == 32) {
969         TCGv_i32 t0 = tcg_temp_new_i32();
970         TCGv_i32 t1 = tcg_temp_new_i32();
971         TCGv_i32 t2 = tcg_temp_new_i32();
972         tcg_gen_mulu2_i32(t0, t1, arg1, arg2);
973         /* Adjust for negative input for the signed arg1.  */
974         tcg_gen_sari_i32(t2, arg1, 31);
975         tcg_gen_and_i32(t2, t2, arg2);
976         tcg_gen_sub_i32(rh, t1, t2);
977         tcg_gen_mov_i32(rl, t0);
978         tcg_temp_free_i32(t0);
979         tcg_temp_free_i32(t1);
980         tcg_temp_free_i32(t2);
981     } else {
982         TCGv_i64 t0 = tcg_temp_new_i64();
983         TCGv_i64 t1 = tcg_temp_new_i64();
984         tcg_gen_ext_i32_i64(t0, arg1);
985         tcg_gen_extu_i32_i64(t1, arg2);
986         tcg_gen_mul_i64(t0, t0, t1);
987         tcg_gen_extr_i64_i32(rl, rh, t0);
988         tcg_temp_free_i64(t0);
989         tcg_temp_free_i64(t1);
990     }
991 }
992 
993 void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg)
994 {
995     if (TCG_TARGET_HAS_ext8s_i32) {
996         tcg_gen_op2_i32(INDEX_op_ext8s_i32, ret, arg);
997     } else {
998         tcg_gen_shli_i32(ret, arg, 24);
999         tcg_gen_sari_i32(ret, ret, 24);
1000     }
1001 }
1002 
1003 void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg)
1004 {
1005     if (TCG_TARGET_HAS_ext16s_i32) {
1006         tcg_gen_op2_i32(INDEX_op_ext16s_i32, ret, arg);
1007     } else {
1008         tcg_gen_shli_i32(ret, arg, 16);
1009         tcg_gen_sari_i32(ret, ret, 16);
1010     }
1011 }
1012 
1013 void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg)
1014 {
1015     if (TCG_TARGET_HAS_ext8u_i32) {
1016         tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg);
1017     } else {
1018         tcg_gen_andi_i32(ret, arg, 0xffu);
1019     }
1020 }
1021 
1022 void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg)
1023 {
1024     if (TCG_TARGET_HAS_ext16u_i32) {
1025         tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg);
1026     } else {
1027         tcg_gen_andi_i32(ret, arg, 0xffffu);
1028     }
1029 }
1030 
1031 /* Note: we assume the two high bytes are set to zero */
1032 void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg)
1033 {
1034     if (TCG_TARGET_HAS_bswap16_i32) {
1035         tcg_gen_op2_i32(INDEX_op_bswap16_i32, ret, arg);
1036     } else {
1037         TCGv_i32 t0 = tcg_temp_new_i32();
1038 
1039         tcg_gen_ext8u_i32(t0, arg);
1040         tcg_gen_shli_i32(t0, t0, 8);
1041         tcg_gen_shri_i32(ret, arg, 8);
1042         tcg_gen_or_i32(ret, ret, t0);
1043         tcg_temp_free_i32(t0);
1044     }
1045 }
1046 
1047 void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
1048 {
1049     if (TCG_TARGET_HAS_bswap32_i32) {
1050         tcg_gen_op2_i32(INDEX_op_bswap32_i32, ret, arg);
1051     } else {
1052         TCGv_i32 t0 = tcg_temp_new_i32();
1053         TCGv_i32 t1 = tcg_temp_new_i32();
1054         TCGv_i32 t2 = tcg_const_i32(0x00ff00ff);
1055 
1056                                         /* arg = abcd */
1057         tcg_gen_shri_i32(t0, arg, 8);   /*  t0 = .abc */
1058         tcg_gen_and_i32(t1, arg, t2);   /*  t1 = .b.d */
1059         tcg_gen_and_i32(t0, t0, t2);    /*  t0 = .a.c */
1060         tcg_gen_shli_i32(t1, t1, 8);    /*  t1 = b.d. */
1061         tcg_gen_or_i32(ret, t0, t1);    /* ret = badc */
1062 
1063         tcg_gen_shri_i32(t0, ret, 16);  /*  t0 = ..ba */
1064         tcg_gen_shli_i32(t1, ret, 16);  /*  t1 = dc.. */
1065         tcg_gen_or_i32(ret, t0, t1);    /* ret = dcba */
1066 
1067         tcg_temp_free_i32(t0);
1068         tcg_temp_free_i32(t1);
1069         tcg_temp_free_i32(t2);
1070     }
1071 }
1072 
1073 void tcg_gen_smin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1074 {
1075     tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, a, b);
1076 }
1077 
1078 void tcg_gen_umin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1079 {
1080     tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, a, b);
1081 }
1082 
1083 void tcg_gen_smax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1084 {
1085     tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, b, a);
1086 }
1087 
1088 void tcg_gen_umax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1089 {
1090     tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, b, a);
1091 }
1092 
1093 void tcg_gen_abs_i32(TCGv_i32 ret, TCGv_i32 a)
1094 {
1095     TCGv_i32 t = tcg_temp_new_i32();
1096 
1097     tcg_gen_sari_i32(t, a, 31);
1098     tcg_gen_xor_i32(ret, a, t);
1099     tcg_gen_sub_i32(ret, ret, t);
1100     tcg_temp_free_i32(t);
1101 }
1102 
1103 /* 64-bit ops */
1104 
1105 #if TCG_TARGET_REG_BITS == 32
1106 /* These are all inline for TCG_TARGET_REG_BITS == 64.  */
1107 
1108 void tcg_gen_discard_i64(TCGv_i64 arg)
1109 {
1110     tcg_gen_discard_i32(TCGV_LOW(arg));
1111     tcg_gen_discard_i32(TCGV_HIGH(arg));
1112 }
1113 
1114 void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg)
1115 {
1116     tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1117     tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
1118 }
1119 
1120 void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
1121 {
1122     tcg_gen_movi_i32(TCGV_LOW(ret), arg);
1123     tcg_gen_movi_i32(TCGV_HIGH(ret), arg >> 32);
1124 }
1125 
1126 void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1127 {
1128     tcg_gen_ld8u_i32(TCGV_LOW(ret), arg2, offset);
1129     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1130 }
1131 
1132 void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1133 {
1134     tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset);
1135     tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1136 }
1137 
1138 void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1139 {
1140     tcg_gen_ld16u_i32(TCGV_LOW(ret), arg2, offset);
1141     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1142 }
1143 
1144 void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1145 {
1146     tcg_gen_ld16s_i32(TCGV_LOW(ret), arg2, offset);
1147     tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1148 }
1149 
1150 void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1151 {
1152     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
1153     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1154 }
1155 
1156 void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1157 {
1158     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
1159     tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1160 }
1161 
1162 void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1163 {
1164     /* Since arg2 and ret have different types,
1165        they cannot be the same temporary */
1166 #ifdef HOST_WORDS_BIGENDIAN
1167     tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset);
1168     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4);
1169 #else
1170     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
1171     tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset + 4);
1172 #endif
1173 }
1174 
1175 void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
1176 {
1177 #ifdef HOST_WORDS_BIGENDIAN
1178     tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset);
1179     tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4);
1180 #else
1181     tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset);
1182     tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset + 4);
1183 #endif
1184 }
1185 
1186 void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1187 {
1188     tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1189     tcg_gen_and_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1190 }
1191 
1192 void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1193 {
1194     tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1195     tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1196 }
1197 
1198 void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1199 {
1200     tcg_gen_xor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1201     tcg_gen_xor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1202 }
1203 
1204 void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1205 {
1206     gen_helper_shl_i64(ret, arg1, arg2);
1207 }
1208 
1209 void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1210 {
1211     gen_helper_shr_i64(ret, arg1, arg2);
1212 }
1213 
1214 void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1215 {
1216     gen_helper_sar_i64(ret, arg1, arg2);
1217 }
1218 
1219 void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1220 {
1221     TCGv_i64 t0;
1222     TCGv_i32 t1;
1223 
1224     t0 = tcg_temp_new_i64();
1225     t1 = tcg_temp_new_i32();
1226 
1227     tcg_gen_mulu2_i32(TCGV_LOW(t0), TCGV_HIGH(t0),
1228                       TCGV_LOW(arg1), TCGV_LOW(arg2));
1229 
1230     tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
1231     tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
1232     tcg_gen_mul_i32(t1, TCGV_HIGH(arg1), TCGV_LOW(arg2));
1233     tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
1234 
1235     tcg_gen_mov_i64(ret, t0);
1236     tcg_temp_free_i64(t0);
1237     tcg_temp_free_i32(t1);
1238 }
1239 #endif /* TCG_TARGET_REG_SIZE == 32 */
1240 
1241 void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1242 {
1243     /* some cases can be optimized here */
1244     if (arg2 == 0) {
1245         tcg_gen_mov_i64(ret, arg1);
1246     } else {
1247         TCGv_i64 t0 = tcg_const_i64(arg2);
1248         tcg_gen_add_i64(ret, arg1, t0);
1249         tcg_temp_free_i64(t0);
1250     }
1251 }
1252 
1253 void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2)
1254 {
1255     if (arg1 == 0 && TCG_TARGET_HAS_neg_i64) {
1256         /* Don't recurse with tcg_gen_neg_i64.  */
1257         tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg2);
1258     } else {
1259         TCGv_i64 t0 = tcg_const_i64(arg1);
1260         tcg_gen_sub_i64(ret, t0, arg2);
1261         tcg_temp_free_i64(t0);
1262     }
1263 }
1264 
1265 void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1266 {
1267     /* some cases can be optimized here */
1268     if (arg2 == 0) {
1269         tcg_gen_mov_i64(ret, arg1);
1270     } else {
1271         TCGv_i64 t0 = tcg_const_i64(arg2);
1272         tcg_gen_sub_i64(ret, arg1, t0);
1273         tcg_temp_free_i64(t0);
1274     }
1275 }
1276 
1277 void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1278 {
1279     TCGv_i64 t0;
1280 
1281     if (TCG_TARGET_REG_BITS == 32) {
1282         tcg_gen_andi_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
1283         tcg_gen_andi_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
1284         return;
1285     }
1286 
1287     /* Some cases can be optimized here.  */
1288     switch (arg2) {
1289     case 0:
1290         tcg_gen_movi_i64(ret, 0);
1291         return;
1292     case -1:
1293         tcg_gen_mov_i64(ret, arg1);
1294         return;
1295     case 0xff:
1296         /* Don't recurse with tcg_gen_ext8u_i64.  */
1297         if (TCG_TARGET_HAS_ext8u_i64) {
1298             tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg1);
1299             return;
1300         }
1301         break;
1302     case 0xffff:
1303         if (TCG_TARGET_HAS_ext16u_i64) {
1304             tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg1);
1305             return;
1306         }
1307         break;
1308     case 0xffffffffu:
1309         if (TCG_TARGET_HAS_ext32u_i64) {
1310             tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg1);
1311             return;
1312         }
1313         break;
1314     }
1315     t0 = tcg_const_i64(arg2);
1316     tcg_gen_and_i64(ret, arg1, t0);
1317     tcg_temp_free_i64(t0);
1318 }
1319 
1320 void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1321 {
1322     if (TCG_TARGET_REG_BITS == 32) {
1323         tcg_gen_ori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
1324         tcg_gen_ori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
1325         return;
1326     }
1327     /* Some cases can be optimized here.  */
1328     if (arg2 == -1) {
1329         tcg_gen_movi_i64(ret, -1);
1330     } else if (arg2 == 0) {
1331         tcg_gen_mov_i64(ret, arg1);
1332     } else {
1333         TCGv_i64 t0 = tcg_const_i64(arg2);
1334         tcg_gen_or_i64(ret, arg1, t0);
1335         tcg_temp_free_i64(t0);
1336     }
1337 }
1338 
1339 void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1340 {
1341     if (TCG_TARGET_REG_BITS == 32) {
1342         tcg_gen_xori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
1343         tcg_gen_xori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
1344         return;
1345     }
1346     /* Some cases can be optimized here.  */
1347     if (arg2 == 0) {
1348         tcg_gen_mov_i64(ret, arg1);
1349     } else if (arg2 == -1 && TCG_TARGET_HAS_not_i64) {
1350         /* Don't recurse with tcg_gen_not_i64.  */
1351         tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg1);
1352     } else {
1353         TCGv_i64 t0 = tcg_const_i64(arg2);
1354         tcg_gen_xor_i64(ret, arg1, t0);
1355         tcg_temp_free_i64(t0);
1356     }
1357 }
1358 
1359 static inline void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
1360                                       unsigned c, bool right, bool arith)
1361 {
1362     tcg_debug_assert(c < 64);
1363     if (c == 0) {
1364         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
1365         tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
1366     } else if (c >= 32) {
1367         c -= 32;
1368         if (right) {
1369             if (arith) {
1370                 tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
1371                 tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), 31);
1372             } else {
1373                 tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
1374                 tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1375             }
1376         } else {
1377             tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), c);
1378             tcg_gen_movi_i32(TCGV_LOW(ret), 0);
1379         }
1380     } else if (right) {
1381         if (TCG_TARGET_HAS_extract2_i32) {
1382             tcg_gen_extract2_i32(TCGV_LOW(ret),
1383                                  TCGV_LOW(arg1), TCGV_HIGH(arg1), c);
1384         } else {
1385             tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c);
1386             tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(ret),
1387                                 TCGV_HIGH(arg1), 32 - c, c);
1388         }
1389         if (arith) {
1390             tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c);
1391         } else {
1392             tcg_gen_shri_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c);
1393         }
1394     } else {
1395         if (TCG_TARGET_HAS_extract2_i32) {
1396             tcg_gen_extract2_i32(TCGV_HIGH(ret),
1397                                  TCGV_LOW(arg1), TCGV_HIGH(arg1), 32 - c);
1398         } else {
1399             TCGv_i32 t0 = tcg_temp_new_i32();
1400             tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c);
1401             tcg_gen_deposit_i32(TCGV_HIGH(ret), t0,
1402                                 TCGV_HIGH(arg1), c, 32 - c);
1403             tcg_temp_free_i32(t0);
1404         }
1405         tcg_gen_shli_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c);
1406     }
1407 }
1408 
1409 void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1410 {
1411     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
1412     if (TCG_TARGET_REG_BITS == 32) {
1413         tcg_gen_shifti_i64(ret, arg1, arg2, 0, 0);
1414     } else if (arg2 == 0) {
1415         tcg_gen_mov_i64(ret, arg1);
1416     } else {
1417         TCGv_i64 t0 = tcg_const_i64(arg2);
1418         tcg_gen_shl_i64(ret, arg1, t0);
1419         tcg_temp_free_i64(t0);
1420     }
1421 }
1422 
1423 void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1424 {
1425     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
1426     if (TCG_TARGET_REG_BITS == 32) {
1427         tcg_gen_shifti_i64(ret, arg1, arg2, 1, 0);
1428     } else if (arg2 == 0) {
1429         tcg_gen_mov_i64(ret, arg1);
1430     } else {
1431         TCGv_i64 t0 = tcg_const_i64(arg2);
1432         tcg_gen_shr_i64(ret, arg1, t0);
1433         tcg_temp_free_i64(t0);
1434     }
1435 }
1436 
1437 void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1438 {
1439     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
1440     if (TCG_TARGET_REG_BITS == 32) {
1441         tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1);
1442     } else if (arg2 == 0) {
1443         tcg_gen_mov_i64(ret, arg1);
1444     } else {
1445         TCGv_i64 t0 = tcg_const_i64(arg2);
1446         tcg_gen_sar_i64(ret, arg1, t0);
1447         tcg_temp_free_i64(t0);
1448     }
1449 }
1450 
1451 void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l)
1452 {
1453     if (cond == TCG_COND_ALWAYS) {
1454         tcg_gen_br(l);
1455     } else if (cond != TCG_COND_NEVER) {
1456         l->refs++;
1457         if (TCG_TARGET_REG_BITS == 32) {
1458             tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1),
1459                               TCGV_HIGH(arg1), TCGV_LOW(arg2),
1460                               TCGV_HIGH(arg2), cond, label_arg(l));
1461         } else {
1462             tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond,
1463                               label_arg(l));
1464         }
1465     }
1466 }
1467 
1468 void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *l)
1469 {
1470     if (cond == TCG_COND_ALWAYS) {
1471         tcg_gen_br(l);
1472     } else if (cond != TCG_COND_NEVER) {
1473         TCGv_i64 t0 = tcg_const_i64(arg2);
1474         tcg_gen_brcond_i64(cond, arg1, t0, l);
1475         tcg_temp_free_i64(t0);
1476     }
1477 }
1478 
1479 void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
1480                          TCGv_i64 arg1, TCGv_i64 arg2)
1481 {
1482     if (cond == TCG_COND_ALWAYS) {
1483         tcg_gen_movi_i64(ret, 1);
1484     } else if (cond == TCG_COND_NEVER) {
1485         tcg_gen_movi_i64(ret, 0);
1486     } else {
1487         if (TCG_TARGET_REG_BITS == 32) {
1488             tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
1489                              TCGV_LOW(arg1), TCGV_HIGH(arg1),
1490                              TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
1491             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1492         } else {
1493             tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
1494         }
1495     }
1496 }
1497 
1498 void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
1499                           TCGv_i64 arg1, int64_t arg2)
1500 {
1501     TCGv_i64 t0 = tcg_const_i64(arg2);
1502     tcg_gen_setcond_i64(cond, ret, arg1, t0);
1503     tcg_temp_free_i64(t0);
1504 }
1505 
1506 void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1507 {
1508     if (arg2 == 0) {
1509         tcg_gen_movi_i64(ret, 0);
1510     } else if (is_power_of_2(arg2)) {
1511         tcg_gen_shli_i64(ret, arg1, ctz64(arg2));
1512     } else {
1513         TCGv_i64 t0 = tcg_const_i64(arg2);
1514         tcg_gen_mul_i64(ret, arg1, t0);
1515         tcg_temp_free_i64(t0);
1516     }
1517 }
1518 
1519 void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1520 {
1521     if (TCG_TARGET_HAS_div_i64) {
1522         tcg_gen_op3_i64(INDEX_op_div_i64, ret, arg1, arg2);
1523     } else if (TCG_TARGET_HAS_div2_i64) {
1524         TCGv_i64 t0 = tcg_temp_new_i64();
1525         tcg_gen_sari_i64(t0, arg1, 63);
1526         tcg_gen_op5_i64(INDEX_op_div2_i64, ret, t0, arg1, t0, arg2);
1527         tcg_temp_free_i64(t0);
1528     } else {
1529         gen_helper_div_i64(ret, arg1, arg2);
1530     }
1531 }
1532 
1533 void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1534 {
1535     if (TCG_TARGET_HAS_rem_i64) {
1536         tcg_gen_op3_i64(INDEX_op_rem_i64, ret, arg1, arg2);
1537     } else if (TCG_TARGET_HAS_div_i64) {
1538         TCGv_i64 t0 = tcg_temp_new_i64();
1539         tcg_gen_op3_i64(INDEX_op_div_i64, t0, arg1, arg2);
1540         tcg_gen_mul_i64(t0, t0, arg2);
1541         tcg_gen_sub_i64(ret, arg1, t0);
1542         tcg_temp_free_i64(t0);
1543     } else if (TCG_TARGET_HAS_div2_i64) {
1544         TCGv_i64 t0 = tcg_temp_new_i64();
1545         tcg_gen_sari_i64(t0, arg1, 63);
1546         tcg_gen_op5_i64(INDEX_op_div2_i64, t0, ret, arg1, t0, arg2);
1547         tcg_temp_free_i64(t0);
1548     } else {
1549         gen_helper_rem_i64(ret, arg1, arg2);
1550     }
1551 }
1552 
1553 void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1554 {
1555     if (TCG_TARGET_HAS_div_i64) {
1556         tcg_gen_op3_i64(INDEX_op_divu_i64, ret, arg1, arg2);
1557     } else if (TCG_TARGET_HAS_div2_i64) {
1558         TCGv_i64 t0 = tcg_temp_new_i64();
1559         tcg_gen_movi_i64(t0, 0);
1560         tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, t0, arg2);
1561         tcg_temp_free_i64(t0);
1562     } else {
1563         gen_helper_divu_i64(ret, arg1, arg2);
1564     }
1565 }
1566 
1567 void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1568 {
1569     if (TCG_TARGET_HAS_rem_i64) {
1570         tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2);
1571     } else if (TCG_TARGET_HAS_div_i64) {
1572         TCGv_i64 t0 = tcg_temp_new_i64();
1573         tcg_gen_op3_i64(INDEX_op_divu_i64, t0, arg1, arg2);
1574         tcg_gen_mul_i64(t0, t0, arg2);
1575         tcg_gen_sub_i64(ret, arg1, t0);
1576         tcg_temp_free_i64(t0);
1577     } else if (TCG_TARGET_HAS_div2_i64) {
1578         TCGv_i64 t0 = tcg_temp_new_i64();
1579         tcg_gen_movi_i64(t0, 0);
1580         tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, t0, arg2);
1581         tcg_temp_free_i64(t0);
1582     } else {
1583         gen_helper_remu_i64(ret, arg1, arg2);
1584     }
1585 }
1586 
1587 void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg)
1588 {
1589     if (TCG_TARGET_REG_BITS == 32) {
1590         tcg_gen_ext8s_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1591         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1592     } else if (TCG_TARGET_HAS_ext8s_i64) {
1593         tcg_gen_op2_i64(INDEX_op_ext8s_i64, ret, arg);
1594     } else {
1595         tcg_gen_shli_i64(ret, arg, 56);
1596         tcg_gen_sari_i64(ret, ret, 56);
1597     }
1598 }
1599 
1600 void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg)
1601 {
1602     if (TCG_TARGET_REG_BITS == 32) {
1603         tcg_gen_ext16s_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1604         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1605     } else if (TCG_TARGET_HAS_ext16s_i64) {
1606         tcg_gen_op2_i64(INDEX_op_ext16s_i64, ret, arg);
1607     } else {
1608         tcg_gen_shli_i64(ret, arg, 48);
1609         tcg_gen_sari_i64(ret, ret, 48);
1610     }
1611 }
1612 
1613 void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg)
1614 {
1615     if (TCG_TARGET_REG_BITS == 32) {
1616         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1617         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1618     } else if (TCG_TARGET_HAS_ext32s_i64) {
1619         tcg_gen_op2_i64(INDEX_op_ext32s_i64, ret, arg);
1620     } else {
1621         tcg_gen_shli_i64(ret, arg, 32);
1622         tcg_gen_sari_i64(ret, ret, 32);
1623     }
1624 }
1625 
1626 void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg)
1627 {
1628     if (TCG_TARGET_REG_BITS == 32) {
1629         tcg_gen_ext8u_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1630         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1631     } else if (TCG_TARGET_HAS_ext8u_i64) {
1632         tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg);
1633     } else {
1634         tcg_gen_andi_i64(ret, arg, 0xffu);
1635     }
1636 }
1637 
1638 void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg)
1639 {
1640     if (TCG_TARGET_REG_BITS == 32) {
1641         tcg_gen_ext16u_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1642         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1643     } else if (TCG_TARGET_HAS_ext16u_i64) {
1644         tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg);
1645     } else {
1646         tcg_gen_andi_i64(ret, arg, 0xffffu);
1647     }
1648 }
1649 
1650 void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg)
1651 {
1652     if (TCG_TARGET_REG_BITS == 32) {
1653         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1654         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1655     } else if (TCG_TARGET_HAS_ext32u_i64) {
1656         tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg);
1657     } else {
1658         tcg_gen_andi_i64(ret, arg, 0xffffffffu);
1659     }
1660 }
1661 
1662 /* Note: we assume the six high bytes are set to zero */
1663 void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg)
1664 {
1665     if (TCG_TARGET_REG_BITS == 32) {
1666         tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1667         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1668     } else if (TCG_TARGET_HAS_bswap16_i64) {
1669         tcg_gen_op2_i64(INDEX_op_bswap16_i64, ret, arg);
1670     } else {
1671         TCGv_i64 t0 = tcg_temp_new_i64();
1672 
1673         tcg_gen_ext8u_i64(t0, arg);
1674         tcg_gen_shli_i64(t0, t0, 8);
1675         tcg_gen_shri_i64(ret, arg, 8);
1676         tcg_gen_or_i64(ret, ret, t0);
1677         tcg_temp_free_i64(t0);
1678     }
1679 }
1680 
1681 /* Note: we assume the four high bytes are set to zero */
1682 void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg)
1683 {
1684     if (TCG_TARGET_REG_BITS == 32) {
1685         tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1686         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1687     } else if (TCG_TARGET_HAS_bswap32_i64) {
1688         tcg_gen_op2_i64(INDEX_op_bswap32_i64, ret, arg);
1689     } else {
1690         TCGv_i64 t0 = tcg_temp_new_i64();
1691         TCGv_i64 t1 = tcg_temp_new_i64();
1692         TCGv_i64 t2 = tcg_const_i64(0x00ff00ff);
1693 
1694                                         /* arg = ....abcd */
1695         tcg_gen_shri_i64(t0, arg, 8);   /*  t0 = .....abc */
1696         tcg_gen_and_i64(t1, arg, t2);   /*  t1 = .....b.d */
1697         tcg_gen_and_i64(t0, t0, t2);    /*  t0 = .....a.c */
1698         tcg_gen_shli_i64(t1, t1, 8);    /*  t1 = ....b.d. */
1699         tcg_gen_or_i64(ret, t0, t1);    /* ret = ....badc */
1700 
1701         tcg_gen_shli_i64(t1, ret, 48);  /*  t1 = dc...... */
1702         tcg_gen_shri_i64(t0, ret, 16);  /*  t0 = ......ba */
1703         tcg_gen_shri_i64(t1, t1, 32);   /*  t1 = ....dc.. */
1704         tcg_gen_or_i64(ret, t0, t1);    /* ret = ....dcba */
1705 
1706         tcg_temp_free_i64(t0);
1707         tcg_temp_free_i64(t1);
1708         tcg_temp_free_i64(t2);
1709     }
1710 }
1711 
1712 void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg)
1713 {
1714     if (TCG_TARGET_REG_BITS == 32) {
1715         TCGv_i32 t0, t1;
1716         t0 = tcg_temp_new_i32();
1717         t1 = tcg_temp_new_i32();
1718 
1719         tcg_gen_bswap32_i32(t0, TCGV_LOW(arg));
1720         tcg_gen_bswap32_i32(t1, TCGV_HIGH(arg));
1721         tcg_gen_mov_i32(TCGV_LOW(ret), t1);
1722         tcg_gen_mov_i32(TCGV_HIGH(ret), t0);
1723         tcg_temp_free_i32(t0);
1724         tcg_temp_free_i32(t1);
1725     } else if (TCG_TARGET_HAS_bswap64_i64) {
1726         tcg_gen_op2_i64(INDEX_op_bswap64_i64, ret, arg);
1727     } else {
1728         TCGv_i64 t0 = tcg_temp_new_i64();
1729         TCGv_i64 t1 = tcg_temp_new_i64();
1730         TCGv_i64 t2 = tcg_temp_new_i64();
1731 
1732                                         /* arg = abcdefgh */
1733         tcg_gen_movi_i64(t2, 0x00ff00ff00ff00ffull);
1734         tcg_gen_shri_i64(t0, arg, 8);   /*  t0 = .abcdefg */
1735         tcg_gen_and_i64(t1, arg, t2);   /*  t1 = .b.d.f.h */
1736         tcg_gen_and_i64(t0, t0, t2);    /*  t0 = .a.c.e.g */
1737         tcg_gen_shli_i64(t1, t1, 8);    /*  t1 = b.d.f.h. */
1738         tcg_gen_or_i64(ret, t0, t1);    /* ret = badcfehg */
1739 
1740         tcg_gen_movi_i64(t2, 0x0000ffff0000ffffull);
1741         tcg_gen_shri_i64(t0, ret, 16);  /*  t0 = ..badcfe */
1742         tcg_gen_and_i64(t1, ret, t2);   /*  t1 = ..dc..hg */
1743         tcg_gen_and_i64(t0, t0, t2);    /*  t0 = ..ba..fe */
1744         tcg_gen_shli_i64(t1, t1, 16);   /*  t1 = dc..hg.. */
1745         tcg_gen_or_i64(ret, t0, t1);    /* ret = dcbahgfe */
1746 
1747         tcg_gen_shri_i64(t0, ret, 32);  /*  t0 = ....dcba */
1748         tcg_gen_shli_i64(t1, ret, 32);  /*  t1 = hgfe.... */
1749         tcg_gen_or_i64(ret, t0, t1);    /* ret = hgfedcba */
1750 
1751         tcg_temp_free_i64(t0);
1752         tcg_temp_free_i64(t1);
1753         tcg_temp_free_i64(t2);
1754     }
1755 }
1756 
1757 void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg)
1758 {
1759     if (TCG_TARGET_REG_BITS == 32) {
1760         tcg_gen_not_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1761         tcg_gen_not_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
1762     } else if (TCG_TARGET_HAS_not_i64) {
1763         tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg);
1764     } else {
1765         tcg_gen_xori_i64(ret, arg, -1);
1766     }
1767 }
1768 
1769 void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1770 {
1771     if (TCG_TARGET_REG_BITS == 32) {
1772         tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1773         tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1774     } else if (TCG_TARGET_HAS_andc_i64) {
1775         tcg_gen_op3_i64(INDEX_op_andc_i64, ret, arg1, arg2);
1776     } else {
1777         TCGv_i64 t0 = tcg_temp_new_i64();
1778         tcg_gen_not_i64(t0, arg2);
1779         tcg_gen_and_i64(ret, arg1, t0);
1780         tcg_temp_free_i64(t0);
1781     }
1782 }
1783 
1784 void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1785 {
1786     if (TCG_TARGET_REG_BITS == 32) {
1787         tcg_gen_eqv_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1788         tcg_gen_eqv_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1789     } else if (TCG_TARGET_HAS_eqv_i64) {
1790         tcg_gen_op3_i64(INDEX_op_eqv_i64, ret, arg1, arg2);
1791     } else {
1792         tcg_gen_xor_i64(ret, arg1, arg2);
1793         tcg_gen_not_i64(ret, ret);
1794     }
1795 }
1796 
1797 void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1798 {
1799     if (TCG_TARGET_REG_BITS == 32) {
1800         tcg_gen_nand_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1801         tcg_gen_nand_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1802     } else if (TCG_TARGET_HAS_nand_i64) {
1803         tcg_gen_op3_i64(INDEX_op_nand_i64, ret, arg1, arg2);
1804     } else {
1805         tcg_gen_and_i64(ret, arg1, arg2);
1806         tcg_gen_not_i64(ret, ret);
1807     }
1808 }
1809 
1810 void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1811 {
1812     if (TCG_TARGET_REG_BITS == 32) {
1813         tcg_gen_nor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1814         tcg_gen_nor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1815     } else if (TCG_TARGET_HAS_nor_i64) {
1816         tcg_gen_op3_i64(INDEX_op_nor_i64, ret, arg1, arg2);
1817     } else {
1818         tcg_gen_or_i64(ret, arg1, arg2);
1819         tcg_gen_not_i64(ret, ret);
1820     }
1821 }
1822 
1823 void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1824 {
1825     if (TCG_TARGET_REG_BITS == 32) {
1826         tcg_gen_orc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1827         tcg_gen_orc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1828     } else if (TCG_TARGET_HAS_orc_i64) {
1829         tcg_gen_op3_i64(INDEX_op_orc_i64, ret, arg1, arg2);
1830     } else {
1831         TCGv_i64 t0 = tcg_temp_new_i64();
1832         tcg_gen_not_i64(t0, arg2);
1833         tcg_gen_or_i64(ret, arg1, t0);
1834         tcg_temp_free_i64(t0);
1835     }
1836 }
1837 
1838 void tcg_gen_clz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1839 {
1840     if (TCG_TARGET_HAS_clz_i64) {
1841         tcg_gen_op3_i64(INDEX_op_clz_i64, ret, arg1, arg2);
1842     } else {
1843         gen_helper_clz_i64(ret, arg1, arg2);
1844     }
1845 }
1846 
1847 void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
1848 {
1849     if (TCG_TARGET_REG_BITS == 32
1850         && TCG_TARGET_HAS_clz_i32
1851         && arg2 <= 0xffffffffu) {
1852         TCGv_i32 t = tcg_const_i32((uint32_t)arg2 - 32);
1853         tcg_gen_clz_i32(t, TCGV_LOW(arg1), t);
1854         tcg_gen_addi_i32(t, t, 32);
1855         tcg_gen_clz_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), t);
1856         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1857         tcg_temp_free_i32(t);
1858     } else {
1859         TCGv_i64 t = tcg_const_i64(arg2);
1860         tcg_gen_clz_i64(ret, arg1, t);
1861         tcg_temp_free_i64(t);
1862     }
1863 }
1864 
1865 void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1866 {
1867     if (TCG_TARGET_HAS_ctz_i64) {
1868         tcg_gen_op3_i64(INDEX_op_ctz_i64, ret, arg1, arg2);
1869     } else if (TCG_TARGET_HAS_ctpop_i64 || TCG_TARGET_HAS_clz_i64) {
1870         TCGv_i64 z, t = tcg_temp_new_i64();
1871 
1872         if (TCG_TARGET_HAS_ctpop_i64) {
1873             tcg_gen_subi_i64(t, arg1, 1);
1874             tcg_gen_andc_i64(t, t, arg1);
1875             tcg_gen_ctpop_i64(t, t);
1876         } else {
1877             /* Since all non-x86 hosts have clz(0) == 64, don't fight it.  */
1878             tcg_gen_neg_i64(t, arg1);
1879             tcg_gen_and_i64(t, t, arg1);
1880             tcg_gen_clzi_i64(t, t, 64);
1881             tcg_gen_xori_i64(t, t, 63);
1882         }
1883         z = tcg_const_i64(0);
1884         tcg_gen_movcond_i64(TCG_COND_EQ, ret, arg1, z, arg2, t);
1885         tcg_temp_free_i64(t);
1886         tcg_temp_free_i64(z);
1887     } else {
1888         gen_helper_ctz_i64(ret, arg1, arg2);
1889     }
1890 }
1891 
1892 void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
1893 {
1894     if (TCG_TARGET_REG_BITS == 32
1895         && TCG_TARGET_HAS_ctz_i32
1896         && arg2 <= 0xffffffffu) {
1897         TCGv_i32 t32 = tcg_const_i32((uint32_t)arg2 - 32);
1898         tcg_gen_ctz_i32(t32, TCGV_HIGH(arg1), t32);
1899         tcg_gen_addi_i32(t32, t32, 32);
1900         tcg_gen_ctz_i32(TCGV_LOW(ret), TCGV_LOW(arg1), t32);
1901         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1902         tcg_temp_free_i32(t32);
1903     } else if (!TCG_TARGET_HAS_ctz_i64
1904                && TCG_TARGET_HAS_ctpop_i64
1905                && arg2 == 64) {
1906         /* This equivalence has the advantage of not requiring a fixup.  */
1907         TCGv_i64 t = tcg_temp_new_i64();
1908         tcg_gen_subi_i64(t, arg1, 1);
1909         tcg_gen_andc_i64(t, t, arg1);
1910         tcg_gen_ctpop_i64(ret, t);
1911         tcg_temp_free_i64(t);
1912     } else {
1913         TCGv_i64 t64 = tcg_const_i64(arg2);
1914         tcg_gen_ctz_i64(ret, arg1, t64);
1915         tcg_temp_free_i64(t64);
1916     }
1917 }
1918 
1919 void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg)
1920 {
1921     if (TCG_TARGET_HAS_clz_i64 || TCG_TARGET_HAS_clz_i32) {
1922         TCGv_i64 t = tcg_temp_new_i64();
1923         tcg_gen_sari_i64(t, arg, 63);
1924         tcg_gen_xor_i64(t, t, arg);
1925         tcg_gen_clzi_i64(t, t, 64);
1926         tcg_gen_subi_i64(ret, t, 1);
1927         tcg_temp_free_i64(t);
1928     } else {
1929         gen_helper_clrsb_i64(ret, arg);
1930     }
1931 }
1932 
1933 void tcg_gen_ctpop_i64(TCGv_i64 ret, TCGv_i64 arg1)
1934 {
1935     if (TCG_TARGET_HAS_ctpop_i64) {
1936         tcg_gen_op2_i64(INDEX_op_ctpop_i64, ret, arg1);
1937     } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_ctpop_i32) {
1938         tcg_gen_ctpop_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
1939         tcg_gen_ctpop_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
1940         tcg_gen_add_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(ret));
1941         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1942     } else {
1943         gen_helper_ctpop_i64(ret, arg1);
1944     }
1945 }
1946 
1947 void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1948 {
1949     if (TCG_TARGET_HAS_rot_i64) {
1950         tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, arg2);
1951     } else {
1952         TCGv_i64 t0, t1;
1953         t0 = tcg_temp_new_i64();
1954         t1 = tcg_temp_new_i64();
1955         tcg_gen_shl_i64(t0, arg1, arg2);
1956         tcg_gen_subfi_i64(t1, 64, arg2);
1957         tcg_gen_shr_i64(t1, arg1, t1);
1958         tcg_gen_or_i64(ret, t0, t1);
1959         tcg_temp_free_i64(t0);
1960         tcg_temp_free_i64(t1);
1961     }
1962 }
1963 
1964 void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
1965 {
1966     tcg_debug_assert(arg2 < 64);
1967     /* some cases can be optimized here */
1968     if (arg2 == 0) {
1969         tcg_gen_mov_i64(ret, arg1);
1970     } else if (TCG_TARGET_HAS_rot_i64) {
1971         TCGv_i64 t0 = tcg_const_i64(arg2);
1972         tcg_gen_rotl_i64(ret, arg1, t0);
1973         tcg_temp_free_i64(t0);
1974     } else {
1975         TCGv_i64 t0, t1;
1976         t0 = tcg_temp_new_i64();
1977         t1 = tcg_temp_new_i64();
1978         tcg_gen_shli_i64(t0, arg1, arg2);
1979         tcg_gen_shri_i64(t1, arg1, 64 - arg2);
1980         tcg_gen_or_i64(ret, t0, t1);
1981         tcg_temp_free_i64(t0);
1982         tcg_temp_free_i64(t1);
1983     }
1984 }
1985 
1986 void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1987 {
1988     if (TCG_TARGET_HAS_rot_i64) {
1989         tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, arg2);
1990     } else {
1991         TCGv_i64 t0, t1;
1992         t0 = tcg_temp_new_i64();
1993         t1 = tcg_temp_new_i64();
1994         tcg_gen_shr_i64(t0, arg1, arg2);
1995         tcg_gen_subfi_i64(t1, 64, arg2);
1996         tcg_gen_shl_i64(t1, arg1, t1);
1997         tcg_gen_or_i64(ret, t0, t1);
1998         tcg_temp_free_i64(t0);
1999         tcg_temp_free_i64(t1);
2000     }
2001 }
2002 
2003 void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
2004 {
2005     tcg_debug_assert(arg2 < 64);
2006     /* some cases can be optimized here */
2007     if (arg2 == 0) {
2008         tcg_gen_mov_i64(ret, arg1);
2009     } else {
2010         tcg_gen_rotli_i64(ret, arg1, 64 - arg2);
2011     }
2012 }
2013 
2014 void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
2015                          unsigned int ofs, unsigned int len)
2016 {
2017     uint64_t mask;
2018     TCGv_i64 t1;
2019 
2020     tcg_debug_assert(ofs < 64);
2021     tcg_debug_assert(len > 0);
2022     tcg_debug_assert(len <= 64);
2023     tcg_debug_assert(ofs + len <= 64);
2024 
2025     if (len == 64) {
2026         tcg_gen_mov_i64(ret, arg2);
2027         return;
2028     }
2029     if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) {
2030         tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
2031         return;
2032     }
2033 
2034     if (TCG_TARGET_REG_BITS == 32) {
2035         if (ofs >= 32) {
2036             tcg_gen_deposit_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1),
2037                                 TCGV_LOW(arg2), ofs - 32, len);
2038             tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
2039             return;
2040         }
2041         if (ofs + len <= 32) {
2042             tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(arg1),
2043                                 TCGV_LOW(arg2), ofs, len);
2044             tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
2045             return;
2046         }
2047     }
2048 
2049     t1 = tcg_temp_new_i64();
2050 
2051     if (TCG_TARGET_HAS_extract2_i64) {
2052         if (ofs + len == 64) {
2053             tcg_gen_shli_i64(t1, arg1, len);
2054             tcg_gen_extract2_i64(ret, t1, arg2, len);
2055             goto done;
2056         }
2057         if (ofs == 0) {
2058             tcg_gen_extract2_i64(ret, arg1, arg2, len);
2059             tcg_gen_rotli_i64(ret, ret, len);
2060             goto done;
2061         }
2062     }
2063 
2064     mask = (1ull << len) - 1;
2065     if (ofs + len < 64) {
2066         tcg_gen_andi_i64(t1, arg2, mask);
2067         tcg_gen_shli_i64(t1, t1, ofs);
2068     } else {
2069         tcg_gen_shli_i64(t1, arg2, ofs);
2070     }
2071     tcg_gen_andi_i64(ret, arg1, ~(mask << ofs));
2072     tcg_gen_or_i64(ret, ret, t1);
2073  done:
2074     tcg_temp_free_i64(t1);
2075 }
2076 
2077 void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg,
2078                            unsigned int ofs, unsigned int len)
2079 {
2080     tcg_debug_assert(ofs < 64);
2081     tcg_debug_assert(len > 0);
2082     tcg_debug_assert(len <= 64);
2083     tcg_debug_assert(ofs + len <= 64);
2084 
2085     if (ofs + len == 64) {
2086         tcg_gen_shli_i64(ret, arg, ofs);
2087     } else if (ofs == 0) {
2088         tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
2089     } else if (TCG_TARGET_HAS_deposit_i64
2090                && TCG_TARGET_deposit_i64_valid(ofs, len)) {
2091         TCGv_i64 zero = tcg_const_i64(0);
2092         tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, zero, arg, ofs, len);
2093         tcg_temp_free_i64(zero);
2094     } else {
2095         if (TCG_TARGET_REG_BITS == 32) {
2096             if (ofs >= 32) {
2097                 tcg_gen_deposit_z_i32(TCGV_HIGH(ret), TCGV_LOW(arg),
2098                                       ofs - 32, len);
2099                 tcg_gen_movi_i32(TCGV_LOW(ret), 0);
2100                 return;
2101             }
2102             if (ofs + len <= 32) {
2103                 tcg_gen_deposit_z_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
2104                 tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2105                 return;
2106             }
2107         }
2108         /* To help two-operand hosts we prefer to zero-extend first,
2109            which allows ARG to stay live.  */
2110         switch (len) {
2111         case 32:
2112             if (TCG_TARGET_HAS_ext32u_i64) {
2113                 tcg_gen_ext32u_i64(ret, arg);
2114                 tcg_gen_shli_i64(ret, ret, ofs);
2115                 return;
2116             }
2117             break;
2118         case 16:
2119             if (TCG_TARGET_HAS_ext16u_i64) {
2120                 tcg_gen_ext16u_i64(ret, arg);
2121                 tcg_gen_shli_i64(ret, ret, ofs);
2122                 return;
2123             }
2124             break;
2125         case 8:
2126             if (TCG_TARGET_HAS_ext8u_i64) {
2127                 tcg_gen_ext8u_i64(ret, arg);
2128                 tcg_gen_shli_i64(ret, ret, ofs);
2129                 return;
2130             }
2131             break;
2132         }
2133         /* Otherwise prefer zero-extension over AND for code size.  */
2134         switch (ofs + len) {
2135         case 32:
2136             if (TCG_TARGET_HAS_ext32u_i64) {
2137                 tcg_gen_shli_i64(ret, arg, ofs);
2138                 tcg_gen_ext32u_i64(ret, ret);
2139                 return;
2140             }
2141             break;
2142         case 16:
2143             if (TCG_TARGET_HAS_ext16u_i64) {
2144                 tcg_gen_shli_i64(ret, arg, ofs);
2145                 tcg_gen_ext16u_i64(ret, ret);
2146                 return;
2147             }
2148             break;
2149         case 8:
2150             if (TCG_TARGET_HAS_ext8u_i64) {
2151                 tcg_gen_shli_i64(ret, arg, ofs);
2152                 tcg_gen_ext8u_i64(ret, ret);
2153                 return;
2154             }
2155             break;
2156         }
2157         tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
2158         tcg_gen_shli_i64(ret, ret, ofs);
2159     }
2160 }
2161 
2162 void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
2163                          unsigned int ofs, unsigned int len)
2164 {
2165     tcg_debug_assert(ofs < 64);
2166     tcg_debug_assert(len > 0);
2167     tcg_debug_assert(len <= 64);
2168     tcg_debug_assert(ofs + len <= 64);
2169 
2170     /* Canonicalize certain special cases, even if extract is supported.  */
2171     if (ofs + len == 64) {
2172         tcg_gen_shri_i64(ret, arg, 64 - len);
2173         return;
2174     }
2175     if (ofs == 0) {
2176         tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
2177         return;
2178     }
2179 
2180     if (TCG_TARGET_REG_BITS == 32) {
2181         /* Look for a 32-bit extract within one of the two words.  */
2182         if (ofs >= 32) {
2183             tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_HIGH(arg), ofs - 32, len);
2184             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2185             return;
2186         }
2187         if (ofs + len <= 32) {
2188             tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
2189             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2190             return;
2191         }
2192         /* The field is split across two words.  One double-word
2193            shift is better than two double-word shifts.  */
2194         goto do_shift_and;
2195     }
2196 
2197     if (TCG_TARGET_HAS_extract_i64
2198         && TCG_TARGET_extract_i64_valid(ofs, len)) {
2199         tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, ofs, len);
2200         return;
2201     }
2202 
2203     /* Assume that zero-extension, if available, is cheaper than a shift.  */
2204     switch (ofs + len) {
2205     case 32:
2206         if (TCG_TARGET_HAS_ext32u_i64) {
2207             tcg_gen_ext32u_i64(ret, arg);
2208             tcg_gen_shri_i64(ret, ret, ofs);
2209             return;
2210         }
2211         break;
2212     case 16:
2213         if (TCG_TARGET_HAS_ext16u_i64) {
2214             tcg_gen_ext16u_i64(ret, arg);
2215             tcg_gen_shri_i64(ret, ret, ofs);
2216             return;
2217         }
2218         break;
2219     case 8:
2220         if (TCG_TARGET_HAS_ext8u_i64) {
2221             tcg_gen_ext8u_i64(ret, arg);
2222             tcg_gen_shri_i64(ret, ret, ofs);
2223             return;
2224         }
2225         break;
2226     }
2227 
2228     /* ??? Ideally we'd know what values are available for immediate AND.
2229        Assume that 8 bits are available, plus the special cases of 16 and 32,
2230        so that we get ext8u, ext16u, and ext32u.  */
2231     switch (len) {
2232     case 1 ... 8: case 16: case 32:
2233     do_shift_and:
2234         tcg_gen_shri_i64(ret, arg, ofs);
2235         tcg_gen_andi_i64(ret, ret, (1ull << len) - 1);
2236         break;
2237     default:
2238         tcg_gen_shli_i64(ret, arg, 64 - len - ofs);
2239         tcg_gen_shri_i64(ret, ret, 64 - len);
2240         break;
2241     }
2242 }
2243 
2244 void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg,
2245                           unsigned int ofs, unsigned int len)
2246 {
2247     tcg_debug_assert(ofs < 64);
2248     tcg_debug_assert(len > 0);
2249     tcg_debug_assert(len <= 64);
2250     tcg_debug_assert(ofs + len <= 64);
2251 
2252     /* Canonicalize certain special cases, even if sextract is supported.  */
2253     if (ofs + len == 64) {
2254         tcg_gen_sari_i64(ret, arg, 64 - len);
2255         return;
2256     }
2257     if (ofs == 0) {
2258         switch (len) {
2259         case 32:
2260             tcg_gen_ext32s_i64(ret, arg);
2261             return;
2262         case 16:
2263             tcg_gen_ext16s_i64(ret, arg);
2264             return;
2265         case 8:
2266             tcg_gen_ext8s_i64(ret, arg);
2267             return;
2268         }
2269     }
2270 
2271     if (TCG_TARGET_REG_BITS == 32) {
2272         /* Look for a 32-bit extract within one of the two words.  */
2273         if (ofs >= 32) {
2274             tcg_gen_sextract_i32(TCGV_LOW(ret), TCGV_HIGH(arg), ofs - 32, len);
2275         } else if (ofs + len <= 32) {
2276             tcg_gen_sextract_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
2277         } else if (ofs == 0) {
2278             tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
2279             tcg_gen_sextract_i32(TCGV_HIGH(ret), TCGV_HIGH(arg), 0, len - 32);
2280             return;
2281         } else if (len > 32) {
2282             TCGv_i32 t = tcg_temp_new_i32();
2283             /* Extract the bits for the high word normally.  */
2284             tcg_gen_sextract_i32(t, TCGV_HIGH(arg), ofs + 32, len - 32);
2285             /* Shift the field down for the low part.  */
2286             tcg_gen_shri_i64(ret, arg, ofs);
2287             /* Overwrite the shift into the high part.  */
2288             tcg_gen_mov_i32(TCGV_HIGH(ret), t);
2289             tcg_temp_free_i32(t);
2290             return;
2291         } else {
2292             /* Shift the field down for the low part, such that the
2293                field sits at the MSB.  */
2294             tcg_gen_shri_i64(ret, arg, ofs + len - 32);
2295             /* Shift the field down from the MSB, sign extending.  */
2296             tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_LOW(ret), 32 - len);
2297         }
2298         /* Sign-extend the field from 32 bits.  */
2299         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
2300         return;
2301     }
2302 
2303     if (TCG_TARGET_HAS_sextract_i64
2304         && TCG_TARGET_extract_i64_valid(ofs, len)) {
2305         tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, arg, ofs, len);
2306         return;
2307     }
2308 
2309     /* Assume that sign-extension, if available, is cheaper than a shift.  */
2310     switch (ofs + len) {
2311     case 32:
2312         if (TCG_TARGET_HAS_ext32s_i64) {
2313             tcg_gen_ext32s_i64(ret, arg);
2314             tcg_gen_sari_i64(ret, ret, ofs);
2315             return;
2316         }
2317         break;
2318     case 16:
2319         if (TCG_TARGET_HAS_ext16s_i64) {
2320             tcg_gen_ext16s_i64(ret, arg);
2321             tcg_gen_sari_i64(ret, ret, ofs);
2322             return;
2323         }
2324         break;
2325     case 8:
2326         if (TCG_TARGET_HAS_ext8s_i64) {
2327             tcg_gen_ext8s_i64(ret, arg);
2328             tcg_gen_sari_i64(ret, ret, ofs);
2329             return;
2330         }
2331         break;
2332     }
2333     switch (len) {
2334     case 32:
2335         if (TCG_TARGET_HAS_ext32s_i64) {
2336             tcg_gen_shri_i64(ret, arg, ofs);
2337             tcg_gen_ext32s_i64(ret, ret);
2338             return;
2339         }
2340         break;
2341     case 16:
2342         if (TCG_TARGET_HAS_ext16s_i64) {
2343             tcg_gen_shri_i64(ret, arg, ofs);
2344             tcg_gen_ext16s_i64(ret, ret);
2345             return;
2346         }
2347         break;
2348     case 8:
2349         if (TCG_TARGET_HAS_ext8s_i64) {
2350             tcg_gen_shri_i64(ret, arg, ofs);
2351             tcg_gen_ext8s_i64(ret, ret);
2352             return;
2353         }
2354         break;
2355     }
2356     tcg_gen_shli_i64(ret, arg, 64 - len - ofs);
2357     tcg_gen_sari_i64(ret, ret, 64 - len);
2358 }
2359 
2360 /*
2361  * Extract 64 bits from a 128-bit input, ah:al, starting from ofs.
2362  * Unlike tcg_gen_extract_i64 above, len is fixed at 64.
2363  */
2364 void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah,
2365                           unsigned int ofs)
2366 {
2367     tcg_debug_assert(ofs <= 64);
2368     if (ofs == 0) {
2369         tcg_gen_mov_i64(ret, al);
2370     } else if (ofs == 64) {
2371         tcg_gen_mov_i64(ret, ah);
2372     } else if (al == ah) {
2373         tcg_gen_rotri_i64(ret, al, ofs);
2374     } else if (TCG_TARGET_HAS_extract2_i64) {
2375         tcg_gen_op4i_i64(INDEX_op_extract2_i64, ret, al, ah, ofs);
2376     } else {
2377         TCGv_i64 t0 = tcg_temp_new_i64();
2378         tcg_gen_shri_i64(t0, al, ofs);
2379         tcg_gen_deposit_i64(ret, t0, ah, 64 - ofs, ofs);
2380         tcg_temp_free_i64(t0);
2381     }
2382 }
2383 
2384 void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1,
2385                          TCGv_i64 c2, TCGv_i64 v1, TCGv_i64 v2)
2386 {
2387     if (cond == TCG_COND_ALWAYS) {
2388         tcg_gen_mov_i64(ret, v1);
2389     } else if (cond == TCG_COND_NEVER) {
2390         tcg_gen_mov_i64(ret, v2);
2391     } else if (TCG_TARGET_REG_BITS == 32) {
2392         TCGv_i32 t0 = tcg_temp_new_i32();
2393         TCGv_i32 t1 = tcg_temp_new_i32();
2394         tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0,
2395                          TCGV_LOW(c1), TCGV_HIGH(c1),
2396                          TCGV_LOW(c2), TCGV_HIGH(c2), cond);
2397 
2398         if (TCG_TARGET_HAS_movcond_i32) {
2399             tcg_gen_movi_i32(t1, 0);
2400             tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, t1,
2401                                 TCGV_LOW(v1), TCGV_LOW(v2));
2402             tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, t1,
2403                                 TCGV_HIGH(v1), TCGV_HIGH(v2));
2404         } else {
2405             tcg_gen_neg_i32(t0, t0);
2406 
2407             tcg_gen_and_i32(t1, TCGV_LOW(v1), t0);
2408             tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(v2), t0);
2409             tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t1);
2410 
2411             tcg_gen_and_i32(t1, TCGV_HIGH(v1), t0);
2412             tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(v2), t0);
2413             tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t1);
2414         }
2415         tcg_temp_free_i32(t0);
2416         tcg_temp_free_i32(t1);
2417     } else if (TCG_TARGET_HAS_movcond_i64) {
2418         tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond);
2419     } else {
2420         TCGv_i64 t0 = tcg_temp_new_i64();
2421         TCGv_i64 t1 = tcg_temp_new_i64();
2422         tcg_gen_setcond_i64(cond, t0, c1, c2);
2423         tcg_gen_neg_i64(t0, t0);
2424         tcg_gen_and_i64(t1, v1, t0);
2425         tcg_gen_andc_i64(ret, v2, t0);
2426         tcg_gen_or_i64(ret, ret, t1);
2427         tcg_temp_free_i64(t0);
2428         tcg_temp_free_i64(t1);
2429     }
2430 }
2431 
2432 void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
2433                       TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
2434 {
2435     if (TCG_TARGET_HAS_add2_i64) {
2436         tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh);
2437     } else {
2438         TCGv_i64 t0 = tcg_temp_new_i64();
2439         TCGv_i64 t1 = tcg_temp_new_i64();
2440         tcg_gen_add_i64(t0, al, bl);
2441         tcg_gen_setcond_i64(TCG_COND_LTU, t1, t0, al);
2442         tcg_gen_add_i64(rh, ah, bh);
2443         tcg_gen_add_i64(rh, rh, t1);
2444         tcg_gen_mov_i64(rl, t0);
2445         tcg_temp_free_i64(t0);
2446         tcg_temp_free_i64(t1);
2447     }
2448 }
2449 
2450 void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
2451                       TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
2452 {
2453     if (TCG_TARGET_HAS_sub2_i64) {
2454         tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh);
2455     } else {
2456         TCGv_i64 t0 = tcg_temp_new_i64();
2457         TCGv_i64 t1 = tcg_temp_new_i64();
2458         tcg_gen_sub_i64(t0, al, bl);
2459         tcg_gen_setcond_i64(TCG_COND_LTU, t1, al, bl);
2460         tcg_gen_sub_i64(rh, ah, bh);
2461         tcg_gen_sub_i64(rh, rh, t1);
2462         tcg_gen_mov_i64(rl, t0);
2463         tcg_temp_free_i64(t0);
2464         tcg_temp_free_i64(t1);
2465     }
2466 }
2467 
2468 void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
2469 {
2470     if (TCG_TARGET_HAS_mulu2_i64) {
2471         tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2);
2472     } else if (TCG_TARGET_HAS_muluh_i64) {
2473         TCGv_i64 t = tcg_temp_new_i64();
2474         tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
2475         tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2);
2476         tcg_gen_mov_i64(rl, t);
2477         tcg_temp_free_i64(t);
2478     } else {
2479         TCGv_i64 t0 = tcg_temp_new_i64();
2480         tcg_gen_mul_i64(t0, arg1, arg2);
2481         gen_helper_muluh_i64(rh, arg1, arg2);
2482         tcg_gen_mov_i64(rl, t0);
2483         tcg_temp_free_i64(t0);
2484     }
2485 }
2486 
2487 void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
2488 {
2489     if (TCG_TARGET_HAS_muls2_i64) {
2490         tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2);
2491     } else if (TCG_TARGET_HAS_mulsh_i64) {
2492         TCGv_i64 t = tcg_temp_new_i64();
2493         tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
2494         tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2);
2495         tcg_gen_mov_i64(rl, t);
2496         tcg_temp_free_i64(t);
2497     } else if (TCG_TARGET_HAS_mulu2_i64 || TCG_TARGET_HAS_muluh_i64) {
2498         TCGv_i64 t0 = tcg_temp_new_i64();
2499         TCGv_i64 t1 = tcg_temp_new_i64();
2500         TCGv_i64 t2 = tcg_temp_new_i64();
2501         TCGv_i64 t3 = tcg_temp_new_i64();
2502         tcg_gen_mulu2_i64(t0, t1, arg1, arg2);
2503         /* Adjust for negative inputs.  */
2504         tcg_gen_sari_i64(t2, arg1, 63);
2505         tcg_gen_sari_i64(t3, arg2, 63);
2506         tcg_gen_and_i64(t2, t2, arg2);
2507         tcg_gen_and_i64(t3, t3, arg1);
2508         tcg_gen_sub_i64(rh, t1, t2);
2509         tcg_gen_sub_i64(rh, rh, t3);
2510         tcg_gen_mov_i64(rl, t0);
2511         tcg_temp_free_i64(t0);
2512         tcg_temp_free_i64(t1);
2513         tcg_temp_free_i64(t2);
2514         tcg_temp_free_i64(t3);
2515     } else {
2516         TCGv_i64 t0 = tcg_temp_new_i64();
2517         tcg_gen_mul_i64(t0, arg1, arg2);
2518         gen_helper_mulsh_i64(rh, arg1, arg2);
2519         tcg_gen_mov_i64(rl, t0);
2520         tcg_temp_free_i64(t0);
2521     }
2522 }
2523 
2524 void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
2525 {
2526     TCGv_i64 t0 = tcg_temp_new_i64();
2527     TCGv_i64 t1 = tcg_temp_new_i64();
2528     TCGv_i64 t2 = tcg_temp_new_i64();
2529     tcg_gen_mulu2_i64(t0, t1, arg1, arg2);
2530     /* Adjust for negative input for the signed arg1.  */
2531     tcg_gen_sari_i64(t2, arg1, 63);
2532     tcg_gen_and_i64(t2, t2, arg2);
2533     tcg_gen_sub_i64(rh, t1, t2);
2534     tcg_gen_mov_i64(rl, t0);
2535     tcg_temp_free_i64(t0);
2536     tcg_temp_free_i64(t1);
2537     tcg_temp_free_i64(t2);
2538 }
2539 
2540 void tcg_gen_smin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2541 {
2542     tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, a, b);
2543 }
2544 
2545 void tcg_gen_umin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2546 {
2547     tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, a, b);
2548 }
2549 
2550 void tcg_gen_smax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2551 {
2552     tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, b, a);
2553 }
2554 
2555 void tcg_gen_umax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2556 {
2557     tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, b, a);
2558 }
2559 
2560 void tcg_gen_abs_i64(TCGv_i64 ret, TCGv_i64 a)
2561 {
2562     TCGv_i64 t = tcg_temp_new_i64();
2563 
2564     tcg_gen_sari_i64(t, a, 63);
2565     tcg_gen_xor_i64(ret, a, t);
2566     tcg_gen_sub_i64(ret, ret, t);
2567     tcg_temp_free_i64(t);
2568 }
2569 
2570 /* Size changing operations.  */
2571 
2572 void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
2573 {
2574     if (TCG_TARGET_REG_BITS == 32) {
2575         tcg_gen_mov_i32(ret, TCGV_LOW(arg));
2576     } else if (TCG_TARGET_HAS_extrl_i64_i32) {
2577         tcg_gen_op2(INDEX_op_extrl_i64_i32,
2578                     tcgv_i32_arg(ret), tcgv_i64_arg(arg));
2579     } else {
2580         tcg_gen_mov_i32(ret, (TCGv_i32)arg);
2581     }
2582 }
2583 
2584 void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
2585 {
2586     if (TCG_TARGET_REG_BITS == 32) {
2587         tcg_gen_mov_i32(ret, TCGV_HIGH(arg));
2588     } else if (TCG_TARGET_HAS_extrh_i64_i32) {
2589         tcg_gen_op2(INDEX_op_extrh_i64_i32,
2590                     tcgv_i32_arg(ret), tcgv_i64_arg(arg));
2591     } else {
2592         TCGv_i64 t = tcg_temp_new_i64();
2593         tcg_gen_shri_i64(t, arg, 32);
2594         tcg_gen_mov_i32(ret, (TCGv_i32)t);
2595         tcg_temp_free_i64(t);
2596     }
2597 }
2598 
2599 void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
2600 {
2601     if (TCG_TARGET_REG_BITS == 32) {
2602         tcg_gen_mov_i32(TCGV_LOW(ret), arg);
2603         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2604     } else {
2605         tcg_gen_op2(INDEX_op_extu_i32_i64,
2606                     tcgv_i64_arg(ret), tcgv_i32_arg(arg));
2607     }
2608 }
2609 
2610 void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
2611 {
2612     if (TCG_TARGET_REG_BITS == 32) {
2613         tcg_gen_mov_i32(TCGV_LOW(ret), arg);
2614         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
2615     } else {
2616         tcg_gen_op2(INDEX_op_ext_i32_i64,
2617                     tcgv_i64_arg(ret), tcgv_i32_arg(arg));
2618     }
2619 }
2620 
2621 void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high)
2622 {
2623     TCGv_i64 tmp;
2624 
2625     if (TCG_TARGET_REG_BITS == 32) {
2626         tcg_gen_mov_i32(TCGV_LOW(dest), low);
2627         tcg_gen_mov_i32(TCGV_HIGH(dest), high);
2628         return;
2629     }
2630 
2631     tmp = tcg_temp_new_i64();
2632     /* These extensions are only needed for type correctness.
2633        We may be able to do better given target specific information.  */
2634     tcg_gen_extu_i32_i64(tmp, high);
2635     tcg_gen_extu_i32_i64(dest, low);
2636     /* If deposit is available, use it.  Otherwise use the extra
2637        knowledge that we have of the zero-extensions above.  */
2638     if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(32, 32)) {
2639         tcg_gen_deposit_i64(dest, dest, tmp, 32, 32);
2640     } else {
2641         tcg_gen_shli_i64(tmp, tmp, 32);
2642         tcg_gen_or_i64(dest, dest, tmp);
2643     }
2644     tcg_temp_free_i64(tmp);
2645 }
2646 
2647 void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg)
2648 {
2649     if (TCG_TARGET_REG_BITS == 32) {
2650         tcg_gen_mov_i32(lo, TCGV_LOW(arg));
2651         tcg_gen_mov_i32(hi, TCGV_HIGH(arg));
2652     } else {
2653         tcg_gen_extrl_i64_i32(lo, arg);
2654         tcg_gen_extrh_i64_i32(hi, arg);
2655     }
2656 }
2657 
2658 void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg)
2659 {
2660     tcg_gen_ext32u_i64(lo, arg);
2661     tcg_gen_shri_i64(hi, arg, 32);
2662 }
2663 
2664 /* QEMU specific operations.  */
2665 
2666 void tcg_gen_exit_tb(TranslationBlock *tb, unsigned idx)
2667 {
2668     uintptr_t val = (uintptr_t)tb + idx;
2669 
2670     if (tb == NULL) {
2671         tcg_debug_assert(idx == 0);
2672     } else if (idx <= TB_EXIT_IDXMAX) {
2673 #ifdef CONFIG_DEBUG_TCG
2674         /* This is an exit following a goto_tb.  Verify that we have
2675            seen this numbered exit before, via tcg_gen_goto_tb.  */
2676         tcg_debug_assert(tcg_ctx->goto_tb_issue_mask & (1 << idx));
2677 #endif
2678         /* When not chaining, exit without indicating a link.  */
2679         if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
2680             val = 0;
2681         }
2682     } else {
2683         /* This is an exit via the exitreq label.  */
2684         tcg_debug_assert(idx == TB_EXIT_REQUESTED);
2685     }
2686 
2687     tcg_gen_op1i(INDEX_op_exit_tb, val);
2688 }
2689 
2690 void tcg_gen_goto_tb(unsigned idx)
2691 {
2692     /* We only support two chained exits.  */
2693     tcg_debug_assert(idx <= TB_EXIT_IDXMAX);
2694 #ifdef CONFIG_DEBUG_TCG
2695     /* Verify that we havn't seen this numbered exit before.  */
2696     tcg_debug_assert((tcg_ctx->goto_tb_issue_mask & (1 << idx)) == 0);
2697     tcg_ctx->goto_tb_issue_mask |= 1 << idx;
2698 #endif
2699     /* When not chaining, we simply fall through to the "fallback" exit.  */
2700     if (!qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
2701         tcg_gen_op1i(INDEX_op_goto_tb, idx);
2702     }
2703 }
2704 
2705 void tcg_gen_lookup_and_goto_ptr(void)
2706 {
2707     if (TCG_TARGET_HAS_goto_ptr && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
2708         TCGv_ptr ptr = tcg_temp_new_ptr();
2709         gen_helper_lookup_tb_ptr(ptr, cpu_env);
2710         tcg_gen_op1i(INDEX_op_goto_ptr, tcgv_ptr_arg(ptr));
2711         tcg_temp_free_ptr(ptr);
2712     } else {
2713         tcg_gen_exit_tb(NULL, 0);
2714     }
2715 }
2716 
2717 static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
2718 {
2719     /* Trigger the asserts within as early as possible.  */
2720     (void)get_alignment_bits(op);
2721 
2722     switch (op & MO_SIZE) {
2723     case MO_8:
2724         op &= ~MO_BSWAP;
2725         break;
2726     case MO_16:
2727         break;
2728     case MO_32:
2729         if (!is64) {
2730             op &= ~MO_SIGN;
2731         }
2732         break;
2733     case MO_64:
2734         if (!is64) {
2735             tcg_abort();
2736         }
2737         break;
2738     }
2739     if (st) {
2740         op &= ~MO_SIGN;
2741     }
2742     return op;
2743 }
2744 
2745 static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
2746                          MemOp memop, TCGArg idx)
2747 {
2748     TCGMemOpIdx oi = make_memop_idx(memop, idx);
2749 #if TARGET_LONG_BITS == 32
2750     tcg_gen_op3i_i32(opc, val, addr, oi);
2751 #else
2752     if (TCG_TARGET_REG_BITS == 32) {
2753         tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
2754     } else {
2755         tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
2756     }
2757 #endif
2758 }
2759 
2760 static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
2761                          MemOp memop, TCGArg idx)
2762 {
2763     TCGMemOpIdx oi = make_memop_idx(memop, idx);
2764 #if TARGET_LONG_BITS == 32
2765     if (TCG_TARGET_REG_BITS == 32) {
2766         tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
2767     } else {
2768         tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
2769     }
2770 #else
2771     if (TCG_TARGET_REG_BITS == 32) {
2772         tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
2773                          TCGV_LOW(addr), TCGV_HIGH(addr), oi);
2774     } else {
2775         tcg_gen_op3i_i64(opc, val, addr, oi);
2776     }
2777 #endif
2778 }
2779 
2780 static void tcg_gen_req_mo(TCGBar type)
2781 {
2782 #ifdef TCG_GUEST_DEFAULT_MO
2783     type &= TCG_GUEST_DEFAULT_MO;
2784 #endif
2785     type &= ~TCG_TARGET_DEFAULT_MO;
2786     if (type) {
2787         tcg_gen_mb(type | TCG_BAR_SC);
2788     }
2789 }
2790 
2791 void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
2792 {
2793     MemOp orig_memop;
2794 
2795     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
2796     memop = tcg_canonicalize_memop(memop, 0, 0);
2797     trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env,
2798                                addr, trace_mem_get_info(memop, 0));
2799 
2800     orig_memop = memop;
2801     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
2802         memop &= ~MO_BSWAP;
2803         /* The bswap primitive requires zero-extended input.  */
2804         if ((memop & MO_SSIZE) == MO_SW) {
2805             memop &= ~MO_SIGN;
2806         }
2807     }
2808 
2809     gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
2810 
2811     if ((orig_memop ^ memop) & MO_BSWAP) {
2812         switch (orig_memop & MO_SIZE) {
2813         case MO_16:
2814             tcg_gen_bswap16_i32(val, val);
2815             if (orig_memop & MO_SIGN) {
2816                 tcg_gen_ext16s_i32(val, val);
2817             }
2818             break;
2819         case MO_32:
2820             tcg_gen_bswap32_i32(val, val);
2821             break;
2822         default:
2823             g_assert_not_reached();
2824         }
2825     }
2826 }
2827 
2828 void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
2829 {
2830     TCGv_i32 swap = NULL;
2831 
2832     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
2833     memop = tcg_canonicalize_memop(memop, 0, 1);
2834     trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env,
2835                                addr, trace_mem_get_info(memop, 1));
2836 
2837     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
2838         swap = tcg_temp_new_i32();
2839         switch (memop & MO_SIZE) {
2840         case MO_16:
2841             tcg_gen_ext16u_i32(swap, val);
2842             tcg_gen_bswap16_i32(swap, swap);
2843             break;
2844         case MO_32:
2845             tcg_gen_bswap32_i32(swap, val);
2846             break;
2847         default:
2848             g_assert_not_reached();
2849         }
2850         val = swap;
2851         memop &= ~MO_BSWAP;
2852     }
2853 
2854     gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
2855 
2856     if (swap) {
2857         tcg_temp_free_i32(swap);
2858     }
2859 }
2860 
2861 void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
2862 {
2863     MemOp orig_memop;
2864 
2865     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
2866         tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
2867         if (memop & MO_SIGN) {
2868             tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
2869         } else {
2870             tcg_gen_movi_i32(TCGV_HIGH(val), 0);
2871         }
2872         return;
2873     }
2874 
2875     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
2876     memop = tcg_canonicalize_memop(memop, 1, 0);
2877     trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env,
2878                                addr, trace_mem_get_info(memop, 0));
2879 
2880     orig_memop = memop;
2881     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
2882         memop &= ~MO_BSWAP;
2883         /* The bswap primitive requires zero-extended input.  */
2884         if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
2885             memop &= ~MO_SIGN;
2886         }
2887     }
2888 
2889     gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
2890 
2891     if ((orig_memop ^ memop) & MO_BSWAP) {
2892         switch (orig_memop & MO_SIZE) {
2893         case MO_16:
2894             tcg_gen_bswap16_i64(val, val);
2895             if (orig_memop & MO_SIGN) {
2896                 tcg_gen_ext16s_i64(val, val);
2897             }
2898             break;
2899         case MO_32:
2900             tcg_gen_bswap32_i64(val, val);
2901             if (orig_memop & MO_SIGN) {
2902                 tcg_gen_ext32s_i64(val, val);
2903             }
2904             break;
2905         case MO_64:
2906             tcg_gen_bswap64_i64(val, val);
2907             break;
2908         default:
2909             g_assert_not_reached();
2910         }
2911     }
2912 }
2913 
2914 void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
2915 {
2916     TCGv_i64 swap = NULL;
2917 
2918     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
2919         tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
2920         return;
2921     }
2922 
2923     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
2924     memop = tcg_canonicalize_memop(memop, 1, 1);
2925     trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env,
2926                                addr, trace_mem_get_info(memop, 1));
2927 
2928     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
2929         swap = tcg_temp_new_i64();
2930         switch (memop & MO_SIZE) {
2931         case MO_16:
2932             tcg_gen_ext16u_i64(swap, val);
2933             tcg_gen_bswap16_i64(swap, swap);
2934             break;
2935         case MO_32:
2936             tcg_gen_ext32u_i64(swap, val);
2937             tcg_gen_bswap32_i64(swap, swap);
2938             break;
2939         case MO_64:
2940             tcg_gen_bswap64_i64(swap, val);
2941             break;
2942         default:
2943             g_assert_not_reached();
2944         }
2945         val = swap;
2946         memop &= ~MO_BSWAP;
2947     }
2948 
2949     gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
2950 
2951     if (swap) {
2952         tcg_temp_free_i64(swap);
2953     }
2954 }
2955 
2956 static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
2957 {
2958     switch (opc & MO_SSIZE) {
2959     case MO_SB:
2960         tcg_gen_ext8s_i32(ret, val);
2961         break;
2962     case MO_UB:
2963         tcg_gen_ext8u_i32(ret, val);
2964         break;
2965     case MO_SW:
2966         tcg_gen_ext16s_i32(ret, val);
2967         break;
2968     case MO_UW:
2969         tcg_gen_ext16u_i32(ret, val);
2970         break;
2971     default:
2972         tcg_gen_mov_i32(ret, val);
2973         break;
2974     }
2975 }
2976 
2977 static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
2978 {
2979     switch (opc & MO_SSIZE) {
2980     case MO_SB:
2981         tcg_gen_ext8s_i64(ret, val);
2982         break;
2983     case MO_UB:
2984         tcg_gen_ext8u_i64(ret, val);
2985         break;
2986     case MO_SW:
2987         tcg_gen_ext16s_i64(ret, val);
2988         break;
2989     case MO_UW:
2990         tcg_gen_ext16u_i64(ret, val);
2991         break;
2992     case MO_SL:
2993         tcg_gen_ext32s_i64(ret, val);
2994         break;
2995     case MO_UL:
2996         tcg_gen_ext32u_i64(ret, val);
2997         break;
2998     default:
2999         tcg_gen_mov_i64(ret, val);
3000         break;
3001     }
3002 }
3003 
3004 #ifdef CONFIG_SOFTMMU
3005 typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
3006                                   TCGv_i32, TCGv_i32, TCGv_i32);
3007 typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
3008                                   TCGv_i64, TCGv_i64, TCGv_i32);
3009 typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
3010                                   TCGv_i32, TCGv_i32);
3011 typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
3012                                   TCGv_i64, TCGv_i32);
3013 #else
3014 typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv, TCGv_i32, TCGv_i32);
3015 typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv, TCGv_i64, TCGv_i64);
3016 typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv, TCGv_i32);
3017 typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv, TCGv_i64);
3018 #endif
3019 
3020 #ifdef CONFIG_ATOMIC64
3021 # define WITH_ATOMIC64(X) X,
3022 #else
3023 # define WITH_ATOMIC64(X)
3024 #endif
3025 
3026 static void * const table_cmpxchg[16] = {
3027     [MO_8] = gen_helper_atomic_cmpxchgb,
3028     [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
3029     [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
3030     [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
3031     [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
3032     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
3033     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
3034 };
3035 
3036 void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
3037                                 TCGv_i32 newv, TCGArg idx, MemOp memop)
3038 {
3039     memop = tcg_canonicalize_memop(memop, 0, 0);
3040 
3041     if (!(tcg_ctx->tb_cflags & CF_PARALLEL)) {
3042         TCGv_i32 t1 = tcg_temp_new_i32();
3043         TCGv_i32 t2 = tcg_temp_new_i32();
3044 
3045         tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
3046 
3047         tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
3048         tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
3049         tcg_gen_qemu_st_i32(t2, addr, idx, memop);
3050         tcg_temp_free_i32(t2);
3051 
3052         if (memop & MO_SIGN) {
3053             tcg_gen_ext_i32(retv, t1, memop);
3054         } else {
3055             tcg_gen_mov_i32(retv, t1);
3056         }
3057         tcg_temp_free_i32(t1);
3058     } else {
3059         gen_atomic_cx_i32 gen;
3060 
3061         gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
3062         tcg_debug_assert(gen != NULL);
3063 
3064 #ifdef CONFIG_SOFTMMU
3065         {
3066             TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx));
3067             gen(retv, cpu_env, addr, cmpv, newv, oi);
3068             tcg_temp_free_i32(oi);
3069         }
3070 #else
3071         gen(retv, cpu_env, addr, cmpv, newv);
3072 #endif
3073 
3074         if (memop & MO_SIGN) {
3075             tcg_gen_ext_i32(retv, retv, memop);
3076         }
3077     }
3078 }
3079 
3080 void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
3081                                 TCGv_i64 newv, TCGArg idx, MemOp memop)
3082 {
3083     memop = tcg_canonicalize_memop(memop, 1, 0);
3084 
3085     if (!(tcg_ctx->tb_cflags & CF_PARALLEL)) {
3086         TCGv_i64 t1 = tcg_temp_new_i64();
3087         TCGv_i64 t2 = tcg_temp_new_i64();
3088 
3089         tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
3090 
3091         tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
3092         tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
3093         tcg_gen_qemu_st_i64(t2, addr, idx, memop);
3094         tcg_temp_free_i64(t2);
3095 
3096         if (memop & MO_SIGN) {
3097             tcg_gen_ext_i64(retv, t1, memop);
3098         } else {
3099             tcg_gen_mov_i64(retv, t1);
3100         }
3101         tcg_temp_free_i64(t1);
3102     } else if ((memop & MO_SIZE) == MO_64) {
3103 #ifdef CONFIG_ATOMIC64
3104         gen_atomic_cx_i64 gen;
3105 
3106         gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
3107         tcg_debug_assert(gen != NULL);
3108 
3109 #ifdef CONFIG_SOFTMMU
3110         {
3111             TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop, idx));
3112             gen(retv, cpu_env, addr, cmpv, newv, oi);
3113             tcg_temp_free_i32(oi);
3114         }
3115 #else
3116         gen(retv, cpu_env, addr, cmpv, newv);
3117 #endif
3118 #else
3119         gen_helper_exit_atomic(cpu_env);
3120         /* Produce a result, so that we have a well-formed opcode stream
3121            with respect to uses of the result in the (dead) code following.  */
3122         tcg_gen_movi_i64(retv, 0);
3123 #endif /* CONFIG_ATOMIC64 */
3124     } else {
3125         TCGv_i32 c32 = tcg_temp_new_i32();
3126         TCGv_i32 n32 = tcg_temp_new_i32();
3127         TCGv_i32 r32 = tcg_temp_new_i32();
3128 
3129         tcg_gen_extrl_i64_i32(c32, cmpv);
3130         tcg_gen_extrl_i64_i32(n32, newv);
3131         tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
3132         tcg_temp_free_i32(c32);
3133         tcg_temp_free_i32(n32);
3134 
3135         tcg_gen_extu_i32_i64(retv, r32);
3136         tcg_temp_free_i32(r32);
3137 
3138         if (memop & MO_SIGN) {
3139             tcg_gen_ext_i64(retv, retv, memop);
3140         }
3141     }
3142 }
3143 
3144 static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
3145                                 TCGArg idx, MemOp memop, bool new_val,
3146                                 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
3147 {
3148     TCGv_i32 t1 = tcg_temp_new_i32();
3149     TCGv_i32 t2 = tcg_temp_new_i32();
3150 
3151     memop = tcg_canonicalize_memop(memop, 0, 0);
3152 
3153     tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
3154     gen(t2, t1, val);
3155     tcg_gen_qemu_st_i32(t2, addr, idx, memop);
3156 
3157     tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
3158     tcg_temp_free_i32(t1);
3159     tcg_temp_free_i32(t2);
3160 }
3161 
3162 static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
3163                              TCGArg idx, MemOp memop, void * const table[])
3164 {
3165     gen_atomic_op_i32 gen;
3166 
3167     memop = tcg_canonicalize_memop(memop, 0, 0);
3168 
3169     gen = table[memop & (MO_SIZE | MO_BSWAP)];
3170     tcg_debug_assert(gen != NULL);
3171 
3172 #ifdef CONFIG_SOFTMMU
3173     {
3174         TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx));
3175         gen(ret, cpu_env, addr, val, oi);
3176         tcg_temp_free_i32(oi);
3177     }
3178 #else
3179     gen(ret, cpu_env, addr, val);
3180 #endif
3181 
3182     if (memop & MO_SIGN) {
3183         tcg_gen_ext_i32(ret, ret, memop);
3184     }
3185 }
3186 
3187 static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
3188                                 TCGArg idx, MemOp memop, bool new_val,
3189                                 void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
3190 {
3191     TCGv_i64 t1 = tcg_temp_new_i64();
3192     TCGv_i64 t2 = tcg_temp_new_i64();
3193 
3194     memop = tcg_canonicalize_memop(memop, 1, 0);
3195 
3196     tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
3197     gen(t2, t1, val);
3198     tcg_gen_qemu_st_i64(t2, addr, idx, memop);
3199 
3200     tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
3201     tcg_temp_free_i64(t1);
3202     tcg_temp_free_i64(t2);
3203 }
3204 
3205 static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
3206                              TCGArg idx, MemOp memop, void * const table[])
3207 {
3208     memop = tcg_canonicalize_memop(memop, 1, 0);
3209 
3210     if ((memop & MO_SIZE) == MO_64) {
3211 #ifdef CONFIG_ATOMIC64
3212         gen_atomic_op_i64 gen;
3213 
3214         gen = table[memop & (MO_SIZE | MO_BSWAP)];
3215         tcg_debug_assert(gen != NULL);
3216 
3217 #ifdef CONFIG_SOFTMMU
3218         {
3219             TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx));
3220             gen(ret, cpu_env, addr, val, oi);
3221             tcg_temp_free_i32(oi);
3222         }
3223 #else
3224         gen(ret, cpu_env, addr, val);
3225 #endif
3226 #else
3227         gen_helper_exit_atomic(cpu_env);
3228         /* Produce a result, so that we have a well-formed opcode stream
3229            with respect to uses of the result in the (dead) code following.  */
3230         tcg_gen_movi_i64(ret, 0);
3231 #endif /* CONFIG_ATOMIC64 */
3232     } else {
3233         TCGv_i32 v32 = tcg_temp_new_i32();
3234         TCGv_i32 r32 = tcg_temp_new_i32();
3235 
3236         tcg_gen_extrl_i64_i32(v32, val);
3237         do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
3238         tcg_temp_free_i32(v32);
3239 
3240         tcg_gen_extu_i32_i64(ret, r32);
3241         tcg_temp_free_i32(r32);
3242 
3243         if (memop & MO_SIGN) {
3244             tcg_gen_ext_i64(ret, ret, memop);
3245         }
3246     }
3247 }
3248 
3249 #define GEN_ATOMIC_HELPER(NAME, OP, NEW)                                \
3250 static void * const table_##NAME[16] = {                                \
3251     [MO_8] = gen_helper_atomic_##NAME##b,                               \
3252     [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le,                   \
3253     [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be,                   \
3254     [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le,                   \
3255     [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be,                   \
3256     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le)     \
3257     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be)     \
3258 };                                                                      \
3259 void tcg_gen_atomic_##NAME##_i32                                        \
3260     (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop)    \
3261 {                                                                       \
3262     if (tcg_ctx->tb_cflags & CF_PARALLEL) {                             \
3263         do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME);     \
3264     } else {                                                            \
3265         do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW,            \
3266                             tcg_gen_##OP##_i32);                        \
3267     }                                                                   \
3268 }                                                                       \
3269 void tcg_gen_atomic_##NAME##_i64                                        \
3270     (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop)    \
3271 {                                                                       \
3272     if (tcg_ctx->tb_cflags & CF_PARALLEL) {                             \
3273         do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME);     \
3274     } else {                                                            \
3275         do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW,            \
3276                             tcg_gen_##OP##_i64);                        \
3277     }                                                                   \
3278 }
3279 
3280 GEN_ATOMIC_HELPER(fetch_add, add, 0)
3281 GEN_ATOMIC_HELPER(fetch_and, and, 0)
3282 GEN_ATOMIC_HELPER(fetch_or, or, 0)
3283 GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
3284 GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
3285 GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
3286 GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
3287 GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
3288 
3289 GEN_ATOMIC_HELPER(add_fetch, add, 1)
3290 GEN_ATOMIC_HELPER(and_fetch, and, 1)
3291 GEN_ATOMIC_HELPER(or_fetch, or, 1)
3292 GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
3293 GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
3294 GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
3295 GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
3296 GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
3297 
3298 static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
3299 {
3300     tcg_gen_mov_i32(r, b);
3301 }
3302 
3303 static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
3304 {
3305     tcg_gen_mov_i64(r, b);
3306 }
3307 
3308 GEN_ATOMIC_HELPER(xchg, mov2, 0)
3309 
3310 #undef GEN_ATOMIC_HELPER
3311