xref: /openbmc/qemu/tcg/tcg-op.c (revision c306cdb0)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg.h"
28 #include "tcg/tcg-op.h"
29 #include "tcg/tcg-mo.h"
30 #include "trace-tcg.h"
31 #include "exec/plugin-gen.h"
32 
33 /* Reduce the number of ifdefs below.  This assumes that all uses of
34    TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
35    the compiler can eliminate.  */
36 #if TCG_TARGET_REG_BITS == 64
37 extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64);
38 extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
39 #define TCGV_LOW  TCGV_LOW_link_error
40 #define TCGV_HIGH TCGV_HIGH_link_error
41 #endif
42 
43 void tcg_gen_op1(TCGOpcode opc, TCGArg a1)
44 {
45     TCGOp *op = tcg_emit_op(opc);
46     op->args[0] = a1;
47 }
48 
49 void tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2)
50 {
51     TCGOp *op = tcg_emit_op(opc);
52     op->args[0] = a1;
53     op->args[1] = a2;
54 }
55 
56 void tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3)
57 {
58     TCGOp *op = tcg_emit_op(opc);
59     op->args[0] = a1;
60     op->args[1] = a2;
61     op->args[2] = a3;
62 }
63 
64 void tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4)
65 {
66     TCGOp *op = tcg_emit_op(opc);
67     op->args[0] = a1;
68     op->args[1] = a2;
69     op->args[2] = a3;
70     op->args[3] = a4;
71 }
72 
73 void tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
74                  TCGArg a4, TCGArg a5)
75 {
76     TCGOp *op = tcg_emit_op(opc);
77     op->args[0] = a1;
78     op->args[1] = a2;
79     op->args[2] = a3;
80     op->args[3] = a4;
81     op->args[4] = a5;
82 }
83 
84 void tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
85                  TCGArg a4, TCGArg a5, TCGArg a6)
86 {
87     TCGOp *op = tcg_emit_op(opc);
88     op->args[0] = a1;
89     op->args[1] = a2;
90     op->args[2] = a3;
91     op->args[3] = a4;
92     op->args[4] = a5;
93     op->args[5] = a6;
94 }
95 
96 void tcg_gen_mb(TCGBar mb_type)
97 {
98     if (tcg_ctx->tb_cflags & CF_PARALLEL) {
99         tcg_gen_op1(INDEX_op_mb, mb_type);
100     }
101 }
102 
103 /* 32 bit ops */
104 
105 void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg)
106 {
107     tcg_gen_mov_i32(ret, tcg_constant_i32(arg));
108 }
109 
110 void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
111 {
112     /* some cases can be optimized here */
113     if (arg2 == 0) {
114         tcg_gen_mov_i32(ret, arg1);
115     } else {
116         tcg_gen_add_i32(ret, arg1, tcg_constant_i32(arg2));
117     }
118 }
119 
120 void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2)
121 {
122     if (arg1 == 0 && TCG_TARGET_HAS_neg_i32) {
123         /* Don't recurse with tcg_gen_neg_i32.  */
124         tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg2);
125     } else {
126         tcg_gen_sub_i32(ret, tcg_constant_i32(arg1), arg2);
127     }
128 }
129 
130 void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
131 {
132     /* some cases can be optimized here */
133     if (arg2 == 0) {
134         tcg_gen_mov_i32(ret, arg1);
135     } else {
136         tcg_gen_sub_i32(ret, arg1, tcg_constant_i32(arg2));
137     }
138 }
139 
140 void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
141 {
142     /* Some cases can be optimized here.  */
143     switch (arg2) {
144     case 0:
145         tcg_gen_movi_i32(ret, 0);
146         return;
147     case -1:
148         tcg_gen_mov_i32(ret, arg1);
149         return;
150     case 0xff:
151         /* Don't recurse with tcg_gen_ext8u_i32.  */
152         if (TCG_TARGET_HAS_ext8u_i32) {
153             tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg1);
154             return;
155         }
156         break;
157     case 0xffff:
158         if (TCG_TARGET_HAS_ext16u_i32) {
159             tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg1);
160             return;
161         }
162         break;
163     }
164 
165     tcg_gen_and_i32(ret, arg1, tcg_constant_i32(arg2));
166 }
167 
168 void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
169 {
170     /* Some cases can be optimized here.  */
171     if (arg2 == -1) {
172         tcg_gen_movi_i32(ret, -1);
173     } else if (arg2 == 0) {
174         tcg_gen_mov_i32(ret, arg1);
175     } else {
176         tcg_gen_or_i32(ret, arg1, tcg_constant_i32(arg2));
177     }
178 }
179 
180 void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
181 {
182     /* Some cases can be optimized here.  */
183     if (arg2 == 0) {
184         tcg_gen_mov_i32(ret, arg1);
185     } else if (arg2 == -1 && TCG_TARGET_HAS_not_i32) {
186         /* Don't recurse with tcg_gen_not_i32.  */
187         tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg1);
188     } else {
189         tcg_gen_xor_i32(ret, arg1, tcg_constant_i32(arg2));
190     }
191 }
192 
193 void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
194 {
195     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
196     if (arg2 == 0) {
197         tcg_gen_mov_i32(ret, arg1);
198     } else {
199         tcg_gen_shl_i32(ret, arg1, tcg_constant_i32(arg2));
200     }
201 }
202 
203 void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
204 {
205     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
206     if (arg2 == 0) {
207         tcg_gen_mov_i32(ret, arg1);
208     } else {
209         tcg_gen_shr_i32(ret, arg1, tcg_constant_i32(arg2));
210     }
211 }
212 
213 void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
214 {
215     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
216     if (arg2 == 0) {
217         tcg_gen_mov_i32(ret, arg1);
218     } else {
219         tcg_gen_sar_i32(ret, arg1, tcg_constant_i32(arg2));
220     }
221 }
222 
223 void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *l)
224 {
225     if (cond == TCG_COND_ALWAYS) {
226         tcg_gen_br(l);
227     } else if (cond != TCG_COND_NEVER) {
228         l->refs++;
229         tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_arg(l));
230     }
231 }
232 
233 void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *l)
234 {
235     if (cond == TCG_COND_ALWAYS) {
236         tcg_gen_br(l);
237     } else if (cond != TCG_COND_NEVER) {
238         tcg_gen_brcond_i32(cond, arg1, tcg_constant_i32(arg2), l);
239     }
240 }
241 
242 void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret,
243                          TCGv_i32 arg1, TCGv_i32 arg2)
244 {
245     if (cond == TCG_COND_ALWAYS) {
246         tcg_gen_movi_i32(ret, 1);
247     } else if (cond == TCG_COND_NEVER) {
248         tcg_gen_movi_i32(ret, 0);
249     } else {
250         tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
251     }
252 }
253 
254 void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret,
255                           TCGv_i32 arg1, int32_t arg2)
256 {
257     tcg_gen_setcond_i32(cond, ret, arg1, tcg_constant_i32(arg2));
258 }
259 
260 void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
261 {
262     if (arg2 == 0) {
263         tcg_gen_movi_i32(ret, 0);
264     } else if (is_power_of_2(arg2)) {
265         tcg_gen_shli_i32(ret, arg1, ctz32(arg2));
266     } else {
267         tcg_gen_mul_i32(ret, arg1, tcg_constant_i32(arg2));
268     }
269 }
270 
271 void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
272 {
273     if (TCG_TARGET_HAS_div_i32) {
274         tcg_gen_op3_i32(INDEX_op_div_i32, ret, arg1, arg2);
275     } else if (TCG_TARGET_HAS_div2_i32) {
276         TCGv_i32 t0 = tcg_temp_new_i32();
277         tcg_gen_sari_i32(t0, arg1, 31);
278         tcg_gen_op5_i32(INDEX_op_div2_i32, ret, t0, arg1, t0, arg2);
279         tcg_temp_free_i32(t0);
280     } else {
281         gen_helper_div_i32(ret, arg1, arg2);
282     }
283 }
284 
285 void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
286 {
287     if (TCG_TARGET_HAS_rem_i32) {
288         tcg_gen_op3_i32(INDEX_op_rem_i32, ret, arg1, arg2);
289     } else if (TCG_TARGET_HAS_div_i32) {
290         TCGv_i32 t0 = tcg_temp_new_i32();
291         tcg_gen_op3_i32(INDEX_op_div_i32, t0, arg1, arg2);
292         tcg_gen_mul_i32(t0, t0, arg2);
293         tcg_gen_sub_i32(ret, arg1, t0);
294         tcg_temp_free_i32(t0);
295     } else if (TCG_TARGET_HAS_div2_i32) {
296         TCGv_i32 t0 = tcg_temp_new_i32();
297         tcg_gen_sari_i32(t0, arg1, 31);
298         tcg_gen_op5_i32(INDEX_op_div2_i32, t0, ret, arg1, t0, arg2);
299         tcg_temp_free_i32(t0);
300     } else {
301         gen_helper_rem_i32(ret, arg1, arg2);
302     }
303 }
304 
305 void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
306 {
307     if (TCG_TARGET_HAS_div_i32) {
308         tcg_gen_op3_i32(INDEX_op_divu_i32, ret, arg1, arg2);
309     } else if (TCG_TARGET_HAS_div2_i32) {
310         TCGv_i32 t0 = tcg_temp_new_i32();
311         tcg_gen_movi_i32(t0, 0);
312         tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, t0, arg2);
313         tcg_temp_free_i32(t0);
314     } else {
315         gen_helper_divu_i32(ret, arg1, arg2);
316     }
317 }
318 
319 void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
320 {
321     if (TCG_TARGET_HAS_rem_i32) {
322         tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2);
323     } else if (TCG_TARGET_HAS_div_i32) {
324         TCGv_i32 t0 = tcg_temp_new_i32();
325         tcg_gen_op3_i32(INDEX_op_divu_i32, t0, arg1, arg2);
326         tcg_gen_mul_i32(t0, t0, arg2);
327         tcg_gen_sub_i32(ret, arg1, t0);
328         tcg_temp_free_i32(t0);
329     } else if (TCG_TARGET_HAS_div2_i32) {
330         TCGv_i32 t0 = tcg_temp_new_i32();
331         tcg_gen_movi_i32(t0, 0);
332         tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, t0, arg2);
333         tcg_temp_free_i32(t0);
334     } else {
335         gen_helper_remu_i32(ret, arg1, arg2);
336     }
337 }
338 
339 void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
340 {
341     if (TCG_TARGET_HAS_andc_i32) {
342         tcg_gen_op3_i32(INDEX_op_andc_i32, ret, arg1, arg2);
343     } else {
344         TCGv_i32 t0 = tcg_temp_new_i32();
345         tcg_gen_not_i32(t0, arg2);
346         tcg_gen_and_i32(ret, arg1, t0);
347         tcg_temp_free_i32(t0);
348     }
349 }
350 
351 void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
352 {
353     if (TCG_TARGET_HAS_eqv_i32) {
354         tcg_gen_op3_i32(INDEX_op_eqv_i32, ret, arg1, arg2);
355     } else {
356         tcg_gen_xor_i32(ret, arg1, arg2);
357         tcg_gen_not_i32(ret, ret);
358     }
359 }
360 
361 void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
362 {
363     if (TCG_TARGET_HAS_nand_i32) {
364         tcg_gen_op3_i32(INDEX_op_nand_i32, ret, arg1, arg2);
365     } else {
366         tcg_gen_and_i32(ret, arg1, arg2);
367         tcg_gen_not_i32(ret, ret);
368     }
369 }
370 
371 void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
372 {
373     if (TCG_TARGET_HAS_nor_i32) {
374         tcg_gen_op3_i32(INDEX_op_nor_i32, ret, arg1, arg2);
375     } else {
376         tcg_gen_or_i32(ret, arg1, arg2);
377         tcg_gen_not_i32(ret, ret);
378     }
379 }
380 
381 void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
382 {
383     if (TCG_TARGET_HAS_orc_i32) {
384         tcg_gen_op3_i32(INDEX_op_orc_i32, ret, arg1, arg2);
385     } else {
386         TCGv_i32 t0 = tcg_temp_new_i32();
387         tcg_gen_not_i32(t0, arg2);
388         tcg_gen_or_i32(ret, arg1, t0);
389         tcg_temp_free_i32(t0);
390     }
391 }
392 
393 void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
394 {
395     if (TCG_TARGET_HAS_clz_i32) {
396         tcg_gen_op3_i32(INDEX_op_clz_i32, ret, arg1, arg2);
397     } else if (TCG_TARGET_HAS_clz_i64) {
398         TCGv_i64 t1 = tcg_temp_new_i64();
399         TCGv_i64 t2 = tcg_temp_new_i64();
400         tcg_gen_extu_i32_i64(t1, arg1);
401         tcg_gen_extu_i32_i64(t2, arg2);
402         tcg_gen_addi_i64(t2, t2, 32);
403         tcg_gen_clz_i64(t1, t1, t2);
404         tcg_gen_extrl_i64_i32(ret, t1);
405         tcg_temp_free_i64(t1);
406         tcg_temp_free_i64(t2);
407         tcg_gen_subi_i32(ret, ret, 32);
408     } else {
409         gen_helper_clz_i32(ret, arg1, arg2);
410     }
411 }
412 
413 void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
414 {
415     tcg_gen_clz_i32(ret, arg1, tcg_constant_i32(arg2));
416 }
417 
418 void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
419 {
420     if (TCG_TARGET_HAS_ctz_i32) {
421         tcg_gen_op3_i32(INDEX_op_ctz_i32, ret, arg1, arg2);
422     } else if (TCG_TARGET_HAS_ctz_i64) {
423         TCGv_i64 t1 = tcg_temp_new_i64();
424         TCGv_i64 t2 = tcg_temp_new_i64();
425         tcg_gen_extu_i32_i64(t1, arg1);
426         tcg_gen_extu_i32_i64(t2, arg2);
427         tcg_gen_ctz_i64(t1, t1, t2);
428         tcg_gen_extrl_i64_i32(ret, t1);
429         tcg_temp_free_i64(t1);
430         tcg_temp_free_i64(t2);
431     } else if (TCG_TARGET_HAS_ctpop_i32
432                || TCG_TARGET_HAS_ctpop_i64
433                || TCG_TARGET_HAS_clz_i32
434                || TCG_TARGET_HAS_clz_i64) {
435         TCGv_i32 z, t = tcg_temp_new_i32();
436 
437         if (TCG_TARGET_HAS_ctpop_i32 || TCG_TARGET_HAS_ctpop_i64) {
438             tcg_gen_subi_i32(t, arg1, 1);
439             tcg_gen_andc_i32(t, t, arg1);
440             tcg_gen_ctpop_i32(t, t);
441         } else {
442             /* Since all non-x86 hosts have clz(0) == 32, don't fight it.  */
443             tcg_gen_neg_i32(t, arg1);
444             tcg_gen_and_i32(t, t, arg1);
445             tcg_gen_clzi_i32(t, t, 32);
446             tcg_gen_xori_i32(t, t, 31);
447         }
448         z = tcg_constant_i32(0);
449         tcg_gen_movcond_i32(TCG_COND_EQ, ret, arg1, z, arg2, t);
450         tcg_temp_free_i32(t);
451     } else {
452         gen_helper_ctz_i32(ret, arg1, arg2);
453     }
454 }
455 
456 void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
457 {
458     if (!TCG_TARGET_HAS_ctz_i32 && TCG_TARGET_HAS_ctpop_i32 && arg2 == 32) {
459         /* This equivalence has the advantage of not requiring a fixup.  */
460         TCGv_i32 t = tcg_temp_new_i32();
461         tcg_gen_subi_i32(t, arg1, 1);
462         tcg_gen_andc_i32(t, t, arg1);
463         tcg_gen_ctpop_i32(ret, t);
464         tcg_temp_free_i32(t);
465     } else {
466         tcg_gen_ctz_i32(ret, arg1, tcg_constant_i32(arg2));
467     }
468 }
469 
470 void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg)
471 {
472     if (TCG_TARGET_HAS_clz_i32) {
473         TCGv_i32 t = tcg_temp_new_i32();
474         tcg_gen_sari_i32(t, arg, 31);
475         tcg_gen_xor_i32(t, t, arg);
476         tcg_gen_clzi_i32(t, t, 32);
477         tcg_gen_subi_i32(ret, t, 1);
478         tcg_temp_free_i32(t);
479     } else {
480         gen_helper_clrsb_i32(ret, arg);
481     }
482 }
483 
484 void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1)
485 {
486     if (TCG_TARGET_HAS_ctpop_i32) {
487         tcg_gen_op2_i32(INDEX_op_ctpop_i32, ret, arg1);
488     } else if (TCG_TARGET_HAS_ctpop_i64) {
489         TCGv_i64 t = tcg_temp_new_i64();
490         tcg_gen_extu_i32_i64(t, arg1);
491         tcg_gen_ctpop_i64(t, t);
492         tcg_gen_extrl_i64_i32(ret, t);
493         tcg_temp_free_i64(t);
494     } else {
495         gen_helper_ctpop_i32(ret, arg1);
496     }
497 }
498 
499 void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
500 {
501     if (TCG_TARGET_HAS_rot_i32) {
502         tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, arg2);
503     } else {
504         TCGv_i32 t0, t1;
505 
506         t0 = tcg_temp_new_i32();
507         t1 = tcg_temp_new_i32();
508         tcg_gen_shl_i32(t0, arg1, arg2);
509         tcg_gen_subfi_i32(t1, 32, arg2);
510         tcg_gen_shr_i32(t1, arg1, t1);
511         tcg_gen_or_i32(ret, t0, t1);
512         tcg_temp_free_i32(t0);
513         tcg_temp_free_i32(t1);
514     }
515 }
516 
517 void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
518 {
519     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
520     /* some cases can be optimized here */
521     if (arg2 == 0) {
522         tcg_gen_mov_i32(ret, arg1);
523     } else if (TCG_TARGET_HAS_rot_i32) {
524         tcg_gen_rotl_i32(ret, arg1, tcg_constant_i32(arg2));
525     } else {
526         TCGv_i32 t0, t1;
527         t0 = tcg_temp_new_i32();
528         t1 = tcg_temp_new_i32();
529         tcg_gen_shli_i32(t0, arg1, arg2);
530         tcg_gen_shri_i32(t1, arg1, 32 - arg2);
531         tcg_gen_or_i32(ret, t0, t1);
532         tcg_temp_free_i32(t0);
533         tcg_temp_free_i32(t1);
534     }
535 }
536 
537 void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
538 {
539     if (TCG_TARGET_HAS_rot_i32) {
540         tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, arg2);
541     } else {
542         TCGv_i32 t0, t1;
543 
544         t0 = tcg_temp_new_i32();
545         t1 = tcg_temp_new_i32();
546         tcg_gen_shr_i32(t0, arg1, arg2);
547         tcg_gen_subfi_i32(t1, 32, arg2);
548         tcg_gen_shl_i32(t1, arg1, t1);
549         tcg_gen_or_i32(ret, t0, t1);
550         tcg_temp_free_i32(t0);
551         tcg_temp_free_i32(t1);
552     }
553 }
554 
555 void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
556 {
557     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
558     /* some cases can be optimized here */
559     if (arg2 == 0) {
560         tcg_gen_mov_i32(ret, arg1);
561     } else {
562         tcg_gen_rotli_i32(ret, arg1, 32 - arg2);
563     }
564 }
565 
566 void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
567                          unsigned int ofs, unsigned int len)
568 {
569     uint32_t mask;
570     TCGv_i32 t1;
571 
572     tcg_debug_assert(ofs < 32);
573     tcg_debug_assert(len > 0);
574     tcg_debug_assert(len <= 32);
575     tcg_debug_assert(ofs + len <= 32);
576 
577     if (len == 32) {
578         tcg_gen_mov_i32(ret, arg2);
579         return;
580     }
581     if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) {
582         tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len);
583         return;
584     }
585 
586     t1 = tcg_temp_new_i32();
587 
588     if (TCG_TARGET_HAS_extract2_i32) {
589         if (ofs + len == 32) {
590             tcg_gen_shli_i32(t1, arg1, len);
591             tcg_gen_extract2_i32(ret, t1, arg2, len);
592             goto done;
593         }
594         if (ofs == 0) {
595             tcg_gen_extract2_i32(ret, arg1, arg2, len);
596             tcg_gen_rotli_i32(ret, ret, len);
597             goto done;
598         }
599     }
600 
601     mask = (1u << len) - 1;
602     if (ofs + len < 32) {
603         tcg_gen_andi_i32(t1, arg2, mask);
604         tcg_gen_shli_i32(t1, t1, ofs);
605     } else {
606         tcg_gen_shli_i32(t1, arg2, ofs);
607     }
608     tcg_gen_andi_i32(ret, arg1, ~(mask << ofs));
609     tcg_gen_or_i32(ret, ret, t1);
610  done:
611     tcg_temp_free_i32(t1);
612 }
613 
614 void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg,
615                            unsigned int ofs, unsigned int len)
616 {
617     tcg_debug_assert(ofs < 32);
618     tcg_debug_assert(len > 0);
619     tcg_debug_assert(len <= 32);
620     tcg_debug_assert(ofs + len <= 32);
621 
622     if (ofs + len == 32) {
623         tcg_gen_shli_i32(ret, arg, ofs);
624     } else if (ofs == 0) {
625         tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
626     } else if (TCG_TARGET_HAS_deposit_i32
627                && TCG_TARGET_deposit_i32_valid(ofs, len)) {
628         TCGv_i32 zero = tcg_constant_i32(0);
629         tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, zero, arg, ofs, len);
630     } else {
631         /* To help two-operand hosts we prefer to zero-extend first,
632            which allows ARG to stay live.  */
633         switch (len) {
634         case 16:
635             if (TCG_TARGET_HAS_ext16u_i32) {
636                 tcg_gen_ext16u_i32(ret, arg);
637                 tcg_gen_shli_i32(ret, ret, ofs);
638                 return;
639             }
640             break;
641         case 8:
642             if (TCG_TARGET_HAS_ext8u_i32) {
643                 tcg_gen_ext8u_i32(ret, arg);
644                 tcg_gen_shli_i32(ret, ret, ofs);
645                 return;
646             }
647             break;
648         }
649         /* Otherwise prefer zero-extension over AND for code size.  */
650         switch (ofs + len) {
651         case 16:
652             if (TCG_TARGET_HAS_ext16u_i32) {
653                 tcg_gen_shli_i32(ret, arg, ofs);
654                 tcg_gen_ext16u_i32(ret, ret);
655                 return;
656             }
657             break;
658         case 8:
659             if (TCG_TARGET_HAS_ext8u_i32) {
660                 tcg_gen_shli_i32(ret, arg, ofs);
661                 tcg_gen_ext8u_i32(ret, ret);
662                 return;
663             }
664             break;
665         }
666         tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
667         tcg_gen_shli_i32(ret, ret, ofs);
668     }
669 }
670 
671 void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg,
672                          unsigned int ofs, unsigned int len)
673 {
674     tcg_debug_assert(ofs < 32);
675     tcg_debug_assert(len > 0);
676     tcg_debug_assert(len <= 32);
677     tcg_debug_assert(ofs + len <= 32);
678 
679     /* Canonicalize certain special cases, even if extract is supported.  */
680     if (ofs + len == 32) {
681         tcg_gen_shri_i32(ret, arg, 32 - len);
682         return;
683     }
684     if (ofs == 0) {
685         tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
686         return;
687     }
688 
689     if (TCG_TARGET_HAS_extract_i32
690         && TCG_TARGET_extract_i32_valid(ofs, len)) {
691         tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, ofs, len);
692         return;
693     }
694 
695     /* Assume that zero-extension, if available, is cheaper than a shift.  */
696     switch (ofs + len) {
697     case 16:
698         if (TCG_TARGET_HAS_ext16u_i32) {
699             tcg_gen_ext16u_i32(ret, arg);
700             tcg_gen_shri_i32(ret, ret, ofs);
701             return;
702         }
703         break;
704     case 8:
705         if (TCG_TARGET_HAS_ext8u_i32) {
706             tcg_gen_ext8u_i32(ret, arg);
707             tcg_gen_shri_i32(ret, ret, ofs);
708             return;
709         }
710         break;
711     }
712 
713     /* ??? Ideally we'd know what values are available for immediate AND.
714        Assume that 8 bits are available, plus the special case of 16,
715        so that we get ext8u, ext16u.  */
716     switch (len) {
717     case 1 ... 8: case 16:
718         tcg_gen_shri_i32(ret, arg, ofs);
719         tcg_gen_andi_i32(ret, ret, (1u << len) - 1);
720         break;
721     default:
722         tcg_gen_shli_i32(ret, arg, 32 - len - ofs);
723         tcg_gen_shri_i32(ret, ret, 32 - len);
724         break;
725     }
726 }
727 
728 void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg,
729                           unsigned int ofs, unsigned int len)
730 {
731     tcg_debug_assert(ofs < 32);
732     tcg_debug_assert(len > 0);
733     tcg_debug_assert(len <= 32);
734     tcg_debug_assert(ofs + len <= 32);
735 
736     /* Canonicalize certain special cases, even if extract is supported.  */
737     if (ofs + len == 32) {
738         tcg_gen_sari_i32(ret, arg, 32 - len);
739         return;
740     }
741     if (ofs == 0) {
742         switch (len) {
743         case 16:
744             tcg_gen_ext16s_i32(ret, arg);
745             return;
746         case 8:
747             tcg_gen_ext8s_i32(ret, arg);
748             return;
749         }
750     }
751 
752     if (TCG_TARGET_HAS_sextract_i32
753         && TCG_TARGET_extract_i32_valid(ofs, len)) {
754         tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, ofs, len);
755         return;
756     }
757 
758     /* Assume that sign-extension, if available, is cheaper than a shift.  */
759     switch (ofs + len) {
760     case 16:
761         if (TCG_TARGET_HAS_ext16s_i32) {
762             tcg_gen_ext16s_i32(ret, arg);
763             tcg_gen_sari_i32(ret, ret, ofs);
764             return;
765         }
766         break;
767     case 8:
768         if (TCG_TARGET_HAS_ext8s_i32) {
769             tcg_gen_ext8s_i32(ret, arg);
770             tcg_gen_sari_i32(ret, ret, ofs);
771             return;
772         }
773         break;
774     }
775     switch (len) {
776     case 16:
777         if (TCG_TARGET_HAS_ext16s_i32) {
778             tcg_gen_shri_i32(ret, arg, ofs);
779             tcg_gen_ext16s_i32(ret, ret);
780             return;
781         }
782         break;
783     case 8:
784         if (TCG_TARGET_HAS_ext8s_i32) {
785             tcg_gen_shri_i32(ret, arg, ofs);
786             tcg_gen_ext8s_i32(ret, ret);
787             return;
788         }
789         break;
790     }
791 
792     tcg_gen_shli_i32(ret, arg, 32 - len - ofs);
793     tcg_gen_sari_i32(ret, ret, 32 - len);
794 }
795 
796 /*
797  * Extract 32-bits from a 64-bit input, ah:al, starting from ofs.
798  * Unlike tcg_gen_extract_i32 above, len is fixed at 32.
799  */
800 void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah,
801                           unsigned int ofs)
802 {
803     tcg_debug_assert(ofs <= 32);
804     if (ofs == 0) {
805         tcg_gen_mov_i32(ret, al);
806     } else if (ofs == 32) {
807         tcg_gen_mov_i32(ret, ah);
808     } else if (al == ah) {
809         tcg_gen_rotri_i32(ret, al, ofs);
810     } else if (TCG_TARGET_HAS_extract2_i32) {
811         tcg_gen_op4i_i32(INDEX_op_extract2_i32, ret, al, ah, ofs);
812     } else {
813         TCGv_i32 t0 = tcg_temp_new_i32();
814         tcg_gen_shri_i32(t0, al, ofs);
815         tcg_gen_deposit_i32(ret, t0, ah, 32 - ofs, ofs);
816         tcg_temp_free_i32(t0);
817     }
818 }
819 
820 void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1,
821                          TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2)
822 {
823     if (cond == TCG_COND_ALWAYS) {
824         tcg_gen_mov_i32(ret, v1);
825     } else if (cond == TCG_COND_NEVER) {
826         tcg_gen_mov_i32(ret, v2);
827     } else if (TCG_TARGET_HAS_movcond_i32) {
828         tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond);
829     } else {
830         TCGv_i32 t0 = tcg_temp_new_i32();
831         TCGv_i32 t1 = tcg_temp_new_i32();
832         tcg_gen_setcond_i32(cond, t0, c1, c2);
833         tcg_gen_neg_i32(t0, t0);
834         tcg_gen_and_i32(t1, v1, t0);
835         tcg_gen_andc_i32(ret, v2, t0);
836         tcg_gen_or_i32(ret, ret, t1);
837         tcg_temp_free_i32(t0);
838         tcg_temp_free_i32(t1);
839     }
840 }
841 
842 void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
843                       TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
844 {
845     if (TCG_TARGET_HAS_add2_i32) {
846         tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh);
847     } else {
848         TCGv_i64 t0 = tcg_temp_new_i64();
849         TCGv_i64 t1 = tcg_temp_new_i64();
850         tcg_gen_concat_i32_i64(t0, al, ah);
851         tcg_gen_concat_i32_i64(t1, bl, bh);
852         tcg_gen_add_i64(t0, t0, t1);
853         tcg_gen_extr_i64_i32(rl, rh, t0);
854         tcg_temp_free_i64(t0);
855         tcg_temp_free_i64(t1);
856     }
857 }
858 
859 void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
860                       TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
861 {
862     if (TCG_TARGET_HAS_sub2_i32) {
863         tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh);
864     } else {
865         TCGv_i64 t0 = tcg_temp_new_i64();
866         TCGv_i64 t1 = tcg_temp_new_i64();
867         tcg_gen_concat_i32_i64(t0, al, ah);
868         tcg_gen_concat_i32_i64(t1, bl, bh);
869         tcg_gen_sub_i64(t0, t0, t1);
870         tcg_gen_extr_i64_i32(rl, rh, t0);
871         tcg_temp_free_i64(t0);
872         tcg_temp_free_i64(t1);
873     }
874 }
875 
876 void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
877 {
878     if (TCG_TARGET_HAS_mulu2_i32) {
879         tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2);
880     } else if (TCG_TARGET_HAS_muluh_i32) {
881         TCGv_i32 t = tcg_temp_new_i32();
882         tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
883         tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2);
884         tcg_gen_mov_i32(rl, t);
885         tcg_temp_free_i32(t);
886     } else {
887         TCGv_i64 t0 = tcg_temp_new_i64();
888         TCGv_i64 t1 = tcg_temp_new_i64();
889         tcg_gen_extu_i32_i64(t0, arg1);
890         tcg_gen_extu_i32_i64(t1, arg2);
891         tcg_gen_mul_i64(t0, t0, t1);
892         tcg_gen_extr_i64_i32(rl, rh, t0);
893         tcg_temp_free_i64(t0);
894         tcg_temp_free_i64(t1);
895     }
896 }
897 
898 void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
899 {
900     if (TCG_TARGET_HAS_muls2_i32) {
901         tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2);
902     } else if (TCG_TARGET_HAS_mulsh_i32) {
903         TCGv_i32 t = tcg_temp_new_i32();
904         tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
905         tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2);
906         tcg_gen_mov_i32(rl, t);
907         tcg_temp_free_i32(t);
908     } else if (TCG_TARGET_REG_BITS == 32) {
909         TCGv_i32 t0 = tcg_temp_new_i32();
910         TCGv_i32 t1 = tcg_temp_new_i32();
911         TCGv_i32 t2 = tcg_temp_new_i32();
912         TCGv_i32 t3 = tcg_temp_new_i32();
913         tcg_gen_mulu2_i32(t0, t1, arg1, arg2);
914         /* Adjust for negative inputs.  */
915         tcg_gen_sari_i32(t2, arg1, 31);
916         tcg_gen_sari_i32(t3, arg2, 31);
917         tcg_gen_and_i32(t2, t2, arg2);
918         tcg_gen_and_i32(t3, t3, arg1);
919         tcg_gen_sub_i32(rh, t1, t2);
920         tcg_gen_sub_i32(rh, rh, t3);
921         tcg_gen_mov_i32(rl, t0);
922         tcg_temp_free_i32(t0);
923         tcg_temp_free_i32(t1);
924         tcg_temp_free_i32(t2);
925         tcg_temp_free_i32(t3);
926     } else {
927         TCGv_i64 t0 = tcg_temp_new_i64();
928         TCGv_i64 t1 = tcg_temp_new_i64();
929         tcg_gen_ext_i32_i64(t0, arg1);
930         tcg_gen_ext_i32_i64(t1, arg2);
931         tcg_gen_mul_i64(t0, t0, t1);
932         tcg_gen_extr_i64_i32(rl, rh, t0);
933         tcg_temp_free_i64(t0);
934         tcg_temp_free_i64(t1);
935     }
936 }
937 
938 void tcg_gen_mulsu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
939 {
940     if (TCG_TARGET_REG_BITS == 32) {
941         TCGv_i32 t0 = tcg_temp_new_i32();
942         TCGv_i32 t1 = tcg_temp_new_i32();
943         TCGv_i32 t2 = tcg_temp_new_i32();
944         tcg_gen_mulu2_i32(t0, t1, arg1, arg2);
945         /* Adjust for negative input for the signed arg1.  */
946         tcg_gen_sari_i32(t2, arg1, 31);
947         tcg_gen_and_i32(t2, t2, arg2);
948         tcg_gen_sub_i32(rh, t1, t2);
949         tcg_gen_mov_i32(rl, t0);
950         tcg_temp_free_i32(t0);
951         tcg_temp_free_i32(t1);
952         tcg_temp_free_i32(t2);
953     } else {
954         TCGv_i64 t0 = tcg_temp_new_i64();
955         TCGv_i64 t1 = tcg_temp_new_i64();
956         tcg_gen_ext_i32_i64(t0, arg1);
957         tcg_gen_extu_i32_i64(t1, arg2);
958         tcg_gen_mul_i64(t0, t0, t1);
959         tcg_gen_extr_i64_i32(rl, rh, t0);
960         tcg_temp_free_i64(t0);
961         tcg_temp_free_i64(t1);
962     }
963 }
964 
965 void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg)
966 {
967     if (TCG_TARGET_HAS_ext8s_i32) {
968         tcg_gen_op2_i32(INDEX_op_ext8s_i32, ret, arg);
969     } else {
970         tcg_gen_shli_i32(ret, arg, 24);
971         tcg_gen_sari_i32(ret, ret, 24);
972     }
973 }
974 
975 void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg)
976 {
977     if (TCG_TARGET_HAS_ext16s_i32) {
978         tcg_gen_op2_i32(INDEX_op_ext16s_i32, ret, arg);
979     } else {
980         tcg_gen_shli_i32(ret, arg, 16);
981         tcg_gen_sari_i32(ret, ret, 16);
982     }
983 }
984 
985 void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg)
986 {
987     if (TCG_TARGET_HAS_ext8u_i32) {
988         tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg);
989     } else {
990         tcg_gen_andi_i32(ret, arg, 0xffu);
991     }
992 }
993 
994 void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg)
995 {
996     if (TCG_TARGET_HAS_ext16u_i32) {
997         tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg);
998     } else {
999         tcg_gen_andi_i32(ret, arg, 0xffffu);
1000     }
1001 }
1002 
1003 void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags)
1004 {
1005     /* Only one extension flag may be present. */
1006     tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
1007 
1008     if (TCG_TARGET_HAS_bswap16_i32) {
1009         tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg, flags);
1010     } else {
1011         TCGv_i32 t0 = tcg_temp_new_i32();
1012         TCGv_i32 t1 = tcg_temp_new_i32();
1013 
1014         tcg_gen_shri_i32(t0, arg, 8);
1015         if (!(flags & TCG_BSWAP_IZ)) {
1016             tcg_gen_ext8u_i32(t0, t0);
1017         }
1018 
1019         if (flags & TCG_BSWAP_OS) {
1020             tcg_gen_shli_i32(t1, arg, 24);
1021             tcg_gen_sari_i32(t1, t1, 16);
1022         } else if (flags & TCG_BSWAP_OZ) {
1023             tcg_gen_ext8u_i32(t1, arg);
1024             tcg_gen_shli_i32(t1, t1, 8);
1025         } else {
1026             tcg_gen_shli_i32(t1, arg, 8);
1027         }
1028 
1029         tcg_gen_or_i32(ret, t0, t1);
1030         tcg_temp_free_i32(t0);
1031         tcg_temp_free_i32(t1);
1032     }
1033 }
1034 
1035 void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
1036 {
1037     if (TCG_TARGET_HAS_bswap32_i32) {
1038         tcg_gen_op3i_i32(INDEX_op_bswap32_i32, ret, arg, 0);
1039     } else {
1040         TCGv_i32 t0 = tcg_temp_new_i32();
1041         TCGv_i32 t1 = tcg_temp_new_i32();
1042         TCGv_i32 t2 = tcg_constant_i32(0x00ff00ff);
1043 
1044                                         /* arg = abcd */
1045         tcg_gen_shri_i32(t0, arg, 8);   /*  t0 = .abc */
1046         tcg_gen_and_i32(t1, arg, t2);   /*  t1 = .b.d */
1047         tcg_gen_and_i32(t0, t0, t2);    /*  t0 = .a.c */
1048         tcg_gen_shli_i32(t1, t1, 8);    /*  t1 = b.d. */
1049         tcg_gen_or_i32(ret, t0, t1);    /* ret = badc */
1050 
1051         tcg_gen_shri_i32(t0, ret, 16);  /*  t0 = ..ba */
1052         tcg_gen_shli_i32(t1, ret, 16);  /*  t1 = dc.. */
1053         tcg_gen_or_i32(ret, t0, t1);    /* ret = dcba */
1054 
1055         tcg_temp_free_i32(t0);
1056         tcg_temp_free_i32(t1);
1057     }
1058 }
1059 
1060 void tcg_gen_smin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1061 {
1062     tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, a, b);
1063 }
1064 
1065 void tcg_gen_umin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1066 {
1067     tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, a, b);
1068 }
1069 
1070 void tcg_gen_smax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1071 {
1072     tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, b, a);
1073 }
1074 
1075 void tcg_gen_umax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1076 {
1077     tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, b, a);
1078 }
1079 
1080 void tcg_gen_abs_i32(TCGv_i32 ret, TCGv_i32 a)
1081 {
1082     TCGv_i32 t = tcg_temp_new_i32();
1083 
1084     tcg_gen_sari_i32(t, a, 31);
1085     tcg_gen_xor_i32(ret, a, t);
1086     tcg_gen_sub_i32(ret, ret, t);
1087     tcg_temp_free_i32(t);
1088 }
1089 
1090 /* 64-bit ops */
1091 
1092 #if TCG_TARGET_REG_BITS == 32
1093 /* These are all inline for TCG_TARGET_REG_BITS == 64.  */
1094 
1095 void tcg_gen_discard_i64(TCGv_i64 arg)
1096 {
1097     tcg_gen_discard_i32(TCGV_LOW(arg));
1098     tcg_gen_discard_i32(TCGV_HIGH(arg));
1099 }
1100 
1101 void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg)
1102 {
1103     TCGTemp *ts = tcgv_i64_temp(arg);
1104 
1105     /* Canonicalize TCGv_i64 TEMP_CONST into TCGv_i32 TEMP_CONST. */
1106     if (ts->kind == TEMP_CONST) {
1107         tcg_gen_movi_i64(ret, ts->val);
1108     } else {
1109         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1110         tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
1111     }
1112 }
1113 
1114 void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
1115 {
1116     tcg_gen_movi_i32(TCGV_LOW(ret), arg);
1117     tcg_gen_movi_i32(TCGV_HIGH(ret), arg >> 32);
1118 }
1119 
1120 void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1121 {
1122     tcg_gen_ld8u_i32(TCGV_LOW(ret), arg2, offset);
1123     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1124 }
1125 
1126 void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1127 {
1128     tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset);
1129     tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1130 }
1131 
1132 void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1133 {
1134     tcg_gen_ld16u_i32(TCGV_LOW(ret), arg2, offset);
1135     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1136 }
1137 
1138 void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1139 {
1140     tcg_gen_ld16s_i32(TCGV_LOW(ret), arg2, offset);
1141     tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1142 }
1143 
1144 void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1145 {
1146     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
1147     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1148 }
1149 
1150 void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1151 {
1152     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
1153     tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1154 }
1155 
1156 void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1157 {
1158     /* Since arg2 and ret have different types,
1159        they cannot be the same temporary */
1160 #ifdef HOST_WORDS_BIGENDIAN
1161     tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset);
1162     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4);
1163 #else
1164     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
1165     tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset + 4);
1166 #endif
1167 }
1168 
1169 void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
1170 {
1171 #ifdef HOST_WORDS_BIGENDIAN
1172     tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset);
1173     tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4);
1174 #else
1175     tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset);
1176     tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset + 4);
1177 #endif
1178 }
1179 
1180 void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1181 {
1182     tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1183     tcg_gen_and_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1184 }
1185 
1186 void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1187 {
1188     tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1189     tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1190 }
1191 
1192 void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1193 {
1194     tcg_gen_xor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1195     tcg_gen_xor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1196 }
1197 
1198 void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1199 {
1200     gen_helper_shl_i64(ret, arg1, arg2);
1201 }
1202 
1203 void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1204 {
1205     gen_helper_shr_i64(ret, arg1, arg2);
1206 }
1207 
1208 void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1209 {
1210     gen_helper_sar_i64(ret, arg1, arg2);
1211 }
1212 
1213 void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1214 {
1215     TCGv_i64 t0;
1216     TCGv_i32 t1;
1217 
1218     t0 = tcg_temp_new_i64();
1219     t1 = tcg_temp_new_i32();
1220 
1221     tcg_gen_mulu2_i32(TCGV_LOW(t0), TCGV_HIGH(t0),
1222                       TCGV_LOW(arg1), TCGV_LOW(arg2));
1223 
1224     tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
1225     tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
1226     tcg_gen_mul_i32(t1, TCGV_HIGH(arg1), TCGV_LOW(arg2));
1227     tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
1228 
1229     tcg_gen_mov_i64(ret, t0);
1230     tcg_temp_free_i64(t0);
1231     tcg_temp_free_i32(t1);
1232 }
1233 
1234 #else
1235 
1236 void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
1237 {
1238     tcg_gen_mov_i64(ret, tcg_constant_i64(arg));
1239 }
1240 
1241 #endif /* TCG_TARGET_REG_SIZE == 32 */
1242 
1243 void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1244 {
1245     /* some cases can be optimized here */
1246     if (arg2 == 0) {
1247         tcg_gen_mov_i64(ret, arg1);
1248     } else if (TCG_TARGET_REG_BITS == 64) {
1249         tcg_gen_add_i64(ret, arg1, tcg_constant_i64(arg2));
1250     } else {
1251         tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
1252                          TCGV_LOW(arg1), TCGV_HIGH(arg1),
1253                          tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));
1254     }
1255 }
1256 
1257 void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2)
1258 {
1259     if (arg1 == 0 && TCG_TARGET_HAS_neg_i64) {
1260         /* Don't recurse with tcg_gen_neg_i64.  */
1261         tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg2);
1262     } else if (TCG_TARGET_REG_BITS == 64) {
1263         tcg_gen_sub_i64(ret, tcg_constant_i64(arg1), arg2);
1264     } else {
1265         tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
1266                          tcg_constant_i32(arg1), tcg_constant_i32(arg1 >> 32),
1267                          TCGV_LOW(arg2), TCGV_HIGH(arg2));
1268     }
1269 }
1270 
1271 void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1272 {
1273     /* some cases can be optimized here */
1274     if (arg2 == 0) {
1275         tcg_gen_mov_i64(ret, arg1);
1276     } else if (TCG_TARGET_REG_BITS == 64) {
1277         tcg_gen_sub_i64(ret, arg1, tcg_constant_i64(arg2));
1278     } else {
1279         tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
1280                          TCGV_LOW(arg1), TCGV_HIGH(arg1),
1281                          tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));
1282     }
1283 }
1284 
1285 void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1286 {
1287     if (TCG_TARGET_REG_BITS == 32) {
1288         tcg_gen_andi_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
1289         tcg_gen_andi_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
1290         return;
1291     }
1292 
1293     /* Some cases can be optimized here.  */
1294     switch (arg2) {
1295     case 0:
1296         tcg_gen_movi_i64(ret, 0);
1297         return;
1298     case -1:
1299         tcg_gen_mov_i64(ret, arg1);
1300         return;
1301     case 0xff:
1302         /* Don't recurse with tcg_gen_ext8u_i64.  */
1303         if (TCG_TARGET_HAS_ext8u_i64) {
1304             tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg1);
1305             return;
1306         }
1307         break;
1308     case 0xffff:
1309         if (TCG_TARGET_HAS_ext16u_i64) {
1310             tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg1);
1311             return;
1312         }
1313         break;
1314     case 0xffffffffu:
1315         if (TCG_TARGET_HAS_ext32u_i64) {
1316             tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg1);
1317             return;
1318         }
1319         break;
1320     }
1321 
1322     tcg_gen_and_i64(ret, arg1, tcg_constant_i64(arg2));
1323 }
1324 
1325 void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1326 {
1327     if (TCG_TARGET_REG_BITS == 32) {
1328         tcg_gen_ori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
1329         tcg_gen_ori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
1330         return;
1331     }
1332     /* Some cases can be optimized here.  */
1333     if (arg2 == -1) {
1334         tcg_gen_movi_i64(ret, -1);
1335     } else if (arg2 == 0) {
1336         tcg_gen_mov_i64(ret, arg1);
1337     } else {
1338         tcg_gen_or_i64(ret, arg1, tcg_constant_i64(arg2));
1339     }
1340 }
1341 
1342 void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1343 {
1344     if (TCG_TARGET_REG_BITS == 32) {
1345         tcg_gen_xori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
1346         tcg_gen_xori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
1347         return;
1348     }
1349     /* Some cases can be optimized here.  */
1350     if (arg2 == 0) {
1351         tcg_gen_mov_i64(ret, arg1);
1352     } else if (arg2 == -1 && TCG_TARGET_HAS_not_i64) {
1353         /* Don't recurse with tcg_gen_not_i64.  */
1354         tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg1);
1355     } else {
1356         tcg_gen_xor_i64(ret, arg1, tcg_constant_i64(arg2));
1357     }
1358 }
1359 
1360 static inline void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
1361                                       unsigned c, bool right, bool arith)
1362 {
1363     tcg_debug_assert(c < 64);
1364     if (c == 0) {
1365         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
1366         tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
1367     } else if (c >= 32) {
1368         c -= 32;
1369         if (right) {
1370             if (arith) {
1371                 tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
1372                 tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), 31);
1373             } else {
1374                 tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
1375                 tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1376             }
1377         } else {
1378             tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), c);
1379             tcg_gen_movi_i32(TCGV_LOW(ret), 0);
1380         }
1381     } else if (right) {
1382         if (TCG_TARGET_HAS_extract2_i32) {
1383             tcg_gen_extract2_i32(TCGV_LOW(ret),
1384                                  TCGV_LOW(arg1), TCGV_HIGH(arg1), c);
1385         } else {
1386             tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c);
1387             tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(ret),
1388                                 TCGV_HIGH(arg1), 32 - c, c);
1389         }
1390         if (arith) {
1391             tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c);
1392         } else {
1393             tcg_gen_shri_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c);
1394         }
1395     } else {
1396         if (TCG_TARGET_HAS_extract2_i32) {
1397             tcg_gen_extract2_i32(TCGV_HIGH(ret),
1398                                  TCGV_LOW(arg1), TCGV_HIGH(arg1), 32 - c);
1399         } else {
1400             TCGv_i32 t0 = tcg_temp_new_i32();
1401             tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c);
1402             tcg_gen_deposit_i32(TCGV_HIGH(ret), t0,
1403                                 TCGV_HIGH(arg1), c, 32 - c);
1404             tcg_temp_free_i32(t0);
1405         }
1406         tcg_gen_shli_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c);
1407     }
1408 }
1409 
1410 void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1411 {
1412     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
1413     if (TCG_TARGET_REG_BITS == 32) {
1414         tcg_gen_shifti_i64(ret, arg1, arg2, 0, 0);
1415     } else if (arg2 == 0) {
1416         tcg_gen_mov_i64(ret, arg1);
1417     } else {
1418         tcg_gen_shl_i64(ret, arg1, tcg_constant_i64(arg2));
1419     }
1420 }
1421 
1422 void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1423 {
1424     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
1425     if (TCG_TARGET_REG_BITS == 32) {
1426         tcg_gen_shifti_i64(ret, arg1, arg2, 1, 0);
1427     } else if (arg2 == 0) {
1428         tcg_gen_mov_i64(ret, arg1);
1429     } else {
1430         tcg_gen_shr_i64(ret, arg1, tcg_constant_i64(arg2));
1431     }
1432 }
1433 
1434 void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1435 {
1436     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
1437     if (TCG_TARGET_REG_BITS == 32) {
1438         tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1);
1439     } else if (arg2 == 0) {
1440         tcg_gen_mov_i64(ret, arg1);
1441     } else {
1442         tcg_gen_sar_i64(ret, arg1, tcg_constant_i64(arg2));
1443     }
1444 }
1445 
1446 void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l)
1447 {
1448     if (cond == TCG_COND_ALWAYS) {
1449         tcg_gen_br(l);
1450     } else if (cond != TCG_COND_NEVER) {
1451         l->refs++;
1452         if (TCG_TARGET_REG_BITS == 32) {
1453             tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1),
1454                               TCGV_HIGH(arg1), TCGV_LOW(arg2),
1455                               TCGV_HIGH(arg2), cond, label_arg(l));
1456         } else {
1457             tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond,
1458                               label_arg(l));
1459         }
1460     }
1461 }
1462 
1463 void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *l)
1464 {
1465     if (TCG_TARGET_REG_BITS == 64) {
1466         tcg_gen_brcond_i64(cond, arg1, tcg_constant_i64(arg2), l);
1467     } else if (cond == TCG_COND_ALWAYS) {
1468         tcg_gen_br(l);
1469     } else if (cond != TCG_COND_NEVER) {
1470         l->refs++;
1471         tcg_gen_op6ii_i32(INDEX_op_brcond2_i32,
1472                           TCGV_LOW(arg1), TCGV_HIGH(arg1),
1473                           tcg_constant_i32(arg2),
1474                           tcg_constant_i32(arg2 >> 32),
1475                           cond, label_arg(l));
1476     }
1477 }
1478 
1479 void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
1480                          TCGv_i64 arg1, TCGv_i64 arg2)
1481 {
1482     if (cond == TCG_COND_ALWAYS) {
1483         tcg_gen_movi_i64(ret, 1);
1484     } else if (cond == TCG_COND_NEVER) {
1485         tcg_gen_movi_i64(ret, 0);
1486     } else {
1487         if (TCG_TARGET_REG_BITS == 32) {
1488             tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
1489                              TCGV_LOW(arg1), TCGV_HIGH(arg1),
1490                              TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
1491             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1492         } else {
1493             tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
1494         }
1495     }
1496 }
1497 
1498 void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
1499                           TCGv_i64 arg1, int64_t arg2)
1500 {
1501     if (TCG_TARGET_REG_BITS == 64) {
1502         tcg_gen_setcond_i64(cond, ret, arg1, tcg_constant_i64(arg2));
1503     } else if (cond == TCG_COND_ALWAYS) {
1504         tcg_gen_movi_i64(ret, 1);
1505     } else if (cond == TCG_COND_NEVER) {
1506         tcg_gen_movi_i64(ret, 0);
1507     } else {
1508         tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
1509                          TCGV_LOW(arg1), TCGV_HIGH(arg1),
1510                          tcg_constant_i32(arg2),
1511                          tcg_constant_i32(arg2 >> 32), cond);
1512         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1513     }
1514 }
1515 
1516 void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1517 {
1518     if (arg2 == 0) {
1519         tcg_gen_movi_i64(ret, 0);
1520     } else if (is_power_of_2(arg2)) {
1521         tcg_gen_shli_i64(ret, arg1, ctz64(arg2));
1522     } else {
1523         TCGv_i64 t0 = tcg_const_i64(arg2);
1524         tcg_gen_mul_i64(ret, arg1, t0);
1525         tcg_temp_free_i64(t0);
1526     }
1527 }
1528 
1529 void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1530 {
1531     if (TCG_TARGET_HAS_div_i64) {
1532         tcg_gen_op3_i64(INDEX_op_div_i64, ret, arg1, arg2);
1533     } else if (TCG_TARGET_HAS_div2_i64) {
1534         TCGv_i64 t0 = tcg_temp_new_i64();
1535         tcg_gen_sari_i64(t0, arg1, 63);
1536         tcg_gen_op5_i64(INDEX_op_div2_i64, ret, t0, arg1, t0, arg2);
1537         tcg_temp_free_i64(t0);
1538     } else {
1539         gen_helper_div_i64(ret, arg1, arg2);
1540     }
1541 }
1542 
1543 void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1544 {
1545     if (TCG_TARGET_HAS_rem_i64) {
1546         tcg_gen_op3_i64(INDEX_op_rem_i64, ret, arg1, arg2);
1547     } else if (TCG_TARGET_HAS_div_i64) {
1548         TCGv_i64 t0 = tcg_temp_new_i64();
1549         tcg_gen_op3_i64(INDEX_op_div_i64, t0, arg1, arg2);
1550         tcg_gen_mul_i64(t0, t0, arg2);
1551         tcg_gen_sub_i64(ret, arg1, t0);
1552         tcg_temp_free_i64(t0);
1553     } else if (TCG_TARGET_HAS_div2_i64) {
1554         TCGv_i64 t0 = tcg_temp_new_i64();
1555         tcg_gen_sari_i64(t0, arg1, 63);
1556         tcg_gen_op5_i64(INDEX_op_div2_i64, t0, ret, arg1, t0, arg2);
1557         tcg_temp_free_i64(t0);
1558     } else {
1559         gen_helper_rem_i64(ret, arg1, arg2);
1560     }
1561 }
1562 
1563 void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1564 {
1565     if (TCG_TARGET_HAS_div_i64) {
1566         tcg_gen_op3_i64(INDEX_op_divu_i64, ret, arg1, arg2);
1567     } else if (TCG_TARGET_HAS_div2_i64) {
1568         TCGv_i64 t0 = tcg_temp_new_i64();
1569         tcg_gen_movi_i64(t0, 0);
1570         tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, t0, arg2);
1571         tcg_temp_free_i64(t0);
1572     } else {
1573         gen_helper_divu_i64(ret, arg1, arg2);
1574     }
1575 }
1576 
1577 void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1578 {
1579     if (TCG_TARGET_HAS_rem_i64) {
1580         tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2);
1581     } else if (TCG_TARGET_HAS_div_i64) {
1582         TCGv_i64 t0 = tcg_temp_new_i64();
1583         tcg_gen_op3_i64(INDEX_op_divu_i64, t0, arg1, arg2);
1584         tcg_gen_mul_i64(t0, t0, arg2);
1585         tcg_gen_sub_i64(ret, arg1, t0);
1586         tcg_temp_free_i64(t0);
1587     } else if (TCG_TARGET_HAS_div2_i64) {
1588         TCGv_i64 t0 = tcg_temp_new_i64();
1589         tcg_gen_movi_i64(t0, 0);
1590         tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, t0, arg2);
1591         tcg_temp_free_i64(t0);
1592     } else {
1593         gen_helper_remu_i64(ret, arg1, arg2);
1594     }
1595 }
1596 
1597 void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg)
1598 {
1599     if (TCG_TARGET_REG_BITS == 32) {
1600         tcg_gen_ext8s_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1601         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1602     } else if (TCG_TARGET_HAS_ext8s_i64) {
1603         tcg_gen_op2_i64(INDEX_op_ext8s_i64, ret, arg);
1604     } else {
1605         tcg_gen_shli_i64(ret, arg, 56);
1606         tcg_gen_sari_i64(ret, ret, 56);
1607     }
1608 }
1609 
1610 void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg)
1611 {
1612     if (TCG_TARGET_REG_BITS == 32) {
1613         tcg_gen_ext16s_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1614         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1615     } else if (TCG_TARGET_HAS_ext16s_i64) {
1616         tcg_gen_op2_i64(INDEX_op_ext16s_i64, ret, arg);
1617     } else {
1618         tcg_gen_shli_i64(ret, arg, 48);
1619         tcg_gen_sari_i64(ret, ret, 48);
1620     }
1621 }
1622 
1623 void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg)
1624 {
1625     if (TCG_TARGET_REG_BITS == 32) {
1626         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1627         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1628     } else if (TCG_TARGET_HAS_ext32s_i64) {
1629         tcg_gen_op2_i64(INDEX_op_ext32s_i64, ret, arg);
1630     } else {
1631         tcg_gen_shli_i64(ret, arg, 32);
1632         tcg_gen_sari_i64(ret, ret, 32);
1633     }
1634 }
1635 
1636 void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg)
1637 {
1638     if (TCG_TARGET_REG_BITS == 32) {
1639         tcg_gen_ext8u_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1640         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1641     } else if (TCG_TARGET_HAS_ext8u_i64) {
1642         tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg);
1643     } else {
1644         tcg_gen_andi_i64(ret, arg, 0xffu);
1645     }
1646 }
1647 
1648 void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg)
1649 {
1650     if (TCG_TARGET_REG_BITS == 32) {
1651         tcg_gen_ext16u_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1652         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1653     } else if (TCG_TARGET_HAS_ext16u_i64) {
1654         tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg);
1655     } else {
1656         tcg_gen_andi_i64(ret, arg, 0xffffu);
1657     }
1658 }
1659 
1660 void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg)
1661 {
1662     if (TCG_TARGET_REG_BITS == 32) {
1663         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1664         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1665     } else if (TCG_TARGET_HAS_ext32u_i64) {
1666         tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg);
1667     } else {
1668         tcg_gen_andi_i64(ret, arg, 0xffffffffu);
1669     }
1670 }
1671 
1672 void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags)
1673 {
1674     /* Only one extension flag may be present. */
1675     tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
1676 
1677     if (TCG_TARGET_REG_BITS == 32) {
1678         tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg), flags);
1679         if (flags & TCG_BSWAP_OS) {
1680             tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1681         } else {
1682             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1683         }
1684     } else if (TCG_TARGET_HAS_bswap16_i64) {
1685         tcg_gen_op3i_i64(INDEX_op_bswap16_i64, ret, arg, flags);
1686     } else {
1687         TCGv_i64 t0 = tcg_temp_new_i64();
1688         TCGv_i64 t1 = tcg_temp_new_i64();
1689 
1690         tcg_gen_shri_i64(t0, arg, 8);
1691         if (!(flags & TCG_BSWAP_IZ)) {
1692             tcg_gen_ext8u_i64(t0, t0);
1693         }
1694 
1695         if (flags & TCG_BSWAP_OS) {
1696             tcg_gen_shli_i64(t1, arg, 56);
1697             tcg_gen_sari_i64(t1, t1, 48);
1698         } else if (flags & TCG_BSWAP_OZ) {
1699             tcg_gen_ext8u_i64(t1, arg);
1700             tcg_gen_shli_i64(t1, t1, 8);
1701         } else {
1702             tcg_gen_shli_i64(t1, arg, 8);
1703         }
1704 
1705         tcg_gen_or_i64(ret, t0, t1);
1706         tcg_temp_free_i64(t0);
1707         tcg_temp_free_i64(t1);
1708     }
1709 }
1710 
1711 void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags)
1712 {
1713     /* Only one extension flag may be present. */
1714     tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
1715 
1716     if (TCG_TARGET_REG_BITS == 32) {
1717         tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1718         if (flags & TCG_BSWAP_OS) {
1719             tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1720         } else {
1721             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1722         }
1723     } else if (TCG_TARGET_HAS_bswap32_i64) {
1724         tcg_gen_op3i_i64(INDEX_op_bswap32_i64, ret, arg, flags);
1725     } else {
1726         TCGv_i64 t0 = tcg_temp_new_i64();
1727         TCGv_i64 t1 = tcg_temp_new_i64();
1728         TCGv_i64 t2 = tcg_constant_i64(0x00ff00ff);
1729 
1730                                             /* arg = xxxxabcd */
1731         tcg_gen_shri_i64(t0, arg, 8);       /*  t0 = .xxxxabc */
1732         tcg_gen_and_i64(t1, arg, t2);       /*  t1 = .....b.d */
1733         tcg_gen_and_i64(t0, t0, t2);        /*  t0 = .....a.c */
1734         tcg_gen_shli_i64(t1, t1, 8);        /*  t1 = ....b.d. */
1735         tcg_gen_or_i64(ret, t0, t1);        /* ret = ....badc */
1736 
1737         tcg_gen_shli_i64(t1, ret, 48);      /*  t1 = dc...... */
1738         tcg_gen_shri_i64(t0, ret, 16);      /*  t0 = ......ba */
1739         if (flags & TCG_BSWAP_OS) {
1740             tcg_gen_sari_i64(t1, t1, 32);   /*  t1 = ssssdc.. */
1741         } else {
1742             tcg_gen_shri_i64(t1, t1, 32);   /*  t1 = ....dc.. */
1743         }
1744         tcg_gen_or_i64(ret, t0, t1);        /* ret = ssssdcba */
1745 
1746         tcg_temp_free_i64(t0);
1747         tcg_temp_free_i64(t1);
1748     }
1749 }
1750 
1751 void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg)
1752 {
1753     if (TCG_TARGET_REG_BITS == 32) {
1754         TCGv_i32 t0, t1;
1755         t0 = tcg_temp_new_i32();
1756         t1 = tcg_temp_new_i32();
1757 
1758         tcg_gen_bswap32_i32(t0, TCGV_LOW(arg));
1759         tcg_gen_bswap32_i32(t1, TCGV_HIGH(arg));
1760         tcg_gen_mov_i32(TCGV_LOW(ret), t1);
1761         tcg_gen_mov_i32(TCGV_HIGH(ret), t0);
1762         tcg_temp_free_i32(t0);
1763         tcg_temp_free_i32(t1);
1764     } else if (TCG_TARGET_HAS_bswap64_i64) {
1765         tcg_gen_op3i_i64(INDEX_op_bswap64_i64, ret, arg, 0);
1766     } else {
1767         TCGv_i64 t0 = tcg_temp_new_i64();
1768         TCGv_i64 t1 = tcg_temp_new_i64();
1769         TCGv_i64 t2 = tcg_temp_new_i64();
1770 
1771                                         /* arg = abcdefgh */
1772         tcg_gen_movi_i64(t2, 0x00ff00ff00ff00ffull);
1773         tcg_gen_shri_i64(t0, arg, 8);   /*  t0 = .abcdefg */
1774         tcg_gen_and_i64(t1, arg, t2);   /*  t1 = .b.d.f.h */
1775         tcg_gen_and_i64(t0, t0, t2);    /*  t0 = .a.c.e.g */
1776         tcg_gen_shli_i64(t1, t1, 8);    /*  t1 = b.d.f.h. */
1777         tcg_gen_or_i64(ret, t0, t1);    /* ret = badcfehg */
1778 
1779         tcg_gen_movi_i64(t2, 0x0000ffff0000ffffull);
1780         tcg_gen_shri_i64(t0, ret, 16);  /*  t0 = ..badcfe */
1781         tcg_gen_and_i64(t1, ret, t2);   /*  t1 = ..dc..hg */
1782         tcg_gen_and_i64(t0, t0, t2);    /*  t0 = ..ba..fe */
1783         tcg_gen_shli_i64(t1, t1, 16);   /*  t1 = dc..hg.. */
1784         tcg_gen_or_i64(ret, t0, t1);    /* ret = dcbahgfe */
1785 
1786         tcg_gen_shri_i64(t0, ret, 32);  /*  t0 = ....dcba */
1787         tcg_gen_shli_i64(t1, ret, 32);  /*  t1 = hgfe.... */
1788         tcg_gen_or_i64(ret, t0, t1);    /* ret = hgfedcba */
1789 
1790         tcg_temp_free_i64(t0);
1791         tcg_temp_free_i64(t1);
1792         tcg_temp_free_i64(t2);
1793     }
1794 }
1795 
1796 void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg)
1797 {
1798     if (TCG_TARGET_REG_BITS == 32) {
1799         tcg_gen_not_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1800         tcg_gen_not_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
1801     } else if (TCG_TARGET_HAS_not_i64) {
1802         tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg);
1803     } else {
1804         tcg_gen_xori_i64(ret, arg, -1);
1805     }
1806 }
1807 
1808 void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1809 {
1810     if (TCG_TARGET_REG_BITS == 32) {
1811         tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1812         tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1813     } else if (TCG_TARGET_HAS_andc_i64) {
1814         tcg_gen_op3_i64(INDEX_op_andc_i64, ret, arg1, arg2);
1815     } else {
1816         TCGv_i64 t0 = tcg_temp_new_i64();
1817         tcg_gen_not_i64(t0, arg2);
1818         tcg_gen_and_i64(ret, arg1, t0);
1819         tcg_temp_free_i64(t0);
1820     }
1821 }
1822 
1823 void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1824 {
1825     if (TCG_TARGET_REG_BITS == 32) {
1826         tcg_gen_eqv_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1827         tcg_gen_eqv_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1828     } else if (TCG_TARGET_HAS_eqv_i64) {
1829         tcg_gen_op3_i64(INDEX_op_eqv_i64, ret, arg1, arg2);
1830     } else {
1831         tcg_gen_xor_i64(ret, arg1, arg2);
1832         tcg_gen_not_i64(ret, ret);
1833     }
1834 }
1835 
1836 void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1837 {
1838     if (TCG_TARGET_REG_BITS == 32) {
1839         tcg_gen_nand_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1840         tcg_gen_nand_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1841     } else if (TCG_TARGET_HAS_nand_i64) {
1842         tcg_gen_op3_i64(INDEX_op_nand_i64, ret, arg1, arg2);
1843     } else {
1844         tcg_gen_and_i64(ret, arg1, arg2);
1845         tcg_gen_not_i64(ret, ret);
1846     }
1847 }
1848 
1849 void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1850 {
1851     if (TCG_TARGET_REG_BITS == 32) {
1852         tcg_gen_nor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1853         tcg_gen_nor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1854     } else if (TCG_TARGET_HAS_nor_i64) {
1855         tcg_gen_op3_i64(INDEX_op_nor_i64, ret, arg1, arg2);
1856     } else {
1857         tcg_gen_or_i64(ret, arg1, arg2);
1858         tcg_gen_not_i64(ret, ret);
1859     }
1860 }
1861 
1862 void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1863 {
1864     if (TCG_TARGET_REG_BITS == 32) {
1865         tcg_gen_orc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1866         tcg_gen_orc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1867     } else if (TCG_TARGET_HAS_orc_i64) {
1868         tcg_gen_op3_i64(INDEX_op_orc_i64, ret, arg1, arg2);
1869     } else {
1870         TCGv_i64 t0 = tcg_temp_new_i64();
1871         tcg_gen_not_i64(t0, arg2);
1872         tcg_gen_or_i64(ret, arg1, t0);
1873         tcg_temp_free_i64(t0);
1874     }
1875 }
1876 
1877 void tcg_gen_clz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1878 {
1879     if (TCG_TARGET_HAS_clz_i64) {
1880         tcg_gen_op3_i64(INDEX_op_clz_i64, ret, arg1, arg2);
1881     } else {
1882         gen_helper_clz_i64(ret, arg1, arg2);
1883     }
1884 }
1885 
1886 void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
1887 {
1888     if (TCG_TARGET_REG_BITS == 32
1889         && TCG_TARGET_HAS_clz_i32
1890         && arg2 <= 0xffffffffu) {
1891         TCGv_i32 t = tcg_temp_new_i32();
1892         tcg_gen_clzi_i32(t, TCGV_LOW(arg1), arg2 - 32);
1893         tcg_gen_addi_i32(t, t, 32);
1894         tcg_gen_clz_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), t);
1895         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1896         tcg_temp_free_i32(t);
1897     } else {
1898         TCGv_i64 t0 = tcg_const_i64(arg2);
1899         tcg_gen_clz_i64(ret, arg1, t0);
1900         tcg_temp_free_i64(t0);
1901     }
1902 }
1903 
1904 void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1905 {
1906     if (TCG_TARGET_HAS_ctz_i64) {
1907         tcg_gen_op3_i64(INDEX_op_ctz_i64, ret, arg1, arg2);
1908     } else if (TCG_TARGET_HAS_ctpop_i64 || TCG_TARGET_HAS_clz_i64) {
1909         TCGv_i64 z, t = tcg_temp_new_i64();
1910 
1911         if (TCG_TARGET_HAS_ctpop_i64) {
1912             tcg_gen_subi_i64(t, arg1, 1);
1913             tcg_gen_andc_i64(t, t, arg1);
1914             tcg_gen_ctpop_i64(t, t);
1915         } else {
1916             /* Since all non-x86 hosts have clz(0) == 64, don't fight it.  */
1917             tcg_gen_neg_i64(t, arg1);
1918             tcg_gen_and_i64(t, t, arg1);
1919             tcg_gen_clzi_i64(t, t, 64);
1920             tcg_gen_xori_i64(t, t, 63);
1921         }
1922         z = tcg_constant_i64(0);
1923         tcg_gen_movcond_i64(TCG_COND_EQ, ret, arg1, z, arg2, t);
1924         tcg_temp_free_i64(t);
1925         tcg_temp_free_i64(z);
1926     } else {
1927         gen_helper_ctz_i64(ret, arg1, arg2);
1928     }
1929 }
1930 
1931 void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
1932 {
1933     if (TCG_TARGET_REG_BITS == 32
1934         && TCG_TARGET_HAS_ctz_i32
1935         && arg2 <= 0xffffffffu) {
1936         TCGv_i32 t32 = tcg_temp_new_i32();
1937         tcg_gen_ctzi_i32(t32, TCGV_HIGH(arg1), arg2 - 32);
1938         tcg_gen_addi_i32(t32, t32, 32);
1939         tcg_gen_ctz_i32(TCGV_LOW(ret), TCGV_LOW(arg1), t32);
1940         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1941         tcg_temp_free_i32(t32);
1942     } else if (!TCG_TARGET_HAS_ctz_i64
1943                && TCG_TARGET_HAS_ctpop_i64
1944                && arg2 == 64) {
1945         /* This equivalence has the advantage of not requiring a fixup.  */
1946         TCGv_i64 t = tcg_temp_new_i64();
1947         tcg_gen_subi_i64(t, arg1, 1);
1948         tcg_gen_andc_i64(t, t, arg1);
1949         tcg_gen_ctpop_i64(ret, t);
1950         tcg_temp_free_i64(t);
1951     } else {
1952         TCGv_i64 t0 = tcg_const_i64(arg2);
1953         tcg_gen_ctz_i64(ret, arg1, t0);
1954         tcg_temp_free_i64(t0);
1955     }
1956 }
1957 
1958 void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg)
1959 {
1960     if (TCG_TARGET_HAS_clz_i64 || TCG_TARGET_HAS_clz_i32) {
1961         TCGv_i64 t = tcg_temp_new_i64();
1962         tcg_gen_sari_i64(t, arg, 63);
1963         tcg_gen_xor_i64(t, t, arg);
1964         tcg_gen_clzi_i64(t, t, 64);
1965         tcg_gen_subi_i64(ret, t, 1);
1966         tcg_temp_free_i64(t);
1967     } else {
1968         gen_helper_clrsb_i64(ret, arg);
1969     }
1970 }
1971 
1972 void tcg_gen_ctpop_i64(TCGv_i64 ret, TCGv_i64 arg1)
1973 {
1974     if (TCG_TARGET_HAS_ctpop_i64) {
1975         tcg_gen_op2_i64(INDEX_op_ctpop_i64, ret, arg1);
1976     } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_ctpop_i32) {
1977         tcg_gen_ctpop_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
1978         tcg_gen_ctpop_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
1979         tcg_gen_add_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(ret));
1980         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1981     } else {
1982         gen_helper_ctpop_i64(ret, arg1);
1983     }
1984 }
1985 
1986 void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1987 {
1988     if (TCG_TARGET_HAS_rot_i64) {
1989         tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, arg2);
1990     } else {
1991         TCGv_i64 t0, t1;
1992         t0 = tcg_temp_new_i64();
1993         t1 = tcg_temp_new_i64();
1994         tcg_gen_shl_i64(t0, arg1, arg2);
1995         tcg_gen_subfi_i64(t1, 64, arg2);
1996         tcg_gen_shr_i64(t1, arg1, t1);
1997         tcg_gen_or_i64(ret, t0, t1);
1998         tcg_temp_free_i64(t0);
1999         tcg_temp_free_i64(t1);
2000     }
2001 }
2002 
2003 void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
2004 {
2005     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
2006     /* some cases can be optimized here */
2007     if (arg2 == 0) {
2008         tcg_gen_mov_i64(ret, arg1);
2009     } else if (TCG_TARGET_HAS_rot_i64) {
2010         tcg_gen_rotl_i64(ret, arg1, tcg_constant_i64(arg2));
2011     } else {
2012         TCGv_i64 t0, t1;
2013         t0 = tcg_temp_new_i64();
2014         t1 = tcg_temp_new_i64();
2015         tcg_gen_shli_i64(t0, arg1, arg2);
2016         tcg_gen_shri_i64(t1, arg1, 64 - arg2);
2017         tcg_gen_or_i64(ret, t0, t1);
2018         tcg_temp_free_i64(t0);
2019         tcg_temp_free_i64(t1);
2020     }
2021 }
2022 
2023 void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
2024 {
2025     if (TCG_TARGET_HAS_rot_i64) {
2026         tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, arg2);
2027     } else {
2028         TCGv_i64 t0, t1;
2029         t0 = tcg_temp_new_i64();
2030         t1 = tcg_temp_new_i64();
2031         tcg_gen_shr_i64(t0, arg1, arg2);
2032         tcg_gen_subfi_i64(t1, 64, arg2);
2033         tcg_gen_shl_i64(t1, arg1, t1);
2034         tcg_gen_or_i64(ret, t0, t1);
2035         tcg_temp_free_i64(t0);
2036         tcg_temp_free_i64(t1);
2037     }
2038 }
2039 
2040 void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
2041 {
2042     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
2043     /* some cases can be optimized here */
2044     if (arg2 == 0) {
2045         tcg_gen_mov_i64(ret, arg1);
2046     } else {
2047         tcg_gen_rotli_i64(ret, arg1, 64 - arg2);
2048     }
2049 }
2050 
2051 void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
2052                          unsigned int ofs, unsigned int len)
2053 {
2054     uint64_t mask;
2055     TCGv_i64 t1;
2056 
2057     tcg_debug_assert(ofs < 64);
2058     tcg_debug_assert(len > 0);
2059     tcg_debug_assert(len <= 64);
2060     tcg_debug_assert(ofs + len <= 64);
2061 
2062     if (len == 64) {
2063         tcg_gen_mov_i64(ret, arg2);
2064         return;
2065     }
2066     if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) {
2067         tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
2068         return;
2069     }
2070 
2071     if (TCG_TARGET_REG_BITS == 32) {
2072         if (ofs >= 32) {
2073             tcg_gen_deposit_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1),
2074                                 TCGV_LOW(arg2), ofs - 32, len);
2075             tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
2076             return;
2077         }
2078         if (ofs + len <= 32) {
2079             tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(arg1),
2080                                 TCGV_LOW(arg2), ofs, len);
2081             tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
2082             return;
2083         }
2084     }
2085 
2086     t1 = tcg_temp_new_i64();
2087 
2088     if (TCG_TARGET_HAS_extract2_i64) {
2089         if (ofs + len == 64) {
2090             tcg_gen_shli_i64(t1, arg1, len);
2091             tcg_gen_extract2_i64(ret, t1, arg2, len);
2092             goto done;
2093         }
2094         if (ofs == 0) {
2095             tcg_gen_extract2_i64(ret, arg1, arg2, len);
2096             tcg_gen_rotli_i64(ret, ret, len);
2097             goto done;
2098         }
2099     }
2100 
2101     mask = (1ull << len) - 1;
2102     if (ofs + len < 64) {
2103         tcg_gen_andi_i64(t1, arg2, mask);
2104         tcg_gen_shli_i64(t1, t1, ofs);
2105     } else {
2106         tcg_gen_shli_i64(t1, arg2, ofs);
2107     }
2108     tcg_gen_andi_i64(ret, arg1, ~(mask << ofs));
2109     tcg_gen_or_i64(ret, ret, t1);
2110  done:
2111     tcg_temp_free_i64(t1);
2112 }
2113 
2114 void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg,
2115                            unsigned int ofs, unsigned int len)
2116 {
2117     tcg_debug_assert(ofs < 64);
2118     tcg_debug_assert(len > 0);
2119     tcg_debug_assert(len <= 64);
2120     tcg_debug_assert(ofs + len <= 64);
2121 
2122     if (ofs + len == 64) {
2123         tcg_gen_shli_i64(ret, arg, ofs);
2124     } else if (ofs == 0) {
2125         tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
2126     } else if (TCG_TARGET_HAS_deposit_i64
2127                && TCG_TARGET_deposit_i64_valid(ofs, len)) {
2128         TCGv_i64 zero = tcg_constant_i64(0);
2129         tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, zero, arg, ofs, len);
2130     } else {
2131         if (TCG_TARGET_REG_BITS == 32) {
2132             if (ofs >= 32) {
2133                 tcg_gen_deposit_z_i32(TCGV_HIGH(ret), TCGV_LOW(arg),
2134                                       ofs - 32, len);
2135                 tcg_gen_movi_i32(TCGV_LOW(ret), 0);
2136                 return;
2137             }
2138             if (ofs + len <= 32) {
2139                 tcg_gen_deposit_z_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
2140                 tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2141                 return;
2142             }
2143         }
2144         /* To help two-operand hosts we prefer to zero-extend first,
2145            which allows ARG to stay live.  */
2146         switch (len) {
2147         case 32:
2148             if (TCG_TARGET_HAS_ext32u_i64) {
2149                 tcg_gen_ext32u_i64(ret, arg);
2150                 tcg_gen_shli_i64(ret, ret, ofs);
2151                 return;
2152             }
2153             break;
2154         case 16:
2155             if (TCG_TARGET_HAS_ext16u_i64) {
2156                 tcg_gen_ext16u_i64(ret, arg);
2157                 tcg_gen_shli_i64(ret, ret, ofs);
2158                 return;
2159             }
2160             break;
2161         case 8:
2162             if (TCG_TARGET_HAS_ext8u_i64) {
2163                 tcg_gen_ext8u_i64(ret, arg);
2164                 tcg_gen_shli_i64(ret, ret, ofs);
2165                 return;
2166             }
2167             break;
2168         }
2169         /* Otherwise prefer zero-extension over AND for code size.  */
2170         switch (ofs + len) {
2171         case 32:
2172             if (TCG_TARGET_HAS_ext32u_i64) {
2173                 tcg_gen_shli_i64(ret, arg, ofs);
2174                 tcg_gen_ext32u_i64(ret, ret);
2175                 return;
2176             }
2177             break;
2178         case 16:
2179             if (TCG_TARGET_HAS_ext16u_i64) {
2180                 tcg_gen_shli_i64(ret, arg, ofs);
2181                 tcg_gen_ext16u_i64(ret, ret);
2182                 return;
2183             }
2184             break;
2185         case 8:
2186             if (TCG_TARGET_HAS_ext8u_i64) {
2187                 tcg_gen_shli_i64(ret, arg, ofs);
2188                 tcg_gen_ext8u_i64(ret, ret);
2189                 return;
2190             }
2191             break;
2192         }
2193         tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
2194         tcg_gen_shli_i64(ret, ret, ofs);
2195     }
2196 }
2197 
2198 void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
2199                          unsigned int ofs, unsigned int len)
2200 {
2201     tcg_debug_assert(ofs < 64);
2202     tcg_debug_assert(len > 0);
2203     tcg_debug_assert(len <= 64);
2204     tcg_debug_assert(ofs + len <= 64);
2205 
2206     /* Canonicalize certain special cases, even if extract is supported.  */
2207     if (ofs + len == 64) {
2208         tcg_gen_shri_i64(ret, arg, 64 - len);
2209         return;
2210     }
2211     if (ofs == 0) {
2212         tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
2213         return;
2214     }
2215 
2216     if (TCG_TARGET_REG_BITS == 32) {
2217         /* Look for a 32-bit extract within one of the two words.  */
2218         if (ofs >= 32) {
2219             tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_HIGH(arg), ofs - 32, len);
2220             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2221             return;
2222         }
2223         if (ofs + len <= 32) {
2224             tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
2225             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2226             return;
2227         }
2228         /* The field is split across two words.  One double-word
2229            shift is better than two double-word shifts.  */
2230         goto do_shift_and;
2231     }
2232 
2233     if (TCG_TARGET_HAS_extract_i64
2234         && TCG_TARGET_extract_i64_valid(ofs, len)) {
2235         tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, ofs, len);
2236         return;
2237     }
2238 
2239     /* Assume that zero-extension, if available, is cheaper than a shift.  */
2240     switch (ofs + len) {
2241     case 32:
2242         if (TCG_TARGET_HAS_ext32u_i64) {
2243             tcg_gen_ext32u_i64(ret, arg);
2244             tcg_gen_shri_i64(ret, ret, ofs);
2245             return;
2246         }
2247         break;
2248     case 16:
2249         if (TCG_TARGET_HAS_ext16u_i64) {
2250             tcg_gen_ext16u_i64(ret, arg);
2251             tcg_gen_shri_i64(ret, ret, ofs);
2252             return;
2253         }
2254         break;
2255     case 8:
2256         if (TCG_TARGET_HAS_ext8u_i64) {
2257             tcg_gen_ext8u_i64(ret, arg);
2258             tcg_gen_shri_i64(ret, ret, ofs);
2259             return;
2260         }
2261         break;
2262     }
2263 
2264     /* ??? Ideally we'd know what values are available for immediate AND.
2265        Assume that 8 bits are available, plus the special cases of 16 and 32,
2266        so that we get ext8u, ext16u, and ext32u.  */
2267     switch (len) {
2268     case 1 ... 8: case 16: case 32:
2269     do_shift_and:
2270         tcg_gen_shri_i64(ret, arg, ofs);
2271         tcg_gen_andi_i64(ret, ret, (1ull << len) - 1);
2272         break;
2273     default:
2274         tcg_gen_shli_i64(ret, arg, 64 - len - ofs);
2275         tcg_gen_shri_i64(ret, ret, 64 - len);
2276         break;
2277     }
2278 }
2279 
2280 void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg,
2281                           unsigned int ofs, unsigned int len)
2282 {
2283     tcg_debug_assert(ofs < 64);
2284     tcg_debug_assert(len > 0);
2285     tcg_debug_assert(len <= 64);
2286     tcg_debug_assert(ofs + len <= 64);
2287 
2288     /* Canonicalize certain special cases, even if sextract is supported.  */
2289     if (ofs + len == 64) {
2290         tcg_gen_sari_i64(ret, arg, 64 - len);
2291         return;
2292     }
2293     if (ofs == 0) {
2294         switch (len) {
2295         case 32:
2296             tcg_gen_ext32s_i64(ret, arg);
2297             return;
2298         case 16:
2299             tcg_gen_ext16s_i64(ret, arg);
2300             return;
2301         case 8:
2302             tcg_gen_ext8s_i64(ret, arg);
2303             return;
2304         }
2305     }
2306 
2307     if (TCG_TARGET_REG_BITS == 32) {
2308         /* Look for a 32-bit extract within one of the two words.  */
2309         if (ofs >= 32) {
2310             tcg_gen_sextract_i32(TCGV_LOW(ret), TCGV_HIGH(arg), ofs - 32, len);
2311         } else if (ofs + len <= 32) {
2312             tcg_gen_sextract_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
2313         } else if (ofs == 0) {
2314             tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
2315             tcg_gen_sextract_i32(TCGV_HIGH(ret), TCGV_HIGH(arg), 0, len - 32);
2316             return;
2317         } else if (len > 32) {
2318             TCGv_i32 t = tcg_temp_new_i32();
2319             /* Extract the bits for the high word normally.  */
2320             tcg_gen_sextract_i32(t, TCGV_HIGH(arg), ofs + 32, len - 32);
2321             /* Shift the field down for the low part.  */
2322             tcg_gen_shri_i64(ret, arg, ofs);
2323             /* Overwrite the shift into the high part.  */
2324             tcg_gen_mov_i32(TCGV_HIGH(ret), t);
2325             tcg_temp_free_i32(t);
2326             return;
2327         } else {
2328             /* Shift the field down for the low part, such that the
2329                field sits at the MSB.  */
2330             tcg_gen_shri_i64(ret, arg, ofs + len - 32);
2331             /* Shift the field down from the MSB, sign extending.  */
2332             tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_LOW(ret), 32 - len);
2333         }
2334         /* Sign-extend the field from 32 bits.  */
2335         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
2336         return;
2337     }
2338 
2339     if (TCG_TARGET_HAS_sextract_i64
2340         && TCG_TARGET_extract_i64_valid(ofs, len)) {
2341         tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, arg, ofs, len);
2342         return;
2343     }
2344 
2345     /* Assume that sign-extension, if available, is cheaper than a shift.  */
2346     switch (ofs + len) {
2347     case 32:
2348         if (TCG_TARGET_HAS_ext32s_i64) {
2349             tcg_gen_ext32s_i64(ret, arg);
2350             tcg_gen_sari_i64(ret, ret, ofs);
2351             return;
2352         }
2353         break;
2354     case 16:
2355         if (TCG_TARGET_HAS_ext16s_i64) {
2356             tcg_gen_ext16s_i64(ret, arg);
2357             tcg_gen_sari_i64(ret, ret, ofs);
2358             return;
2359         }
2360         break;
2361     case 8:
2362         if (TCG_TARGET_HAS_ext8s_i64) {
2363             tcg_gen_ext8s_i64(ret, arg);
2364             tcg_gen_sari_i64(ret, ret, ofs);
2365             return;
2366         }
2367         break;
2368     }
2369     switch (len) {
2370     case 32:
2371         if (TCG_TARGET_HAS_ext32s_i64) {
2372             tcg_gen_shri_i64(ret, arg, ofs);
2373             tcg_gen_ext32s_i64(ret, ret);
2374             return;
2375         }
2376         break;
2377     case 16:
2378         if (TCG_TARGET_HAS_ext16s_i64) {
2379             tcg_gen_shri_i64(ret, arg, ofs);
2380             tcg_gen_ext16s_i64(ret, ret);
2381             return;
2382         }
2383         break;
2384     case 8:
2385         if (TCG_TARGET_HAS_ext8s_i64) {
2386             tcg_gen_shri_i64(ret, arg, ofs);
2387             tcg_gen_ext8s_i64(ret, ret);
2388             return;
2389         }
2390         break;
2391     }
2392     tcg_gen_shli_i64(ret, arg, 64 - len - ofs);
2393     tcg_gen_sari_i64(ret, ret, 64 - len);
2394 }
2395 
2396 /*
2397  * Extract 64 bits from a 128-bit input, ah:al, starting from ofs.
2398  * Unlike tcg_gen_extract_i64 above, len is fixed at 64.
2399  */
2400 void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah,
2401                           unsigned int ofs)
2402 {
2403     tcg_debug_assert(ofs <= 64);
2404     if (ofs == 0) {
2405         tcg_gen_mov_i64(ret, al);
2406     } else if (ofs == 64) {
2407         tcg_gen_mov_i64(ret, ah);
2408     } else if (al == ah) {
2409         tcg_gen_rotri_i64(ret, al, ofs);
2410     } else if (TCG_TARGET_HAS_extract2_i64) {
2411         tcg_gen_op4i_i64(INDEX_op_extract2_i64, ret, al, ah, ofs);
2412     } else {
2413         TCGv_i64 t0 = tcg_temp_new_i64();
2414         tcg_gen_shri_i64(t0, al, ofs);
2415         tcg_gen_deposit_i64(ret, t0, ah, 64 - ofs, ofs);
2416         tcg_temp_free_i64(t0);
2417     }
2418 }
2419 
2420 void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1,
2421                          TCGv_i64 c2, TCGv_i64 v1, TCGv_i64 v2)
2422 {
2423     if (cond == TCG_COND_ALWAYS) {
2424         tcg_gen_mov_i64(ret, v1);
2425     } else if (cond == TCG_COND_NEVER) {
2426         tcg_gen_mov_i64(ret, v2);
2427     } else if (TCG_TARGET_REG_BITS == 32) {
2428         TCGv_i32 t0 = tcg_temp_new_i32();
2429         TCGv_i32 t1 = tcg_temp_new_i32();
2430         tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0,
2431                          TCGV_LOW(c1), TCGV_HIGH(c1),
2432                          TCGV_LOW(c2), TCGV_HIGH(c2), cond);
2433 
2434         if (TCG_TARGET_HAS_movcond_i32) {
2435             tcg_gen_movi_i32(t1, 0);
2436             tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, t1,
2437                                 TCGV_LOW(v1), TCGV_LOW(v2));
2438             tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, t1,
2439                                 TCGV_HIGH(v1), TCGV_HIGH(v2));
2440         } else {
2441             tcg_gen_neg_i32(t0, t0);
2442 
2443             tcg_gen_and_i32(t1, TCGV_LOW(v1), t0);
2444             tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(v2), t0);
2445             tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t1);
2446 
2447             tcg_gen_and_i32(t1, TCGV_HIGH(v1), t0);
2448             tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(v2), t0);
2449             tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t1);
2450         }
2451         tcg_temp_free_i32(t0);
2452         tcg_temp_free_i32(t1);
2453     } else if (TCG_TARGET_HAS_movcond_i64) {
2454         tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond);
2455     } else {
2456         TCGv_i64 t0 = tcg_temp_new_i64();
2457         TCGv_i64 t1 = tcg_temp_new_i64();
2458         tcg_gen_setcond_i64(cond, t0, c1, c2);
2459         tcg_gen_neg_i64(t0, t0);
2460         tcg_gen_and_i64(t1, v1, t0);
2461         tcg_gen_andc_i64(ret, v2, t0);
2462         tcg_gen_or_i64(ret, ret, t1);
2463         tcg_temp_free_i64(t0);
2464         tcg_temp_free_i64(t1);
2465     }
2466 }
2467 
2468 void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
2469                       TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
2470 {
2471     if (TCG_TARGET_HAS_add2_i64) {
2472         tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh);
2473     } else {
2474         TCGv_i64 t0 = tcg_temp_new_i64();
2475         TCGv_i64 t1 = tcg_temp_new_i64();
2476         tcg_gen_add_i64(t0, al, bl);
2477         tcg_gen_setcond_i64(TCG_COND_LTU, t1, t0, al);
2478         tcg_gen_add_i64(rh, ah, bh);
2479         tcg_gen_add_i64(rh, rh, t1);
2480         tcg_gen_mov_i64(rl, t0);
2481         tcg_temp_free_i64(t0);
2482         tcg_temp_free_i64(t1);
2483     }
2484 }
2485 
2486 void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
2487                       TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
2488 {
2489     if (TCG_TARGET_HAS_sub2_i64) {
2490         tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh);
2491     } else {
2492         TCGv_i64 t0 = tcg_temp_new_i64();
2493         TCGv_i64 t1 = tcg_temp_new_i64();
2494         tcg_gen_sub_i64(t0, al, bl);
2495         tcg_gen_setcond_i64(TCG_COND_LTU, t1, al, bl);
2496         tcg_gen_sub_i64(rh, ah, bh);
2497         tcg_gen_sub_i64(rh, rh, t1);
2498         tcg_gen_mov_i64(rl, t0);
2499         tcg_temp_free_i64(t0);
2500         tcg_temp_free_i64(t1);
2501     }
2502 }
2503 
2504 void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
2505 {
2506     if (TCG_TARGET_HAS_mulu2_i64) {
2507         tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2);
2508     } else if (TCG_TARGET_HAS_muluh_i64) {
2509         TCGv_i64 t = tcg_temp_new_i64();
2510         tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
2511         tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2);
2512         tcg_gen_mov_i64(rl, t);
2513         tcg_temp_free_i64(t);
2514     } else {
2515         TCGv_i64 t0 = tcg_temp_new_i64();
2516         tcg_gen_mul_i64(t0, arg1, arg2);
2517         gen_helper_muluh_i64(rh, arg1, arg2);
2518         tcg_gen_mov_i64(rl, t0);
2519         tcg_temp_free_i64(t0);
2520     }
2521 }
2522 
2523 void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
2524 {
2525     if (TCG_TARGET_HAS_muls2_i64) {
2526         tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2);
2527     } else if (TCG_TARGET_HAS_mulsh_i64) {
2528         TCGv_i64 t = tcg_temp_new_i64();
2529         tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
2530         tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2);
2531         tcg_gen_mov_i64(rl, t);
2532         tcg_temp_free_i64(t);
2533     } else if (TCG_TARGET_HAS_mulu2_i64 || TCG_TARGET_HAS_muluh_i64) {
2534         TCGv_i64 t0 = tcg_temp_new_i64();
2535         TCGv_i64 t1 = tcg_temp_new_i64();
2536         TCGv_i64 t2 = tcg_temp_new_i64();
2537         TCGv_i64 t3 = tcg_temp_new_i64();
2538         tcg_gen_mulu2_i64(t0, t1, arg1, arg2);
2539         /* Adjust for negative inputs.  */
2540         tcg_gen_sari_i64(t2, arg1, 63);
2541         tcg_gen_sari_i64(t3, arg2, 63);
2542         tcg_gen_and_i64(t2, t2, arg2);
2543         tcg_gen_and_i64(t3, t3, arg1);
2544         tcg_gen_sub_i64(rh, t1, t2);
2545         tcg_gen_sub_i64(rh, rh, t3);
2546         tcg_gen_mov_i64(rl, t0);
2547         tcg_temp_free_i64(t0);
2548         tcg_temp_free_i64(t1);
2549         tcg_temp_free_i64(t2);
2550         tcg_temp_free_i64(t3);
2551     } else {
2552         TCGv_i64 t0 = tcg_temp_new_i64();
2553         tcg_gen_mul_i64(t0, arg1, arg2);
2554         gen_helper_mulsh_i64(rh, arg1, arg2);
2555         tcg_gen_mov_i64(rl, t0);
2556         tcg_temp_free_i64(t0);
2557     }
2558 }
2559 
2560 void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
2561 {
2562     TCGv_i64 t0 = tcg_temp_new_i64();
2563     TCGv_i64 t1 = tcg_temp_new_i64();
2564     TCGv_i64 t2 = tcg_temp_new_i64();
2565     tcg_gen_mulu2_i64(t0, t1, arg1, arg2);
2566     /* Adjust for negative input for the signed arg1.  */
2567     tcg_gen_sari_i64(t2, arg1, 63);
2568     tcg_gen_and_i64(t2, t2, arg2);
2569     tcg_gen_sub_i64(rh, t1, t2);
2570     tcg_gen_mov_i64(rl, t0);
2571     tcg_temp_free_i64(t0);
2572     tcg_temp_free_i64(t1);
2573     tcg_temp_free_i64(t2);
2574 }
2575 
2576 void tcg_gen_smin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2577 {
2578     tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, a, b);
2579 }
2580 
2581 void tcg_gen_umin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2582 {
2583     tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, a, b);
2584 }
2585 
2586 void tcg_gen_smax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2587 {
2588     tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, b, a);
2589 }
2590 
2591 void tcg_gen_umax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2592 {
2593     tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, b, a);
2594 }
2595 
2596 void tcg_gen_abs_i64(TCGv_i64 ret, TCGv_i64 a)
2597 {
2598     TCGv_i64 t = tcg_temp_new_i64();
2599 
2600     tcg_gen_sari_i64(t, a, 63);
2601     tcg_gen_xor_i64(ret, a, t);
2602     tcg_gen_sub_i64(ret, ret, t);
2603     tcg_temp_free_i64(t);
2604 }
2605 
2606 /* Size changing operations.  */
2607 
2608 void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
2609 {
2610     if (TCG_TARGET_REG_BITS == 32) {
2611         tcg_gen_mov_i32(ret, TCGV_LOW(arg));
2612     } else if (TCG_TARGET_HAS_extrl_i64_i32) {
2613         tcg_gen_op2(INDEX_op_extrl_i64_i32,
2614                     tcgv_i32_arg(ret), tcgv_i64_arg(arg));
2615     } else {
2616         tcg_gen_mov_i32(ret, (TCGv_i32)arg);
2617     }
2618 }
2619 
2620 void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
2621 {
2622     if (TCG_TARGET_REG_BITS == 32) {
2623         tcg_gen_mov_i32(ret, TCGV_HIGH(arg));
2624     } else if (TCG_TARGET_HAS_extrh_i64_i32) {
2625         tcg_gen_op2(INDEX_op_extrh_i64_i32,
2626                     tcgv_i32_arg(ret), tcgv_i64_arg(arg));
2627     } else {
2628         TCGv_i64 t = tcg_temp_new_i64();
2629         tcg_gen_shri_i64(t, arg, 32);
2630         tcg_gen_mov_i32(ret, (TCGv_i32)t);
2631         tcg_temp_free_i64(t);
2632     }
2633 }
2634 
2635 void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
2636 {
2637     if (TCG_TARGET_REG_BITS == 32) {
2638         tcg_gen_mov_i32(TCGV_LOW(ret), arg);
2639         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2640     } else {
2641         tcg_gen_op2(INDEX_op_extu_i32_i64,
2642                     tcgv_i64_arg(ret), tcgv_i32_arg(arg));
2643     }
2644 }
2645 
2646 void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
2647 {
2648     if (TCG_TARGET_REG_BITS == 32) {
2649         tcg_gen_mov_i32(TCGV_LOW(ret), arg);
2650         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
2651     } else {
2652         tcg_gen_op2(INDEX_op_ext_i32_i64,
2653                     tcgv_i64_arg(ret), tcgv_i32_arg(arg));
2654     }
2655 }
2656 
2657 void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high)
2658 {
2659     TCGv_i64 tmp;
2660 
2661     if (TCG_TARGET_REG_BITS == 32) {
2662         tcg_gen_mov_i32(TCGV_LOW(dest), low);
2663         tcg_gen_mov_i32(TCGV_HIGH(dest), high);
2664         return;
2665     }
2666 
2667     tmp = tcg_temp_new_i64();
2668     /* These extensions are only needed for type correctness.
2669        We may be able to do better given target specific information.  */
2670     tcg_gen_extu_i32_i64(tmp, high);
2671     tcg_gen_extu_i32_i64(dest, low);
2672     /* If deposit is available, use it.  Otherwise use the extra
2673        knowledge that we have of the zero-extensions above.  */
2674     if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(32, 32)) {
2675         tcg_gen_deposit_i64(dest, dest, tmp, 32, 32);
2676     } else {
2677         tcg_gen_shli_i64(tmp, tmp, 32);
2678         tcg_gen_or_i64(dest, dest, tmp);
2679     }
2680     tcg_temp_free_i64(tmp);
2681 }
2682 
2683 void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg)
2684 {
2685     if (TCG_TARGET_REG_BITS == 32) {
2686         tcg_gen_mov_i32(lo, TCGV_LOW(arg));
2687         tcg_gen_mov_i32(hi, TCGV_HIGH(arg));
2688     } else {
2689         tcg_gen_extrl_i64_i32(lo, arg);
2690         tcg_gen_extrh_i64_i32(hi, arg);
2691     }
2692 }
2693 
2694 void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg)
2695 {
2696     tcg_gen_ext32u_i64(lo, arg);
2697     tcg_gen_shri_i64(hi, arg, 32);
2698 }
2699 
2700 /* QEMU specific operations.  */
2701 
2702 void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx)
2703 {
2704     /*
2705      * Let the jit code return the read-only version of the
2706      * TranslationBlock, so that we minimize the pc-relative
2707      * distance of the address of the exit_tb code to TB.
2708      * This will improve utilization of pc-relative address loads.
2709      *
2710      * TODO: Move this to translator_loop, so that all const
2711      * TranslationBlock pointers refer to read-only memory.
2712      * This requires coordination with targets that do not use
2713      * the translator_loop.
2714      */
2715     uintptr_t val = (uintptr_t)tcg_splitwx_to_rx((void *)tb) + idx;
2716 
2717     if (tb == NULL) {
2718         tcg_debug_assert(idx == 0);
2719     } else if (idx <= TB_EXIT_IDXMAX) {
2720 #ifdef CONFIG_DEBUG_TCG
2721         /* This is an exit following a goto_tb.  Verify that we have
2722            seen this numbered exit before, via tcg_gen_goto_tb.  */
2723         tcg_debug_assert(tcg_ctx->goto_tb_issue_mask & (1 << idx));
2724 #endif
2725     } else {
2726         /* This is an exit via the exitreq label.  */
2727         tcg_debug_assert(idx == TB_EXIT_REQUESTED);
2728     }
2729 
2730     plugin_gen_disable_mem_helpers();
2731     tcg_gen_op1i(INDEX_op_exit_tb, val);
2732 }
2733 
2734 void tcg_gen_goto_tb(unsigned idx)
2735 {
2736     /* We tested CF_NO_GOTO_TB in translator_use_goto_tb. */
2737     tcg_debug_assert(!(tcg_ctx->tb_cflags & CF_NO_GOTO_TB));
2738     /* We only support two chained exits.  */
2739     tcg_debug_assert(idx <= TB_EXIT_IDXMAX);
2740 #ifdef CONFIG_DEBUG_TCG
2741     /* Verify that we haven't seen this numbered exit before.  */
2742     tcg_debug_assert((tcg_ctx->goto_tb_issue_mask & (1 << idx)) == 0);
2743     tcg_ctx->goto_tb_issue_mask |= 1 << idx;
2744 #endif
2745     plugin_gen_disable_mem_helpers();
2746     tcg_gen_op1i(INDEX_op_goto_tb, idx);
2747 }
2748 
2749 void tcg_gen_lookup_and_goto_ptr(void)
2750 {
2751     TCGv_ptr ptr;
2752 
2753     if (tcg_ctx->tb_cflags & CF_NO_GOTO_PTR) {
2754         tcg_gen_exit_tb(NULL, 0);
2755         return;
2756     }
2757 
2758     plugin_gen_disable_mem_helpers();
2759     ptr = tcg_temp_new_ptr();
2760     gen_helper_lookup_tb_ptr(ptr, cpu_env);
2761     tcg_gen_op1i(INDEX_op_goto_ptr, tcgv_ptr_arg(ptr));
2762     tcg_temp_free_ptr(ptr);
2763 }
2764 
2765 static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
2766 {
2767     /* Trigger the asserts within as early as possible.  */
2768     unsigned a_bits = get_alignment_bits(op);
2769 
2770     /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
2771     if (a_bits == (op & MO_SIZE)) {
2772         op = (op & ~MO_AMASK) | MO_ALIGN;
2773     }
2774 
2775     switch (op & MO_SIZE) {
2776     case MO_8:
2777         op &= ~MO_BSWAP;
2778         break;
2779     case MO_16:
2780         break;
2781     case MO_32:
2782         if (!is64) {
2783             op &= ~MO_SIGN;
2784         }
2785         break;
2786     case MO_64:
2787         if (is64) {
2788             op &= ~MO_SIGN;
2789             break;
2790         }
2791         /* fall through */
2792     default:
2793         g_assert_not_reached();
2794     }
2795     if (st) {
2796         op &= ~MO_SIGN;
2797     }
2798     return op;
2799 }
2800 
2801 static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
2802                          MemOp memop, TCGArg idx)
2803 {
2804     MemOpIdx oi = make_memop_idx(memop, idx);
2805 #if TARGET_LONG_BITS == 32
2806     tcg_gen_op3i_i32(opc, val, addr, oi);
2807 #else
2808     if (TCG_TARGET_REG_BITS == 32) {
2809         tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
2810     } else {
2811         tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
2812     }
2813 #endif
2814 }
2815 
2816 static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
2817                          MemOp memop, TCGArg idx)
2818 {
2819     MemOpIdx oi = make_memop_idx(memop, idx);
2820 #if TARGET_LONG_BITS == 32
2821     if (TCG_TARGET_REG_BITS == 32) {
2822         tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
2823     } else {
2824         tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
2825     }
2826 #else
2827     if (TCG_TARGET_REG_BITS == 32) {
2828         tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
2829                          TCGV_LOW(addr), TCGV_HIGH(addr), oi);
2830     } else {
2831         tcg_gen_op3i_i64(opc, val, addr, oi);
2832     }
2833 #endif
2834 }
2835 
2836 static void tcg_gen_req_mo(TCGBar type)
2837 {
2838 #ifdef TCG_GUEST_DEFAULT_MO
2839     type &= TCG_GUEST_DEFAULT_MO;
2840 #endif
2841     type &= ~TCG_TARGET_DEFAULT_MO;
2842     if (type) {
2843         tcg_gen_mb(type | TCG_BAR_SC);
2844     }
2845 }
2846 
2847 static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
2848 {
2849 #ifdef CONFIG_PLUGIN
2850     if (tcg_ctx->plugin_insn != NULL) {
2851         /* Save a copy of the vaddr for use after a load.  */
2852         TCGv temp = tcg_temp_new();
2853         tcg_gen_mov_tl(temp, vaddr);
2854         return temp;
2855     }
2856 #endif
2857     return vaddr;
2858 }
2859 
2860 static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi,
2861                                      enum qemu_plugin_mem_rw rw)
2862 {
2863 #ifdef CONFIG_PLUGIN
2864     if (tcg_ctx->plugin_insn != NULL) {
2865         qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
2866         plugin_gen_empty_mem_callback(vaddr, info);
2867         tcg_temp_free(vaddr);
2868     }
2869 #endif
2870 }
2871 
2872 void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
2873 {
2874     MemOp orig_memop;
2875     MemOpIdx oi;
2876 
2877     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
2878     memop = tcg_canonicalize_memop(memop, 0, 0);
2879     oi = make_memop_idx(memop, idx);
2880     trace_guest_ld_before_tcg(tcg_ctx->cpu, cpu_env, addr, oi);
2881 
2882     orig_memop = memop;
2883     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
2884         memop &= ~MO_BSWAP;
2885         /* The bswap primitive benefits from zero-extended input.  */
2886         if ((memop & MO_SSIZE) == MO_SW) {
2887             memop &= ~MO_SIGN;
2888         }
2889     }
2890 
2891     addr = plugin_prep_mem_callbacks(addr);
2892     gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
2893     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
2894 
2895     if ((orig_memop ^ memop) & MO_BSWAP) {
2896         switch (orig_memop & MO_SIZE) {
2897         case MO_16:
2898             tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
2899                                            ? TCG_BSWAP_IZ | TCG_BSWAP_OS
2900                                            : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
2901             break;
2902         case MO_32:
2903             tcg_gen_bswap32_i32(val, val);
2904             break;
2905         default:
2906             g_assert_not_reached();
2907         }
2908     }
2909 }
2910 
2911 void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
2912 {
2913     TCGv_i32 swap = NULL;
2914     MemOpIdx oi;
2915 
2916     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
2917     memop = tcg_canonicalize_memop(memop, 0, 1);
2918     oi = make_memop_idx(memop, idx);
2919     trace_guest_st_before_tcg(tcg_ctx->cpu, cpu_env, addr, oi);
2920 
2921     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
2922         swap = tcg_temp_new_i32();
2923         switch (memop & MO_SIZE) {
2924         case MO_16:
2925             tcg_gen_bswap16_i32(swap, val, 0);
2926             break;
2927         case MO_32:
2928             tcg_gen_bswap32_i32(swap, val);
2929             break;
2930         default:
2931             g_assert_not_reached();
2932         }
2933         val = swap;
2934         memop &= ~MO_BSWAP;
2935     }
2936 
2937     addr = plugin_prep_mem_callbacks(addr);
2938     if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
2939         gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
2940     } else {
2941         gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
2942     }
2943     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
2944 
2945     if (swap) {
2946         tcg_temp_free_i32(swap);
2947     }
2948 }
2949 
2950 void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
2951 {
2952     MemOp orig_memop;
2953     MemOpIdx oi;
2954 
2955     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
2956         tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
2957         if (memop & MO_SIGN) {
2958             tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
2959         } else {
2960             tcg_gen_movi_i32(TCGV_HIGH(val), 0);
2961         }
2962         return;
2963     }
2964 
2965     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
2966     memop = tcg_canonicalize_memop(memop, 1, 0);
2967     oi = make_memop_idx(memop, idx);
2968     trace_guest_ld_before_tcg(tcg_ctx->cpu, cpu_env, addr, oi);
2969 
2970     orig_memop = memop;
2971     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
2972         memop &= ~MO_BSWAP;
2973         /* The bswap primitive benefits from zero-extended input.  */
2974         if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
2975             memop &= ~MO_SIGN;
2976         }
2977     }
2978 
2979     addr = plugin_prep_mem_callbacks(addr);
2980     gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
2981     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
2982 
2983     if ((orig_memop ^ memop) & MO_BSWAP) {
2984         int flags = (orig_memop & MO_SIGN
2985                      ? TCG_BSWAP_IZ | TCG_BSWAP_OS
2986                      : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
2987         switch (orig_memop & MO_SIZE) {
2988         case MO_16:
2989             tcg_gen_bswap16_i64(val, val, flags);
2990             break;
2991         case MO_32:
2992             tcg_gen_bswap32_i64(val, val, flags);
2993             break;
2994         case MO_64:
2995             tcg_gen_bswap64_i64(val, val);
2996             break;
2997         default:
2998             g_assert_not_reached();
2999         }
3000     }
3001 }
3002 
3003 void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
3004 {
3005     TCGv_i64 swap = NULL;
3006     MemOpIdx oi;
3007 
3008     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
3009         tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
3010         return;
3011     }
3012 
3013     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
3014     memop = tcg_canonicalize_memop(memop, 1, 1);
3015     oi = make_memop_idx(memop, idx);
3016     trace_guest_st_before_tcg(tcg_ctx->cpu, cpu_env, addr, oi);
3017 
3018     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
3019         swap = tcg_temp_new_i64();
3020         switch (memop & MO_SIZE) {
3021         case MO_16:
3022             tcg_gen_bswap16_i64(swap, val, 0);
3023             break;
3024         case MO_32:
3025             tcg_gen_bswap32_i64(swap, val, 0);
3026             break;
3027         case MO_64:
3028             tcg_gen_bswap64_i64(swap, val);
3029             break;
3030         default:
3031             g_assert_not_reached();
3032         }
3033         val = swap;
3034         memop &= ~MO_BSWAP;
3035     }
3036 
3037     addr = plugin_prep_mem_callbacks(addr);
3038     gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
3039     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
3040 
3041     if (swap) {
3042         tcg_temp_free_i64(swap);
3043     }
3044 }
3045 
3046 static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
3047 {
3048     switch (opc & MO_SSIZE) {
3049     case MO_SB:
3050         tcg_gen_ext8s_i32(ret, val);
3051         break;
3052     case MO_UB:
3053         tcg_gen_ext8u_i32(ret, val);
3054         break;
3055     case MO_SW:
3056         tcg_gen_ext16s_i32(ret, val);
3057         break;
3058     case MO_UW:
3059         tcg_gen_ext16u_i32(ret, val);
3060         break;
3061     default:
3062         tcg_gen_mov_i32(ret, val);
3063         break;
3064     }
3065 }
3066 
3067 static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
3068 {
3069     switch (opc & MO_SSIZE) {
3070     case MO_SB:
3071         tcg_gen_ext8s_i64(ret, val);
3072         break;
3073     case MO_UB:
3074         tcg_gen_ext8u_i64(ret, val);
3075         break;
3076     case MO_SW:
3077         tcg_gen_ext16s_i64(ret, val);
3078         break;
3079     case MO_UW:
3080         tcg_gen_ext16u_i64(ret, val);
3081         break;
3082     case MO_SL:
3083         tcg_gen_ext32s_i64(ret, val);
3084         break;
3085     case MO_UL:
3086         tcg_gen_ext32u_i64(ret, val);
3087         break;
3088     default:
3089         tcg_gen_mov_i64(ret, val);
3090         break;
3091     }
3092 }
3093 
3094 typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
3095                                   TCGv_i32, TCGv_i32, TCGv_i32);
3096 typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
3097                                   TCGv_i64, TCGv_i64, TCGv_i32);
3098 typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
3099                                   TCGv_i32, TCGv_i32);
3100 typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
3101                                   TCGv_i64, TCGv_i32);
3102 
3103 #ifdef CONFIG_ATOMIC64
3104 # define WITH_ATOMIC64(X) X,
3105 #else
3106 # define WITH_ATOMIC64(X)
3107 #endif
3108 
3109 static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
3110     [MO_8] = gen_helper_atomic_cmpxchgb,
3111     [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
3112     [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
3113     [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
3114     [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
3115     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
3116     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
3117 };
3118 
3119 void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
3120                                 TCGv_i32 newv, TCGArg idx, MemOp memop)
3121 {
3122     memop = tcg_canonicalize_memop(memop, 0, 0);
3123 
3124     if (!(tcg_ctx->tb_cflags & CF_PARALLEL)) {
3125         TCGv_i32 t1 = tcg_temp_new_i32();
3126         TCGv_i32 t2 = tcg_temp_new_i32();
3127 
3128         tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
3129 
3130         tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
3131         tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
3132         tcg_gen_qemu_st_i32(t2, addr, idx, memop);
3133         tcg_temp_free_i32(t2);
3134 
3135         if (memop & MO_SIGN) {
3136             tcg_gen_ext_i32(retv, t1, memop);
3137         } else {
3138             tcg_gen_mov_i32(retv, t1);
3139         }
3140         tcg_temp_free_i32(t1);
3141     } else {
3142         gen_atomic_cx_i32 gen;
3143         MemOpIdx oi;
3144 
3145         gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
3146         tcg_debug_assert(gen != NULL);
3147 
3148         oi = make_memop_idx(memop & ~MO_SIGN, idx);
3149         gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
3150 
3151         if (memop & MO_SIGN) {
3152             tcg_gen_ext_i32(retv, retv, memop);
3153         }
3154     }
3155 }
3156 
3157 void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
3158                                 TCGv_i64 newv, TCGArg idx, MemOp memop)
3159 {
3160     memop = tcg_canonicalize_memop(memop, 1, 0);
3161 
3162     if (!(tcg_ctx->tb_cflags & CF_PARALLEL)) {
3163         TCGv_i64 t1 = tcg_temp_new_i64();
3164         TCGv_i64 t2 = tcg_temp_new_i64();
3165 
3166         tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
3167 
3168         tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
3169         tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
3170         tcg_gen_qemu_st_i64(t2, addr, idx, memop);
3171         tcg_temp_free_i64(t2);
3172 
3173         if (memop & MO_SIGN) {
3174             tcg_gen_ext_i64(retv, t1, memop);
3175         } else {
3176             tcg_gen_mov_i64(retv, t1);
3177         }
3178         tcg_temp_free_i64(t1);
3179     } else if ((memop & MO_SIZE) == MO_64) {
3180 #ifdef CONFIG_ATOMIC64
3181         gen_atomic_cx_i64 gen;
3182         MemOpIdx oi;
3183 
3184         gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
3185         tcg_debug_assert(gen != NULL);
3186 
3187         oi = make_memop_idx(memop, idx);
3188         gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
3189 #else
3190         gen_helper_exit_atomic(cpu_env);
3191         /* Produce a result, so that we have a well-formed opcode stream
3192            with respect to uses of the result in the (dead) code following.  */
3193         tcg_gen_movi_i64(retv, 0);
3194 #endif /* CONFIG_ATOMIC64 */
3195     } else {
3196         TCGv_i32 c32 = tcg_temp_new_i32();
3197         TCGv_i32 n32 = tcg_temp_new_i32();
3198         TCGv_i32 r32 = tcg_temp_new_i32();
3199 
3200         tcg_gen_extrl_i64_i32(c32, cmpv);
3201         tcg_gen_extrl_i64_i32(n32, newv);
3202         tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
3203         tcg_temp_free_i32(c32);
3204         tcg_temp_free_i32(n32);
3205 
3206         tcg_gen_extu_i32_i64(retv, r32);
3207         tcg_temp_free_i32(r32);
3208 
3209         if (memop & MO_SIGN) {
3210             tcg_gen_ext_i64(retv, retv, memop);
3211         }
3212     }
3213 }
3214 
3215 static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
3216                                 TCGArg idx, MemOp memop, bool new_val,
3217                                 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
3218 {
3219     TCGv_i32 t1 = tcg_temp_new_i32();
3220     TCGv_i32 t2 = tcg_temp_new_i32();
3221 
3222     memop = tcg_canonicalize_memop(memop, 0, 0);
3223 
3224     tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
3225     tcg_gen_ext_i32(t2, val, memop);
3226     gen(t2, t1, t2);
3227     tcg_gen_qemu_st_i32(t2, addr, idx, memop);
3228 
3229     tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
3230     tcg_temp_free_i32(t1);
3231     tcg_temp_free_i32(t2);
3232 }
3233 
3234 static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
3235                              TCGArg idx, MemOp memop, void * const table[])
3236 {
3237     gen_atomic_op_i32 gen;
3238     MemOpIdx oi;
3239 
3240     memop = tcg_canonicalize_memop(memop, 0, 0);
3241 
3242     gen = table[memop & (MO_SIZE | MO_BSWAP)];
3243     tcg_debug_assert(gen != NULL);
3244 
3245     oi = make_memop_idx(memop & ~MO_SIGN, idx);
3246     gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
3247 
3248     if (memop & MO_SIGN) {
3249         tcg_gen_ext_i32(ret, ret, memop);
3250     }
3251 }
3252 
3253 static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
3254                                 TCGArg idx, MemOp memop, bool new_val,
3255                                 void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
3256 {
3257     TCGv_i64 t1 = tcg_temp_new_i64();
3258     TCGv_i64 t2 = tcg_temp_new_i64();
3259 
3260     memop = tcg_canonicalize_memop(memop, 1, 0);
3261 
3262     tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
3263     tcg_gen_ext_i64(t2, val, memop);
3264     gen(t2, t1, t2);
3265     tcg_gen_qemu_st_i64(t2, addr, idx, memop);
3266 
3267     tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
3268     tcg_temp_free_i64(t1);
3269     tcg_temp_free_i64(t2);
3270 }
3271 
3272 static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
3273                              TCGArg idx, MemOp memop, void * const table[])
3274 {
3275     memop = tcg_canonicalize_memop(memop, 1, 0);
3276 
3277     if ((memop & MO_SIZE) == MO_64) {
3278 #ifdef CONFIG_ATOMIC64
3279         gen_atomic_op_i64 gen;
3280         MemOpIdx oi;
3281 
3282         gen = table[memop & (MO_SIZE | MO_BSWAP)];
3283         tcg_debug_assert(gen != NULL);
3284 
3285         oi = make_memop_idx(memop & ~MO_SIGN, idx);
3286         gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
3287 #else
3288         gen_helper_exit_atomic(cpu_env);
3289         /* Produce a result, so that we have a well-formed opcode stream
3290            with respect to uses of the result in the (dead) code following.  */
3291         tcg_gen_movi_i64(ret, 0);
3292 #endif /* CONFIG_ATOMIC64 */
3293     } else {
3294         TCGv_i32 v32 = tcg_temp_new_i32();
3295         TCGv_i32 r32 = tcg_temp_new_i32();
3296 
3297         tcg_gen_extrl_i64_i32(v32, val);
3298         do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
3299         tcg_temp_free_i32(v32);
3300 
3301         tcg_gen_extu_i32_i64(ret, r32);
3302         tcg_temp_free_i32(r32);
3303 
3304         if (memop & MO_SIGN) {
3305             tcg_gen_ext_i64(ret, ret, memop);
3306         }
3307     }
3308 }
3309 
3310 #define GEN_ATOMIC_HELPER(NAME, OP, NEW)                                \
3311 static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = {          \
3312     [MO_8] = gen_helper_atomic_##NAME##b,                               \
3313     [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le,                   \
3314     [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be,                   \
3315     [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le,                   \
3316     [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be,                   \
3317     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le)     \
3318     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be)     \
3319 };                                                                      \
3320 void tcg_gen_atomic_##NAME##_i32                                        \
3321     (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop)    \
3322 {                                                                       \
3323     if (tcg_ctx->tb_cflags & CF_PARALLEL) {                             \
3324         do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME);     \
3325     } else {                                                            \
3326         do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW,            \
3327                             tcg_gen_##OP##_i32);                        \
3328     }                                                                   \
3329 }                                                                       \
3330 void tcg_gen_atomic_##NAME##_i64                                        \
3331     (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop)    \
3332 {                                                                       \
3333     if (tcg_ctx->tb_cflags & CF_PARALLEL) {                             \
3334         do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME);     \
3335     } else {                                                            \
3336         do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW,            \
3337                             tcg_gen_##OP##_i64);                        \
3338     }                                                                   \
3339 }
3340 
3341 GEN_ATOMIC_HELPER(fetch_add, add, 0)
3342 GEN_ATOMIC_HELPER(fetch_and, and, 0)
3343 GEN_ATOMIC_HELPER(fetch_or, or, 0)
3344 GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
3345 GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
3346 GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
3347 GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
3348 GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
3349 
3350 GEN_ATOMIC_HELPER(add_fetch, add, 1)
3351 GEN_ATOMIC_HELPER(and_fetch, and, 1)
3352 GEN_ATOMIC_HELPER(or_fetch, or, 1)
3353 GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
3354 GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
3355 GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
3356 GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
3357 GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
3358 
3359 static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
3360 {
3361     tcg_gen_mov_i32(r, b);
3362 }
3363 
3364 static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
3365 {
3366     tcg_gen_mov_i64(r, b);
3367 }
3368 
3369 GEN_ATOMIC_HELPER(xchg, mov2, 0)
3370 
3371 #undef GEN_ATOMIC_HELPER
3372