xref: /openbmc/qemu/tcg/tcg-op.c (revision 7a5951f651ad5f158631a826070b24631e733763)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg.h"
28 #include "tcg/tcg-op.h"
29 #include "tcg/tcg-mo.h"
30 #include "exec/plugin-gen.h"
31 #include "tcg-internal.h"
32 
33 
34 void tcg_gen_op1(TCGOpcode opc, TCGArg a1)
35 {
36     TCGOp *op = tcg_emit_op(opc, 1);
37     op->args[0] = a1;
38 }
39 
40 void tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2)
41 {
42     TCGOp *op = tcg_emit_op(opc, 2);
43     op->args[0] = a1;
44     op->args[1] = a2;
45 }
46 
47 void tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3)
48 {
49     TCGOp *op = tcg_emit_op(opc, 3);
50     op->args[0] = a1;
51     op->args[1] = a2;
52     op->args[2] = a3;
53 }
54 
55 void tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4)
56 {
57     TCGOp *op = tcg_emit_op(opc, 4);
58     op->args[0] = a1;
59     op->args[1] = a2;
60     op->args[2] = a3;
61     op->args[3] = a4;
62 }
63 
64 void tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
65                  TCGArg a4, TCGArg a5)
66 {
67     TCGOp *op = tcg_emit_op(opc, 5);
68     op->args[0] = a1;
69     op->args[1] = a2;
70     op->args[2] = a3;
71     op->args[3] = a4;
72     op->args[4] = a5;
73 }
74 
75 void tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
76                  TCGArg a4, TCGArg a5, TCGArg a6)
77 {
78     TCGOp *op = tcg_emit_op(opc, 6);
79     op->args[0] = a1;
80     op->args[1] = a2;
81     op->args[2] = a3;
82     op->args[3] = a4;
83     op->args[4] = a5;
84     op->args[5] = a6;
85 }
86 
87 void tcg_gen_mb(TCGBar mb_type)
88 {
89     if (tcg_ctx->tb_cflags & CF_PARALLEL) {
90         tcg_gen_op1(INDEX_op_mb, mb_type);
91     }
92 }
93 
94 /* 32 bit ops */
95 
96 void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg)
97 {
98     tcg_gen_mov_i32(ret, tcg_constant_i32(arg));
99 }
100 
101 void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
102 {
103     /* some cases can be optimized here */
104     if (arg2 == 0) {
105         tcg_gen_mov_i32(ret, arg1);
106     } else {
107         tcg_gen_add_i32(ret, arg1, tcg_constant_i32(arg2));
108     }
109 }
110 
111 void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2)
112 {
113     if (arg1 == 0 && TCG_TARGET_HAS_neg_i32) {
114         /* Don't recurse with tcg_gen_neg_i32.  */
115         tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg2);
116     } else {
117         tcg_gen_sub_i32(ret, tcg_constant_i32(arg1), arg2);
118     }
119 }
120 
121 void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
122 {
123     /* some cases can be optimized here */
124     if (arg2 == 0) {
125         tcg_gen_mov_i32(ret, arg1);
126     } else {
127         tcg_gen_sub_i32(ret, arg1, tcg_constant_i32(arg2));
128     }
129 }
130 
131 void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
132 {
133     /* Some cases can be optimized here.  */
134     switch (arg2) {
135     case 0:
136         tcg_gen_movi_i32(ret, 0);
137         return;
138     case -1:
139         tcg_gen_mov_i32(ret, arg1);
140         return;
141     case 0xff:
142         /* Don't recurse with tcg_gen_ext8u_i32.  */
143         if (TCG_TARGET_HAS_ext8u_i32) {
144             tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg1);
145             return;
146         }
147         break;
148     case 0xffff:
149         if (TCG_TARGET_HAS_ext16u_i32) {
150             tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg1);
151             return;
152         }
153         break;
154     }
155 
156     tcg_gen_and_i32(ret, arg1, tcg_constant_i32(arg2));
157 }
158 
159 void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
160 {
161     /* Some cases can be optimized here.  */
162     if (arg2 == -1) {
163         tcg_gen_movi_i32(ret, -1);
164     } else if (arg2 == 0) {
165         tcg_gen_mov_i32(ret, arg1);
166     } else {
167         tcg_gen_or_i32(ret, arg1, tcg_constant_i32(arg2));
168     }
169 }
170 
171 void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
172 {
173     /* Some cases can be optimized here.  */
174     if (arg2 == 0) {
175         tcg_gen_mov_i32(ret, arg1);
176     } else if (arg2 == -1 && TCG_TARGET_HAS_not_i32) {
177         /* Don't recurse with tcg_gen_not_i32.  */
178         tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg1);
179     } else {
180         tcg_gen_xor_i32(ret, arg1, tcg_constant_i32(arg2));
181     }
182 }
183 
184 void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
185 {
186     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
187     if (arg2 == 0) {
188         tcg_gen_mov_i32(ret, arg1);
189     } else {
190         tcg_gen_shl_i32(ret, arg1, tcg_constant_i32(arg2));
191     }
192 }
193 
194 void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
195 {
196     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
197     if (arg2 == 0) {
198         tcg_gen_mov_i32(ret, arg1);
199     } else {
200         tcg_gen_shr_i32(ret, arg1, tcg_constant_i32(arg2));
201     }
202 }
203 
204 void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
205 {
206     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
207     if (arg2 == 0) {
208         tcg_gen_mov_i32(ret, arg1);
209     } else {
210         tcg_gen_sar_i32(ret, arg1, tcg_constant_i32(arg2));
211     }
212 }
213 
214 void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *l)
215 {
216     if (cond == TCG_COND_ALWAYS) {
217         tcg_gen_br(l);
218     } else if (cond != TCG_COND_NEVER) {
219         l->refs++;
220         tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_arg(l));
221     }
222 }
223 
224 void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *l)
225 {
226     if (cond == TCG_COND_ALWAYS) {
227         tcg_gen_br(l);
228     } else if (cond != TCG_COND_NEVER) {
229         tcg_gen_brcond_i32(cond, arg1, tcg_constant_i32(arg2), l);
230     }
231 }
232 
233 void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret,
234                          TCGv_i32 arg1, TCGv_i32 arg2)
235 {
236     if (cond == TCG_COND_ALWAYS) {
237         tcg_gen_movi_i32(ret, 1);
238     } else if (cond == TCG_COND_NEVER) {
239         tcg_gen_movi_i32(ret, 0);
240     } else {
241         tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
242     }
243 }
244 
245 void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret,
246                           TCGv_i32 arg1, int32_t arg2)
247 {
248     tcg_gen_setcond_i32(cond, ret, arg1, tcg_constant_i32(arg2));
249 }
250 
251 void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
252 {
253     if (arg2 == 0) {
254         tcg_gen_movi_i32(ret, 0);
255     } else if (is_power_of_2(arg2)) {
256         tcg_gen_shli_i32(ret, arg1, ctz32(arg2));
257     } else {
258         tcg_gen_mul_i32(ret, arg1, tcg_constant_i32(arg2));
259     }
260 }
261 
262 void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
263 {
264     if (TCG_TARGET_HAS_div_i32) {
265         tcg_gen_op3_i32(INDEX_op_div_i32, ret, arg1, arg2);
266     } else if (TCG_TARGET_HAS_div2_i32) {
267         TCGv_i32 t0 = tcg_temp_new_i32();
268         tcg_gen_sari_i32(t0, arg1, 31);
269         tcg_gen_op5_i32(INDEX_op_div2_i32, ret, t0, arg1, t0, arg2);
270         tcg_temp_free_i32(t0);
271     } else {
272         gen_helper_div_i32(ret, arg1, arg2);
273     }
274 }
275 
276 void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
277 {
278     if (TCG_TARGET_HAS_rem_i32) {
279         tcg_gen_op3_i32(INDEX_op_rem_i32, ret, arg1, arg2);
280     } else if (TCG_TARGET_HAS_div_i32) {
281         TCGv_i32 t0 = tcg_temp_new_i32();
282         tcg_gen_op3_i32(INDEX_op_div_i32, t0, arg1, arg2);
283         tcg_gen_mul_i32(t0, t0, arg2);
284         tcg_gen_sub_i32(ret, arg1, t0);
285         tcg_temp_free_i32(t0);
286     } else if (TCG_TARGET_HAS_div2_i32) {
287         TCGv_i32 t0 = tcg_temp_new_i32();
288         tcg_gen_sari_i32(t0, arg1, 31);
289         tcg_gen_op5_i32(INDEX_op_div2_i32, t0, ret, arg1, t0, arg2);
290         tcg_temp_free_i32(t0);
291     } else {
292         gen_helper_rem_i32(ret, arg1, arg2);
293     }
294 }
295 
296 void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
297 {
298     if (TCG_TARGET_HAS_div_i32) {
299         tcg_gen_op3_i32(INDEX_op_divu_i32, ret, arg1, arg2);
300     } else if (TCG_TARGET_HAS_div2_i32) {
301         TCGv_i32 t0 = tcg_temp_new_i32();
302         tcg_gen_movi_i32(t0, 0);
303         tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, t0, arg2);
304         tcg_temp_free_i32(t0);
305     } else {
306         gen_helper_divu_i32(ret, arg1, arg2);
307     }
308 }
309 
310 void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
311 {
312     if (TCG_TARGET_HAS_rem_i32) {
313         tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2);
314     } else if (TCG_TARGET_HAS_div_i32) {
315         TCGv_i32 t0 = tcg_temp_new_i32();
316         tcg_gen_op3_i32(INDEX_op_divu_i32, t0, arg1, arg2);
317         tcg_gen_mul_i32(t0, t0, arg2);
318         tcg_gen_sub_i32(ret, arg1, t0);
319         tcg_temp_free_i32(t0);
320     } else if (TCG_TARGET_HAS_div2_i32) {
321         TCGv_i32 t0 = tcg_temp_new_i32();
322         tcg_gen_movi_i32(t0, 0);
323         tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, t0, arg2);
324         tcg_temp_free_i32(t0);
325     } else {
326         gen_helper_remu_i32(ret, arg1, arg2);
327     }
328 }
329 
330 void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
331 {
332     if (TCG_TARGET_HAS_andc_i32) {
333         tcg_gen_op3_i32(INDEX_op_andc_i32, ret, arg1, arg2);
334     } else {
335         TCGv_i32 t0 = tcg_temp_new_i32();
336         tcg_gen_not_i32(t0, arg2);
337         tcg_gen_and_i32(ret, arg1, t0);
338         tcg_temp_free_i32(t0);
339     }
340 }
341 
342 void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
343 {
344     if (TCG_TARGET_HAS_eqv_i32) {
345         tcg_gen_op3_i32(INDEX_op_eqv_i32, ret, arg1, arg2);
346     } else {
347         tcg_gen_xor_i32(ret, arg1, arg2);
348         tcg_gen_not_i32(ret, ret);
349     }
350 }
351 
352 void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
353 {
354     if (TCG_TARGET_HAS_nand_i32) {
355         tcg_gen_op3_i32(INDEX_op_nand_i32, ret, arg1, arg2);
356     } else {
357         tcg_gen_and_i32(ret, arg1, arg2);
358         tcg_gen_not_i32(ret, ret);
359     }
360 }
361 
362 void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
363 {
364     if (TCG_TARGET_HAS_nor_i32) {
365         tcg_gen_op3_i32(INDEX_op_nor_i32, ret, arg1, arg2);
366     } else {
367         tcg_gen_or_i32(ret, arg1, arg2);
368         tcg_gen_not_i32(ret, ret);
369     }
370 }
371 
372 void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
373 {
374     if (TCG_TARGET_HAS_orc_i32) {
375         tcg_gen_op3_i32(INDEX_op_orc_i32, ret, arg1, arg2);
376     } else {
377         TCGv_i32 t0 = tcg_temp_new_i32();
378         tcg_gen_not_i32(t0, arg2);
379         tcg_gen_or_i32(ret, arg1, t0);
380         tcg_temp_free_i32(t0);
381     }
382 }
383 
384 void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
385 {
386     if (TCG_TARGET_HAS_clz_i32) {
387         tcg_gen_op3_i32(INDEX_op_clz_i32, ret, arg1, arg2);
388     } else if (TCG_TARGET_HAS_clz_i64) {
389         TCGv_i64 t1 = tcg_temp_new_i64();
390         TCGv_i64 t2 = tcg_temp_new_i64();
391         tcg_gen_extu_i32_i64(t1, arg1);
392         tcg_gen_extu_i32_i64(t2, arg2);
393         tcg_gen_addi_i64(t2, t2, 32);
394         tcg_gen_clz_i64(t1, t1, t2);
395         tcg_gen_extrl_i64_i32(ret, t1);
396         tcg_temp_free_i64(t1);
397         tcg_temp_free_i64(t2);
398         tcg_gen_subi_i32(ret, ret, 32);
399     } else {
400         gen_helper_clz_i32(ret, arg1, arg2);
401     }
402 }
403 
404 void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
405 {
406     tcg_gen_clz_i32(ret, arg1, tcg_constant_i32(arg2));
407 }
408 
409 void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
410 {
411     if (TCG_TARGET_HAS_ctz_i32) {
412         tcg_gen_op3_i32(INDEX_op_ctz_i32, ret, arg1, arg2);
413     } else if (TCG_TARGET_HAS_ctz_i64) {
414         TCGv_i64 t1 = tcg_temp_new_i64();
415         TCGv_i64 t2 = tcg_temp_new_i64();
416         tcg_gen_extu_i32_i64(t1, arg1);
417         tcg_gen_extu_i32_i64(t2, arg2);
418         tcg_gen_ctz_i64(t1, t1, t2);
419         tcg_gen_extrl_i64_i32(ret, t1);
420         tcg_temp_free_i64(t1);
421         tcg_temp_free_i64(t2);
422     } else if (TCG_TARGET_HAS_ctpop_i32
423                || TCG_TARGET_HAS_ctpop_i64
424                || TCG_TARGET_HAS_clz_i32
425                || TCG_TARGET_HAS_clz_i64) {
426         TCGv_i32 z, t = tcg_temp_new_i32();
427 
428         if (TCG_TARGET_HAS_ctpop_i32 || TCG_TARGET_HAS_ctpop_i64) {
429             tcg_gen_subi_i32(t, arg1, 1);
430             tcg_gen_andc_i32(t, t, arg1);
431             tcg_gen_ctpop_i32(t, t);
432         } else {
433             /* Since all non-x86 hosts have clz(0) == 32, don't fight it.  */
434             tcg_gen_neg_i32(t, arg1);
435             tcg_gen_and_i32(t, t, arg1);
436             tcg_gen_clzi_i32(t, t, 32);
437             tcg_gen_xori_i32(t, t, 31);
438         }
439         z = tcg_constant_i32(0);
440         tcg_gen_movcond_i32(TCG_COND_EQ, ret, arg1, z, arg2, t);
441         tcg_temp_free_i32(t);
442     } else {
443         gen_helper_ctz_i32(ret, arg1, arg2);
444     }
445 }
446 
447 void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
448 {
449     if (!TCG_TARGET_HAS_ctz_i32 && TCG_TARGET_HAS_ctpop_i32 && arg2 == 32) {
450         /* This equivalence has the advantage of not requiring a fixup.  */
451         TCGv_i32 t = tcg_temp_new_i32();
452         tcg_gen_subi_i32(t, arg1, 1);
453         tcg_gen_andc_i32(t, t, arg1);
454         tcg_gen_ctpop_i32(ret, t);
455         tcg_temp_free_i32(t);
456     } else {
457         tcg_gen_ctz_i32(ret, arg1, tcg_constant_i32(arg2));
458     }
459 }
460 
461 void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg)
462 {
463     if (TCG_TARGET_HAS_clz_i32) {
464         TCGv_i32 t = tcg_temp_new_i32();
465         tcg_gen_sari_i32(t, arg, 31);
466         tcg_gen_xor_i32(t, t, arg);
467         tcg_gen_clzi_i32(t, t, 32);
468         tcg_gen_subi_i32(ret, t, 1);
469         tcg_temp_free_i32(t);
470     } else {
471         gen_helper_clrsb_i32(ret, arg);
472     }
473 }
474 
475 void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1)
476 {
477     if (TCG_TARGET_HAS_ctpop_i32) {
478         tcg_gen_op2_i32(INDEX_op_ctpop_i32, ret, arg1);
479     } else if (TCG_TARGET_HAS_ctpop_i64) {
480         TCGv_i64 t = tcg_temp_new_i64();
481         tcg_gen_extu_i32_i64(t, arg1);
482         tcg_gen_ctpop_i64(t, t);
483         tcg_gen_extrl_i64_i32(ret, t);
484         tcg_temp_free_i64(t);
485     } else {
486         gen_helper_ctpop_i32(ret, arg1);
487     }
488 }
489 
490 void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
491 {
492     if (TCG_TARGET_HAS_rot_i32) {
493         tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, arg2);
494     } else {
495         TCGv_i32 t0, t1;
496 
497         t0 = tcg_temp_new_i32();
498         t1 = tcg_temp_new_i32();
499         tcg_gen_shl_i32(t0, arg1, arg2);
500         tcg_gen_subfi_i32(t1, 32, arg2);
501         tcg_gen_shr_i32(t1, arg1, t1);
502         tcg_gen_or_i32(ret, t0, t1);
503         tcg_temp_free_i32(t0);
504         tcg_temp_free_i32(t1);
505     }
506 }
507 
508 void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
509 {
510     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
511     /* some cases can be optimized here */
512     if (arg2 == 0) {
513         tcg_gen_mov_i32(ret, arg1);
514     } else if (TCG_TARGET_HAS_rot_i32) {
515         tcg_gen_rotl_i32(ret, arg1, tcg_constant_i32(arg2));
516     } else {
517         TCGv_i32 t0, t1;
518         t0 = tcg_temp_new_i32();
519         t1 = tcg_temp_new_i32();
520         tcg_gen_shli_i32(t0, arg1, arg2);
521         tcg_gen_shri_i32(t1, arg1, 32 - arg2);
522         tcg_gen_or_i32(ret, t0, t1);
523         tcg_temp_free_i32(t0);
524         tcg_temp_free_i32(t1);
525     }
526 }
527 
528 void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
529 {
530     if (TCG_TARGET_HAS_rot_i32) {
531         tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, arg2);
532     } else {
533         TCGv_i32 t0, t1;
534 
535         t0 = tcg_temp_new_i32();
536         t1 = tcg_temp_new_i32();
537         tcg_gen_shr_i32(t0, arg1, arg2);
538         tcg_gen_subfi_i32(t1, 32, arg2);
539         tcg_gen_shl_i32(t1, arg1, t1);
540         tcg_gen_or_i32(ret, t0, t1);
541         tcg_temp_free_i32(t0);
542         tcg_temp_free_i32(t1);
543     }
544 }
545 
546 void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
547 {
548     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
549     /* some cases can be optimized here */
550     if (arg2 == 0) {
551         tcg_gen_mov_i32(ret, arg1);
552     } else {
553         tcg_gen_rotli_i32(ret, arg1, 32 - arg2);
554     }
555 }
556 
557 void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
558                          unsigned int ofs, unsigned int len)
559 {
560     uint32_t mask;
561     TCGv_i32 t1;
562 
563     tcg_debug_assert(ofs < 32);
564     tcg_debug_assert(len > 0);
565     tcg_debug_assert(len <= 32);
566     tcg_debug_assert(ofs + len <= 32);
567 
568     if (len == 32) {
569         tcg_gen_mov_i32(ret, arg2);
570         return;
571     }
572     if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) {
573         tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len);
574         return;
575     }
576 
577     t1 = tcg_temp_new_i32();
578 
579     if (TCG_TARGET_HAS_extract2_i32) {
580         if (ofs + len == 32) {
581             tcg_gen_shli_i32(t1, arg1, len);
582             tcg_gen_extract2_i32(ret, t1, arg2, len);
583             goto done;
584         }
585         if (ofs == 0) {
586             tcg_gen_extract2_i32(ret, arg1, arg2, len);
587             tcg_gen_rotli_i32(ret, ret, len);
588             goto done;
589         }
590     }
591 
592     mask = (1u << len) - 1;
593     if (ofs + len < 32) {
594         tcg_gen_andi_i32(t1, arg2, mask);
595         tcg_gen_shli_i32(t1, t1, ofs);
596     } else {
597         tcg_gen_shli_i32(t1, arg2, ofs);
598     }
599     tcg_gen_andi_i32(ret, arg1, ~(mask << ofs));
600     tcg_gen_or_i32(ret, ret, t1);
601  done:
602     tcg_temp_free_i32(t1);
603 }
604 
605 void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg,
606                            unsigned int ofs, unsigned int len)
607 {
608     tcg_debug_assert(ofs < 32);
609     tcg_debug_assert(len > 0);
610     tcg_debug_assert(len <= 32);
611     tcg_debug_assert(ofs + len <= 32);
612 
613     if (ofs + len == 32) {
614         tcg_gen_shli_i32(ret, arg, ofs);
615     } else if (ofs == 0) {
616         tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
617     } else if (TCG_TARGET_HAS_deposit_i32
618                && TCG_TARGET_deposit_i32_valid(ofs, len)) {
619         TCGv_i32 zero = tcg_constant_i32(0);
620         tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, zero, arg, ofs, len);
621     } else {
622         /* To help two-operand hosts we prefer to zero-extend first,
623            which allows ARG to stay live.  */
624         switch (len) {
625         case 16:
626             if (TCG_TARGET_HAS_ext16u_i32) {
627                 tcg_gen_ext16u_i32(ret, arg);
628                 tcg_gen_shli_i32(ret, ret, ofs);
629                 return;
630             }
631             break;
632         case 8:
633             if (TCG_TARGET_HAS_ext8u_i32) {
634                 tcg_gen_ext8u_i32(ret, arg);
635                 tcg_gen_shli_i32(ret, ret, ofs);
636                 return;
637             }
638             break;
639         }
640         /* Otherwise prefer zero-extension over AND for code size.  */
641         switch (ofs + len) {
642         case 16:
643             if (TCG_TARGET_HAS_ext16u_i32) {
644                 tcg_gen_shli_i32(ret, arg, ofs);
645                 tcg_gen_ext16u_i32(ret, ret);
646                 return;
647             }
648             break;
649         case 8:
650             if (TCG_TARGET_HAS_ext8u_i32) {
651                 tcg_gen_shli_i32(ret, arg, ofs);
652                 tcg_gen_ext8u_i32(ret, ret);
653                 return;
654             }
655             break;
656         }
657         tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
658         tcg_gen_shli_i32(ret, ret, ofs);
659     }
660 }
661 
662 void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg,
663                          unsigned int ofs, unsigned int len)
664 {
665     tcg_debug_assert(ofs < 32);
666     tcg_debug_assert(len > 0);
667     tcg_debug_assert(len <= 32);
668     tcg_debug_assert(ofs + len <= 32);
669 
670     /* Canonicalize certain special cases, even if extract is supported.  */
671     if (ofs + len == 32) {
672         tcg_gen_shri_i32(ret, arg, 32 - len);
673         return;
674     }
675     if (ofs == 0) {
676         tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
677         return;
678     }
679 
680     if (TCG_TARGET_HAS_extract_i32
681         && TCG_TARGET_extract_i32_valid(ofs, len)) {
682         tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, ofs, len);
683         return;
684     }
685 
686     /* Assume that zero-extension, if available, is cheaper than a shift.  */
687     switch (ofs + len) {
688     case 16:
689         if (TCG_TARGET_HAS_ext16u_i32) {
690             tcg_gen_ext16u_i32(ret, arg);
691             tcg_gen_shri_i32(ret, ret, ofs);
692             return;
693         }
694         break;
695     case 8:
696         if (TCG_TARGET_HAS_ext8u_i32) {
697             tcg_gen_ext8u_i32(ret, arg);
698             tcg_gen_shri_i32(ret, ret, ofs);
699             return;
700         }
701         break;
702     }
703 
704     /* ??? Ideally we'd know what values are available for immediate AND.
705        Assume that 8 bits are available, plus the special case of 16,
706        so that we get ext8u, ext16u.  */
707     switch (len) {
708     case 1 ... 8: case 16:
709         tcg_gen_shri_i32(ret, arg, ofs);
710         tcg_gen_andi_i32(ret, ret, (1u << len) - 1);
711         break;
712     default:
713         tcg_gen_shli_i32(ret, arg, 32 - len - ofs);
714         tcg_gen_shri_i32(ret, ret, 32 - len);
715         break;
716     }
717 }
718 
719 void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg,
720                           unsigned int ofs, unsigned int len)
721 {
722     tcg_debug_assert(ofs < 32);
723     tcg_debug_assert(len > 0);
724     tcg_debug_assert(len <= 32);
725     tcg_debug_assert(ofs + len <= 32);
726 
727     /* Canonicalize certain special cases, even if extract is supported.  */
728     if (ofs + len == 32) {
729         tcg_gen_sari_i32(ret, arg, 32 - len);
730         return;
731     }
732     if (ofs == 0) {
733         switch (len) {
734         case 16:
735             tcg_gen_ext16s_i32(ret, arg);
736             return;
737         case 8:
738             tcg_gen_ext8s_i32(ret, arg);
739             return;
740         }
741     }
742 
743     if (TCG_TARGET_HAS_sextract_i32
744         && TCG_TARGET_extract_i32_valid(ofs, len)) {
745         tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, ofs, len);
746         return;
747     }
748 
749     /* Assume that sign-extension, if available, is cheaper than a shift.  */
750     switch (ofs + len) {
751     case 16:
752         if (TCG_TARGET_HAS_ext16s_i32) {
753             tcg_gen_ext16s_i32(ret, arg);
754             tcg_gen_sari_i32(ret, ret, ofs);
755             return;
756         }
757         break;
758     case 8:
759         if (TCG_TARGET_HAS_ext8s_i32) {
760             tcg_gen_ext8s_i32(ret, arg);
761             tcg_gen_sari_i32(ret, ret, ofs);
762             return;
763         }
764         break;
765     }
766     switch (len) {
767     case 16:
768         if (TCG_TARGET_HAS_ext16s_i32) {
769             tcg_gen_shri_i32(ret, arg, ofs);
770             tcg_gen_ext16s_i32(ret, ret);
771             return;
772         }
773         break;
774     case 8:
775         if (TCG_TARGET_HAS_ext8s_i32) {
776             tcg_gen_shri_i32(ret, arg, ofs);
777             tcg_gen_ext8s_i32(ret, ret);
778             return;
779         }
780         break;
781     }
782 
783     tcg_gen_shli_i32(ret, arg, 32 - len - ofs);
784     tcg_gen_sari_i32(ret, ret, 32 - len);
785 }
786 
787 /*
788  * Extract 32-bits from a 64-bit input, ah:al, starting from ofs.
789  * Unlike tcg_gen_extract_i32 above, len is fixed at 32.
790  */
791 void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah,
792                           unsigned int ofs)
793 {
794     tcg_debug_assert(ofs <= 32);
795     if (ofs == 0) {
796         tcg_gen_mov_i32(ret, al);
797     } else if (ofs == 32) {
798         tcg_gen_mov_i32(ret, ah);
799     } else if (al == ah) {
800         tcg_gen_rotri_i32(ret, al, ofs);
801     } else if (TCG_TARGET_HAS_extract2_i32) {
802         tcg_gen_op4i_i32(INDEX_op_extract2_i32, ret, al, ah, ofs);
803     } else {
804         TCGv_i32 t0 = tcg_temp_new_i32();
805         tcg_gen_shri_i32(t0, al, ofs);
806         tcg_gen_deposit_i32(ret, t0, ah, 32 - ofs, ofs);
807         tcg_temp_free_i32(t0);
808     }
809 }
810 
811 void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1,
812                          TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2)
813 {
814     if (cond == TCG_COND_ALWAYS) {
815         tcg_gen_mov_i32(ret, v1);
816     } else if (cond == TCG_COND_NEVER) {
817         tcg_gen_mov_i32(ret, v2);
818     } else if (TCG_TARGET_HAS_movcond_i32) {
819         tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond);
820     } else {
821         TCGv_i32 t0 = tcg_temp_new_i32();
822         TCGv_i32 t1 = tcg_temp_new_i32();
823         tcg_gen_setcond_i32(cond, t0, c1, c2);
824         tcg_gen_neg_i32(t0, t0);
825         tcg_gen_and_i32(t1, v1, t0);
826         tcg_gen_andc_i32(ret, v2, t0);
827         tcg_gen_or_i32(ret, ret, t1);
828         tcg_temp_free_i32(t0);
829         tcg_temp_free_i32(t1);
830     }
831 }
832 
833 void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
834                       TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
835 {
836     if (TCG_TARGET_HAS_add2_i32) {
837         tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh);
838     } else {
839         TCGv_i64 t0 = tcg_temp_new_i64();
840         TCGv_i64 t1 = tcg_temp_new_i64();
841         tcg_gen_concat_i32_i64(t0, al, ah);
842         tcg_gen_concat_i32_i64(t1, bl, bh);
843         tcg_gen_add_i64(t0, t0, t1);
844         tcg_gen_extr_i64_i32(rl, rh, t0);
845         tcg_temp_free_i64(t0);
846         tcg_temp_free_i64(t1);
847     }
848 }
849 
850 void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
851                       TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
852 {
853     if (TCG_TARGET_HAS_sub2_i32) {
854         tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh);
855     } else {
856         TCGv_i64 t0 = tcg_temp_new_i64();
857         TCGv_i64 t1 = tcg_temp_new_i64();
858         tcg_gen_concat_i32_i64(t0, al, ah);
859         tcg_gen_concat_i32_i64(t1, bl, bh);
860         tcg_gen_sub_i64(t0, t0, t1);
861         tcg_gen_extr_i64_i32(rl, rh, t0);
862         tcg_temp_free_i64(t0);
863         tcg_temp_free_i64(t1);
864     }
865 }
866 
867 void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
868 {
869     if (TCG_TARGET_HAS_mulu2_i32) {
870         tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2);
871     } else if (TCG_TARGET_HAS_muluh_i32) {
872         TCGv_i32 t = tcg_temp_new_i32();
873         tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
874         tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2);
875         tcg_gen_mov_i32(rl, t);
876         tcg_temp_free_i32(t);
877     } else {
878         TCGv_i64 t0 = tcg_temp_new_i64();
879         TCGv_i64 t1 = tcg_temp_new_i64();
880         tcg_gen_extu_i32_i64(t0, arg1);
881         tcg_gen_extu_i32_i64(t1, arg2);
882         tcg_gen_mul_i64(t0, t0, t1);
883         tcg_gen_extr_i64_i32(rl, rh, t0);
884         tcg_temp_free_i64(t0);
885         tcg_temp_free_i64(t1);
886     }
887 }
888 
889 void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
890 {
891     if (TCG_TARGET_HAS_muls2_i32) {
892         tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2);
893     } else if (TCG_TARGET_HAS_mulsh_i32) {
894         TCGv_i32 t = tcg_temp_new_i32();
895         tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
896         tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2);
897         tcg_gen_mov_i32(rl, t);
898         tcg_temp_free_i32(t);
899     } else if (TCG_TARGET_REG_BITS == 32) {
900         TCGv_i32 t0 = tcg_temp_new_i32();
901         TCGv_i32 t1 = tcg_temp_new_i32();
902         TCGv_i32 t2 = tcg_temp_new_i32();
903         TCGv_i32 t3 = tcg_temp_new_i32();
904         tcg_gen_mulu2_i32(t0, t1, arg1, arg2);
905         /* Adjust for negative inputs.  */
906         tcg_gen_sari_i32(t2, arg1, 31);
907         tcg_gen_sari_i32(t3, arg2, 31);
908         tcg_gen_and_i32(t2, t2, arg2);
909         tcg_gen_and_i32(t3, t3, arg1);
910         tcg_gen_sub_i32(rh, t1, t2);
911         tcg_gen_sub_i32(rh, rh, t3);
912         tcg_gen_mov_i32(rl, t0);
913         tcg_temp_free_i32(t0);
914         tcg_temp_free_i32(t1);
915         tcg_temp_free_i32(t2);
916         tcg_temp_free_i32(t3);
917     } else {
918         TCGv_i64 t0 = tcg_temp_new_i64();
919         TCGv_i64 t1 = tcg_temp_new_i64();
920         tcg_gen_ext_i32_i64(t0, arg1);
921         tcg_gen_ext_i32_i64(t1, arg2);
922         tcg_gen_mul_i64(t0, t0, t1);
923         tcg_gen_extr_i64_i32(rl, rh, t0);
924         tcg_temp_free_i64(t0);
925         tcg_temp_free_i64(t1);
926     }
927 }
928 
929 void tcg_gen_mulsu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
930 {
931     if (TCG_TARGET_REG_BITS == 32) {
932         TCGv_i32 t0 = tcg_temp_new_i32();
933         TCGv_i32 t1 = tcg_temp_new_i32();
934         TCGv_i32 t2 = tcg_temp_new_i32();
935         tcg_gen_mulu2_i32(t0, t1, arg1, arg2);
936         /* Adjust for negative input for the signed arg1.  */
937         tcg_gen_sari_i32(t2, arg1, 31);
938         tcg_gen_and_i32(t2, t2, arg2);
939         tcg_gen_sub_i32(rh, t1, t2);
940         tcg_gen_mov_i32(rl, t0);
941         tcg_temp_free_i32(t0);
942         tcg_temp_free_i32(t1);
943         tcg_temp_free_i32(t2);
944     } else {
945         TCGv_i64 t0 = tcg_temp_new_i64();
946         TCGv_i64 t1 = tcg_temp_new_i64();
947         tcg_gen_ext_i32_i64(t0, arg1);
948         tcg_gen_extu_i32_i64(t1, arg2);
949         tcg_gen_mul_i64(t0, t0, t1);
950         tcg_gen_extr_i64_i32(rl, rh, t0);
951         tcg_temp_free_i64(t0);
952         tcg_temp_free_i64(t1);
953     }
954 }
955 
956 void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg)
957 {
958     if (TCG_TARGET_HAS_ext8s_i32) {
959         tcg_gen_op2_i32(INDEX_op_ext8s_i32, ret, arg);
960     } else {
961         tcg_gen_shli_i32(ret, arg, 24);
962         tcg_gen_sari_i32(ret, ret, 24);
963     }
964 }
965 
966 void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg)
967 {
968     if (TCG_TARGET_HAS_ext16s_i32) {
969         tcg_gen_op2_i32(INDEX_op_ext16s_i32, ret, arg);
970     } else {
971         tcg_gen_shli_i32(ret, arg, 16);
972         tcg_gen_sari_i32(ret, ret, 16);
973     }
974 }
975 
976 void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg)
977 {
978     if (TCG_TARGET_HAS_ext8u_i32) {
979         tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg);
980     } else {
981         tcg_gen_andi_i32(ret, arg, 0xffu);
982     }
983 }
984 
985 void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg)
986 {
987     if (TCG_TARGET_HAS_ext16u_i32) {
988         tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg);
989     } else {
990         tcg_gen_andi_i32(ret, arg, 0xffffu);
991     }
992 }
993 
994 void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags)
995 {
996     /* Only one extension flag may be present. */
997     tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
998 
999     if (TCG_TARGET_HAS_bswap16_i32) {
1000         tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg, flags);
1001     } else {
1002         TCGv_i32 t0 = tcg_temp_new_i32();
1003         TCGv_i32 t1 = tcg_temp_new_i32();
1004 
1005         tcg_gen_shri_i32(t0, arg, 8);
1006         if (!(flags & TCG_BSWAP_IZ)) {
1007             tcg_gen_ext8u_i32(t0, t0);
1008         }
1009 
1010         if (flags & TCG_BSWAP_OS) {
1011             tcg_gen_shli_i32(t1, arg, 24);
1012             tcg_gen_sari_i32(t1, t1, 16);
1013         } else if (flags & TCG_BSWAP_OZ) {
1014             tcg_gen_ext8u_i32(t1, arg);
1015             tcg_gen_shli_i32(t1, t1, 8);
1016         } else {
1017             tcg_gen_shli_i32(t1, arg, 8);
1018         }
1019 
1020         tcg_gen_or_i32(ret, t0, t1);
1021         tcg_temp_free_i32(t0);
1022         tcg_temp_free_i32(t1);
1023     }
1024 }
1025 
1026 void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
1027 {
1028     if (TCG_TARGET_HAS_bswap32_i32) {
1029         tcg_gen_op3i_i32(INDEX_op_bswap32_i32, ret, arg, 0);
1030     } else {
1031         TCGv_i32 t0 = tcg_temp_new_i32();
1032         TCGv_i32 t1 = tcg_temp_new_i32();
1033         TCGv_i32 t2 = tcg_constant_i32(0x00ff00ff);
1034 
1035                                         /* arg = abcd */
1036         tcg_gen_shri_i32(t0, arg, 8);   /*  t0 = .abc */
1037         tcg_gen_and_i32(t1, arg, t2);   /*  t1 = .b.d */
1038         tcg_gen_and_i32(t0, t0, t2);    /*  t0 = .a.c */
1039         tcg_gen_shli_i32(t1, t1, 8);    /*  t1 = b.d. */
1040         tcg_gen_or_i32(ret, t0, t1);    /* ret = badc */
1041 
1042         tcg_gen_shri_i32(t0, ret, 16);  /*  t0 = ..ba */
1043         tcg_gen_shli_i32(t1, ret, 16);  /*  t1 = dc.. */
1044         tcg_gen_or_i32(ret, t0, t1);    /* ret = dcba */
1045 
1046         tcg_temp_free_i32(t0);
1047         tcg_temp_free_i32(t1);
1048     }
1049 }
1050 
1051 void tcg_gen_hswap_i32(TCGv_i32 ret, TCGv_i32 arg)
1052 {
1053     /* Swapping 2 16-bit elements is a rotate. */
1054     tcg_gen_rotli_i32(ret, arg, 16);
1055 }
1056 
1057 void tcg_gen_smin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1058 {
1059     tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, a, b);
1060 }
1061 
1062 void tcg_gen_umin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1063 {
1064     tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, a, b);
1065 }
1066 
1067 void tcg_gen_smax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1068 {
1069     tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, b, a);
1070 }
1071 
1072 void tcg_gen_umax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1073 {
1074     tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, b, a);
1075 }
1076 
1077 void tcg_gen_abs_i32(TCGv_i32 ret, TCGv_i32 a)
1078 {
1079     TCGv_i32 t = tcg_temp_new_i32();
1080 
1081     tcg_gen_sari_i32(t, a, 31);
1082     tcg_gen_xor_i32(ret, a, t);
1083     tcg_gen_sub_i32(ret, ret, t);
1084     tcg_temp_free_i32(t);
1085 }
1086 
1087 /* 64-bit ops */
1088 
1089 #if TCG_TARGET_REG_BITS == 32
1090 /* These are all inline for TCG_TARGET_REG_BITS == 64.  */
1091 
1092 void tcg_gen_discard_i64(TCGv_i64 arg)
1093 {
1094     tcg_gen_discard_i32(TCGV_LOW(arg));
1095     tcg_gen_discard_i32(TCGV_HIGH(arg));
1096 }
1097 
1098 void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg)
1099 {
1100     TCGTemp *ts = tcgv_i64_temp(arg);
1101 
1102     /* Canonicalize TCGv_i64 TEMP_CONST into TCGv_i32 TEMP_CONST. */
1103     if (ts->kind == TEMP_CONST) {
1104         tcg_gen_movi_i64(ret, ts->val);
1105     } else {
1106         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1107         tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
1108     }
1109 }
1110 
1111 void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
1112 {
1113     tcg_gen_movi_i32(TCGV_LOW(ret), arg);
1114     tcg_gen_movi_i32(TCGV_HIGH(ret), arg >> 32);
1115 }
1116 
1117 void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1118 {
1119     tcg_gen_ld8u_i32(TCGV_LOW(ret), arg2, offset);
1120     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1121 }
1122 
1123 void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1124 {
1125     tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset);
1126     tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1127 }
1128 
1129 void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1130 {
1131     tcg_gen_ld16u_i32(TCGV_LOW(ret), arg2, offset);
1132     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1133 }
1134 
1135 void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1136 {
1137     tcg_gen_ld16s_i32(TCGV_LOW(ret), arg2, offset);
1138     tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1139 }
1140 
1141 void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1142 {
1143     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
1144     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1145 }
1146 
1147 void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1148 {
1149     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
1150     tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1151 }
1152 
1153 void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1154 {
1155     /* Since arg2 and ret have different types,
1156        they cannot be the same temporary */
1157 #if HOST_BIG_ENDIAN
1158     tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset);
1159     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4);
1160 #else
1161     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
1162     tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset + 4);
1163 #endif
1164 }
1165 
1166 void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
1167 {
1168     tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset);
1169 }
1170 
1171 void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
1172 {
1173     tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset);
1174 }
1175 
1176 void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
1177 {
1178     tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset);
1179 }
1180 
1181 void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
1182 {
1183 #if HOST_BIG_ENDIAN
1184     tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset);
1185     tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4);
1186 #else
1187     tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset);
1188     tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset + 4);
1189 #endif
1190 }
1191 
1192 void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1193 {
1194     tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1),
1195                      TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2));
1196 }
1197 
1198 void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1199 {
1200     tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1),
1201                      TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2));
1202 }
1203 
1204 void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1205 {
1206     tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1207     tcg_gen_and_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1208 }
1209 
1210 void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1211 {
1212     tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1213     tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1214 }
1215 
1216 void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1217 {
1218     tcg_gen_xor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1219     tcg_gen_xor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1220 }
1221 
1222 void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1223 {
1224     gen_helper_shl_i64(ret, arg1, arg2);
1225 }
1226 
1227 void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1228 {
1229     gen_helper_shr_i64(ret, arg1, arg2);
1230 }
1231 
1232 void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1233 {
1234     gen_helper_sar_i64(ret, arg1, arg2);
1235 }
1236 
1237 void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1238 {
1239     TCGv_i64 t0;
1240     TCGv_i32 t1;
1241 
1242     t0 = tcg_temp_new_i64();
1243     t1 = tcg_temp_new_i32();
1244 
1245     tcg_gen_mulu2_i32(TCGV_LOW(t0), TCGV_HIGH(t0),
1246                       TCGV_LOW(arg1), TCGV_LOW(arg2));
1247 
1248     tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
1249     tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
1250     tcg_gen_mul_i32(t1, TCGV_HIGH(arg1), TCGV_LOW(arg2));
1251     tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
1252 
1253     tcg_gen_mov_i64(ret, t0);
1254     tcg_temp_free_i64(t0);
1255     tcg_temp_free_i32(t1);
1256 }
1257 
1258 #else
1259 
1260 void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
1261 {
1262     tcg_gen_mov_i64(ret, tcg_constant_i64(arg));
1263 }
1264 
1265 #endif /* TCG_TARGET_REG_SIZE == 32 */
1266 
1267 void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1268 {
1269     /* some cases can be optimized here */
1270     if (arg2 == 0) {
1271         tcg_gen_mov_i64(ret, arg1);
1272     } else if (TCG_TARGET_REG_BITS == 64) {
1273         tcg_gen_add_i64(ret, arg1, tcg_constant_i64(arg2));
1274     } else {
1275         tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
1276                          TCGV_LOW(arg1), TCGV_HIGH(arg1),
1277                          tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));
1278     }
1279 }
1280 
1281 void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2)
1282 {
1283     if (arg1 == 0 && TCG_TARGET_HAS_neg_i64) {
1284         /* Don't recurse with tcg_gen_neg_i64.  */
1285         tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg2);
1286     } else if (TCG_TARGET_REG_BITS == 64) {
1287         tcg_gen_sub_i64(ret, tcg_constant_i64(arg1), arg2);
1288     } else {
1289         tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
1290                          tcg_constant_i32(arg1), tcg_constant_i32(arg1 >> 32),
1291                          TCGV_LOW(arg2), TCGV_HIGH(arg2));
1292     }
1293 }
1294 
1295 void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1296 {
1297     /* some cases can be optimized here */
1298     if (arg2 == 0) {
1299         tcg_gen_mov_i64(ret, arg1);
1300     } else if (TCG_TARGET_REG_BITS == 64) {
1301         tcg_gen_sub_i64(ret, arg1, tcg_constant_i64(arg2));
1302     } else {
1303         tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
1304                          TCGV_LOW(arg1), TCGV_HIGH(arg1),
1305                          tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));
1306     }
1307 }
1308 
1309 void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1310 {
1311     if (TCG_TARGET_REG_BITS == 32) {
1312         tcg_gen_andi_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
1313         tcg_gen_andi_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
1314         return;
1315     }
1316 
1317     /* Some cases can be optimized here.  */
1318     switch (arg2) {
1319     case 0:
1320         tcg_gen_movi_i64(ret, 0);
1321         return;
1322     case -1:
1323         tcg_gen_mov_i64(ret, arg1);
1324         return;
1325     case 0xff:
1326         /* Don't recurse with tcg_gen_ext8u_i64.  */
1327         if (TCG_TARGET_HAS_ext8u_i64) {
1328             tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg1);
1329             return;
1330         }
1331         break;
1332     case 0xffff:
1333         if (TCG_TARGET_HAS_ext16u_i64) {
1334             tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg1);
1335             return;
1336         }
1337         break;
1338     case 0xffffffffu:
1339         if (TCG_TARGET_HAS_ext32u_i64) {
1340             tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg1);
1341             return;
1342         }
1343         break;
1344     }
1345 
1346     tcg_gen_and_i64(ret, arg1, tcg_constant_i64(arg2));
1347 }
1348 
1349 void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1350 {
1351     if (TCG_TARGET_REG_BITS == 32) {
1352         tcg_gen_ori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
1353         tcg_gen_ori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
1354         return;
1355     }
1356     /* Some cases can be optimized here.  */
1357     if (arg2 == -1) {
1358         tcg_gen_movi_i64(ret, -1);
1359     } else if (arg2 == 0) {
1360         tcg_gen_mov_i64(ret, arg1);
1361     } else {
1362         tcg_gen_or_i64(ret, arg1, tcg_constant_i64(arg2));
1363     }
1364 }
1365 
1366 void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1367 {
1368     if (TCG_TARGET_REG_BITS == 32) {
1369         tcg_gen_xori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
1370         tcg_gen_xori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
1371         return;
1372     }
1373     /* Some cases can be optimized here.  */
1374     if (arg2 == 0) {
1375         tcg_gen_mov_i64(ret, arg1);
1376     } else if (arg2 == -1 && TCG_TARGET_HAS_not_i64) {
1377         /* Don't recurse with tcg_gen_not_i64.  */
1378         tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg1);
1379     } else {
1380         tcg_gen_xor_i64(ret, arg1, tcg_constant_i64(arg2));
1381     }
1382 }
1383 
1384 static inline void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
1385                                       unsigned c, bool right, bool arith)
1386 {
1387     tcg_debug_assert(c < 64);
1388     if (c == 0) {
1389         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
1390         tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
1391     } else if (c >= 32) {
1392         c -= 32;
1393         if (right) {
1394             if (arith) {
1395                 tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
1396                 tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), 31);
1397             } else {
1398                 tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
1399                 tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1400             }
1401         } else {
1402             tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), c);
1403             tcg_gen_movi_i32(TCGV_LOW(ret), 0);
1404         }
1405     } else if (right) {
1406         if (TCG_TARGET_HAS_extract2_i32) {
1407             tcg_gen_extract2_i32(TCGV_LOW(ret),
1408                                  TCGV_LOW(arg1), TCGV_HIGH(arg1), c);
1409         } else {
1410             tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c);
1411             tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(ret),
1412                                 TCGV_HIGH(arg1), 32 - c, c);
1413         }
1414         if (arith) {
1415             tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c);
1416         } else {
1417             tcg_gen_shri_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c);
1418         }
1419     } else {
1420         if (TCG_TARGET_HAS_extract2_i32) {
1421             tcg_gen_extract2_i32(TCGV_HIGH(ret),
1422                                  TCGV_LOW(arg1), TCGV_HIGH(arg1), 32 - c);
1423         } else {
1424             TCGv_i32 t0 = tcg_temp_new_i32();
1425             tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c);
1426             tcg_gen_deposit_i32(TCGV_HIGH(ret), t0,
1427                                 TCGV_HIGH(arg1), c, 32 - c);
1428             tcg_temp_free_i32(t0);
1429         }
1430         tcg_gen_shli_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c);
1431     }
1432 }
1433 
1434 void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1435 {
1436     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
1437     if (TCG_TARGET_REG_BITS == 32) {
1438         tcg_gen_shifti_i64(ret, arg1, arg2, 0, 0);
1439     } else if (arg2 == 0) {
1440         tcg_gen_mov_i64(ret, arg1);
1441     } else {
1442         tcg_gen_shl_i64(ret, arg1, tcg_constant_i64(arg2));
1443     }
1444 }
1445 
1446 void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1447 {
1448     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
1449     if (TCG_TARGET_REG_BITS == 32) {
1450         tcg_gen_shifti_i64(ret, arg1, arg2, 1, 0);
1451     } else if (arg2 == 0) {
1452         tcg_gen_mov_i64(ret, arg1);
1453     } else {
1454         tcg_gen_shr_i64(ret, arg1, tcg_constant_i64(arg2));
1455     }
1456 }
1457 
1458 void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1459 {
1460     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
1461     if (TCG_TARGET_REG_BITS == 32) {
1462         tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1);
1463     } else if (arg2 == 0) {
1464         tcg_gen_mov_i64(ret, arg1);
1465     } else {
1466         tcg_gen_sar_i64(ret, arg1, tcg_constant_i64(arg2));
1467     }
1468 }
1469 
1470 void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l)
1471 {
1472     if (cond == TCG_COND_ALWAYS) {
1473         tcg_gen_br(l);
1474     } else if (cond != TCG_COND_NEVER) {
1475         l->refs++;
1476         if (TCG_TARGET_REG_BITS == 32) {
1477             tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1),
1478                               TCGV_HIGH(arg1), TCGV_LOW(arg2),
1479                               TCGV_HIGH(arg2), cond, label_arg(l));
1480         } else {
1481             tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond,
1482                               label_arg(l));
1483         }
1484     }
1485 }
1486 
1487 void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *l)
1488 {
1489     if (TCG_TARGET_REG_BITS == 64) {
1490         tcg_gen_brcond_i64(cond, arg1, tcg_constant_i64(arg2), l);
1491     } else if (cond == TCG_COND_ALWAYS) {
1492         tcg_gen_br(l);
1493     } else if (cond != TCG_COND_NEVER) {
1494         l->refs++;
1495         tcg_gen_op6ii_i32(INDEX_op_brcond2_i32,
1496                           TCGV_LOW(arg1), TCGV_HIGH(arg1),
1497                           tcg_constant_i32(arg2),
1498                           tcg_constant_i32(arg2 >> 32),
1499                           cond, label_arg(l));
1500     }
1501 }
1502 
1503 void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
1504                          TCGv_i64 arg1, TCGv_i64 arg2)
1505 {
1506     if (cond == TCG_COND_ALWAYS) {
1507         tcg_gen_movi_i64(ret, 1);
1508     } else if (cond == TCG_COND_NEVER) {
1509         tcg_gen_movi_i64(ret, 0);
1510     } else {
1511         if (TCG_TARGET_REG_BITS == 32) {
1512             tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
1513                              TCGV_LOW(arg1), TCGV_HIGH(arg1),
1514                              TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
1515             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1516         } else {
1517             tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
1518         }
1519     }
1520 }
1521 
1522 void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
1523                           TCGv_i64 arg1, int64_t arg2)
1524 {
1525     if (TCG_TARGET_REG_BITS == 64) {
1526         tcg_gen_setcond_i64(cond, ret, arg1, tcg_constant_i64(arg2));
1527     } else if (cond == TCG_COND_ALWAYS) {
1528         tcg_gen_movi_i64(ret, 1);
1529     } else if (cond == TCG_COND_NEVER) {
1530         tcg_gen_movi_i64(ret, 0);
1531     } else {
1532         tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
1533                          TCGV_LOW(arg1), TCGV_HIGH(arg1),
1534                          tcg_constant_i32(arg2),
1535                          tcg_constant_i32(arg2 >> 32), cond);
1536         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1537     }
1538 }
1539 
1540 void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1541 {
1542     if (arg2 == 0) {
1543         tcg_gen_movi_i64(ret, 0);
1544     } else if (is_power_of_2(arg2)) {
1545         tcg_gen_shli_i64(ret, arg1, ctz64(arg2));
1546     } else {
1547         TCGv_i64 t0 = tcg_const_i64(arg2);
1548         tcg_gen_mul_i64(ret, arg1, t0);
1549         tcg_temp_free_i64(t0);
1550     }
1551 }
1552 
1553 void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1554 {
1555     if (TCG_TARGET_HAS_div_i64) {
1556         tcg_gen_op3_i64(INDEX_op_div_i64, ret, arg1, arg2);
1557     } else if (TCG_TARGET_HAS_div2_i64) {
1558         TCGv_i64 t0 = tcg_temp_new_i64();
1559         tcg_gen_sari_i64(t0, arg1, 63);
1560         tcg_gen_op5_i64(INDEX_op_div2_i64, ret, t0, arg1, t0, arg2);
1561         tcg_temp_free_i64(t0);
1562     } else {
1563         gen_helper_div_i64(ret, arg1, arg2);
1564     }
1565 }
1566 
1567 void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1568 {
1569     if (TCG_TARGET_HAS_rem_i64) {
1570         tcg_gen_op3_i64(INDEX_op_rem_i64, ret, arg1, arg2);
1571     } else if (TCG_TARGET_HAS_div_i64) {
1572         TCGv_i64 t0 = tcg_temp_new_i64();
1573         tcg_gen_op3_i64(INDEX_op_div_i64, t0, arg1, arg2);
1574         tcg_gen_mul_i64(t0, t0, arg2);
1575         tcg_gen_sub_i64(ret, arg1, t0);
1576         tcg_temp_free_i64(t0);
1577     } else if (TCG_TARGET_HAS_div2_i64) {
1578         TCGv_i64 t0 = tcg_temp_new_i64();
1579         tcg_gen_sari_i64(t0, arg1, 63);
1580         tcg_gen_op5_i64(INDEX_op_div2_i64, t0, ret, arg1, t0, arg2);
1581         tcg_temp_free_i64(t0);
1582     } else {
1583         gen_helper_rem_i64(ret, arg1, arg2);
1584     }
1585 }
1586 
1587 void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1588 {
1589     if (TCG_TARGET_HAS_div_i64) {
1590         tcg_gen_op3_i64(INDEX_op_divu_i64, ret, arg1, arg2);
1591     } else if (TCG_TARGET_HAS_div2_i64) {
1592         TCGv_i64 t0 = tcg_temp_new_i64();
1593         tcg_gen_movi_i64(t0, 0);
1594         tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, t0, arg2);
1595         tcg_temp_free_i64(t0);
1596     } else {
1597         gen_helper_divu_i64(ret, arg1, arg2);
1598     }
1599 }
1600 
1601 void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1602 {
1603     if (TCG_TARGET_HAS_rem_i64) {
1604         tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2);
1605     } else if (TCG_TARGET_HAS_div_i64) {
1606         TCGv_i64 t0 = tcg_temp_new_i64();
1607         tcg_gen_op3_i64(INDEX_op_divu_i64, t0, arg1, arg2);
1608         tcg_gen_mul_i64(t0, t0, arg2);
1609         tcg_gen_sub_i64(ret, arg1, t0);
1610         tcg_temp_free_i64(t0);
1611     } else if (TCG_TARGET_HAS_div2_i64) {
1612         TCGv_i64 t0 = tcg_temp_new_i64();
1613         tcg_gen_movi_i64(t0, 0);
1614         tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, t0, arg2);
1615         tcg_temp_free_i64(t0);
1616     } else {
1617         gen_helper_remu_i64(ret, arg1, arg2);
1618     }
1619 }
1620 
1621 void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg)
1622 {
1623     if (TCG_TARGET_REG_BITS == 32) {
1624         tcg_gen_ext8s_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1625         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1626     } else if (TCG_TARGET_HAS_ext8s_i64) {
1627         tcg_gen_op2_i64(INDEX_op_ext8s_i64, ret, arg);
1628     } else {
1629         tcg_gen_shli_i64(ret, arg, 56);
1630         tcg_gen_sari_i64(ret, ret, 56);
1631     }
1632 }
1633 
1634 void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg)
1635 {
1636     if (TCG_TARGET_REG_BITS == 32) {
1637         tcg_gen_ext16s_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1638         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1639     } else if (TCG_TARGET_HAS_ext16s_i64) {
1640         tcg_gen_op2_i64(INDEX_op_ext16s_i64, ret, arg);
1641     } else {
1642         tcg_gen_shli_i64(ret, arg, 48);
1643         tcg_gen_sari_i64(ret, ret, 48);
1644     }
1645 }
1646 
1647 void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg)
1648 {
1649     if (TCG_TARGET_REG_BITS == 32) {
1650         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1651         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1652     } else if (TCG_TARGET_HAS_ext32s_i64) {
1653         tcg_gen_op2_i64(INDEX_op_ext32s_i64, ret, arg);
1654     } else {
1655         tcg_gen_shli_i64(ret, arg, 32);
1656         tcg_gen_sari_i64(ret, ret, 32);
1657     }
1658 }
1659 
1660 void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg)
1661 {
1662     if (TCG_TARGET_REG_BITS == 32) {
1663         tcg_gen_ext8u_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1664         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1665     } else if (TCG_TARGET_HAS_ext8u_i64) {
1666         tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg);
1667     } else {
1668         tcg_gen_andi_i64(ret, arg, 0xffu);
1669     }
1670 }
1671 
1672 void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg)
1673 {
1674     if (TCG_TARGET_REG_BITS == 32) {
1675         tcg_gen_ext16u_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1676         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1677     } else if (TCG_TARGET_HAS_ext16u_i64) {
1678         tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg);
1679     } else {
1680         tcg_gen_andi_i64(ret, arg, 0xffffu);
1681     }
1682 }
1683 
1684 void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg)
1685 {
1686     if (TCG_TARGET_REG_BITS == 32) {
1687         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1688         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1689     } else if (TCG_TARGET_HAS_ext32u_i64) {
1690         tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg);
1691     } else {
1692         tcg_gen_andi_i64(ret, arg, 0xffffffffu);
1693     }
1694 }
1695 
1696 void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags)
1697 {
1698     /* Only one extension flag may be present. */
1699     tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
1700 
1701     if (TCG_TARGET_REG_BITS == 32) {
1702         tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg), flags);
1703         if (flags & TCG_BSWAP_OS) {
1704             tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1705         } else {
1706             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1707         }
1708     } else if (TCG_TARGET_HAS_bswap16_i64) {
1709         tcg_gen_op3i_i64(INDEX_op_bswap16_i64, ret, arg, flags);
1710     } else {
1711         TCGv_i64 t0 = tcg_temp_new_i64();
1712         TCGv_i64 t1 = tcg_temp_new_i64();
1713 
1714         tcg_gen_shri_i64(t0, arg, 8);
1715         if (!(flags & TCG_BSWAP_IZ)) {
1716             tcg_gen_ext8u_i64(t0, t0);
1717         }
1718 
1719         if (flags & TCG_BSWAP_OS) {
1720             tcg_gen_shli_i64(t1, arg, 56);
1721             tcg_gen_sari_i64(t1, t1, 48);
1722         } else if (flags & TCG_BSWAP_OZ) {
1723             tcg_gen_ext8u_i64(t1, arg);
1724             tcg_gen_shli_i64(t1, t1, 8);
1725         } else {
1726             tcg_gen_shli_i64(t1, arg, 8);
1727         }
1728 
1729         tcg_gen_or_i64(ret, t0, t1);
1730         tcg_temp_free_i64(t0);
1731         tcg_temp_free_i64(t1);
1732     }
1733 }
1734 
1735 void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags)
1736 {
1737     /* Only one extension flag may be present. */
1738     tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
1739 
1740     if (TCG_TARGET_REG_BITS == 32) {
1741         tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1742         if (flags & TCG_BSWAP_OS) {
1743             tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1744         } else {
1745             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1746         }
1747     } else if (TCG_TARGET_HAS_bswap32_i64) {
1748         tcg_gen_op3i_i64(INDEX_op_bswap32_i64, ret, arg, flags);
1749     } else {
1750         TCGv_i64 t0 = tcg_temp_new_i64();
1751         TCGv_i64 t1 = tcg_temp_new_i64();
1752         TCGv_i64 t2 = tcg_constant_i64(0x00ff00ff);
1753 
1754                                             /* arg = xxxxabcd */
1755         tcg_gen_shri_i64(t0, arg, 8);       /*  t0 = .xxxxabc */
1756         tcg_gen_and_i64(t1, arg, t2);       /*  t1 = .....b.d */
1757         tcg_gen_and_i64(t0, t0, t2);        /*  t0 = .....a.c */
1758         tcg_gen_shli_i64(t1, t1, 8);        /*  t1 = ....b.d. */
1759         tcg_gen_or_i64(ret, t0, t1);        /* ret = ....badc */
1760 
1761         tcg_gen_shli_i64(t1, ret, 48);      /*  t1 = dc...... */
1762         tcg_gen_shri_i64(t0, ret, 16);      /*  t0 = ......ba */
1763         if (flags & TCG_BSWAP_OS) {
1764             tcg_gen_sari_i64(t1, t1, 32);   /*  t1 = ssssdc.. */
1765         } else {
1766             tcg_gen_shri_i64(t1, t1, 32);   /*  t1 = ....dc.. */
1767         }
1768         tcg_gen_or_i64(ret, t0, t1);        /* ret = ssssdcba */
1769 
1770         tcg_temp_free_i64(t0);
1771         tcg_temp_free_i64(t1);
1772     }
1773 }
1774 
1775 void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg)
1776 {
1777     if (TCG_TARGET_REG_BITS == 32) {
1778         TCGv_i32 t0, t1;
1779         t0 = tcg_temp_new_i32();
1780         t1 = tcg_temp_new_i32();
1781 
1782         tcg_gen_bswap32_i32(t0, TCGV_LOW(arg));
1783         tcg_gen_bswap32_i32(t1, TCGV_HIGH(arg));
1784         tcg_gen_mov_i32(TCGV_LOW(ret), t1);
1785         tcg_gen_mov_i32(TCGV_HIGH(ret), t0);
1786         tcg_temp_free_i32(t0);
1787         tcg_temp_free_i32(t1);
1788     } else if (TCG_TARGET_HAS_bswap64_i64) {
1789         tcg_gen_op3i_i64(INDEX_op_bswap64_i64, ret, arg, 0);
1790     } else {
1791         TCGv_i64 t0 = tcg_temp_new_i64();
1792         TCGv_i64 t1 = tcg_temp_new_i64();
1793         TCGv_i64 t2 = tcg_temp_new_i64();
1794 
1795                                         /* arg = abcdefgh */
1796         tcg_gen_movi_i64(t2, 0x00ff00ff00ff00ffull);
1797         tcg_gen_shri_i64(t0, arg, 8);   /*  t0 = .abcdefg */
1798         tcg_gen_and_i64(t1, arg, t2);   /*  t1 = .b.d.f.h */
1799         tcg_gen_and_i64(t0, t0, t2);    /*  t0 = .a.c.e.g */
1800         tcg_gen_shli_i64(t1, t1, 8);    /*  t1 = b.d.f.h. */
1801         tcg_gen_or_i64(ret, t0, t1);    /* ret = badcfehg */
1802 
1803         tcg_gen_movi_i64(t2, 0x0000ffff0000ffffull);
1804         tcg_gen_shri_i64(t0, ret, 16);  /*  t0 = ..badcfe */
1805         tcg_gen_and_i64(t1, ret, t2);   /*  t1 = ..dc..hg */
1806         tcg_gen_and_i64(t0, t0, t2);    /*  t0 = ..ba..fe */
1807         tcg_gen_shli_i64(t1, t1, 16);   /*  t1 = dc..hg.. */
1808         tcg_gen_or_i64(ret, t0, t1);    /* ret = dcbahgfe */
1809 
1810         tcg_gen_shri_i64(t0, ret, 32);  /*  t0 = ....dcba */
1811         tcg_gen_shli_i64(t1, ret, 32);  /*  t1 = hgfe.... */
1812         tcg_gen_or_i64(ret, t0, t1);    /* ret = hgfedcba */
1813 
1814         tcg_temp_free_i64(t0);
1815         tcg_temp_free_i64(t1);
1816         tcg_temp_free_i64(t2);
1817     }
1818 }
1819 
1820 void tcg_gen_hswap_i64(TCGv_i64 ret, TCGv_i64 arg)
1821 {
1822     uint64_t m = 0x0000ffff0000ffffull;
1823     TCGv_i64 t0 = tcg_temp_new_i64();
1824     TCGv_i64 t1 = tcg_temp_new_i64();
1825 
1826     /* See include/qemu/bitops.h, hswap64. */
1827     tcg_gen_rotli_i64(t1, arg, 32);
1828     tcg_gen_andi_i64(t0, t1, m);
1829     tcg_gen_shli_i64(t0, t0, 16);
1830     tcg_gen_shri_i64(t1, t1, 16);
1831     tcg_gen_andi_i64(t1, t1, m);
1832     tcg_gen_or_i64(ret, t0, t1);
1833 
1834     tcg_temp_free_i64(t0);
1835     tcg_temp_free_i64(t1);
1836 }
1837 
1838 void tcg_gen_wswap_i64(TCGv_i64 ret, TCGv_i64 arg)
1839 {
1840     /* Swapping 2 32-bit elements is a rotate. */
1841     tcg_gen_rotli_i64(ret, arg, 32);
1842 }
1843 
1844 void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg)
1845 {
1846     if (TCG_TARGET_REG_BITS == 32) {
1847         tcg_gen_not_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1848         tcg_gen_not_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
1849     } else if (TCG_TARGET_HAS_not_i64) {
1850         tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg);
1851     } else {
1852         tcg_gen_xori_i64(ret, arg, -1);
1853     }
1854 }
1855 
1856 void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1857 {
1858     if (TCG_TARGET_REG_BITS == 32) {
1859         tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1860         tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1861     } else if (TCG_TARGET_HAS_andc_i64) {
1862         tcg_gen_op3_i64(INDEX_op_andc_i64, ret, arg1, arg2);
1863     } else {
1864         TCGv_i64 t0 = tcg_temp_new_i64();
1865         tcg_gen_not_i64(t0, arg2);
1866         tcg_gen_and_i64(ret, arg1, t0);
1867         tcg_temp_free_i64(t0);
1868     }
1869 }
1870 
1871 void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1872 {
1873     if (TCG_TARGET_REG_BITS == 32) {
1874         tcg_gen_eqv_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1875         tcg_gen_eqv_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1876     } else if (TCG_TARGET_HAS_eqv_i64) {
1877         tcg_gen_op3_i64(INDEX_op_eqv_i64, ret, arg1, arg2);
1878     } else {
1879         tcg_gen_xor_i64(ret, arg1, arg2);
1880         tcg_gen_not_i64(ret, ret);
1881     }
1882 }
1883 
1884 void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1885 {
1886     if (TCG_TARGET_REG_BITS == 32) {
1887         tcg_gen_nand_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1888         tcg_gen_nand_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1889     } else if (TCG_TARGET_HAS_nand_i64) {
1890         tcg_gen_op3_i64(INDEX_op_nand_i64, ret, arg1, arg2);
1891     } else {
1892         tcg_gen_and_i64(ret, arg1, arg2);
1893         tcg_gen_not_i64(ret, ret);
1894     }
1895 }
1896 
1897 void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1898 {
1899     if (TCG_TARGET_REG_BITS == 32) {
1900         tcg_gen_nor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1901         tcg_gen_nor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1902     } else if (TCG_TARGET_HAS_nor_i64) {
1903         tcg_gen_op3_i64(INDEX_op_nor_i64, ret, arg1, arg2);
1904     } else {
1905         tcg_gen_or_i64(ret, arg1, arg2);
1906         tcg_gen_not_i64(ret, ret);
1907     }
1908 }
1909 
1910 void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1911 {
1912     if (TCG_TARGET_REG_BITS == 32) {
1913         tcg_gen_orc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1914         tcg_gen_orc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1915     } else if (TCG_TARGET_HAS_orc_i64) {
1916         tcg_gen_op3_i64(INDEX_op_orc_i64, ret, arg1, arg2);
1917     } else {
1918         TCGv_i64 t0 = tcg_temp_new_i64();
1919         tcg_gen_not_i64(t0, arg2);
1920         tcg_gen_or_i64(ret, arg1, t0);
1921         tcg_temp_free_i64(t0);
1922     }
1923 }
1924 
1925 void tcg_gen_clz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1926 {
1927     if (TCG_TARGET_HAS_clz_i64) {
1928         tcg_gen_op3_i64(INDEX_op_clz_i64, ret, arg1, arg2);
1929     } else {
1930         gen_helper_clz_i64(ret, arg1, arg2);
1931     }
1932 }
1933 
1934 void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
1935 {
1936     if (TCG_TARGET_REG_BITS == 32
1937         && TCG_TARGET_HAS_clz_i32
1938         && arg2 <= 0xffffffffu) {
1939         TCGv_i32 t = tcg_temp_new_i32();
1940         tcg_gen_clzi_i32(t, TCGV_LOW(arg1), arg2 - 32);
1941         tcg_gen_addi_i32(t, t, 32);
1942         tcg_gen_clz_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), t);
1943         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1944         tcg_temp_free_i32(t);
1945     } else {
1946         TCGv_i64 t0 = tcg_const_i64(arg2);
1947         tcg_gen_clz_i64(ret, arg1, t0);
1948         tcg_temp_free_i64(t0);
1949     }
1950 }
1951 
1952 void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1953 {
1954     if (TCG_TARGET_HAS_ctz_i64) {
1955         tcg_gen_op3_i64(INDEX_op_ctz_i64, ret, arg1, arg2);
1956     } else if (TCG_TARGET_HAS_ctpop_i64 || TCG_TARGET_HAS_clz_i64) {
1957         TCGv_i64 z, t = tcg_temp_new_i64();
1958 
1959         if (TCG_TARGET_HAS_ctpop_i64) {
1960             tcg_gen_subi_i64(t, arg1, 1);
1961             tcg_gen_andc_i64(t, t, arg1);
1962             tcg_gen_ctpop_i64(t, t);
1963         } else {
1964             /* Since all non-x86 hosts have clz(0) == 64, don't fight it.  */
1965             tcg_gen_neg_i64(t, arg1);
1966             tcg_gen_and_i64(t, t, arg1);
1967             tcg_gen_clzi_i64(t, t, 64);
1968             tcg_gen_xori_i64(t, t, 63);
1969         }
1970         z = tcg_constant_i64(0);
1971         tcg_gen_movcond_i64(TCG_COND_EQ, ret, arg1, z, arg2, t);
1972         tcg_temp_free_i64(t);
1973         tcg_temp_free_i64(z);
1974     } else {
1975         gen_helper_ctz_i64(ret, arg1, arg2);
1976     }
1977 }
1978 
1979 void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
1980 {
1981     if (TCG_TARGET_REG_BITS == 32
1982         && TCG_TARGET_HAS_ctz_i32
1983         && arg2 <= 0xffffffffu) {
1984         TCGv_i32 t32 = tcg_temp_new_i32();
1985         tcg_gen_ctzi_i32(t32, TCGV_HIGH(arg1), arg2 - 32);
1986         tcg_gen_addi_i32(t32, t32, 32);
1987         tcg_gen_ctz_i32(TCGV_LOW(ret), TCGV_LOW(arg1), t32);
1988         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1989         tcg_temp_free_i32(t32);
1990     } else if (!TCG_TARGET_HAS_ctz_i64
1991                && TCG_TARGET_HAS_ctpop_i64
1992                && arg2 == 64) {
1993         /* This equivalence has the advantage of not requiring a fixup.  */
1994         TCGv_i64 t = tcg_temp_new_i64();
1995         tcg_gen_subi_i64(t, arg1, 1);
1996         tcg_gen_andc_i64(t, t, arg1);
1997         tcg_gen_ctpop_i64(ret, t);
1998         tcg_temp_free_i64(t);
1999     } else {
2000         TCGv_i64 t0 = tcg_const_i64(arg2);
2001         tcg_gen_ctz_i64(ret, arg1, t0);
2002         tcg_temp_free_i64(t0);
2003     }
2004 }
2005 
2006 void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg)
2007 {
2008     if (TCG_TARGET_HAS_clz_i64 || TCG_TARGET_HAS_clz_i32) {
2009         TCGv_i64 t = tcg_temp_new_i64();
2010         tcg_gen_sari_i64(t, arg, 63);
2011         tcg_gen_xor_i64(t, t, arg);
2012         tcg_gen_clzi_i64(t, t, 64);
2013         tcg_gen_subi_i64(ret, t, 1);
2014         tcg_temp_free_i64(t);
2015     } else {
2016         gen_helper_clrsb_i64(ret, arg);
2017     }
2018 }
2019 
2020 void tcg_gen_ctpop_i64(TCGv_i64 ret, TCGv_i64 arg1)
2021 {
2022     if (TCG_TARGET_HAS_ctpop_i64) {
2023         tcg_gen_op2_i64(INDEX_op_ctpop_i64, ret, arg1);
2024     } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_ctpop_i32) {
2025         tcg_gen_ctpop_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
2026         tcg_gen_ctpop_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
2027         tcg_gen_add_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(ret));
2028         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2029     } else {
2030         gen_helper_ctpop_i64(ret, arg1);
2031     }
2032 }
2033 
2034 void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
2035 {
2036     if (TCG_TARGET_HAS_rot_i64) {
2037         tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, arg2);
2038     } else {
2039         TCGv_i64 t0, t1;
2040         t0 = tcg_temp_new_i64();
2041         t1 = tcg_temp_new_i64();
2042         tcg_gen_shl_i64(t0, arg1, arg2);
2043         tcg_gen_subfi_i64(t1, 64, arg2);
2044         tcg_gen_shr_i64(t1, arg1, t1);
2045         tcg_gen_or_i64(ret, t0, t1);
2046         tcg_temp_free_i64(t0);
2047         tcg_temp_free_i64(t1);
2048     }
2049 }
2050 
2051 void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
2052 {
2053     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
2054     /* some cases can be optimized here */
2055     if (arg2 == 0) {
2056         tcg_gen_mov_i64(ret, arg1);
2057     } else if (TCG_TARGET_HAS_rot_i64) {
2058         tcg_gen_rotl_i64(ret, arg1, tcg_constant_i64(arg2));
2059     } else {
2060         TCGv_i64 t0, t1;
2061         t0 = tcg_temp_new_i64();
2062         t1 = tcg_temp_new_i64();
2063         tcg_gen_shli_i64(t0, arg1, arg2);
2064         tcg_gen_shri_i64(t1, arg1, 64 - arg2);
2065         tcg_gen_or_i64(ret, t0, t1);
2066         tcg_temp_free_i64(t0);
2067         tcg_temp_free_i64(t1);
2068     }
2069 }
2070 
2071 void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
2072 {
2073     if (TCG_TARGET_HAS_rot_i64) {
2074         tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, arg2);
2075     } else {
2076         TCGv_i64 t0, t1;
2077         t0 = tcg_temp_new_i64();
2078         t1 = tcg_temp_new_i64();
2079         tcg_gen_shr_i64(t0, arg1, arg2);
2080         tcg_gen_subfi_i64(t1, 64, arg2);
2081         tcg_gen_shl_i64(t1, arg1, t1);
2082         tcg_gen_or_i64(ret, t0, t1);
2083         tcg_temp_free_i64(t0);
2084         tcg_temp_free_i64(t1);
2085     }
2086 }
2087 
2088 void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
2089 {
2090     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
2091     /* some cases can be optimized here */
2092     if (arg2 == 0) {
2093         tcg_gen_mov_i64(ret, arg1);
2094     } else {
2095         tcg_gen_rotli_i64(ret, arg1, 64 - arg2);
2096     }
2097 }
2098 
2099 void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
2100                          unsigned int ofs, unsigned int len)
2101 {
2102     uint64_t mask;
2103     TCGv_i64 t1;
2104 
2105     tcg_debug_assert(ofs < 64);
2106     tcg_debug_assert(len > 0);
2107     tcg_debug_assert(len <= 64);
2108     tcg_debug_assert(ofs + len <= 64);
2109 
2110     if (len == 64) {
2111         tcg_gen_mov_i64(ret, arg2);
2112         return;
2113     }
2114     if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) {
2115         tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
2116         return;
2117     }
2118 
2119     if (TCG_TARGET_REG_BITS == 32) {
2120         if (ofs >= 32) {
2121             tcg_gen_deposit_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1),
2122                                 TCGV_LOW(arg2), ofs - 32, len);
2123             tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
2124             return;
2125         }
2126         if (ofs + len <= 32) {
2127             tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(arg1),
2128                                 TCGV_LOW(arg2), ofs, len);
2129             tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
2130             return;
2131         }
2132     }
2133 
2134     t1 = tcg_temp_new_i64();
2135 
2136     if (TCG_TARGET_HAS_extract2_i64) {
2137         if (ofs + len == 64) {
2138             tcg_gen_shli_i64(t1, arg1, len);
2139             tcg_gen_extract2_i64(ret, t1, arg2, len);
2140             goto done;
2141         }
2142         if (ofs == 0) {
2143             tcg_gen_extract2_i64(ret, arg1, arg2, len);
2144             tcg_gen_rotli_i64(ret, ret, len);
2145             goto done;
2146         }
2147     }
2148 
2149     mask = (1ull << len) - 1;
2150     if (ofs + len < 64) {
2151         tcg_gen_andi_i64(t1, arg2, mask);
2152         tcg_gen_shli_i64(t1, t1, ofs);
2153     } else {
2154         tcg_gen_shli_i64(t1, arg2, ofs);
2155     }
2156     tcg_gen_andi_i64(ret, arg1, ~(mask << ofs));
2157     tcg_gen_or_i64(ret, ret, t1);
2158  done:
2159     tcg_temp_free_i64(t1);
2160 }
2161 
2162 void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg,
2163                            unsigned int ofs, unsigned int len)
2164 {
2165     tcg_debug_assert(ofs < 64);
2166     tcg_debug_assert(len > 0);
2167     tcg_debug_assert(len <= 64);
2168     tcg_debug_assert(ofs + len <= 64);
2169 
2170     if (ofs + len == 64) {
2171         tcg_gen_shli_i64(ret, arg, ofs);
2172     } else if (ofs == 0) {
2173         tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
2174     } else if (TCG_TARGET_HAS_deposit_i64
2175                && TCG_TARGET_deposit_i64_valid(ofs, len)) {
2176         TCGv_i64 zero = tcg_constant_i64(0);
2177         tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, zero, arg, ofs, len);
2178     } else {
2179         if (TCG_TARGET_REG_BITS == 32) {
2180             if (ofs >= 32) {
2181                 tcg_gen_deposit_z_i32(TCGV_HIGH(ret), TCGV_LOW(arg),
2182                                       ofs - 32, len);
2183                 tcg_gen_movi_i32(TCGV_LOW(ret), 0);
2184                 return;
2185             }
2186             if (ofs + len <= 32) {
2187                 tcg_gen_deposit_z_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
2188                 tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2189                 return;
2190             }
2191         }
2192         /* To help two-operand hosts we prefer to zero-extend first,
2193            which allows ARG to stay live.  */
2194         switch (len) {
2195         case 32:
2196             if (TCG_TARGET_HAS_ext32u_i64) {
2197                 tcg_gen_ext32u_i64(ret, arg);
2198                 tcg_gen_shli_i64(ret, ret, ofs);
2199                 return;
2200             }
2201             break;
2202         case 16:
2203             if (TCG_TARGET_HAS_ext16u_i64) {
2204                 tcg_gen_ext16u_i64(ret, arg);
2205                 tcg_gen_shli_i64(ret, ret, ofs);
2206                 return;
2207             }
2208             break;
2209         case 8:
2210             if (TCG_TARGET_HAS_ext8u_i64) {
2211                 tcg_gen_ext8u_i64(ret, arg);
2212                 tcg_gen_shli_i64(ret, ret, ofs);
2213                 return;
2214             }
2215             break;
2216         }
2217         /* Otherwise prefer zero-extension over AND for code size.  */
2218         switch (ofs + len) {
2219         case 32:
2220             if (TCG_TARGET_HAS_ext32u_i64) {
2221                 tcg_gen_shli_i64(ret, arg, ofs);
2222                 tcg_gen_ext32u_i64(ret, ret);
2223                 return;
2224             }
2225             break;
2226         case 16:
2227             if (TCG_TARGET_HAS_ext16u_i64) {
2228                 tcg_gen_shli_i64(ret, arg, ofs);
2229                 tcg_gen_ext16u_i64(ret, ret);
2230                 return;
2231             }
2232             break;
2233         case 8:
2234             if (TCG_TARGET_HAS_ext8u_i64) {
2235                 tcg_gen_shli_i64(ret, arg, ofs);
2236                 tcg_gen_ext8u_i64(ret, ret);
2237                 return;
2238             }
2239             break;
2240         }
2241         tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
2242         tcg_gen_shli_i64(ret, ret, ofs);
2243     }
2244 }
2245 
2246 void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
2247                          unsigned int ofs, unsigned int len)
2248 {
2249     tcg_debug_assert(ofs < 64);
2250     tcg_debug_assert(len > 0);
2251     tcg_debug_assert(len <= 64);
2252     tcg_debug_assert(ofs + len <= 64);
2253 
2254     /* Canonicalize certain special cases, even if extract is supported.  */
2255     if (ofs + len == 64) {
2256         tcg_gen_shri_i64(ret, arg, 64 - len);
2257         return;
2258     }
2259     if (ofs == 0) {
2260         tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
2261         return;
2262     }
2263 
2264     if (TCG_TARGET_REG_BITS == 32) {
2265         /* Look for a 32-bit extract within one of the two words.  */
2266         if (ofs >= 32) {
2267             tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_HIGH(arg), ofs - 32, len);
2268             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2269             return;
2270         }
2271         if (ofs + len <= 32) {
2272             tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
2273             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2274             return;
2275         }
2276         /* The field is split across two words.  One double-word
2277            shift is better than two double-word shifts.  */
2278         goto do_shift_and;
2279     }
2280 
2281     if (TCG_TARGET_HAS_extract_i64
2282         && TCG_TARGET_extract_i64_valid(ofs, len)) {
2283         tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, ofs, len);
2284         return;
2285     }
2286 
2287     /* Assume that zero-extension, if available, is cheaper than a shift.  */
2288     switch (ofs + len) {
2289     case 32:
2290         if (TCG_TARGET_HAS_ext32u_i64) {
2291             tcg_gen_ext32u_i64(ret, arg);
2292             tcg_gen_shri_i64(ret, ret, ofs);
2293             return;
2294         }
2295         break;
2296     case 16:
2297         if (TCG_TARGET_HAS_ext16u_i64) {
2298             tcg_gen_ext16u_i64(ret, arg);
2299             tcg_gen_shri_i64(ret, ret, ofs);
2300             return;
2301         }
2302         break;
2303     case 8:
2304         if (TCG_TARGET_HAS_ext8u_i64) {
2305             tcg_gen_ext8u_i64(ret, arg);
2306             tcg_gen_shri_i64(ret, ret, ofs);
2307             return;
2308         }
2309         break;
2310     }
2311 
2312     /* ??? Ideally we'd know what values are available for immediate AND.
2313        Assume that 8 bits are available, plus the special cases of 16 and 32,
2314        so that we get ext8u, ext16u, and ext32u.  */
2315     switch (len) {
2316     case 1 ... 8: case 16: case 32:
2317     do_shift_and:
2318         tcg_gen_shri_i64(ret, arg, ofs);
2319         tcg_gen_andi_i64(ret, ret, (1ull << len) - 1);
2320         break;
2321     default:
2322         tcg_gen_shli_i64(ret, arg, 64 - len - ofs);
2323         tcg_gen_shri_i64(ret, ret, 64 - len);
2324         break;
2325     }
2326 }
2327 
2328 void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg,
2329                           unsigned int ofs, unsigned int len)
2330 {
2331     tcg_debug_assert(ofs < 64);
2332     tcg_debug_assert(len > 0);
2333     tcg_debug_assert(len <= 64);
2334     tcg_debug_assert(ofs + len <= 64);
2335 
2336     /* Canonicalize certain special cases, even if sextract is supported.  */
2337     if (ofs + len == 64) {
2338         tcg_gen_sari_i64(ret, arg, 64 - len);
2339         return;
2340     }
2341     if (ofs == 0) {
2342         switch (len) {
2343         case 32:
2344             tcg_gen_ext32s_i64(ret, arg);
2345             return;
2346         case 16:
2347             tcg_gen_ext16s_i64(ret, arg);
2348             return;
2349         case 8:
2350             tcg_gen_ext8s_i64(ret, arg);
2351             return;
2352         }
2353     }
2354 
2355     if (TCG_TARGET_REG_BITS == 32) {
2356         /* Look for a 32-bit extract within one of the two words.  */
2357         if (ofs >= 32) {
2358             tcg_gen_sextract_i32(TCGV_LOW(ret), TCGV_HIGH(arg), ofs - 32, len);
2359         } else if (ofs + len <= 32) {
2360             tcg_gen_sextract_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
2361         } else if (ofs == 0) {
2362             tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
2363             tcg_gen_sextract_i32(TCGV_HIGH(ret), TCGV_HIGH(arg), 0, len - 32);
2364             return;
2365         } else if (len > 32) {
2366             TCGv_i32 t = tcg_temp_new_i32();
2367             /* Extract the bits for the high word normally.  */
2368             tcg_gen_sextract_i32(t, TCGV_HIGH(arg), ofs + 32, len - 32);
2369             /* Shift the field down for the low part.  */
2370             tcg_gen_shri_i64(ret, arg, ofs);
2371             /* Overwrite the shift into the high part.  */
2372             tcg_gen_mov_i32(TCGV_HIGH(ret), t);
2373             tcg_temp_free_i32(t);
2374             return;
2375         } else {
2376             /* Shift the field down for the low part, such that the
2377                field sits at the MSB.  */
2378             tcg_gen_shri_i64(ret, arg, ofs + len - 32);
2379             /* Shift the field down from the MSB, sign extending.  */
2380             tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_LOW(ret), 32 - len);
2381         }
2382         /* Sign-extend the field from 32 bits.  */
2383         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
2384         return;
2385     }
2386 
2387     if (TCG_TARGET_HAS_sextract_i64
2388         && TCG_TARGET_extract_i64_valid(ofs, len)) {
2389         tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, arg, ofs, len);
2390         return;
2391     }
2392 
2393     /* Assume that sign-extension, if available, is cheaper than a shift.  */
2394     switch (ofs + len) {
2395     case 32:
2396         if (TCG_TARGET_HAS_ext32s_i64) {
2397             tcg_gen_ext32s_i64(ret, arg);
2398             tcg_gen_sari_i64(ret, ret, ofs);
2399             return;
2400         }
2401         break;
2402     case 16:
2403         if (TCG_TARGET_HAS_ext16s_i64) {
2404             tcg_gen_ext16s_i64(ret, arg);
2405             tcg_gen_sari_i64(ret, ret, ofs);
2406             return;
2407         }
2408         break;
2409     case 8:
2410         if (TCG_TARGET_HAS_ext8s_i64) {
2411             tcg_gen_ext8s_i64(ret, arg);
2412             tcg_gen_sari_i64(ret, ret, ofs);
2413             return;
2414         }
2415         break;
2416     }
2417     switch (len) {
2418     case 32:
2419         if (TCG_TARGET_HAS_ext32s_i64) {
2420             tcg_gen_shri_i64(ret, arg, ofs);
2421             tcg_gen_ext32s_i64(ret, ret);
2422             return;
2423         }
2424         break;
2425     case 16:
2426         if (TCG_TARGET_HAS_ext16s_i64) {
2427             tcg_gen_shri_i64(ret, arg, ofs);
2428             tcg_gen_ext16s_i64(ret, ret);
2429             return;
2430         }
2431         break;
2432     case 8:
2433         if (TCG_TARGET_HAS_ext8s_i64) {
2434             tcg_gen_shri_i64(ret, arg, ofs);
2435             tcg_gen_ext8s_i64(ret, ret);
2436             return;
2437         }
2438         break;
2439     }
2440     tcg_gen_shli_i64(ret, arg, 64 - len - ofs);
2441     tcg_gen_sari_i64(ret, ret, 64 - len);
2442 }
2443 
2444 /*
2445  * Extract 64 bits from a 128-bit input, ah:al, starting from ofs.
2446  * Unlike tcg_gen_extract_i64 above, len is fixed at 64.
2447  */
2448 void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah,
2449                           unsigned int ofs)
2450 {
2451     tcg_debug_assert(ofs <= 64);
2452     if (ofs == 0) {
2453         tcg_gen_mov_i64(ret, al);
2454     } else if (ofs == 64) {
2455         tcg_gen_mov_i64(ret, ah);
2456     } else if (al == ah) {
2457         tcg_gen_rotri_i64(ret, al, ofs);
2458     } else if (TCG_TARGET_HAS_extract2_i64) {
2459         tcg_gen_op4i_i64(INDEX_op_extract2_i64, ret, al, ah, ofs);
2460     } else {
2461         TCGv_i64 t0 = tcg_temp_new_i64();
2462         tcg_gen_shri_i64(t0, al, ofs);
2463         tcg_gen_deposit_i64(ret, t0, ah, 64 - ofs, ofs);
2464         tcg_temp_free_i64(t0);
2465     }
2466 }
2467 
2468 void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1,
2469                          TCGv_i64 c2, TCGv_i64 v1, TCGv_i64 v2)
2470 {
2471     if (cond == TCG_COND_ALWAYS) {
2472         tcg_gen_mov_i64(ret, v1);
2473     } else if (cond == TCG_COND_NEVER) {
2474         tcg_gen_mov_i64(ret, v2);
2475     } else if (TCG_TARGET_REG_BITS == 32) {
2476         TCGv_i32 t0 = tcg_temp_new_i32();
2477         TCGv_i32 t1 = tcg_temp_new_i32();
2478         tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0,
2479                          TCGV_LOW(c1), TCGV_HIGH(c1),
2480                          TCGV_LOW(c2), TCGV_HIGH(c2), cond);
2481 
2482         if (TCG_TARGET_HAS_movcond_i32) {
2483             tcg_gen_movi_i32(t1, 0);
2484             tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, t1,
2485                                 TCGV_LOW(v1), TCGV_LOW(v2));
2486             tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, t1,
2487                                 TCGV_HIGH(v1), TCGV_HIGH(v2));
2488         } else {
2489             tcg_gen_neg_i32(t0, t0);
2490 
2491             tcg_gen_and_i32(t1, TCGV_LOW(v1), t0);
2492             tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(v2), t0);
2493             tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t1);
2494 
2495             tcg_gen_and_i32(t1, TCGV_HIGH(v1), t0);
2496             tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(v2), t0);
2497             tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t1);
2498         }
2499         tcg_temp_free_i32(t0);
2500         tcg_temp_free_i32(t1);
2501     } else if (TCG_TARGET_HAS_movcond_i64) {
2502         tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond);
2503     } else {
2504         TCGv_i64 t0 = tcg_temp_new_i64();
2505         TCGv_i64 t1 = tcg_temp_new_i64();
2506         tcg_gen_setcond_i64(cond, t0, c1, c2);
2507         tcg_gen_neg_i64(t0, t0);
2508         tcg_gen_and_i64(t1, v1, t0);
2509         tcg_gen_andc_i64(ret, v2, t0);
2510         tcg_gen_or_i64(ret, ret, t1);
2511         tcg_temp_free_i64(t0);
2512         tcg_temp_free_i64(t1);
2513     }
2514 }
2515 
2516 void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
2517                       TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
2518 {
2519     if (TCG_TARGET_HAS_add2_i64) {
2520         tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh);
2521     } else {
2522         TCGv_i64 t0 = tcg_temp_new_i64();
2523         TCGv_i64 t1 = tcg_temp_new_i64();
2524         tcg_gen_add_i64(t0, al, bl);
2525         tcg_gen_setcond_i64(TCG_COND_LTU, t1, t0, al);
2526         tcg_gen_add_i64(rh, ah, bh);
2527         tcg_gen_add_i64(rh, rh, t1);
2528         tcg_gen_mov_i64(rl, t0);
2529         tcg_temp_free_i64(t0);
2530         tcg_temp_free_i64(t1);
2531     }
2532 }
2533 
2534 void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
2535                       TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
2536 {
2537     if (TCG_TARGET_HAS_sub2_i64) {
2538         tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh);
2539     } else {
2540         TCGv_i64 t0 = tcg_temp_new_i64();
2541         TCGv_i64 t1 = tcg_temp_new_i64();
2542         tcg_gen_sub_i64(t0, al, bl);
2543         tcg_gen_setcond_i64(TCG_COND_LTU, t1, al, bl);
2544         tcg_gen_sub_i64(rh, ah, bh);
2545         tcg_gen_sub_i64(rh, rh, t1);
2546         tcg_gen_mov_i64(rl, t0);
2547         tcg_temp_free_i64(t0);
2548         tcg_temp_free_i64(t1);
2549     }
2550 }
2551 
2552 void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
2553 {
2554     if (TCG_TARGET_HAS_mulu2_i64) {
2555         tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2);
2556     } else if (TCG_TARGET_HAS_muluh_i64) {
2557         TCGv_i64 t = tcg_temp_new_i64();
2558         tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
2559         tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2);
2560         tcg_gen_mov_i64(rl, t);
2561         tcg_temp_free_i64(t);
2562     } else {
2563         TCGv_i64 t0 = tcg_temp_new_i64();
2564         tcg_gen_mul_i64(t0, arg1, arg2);
2565         gen_helper_muluh_i64(rh, arg1, arg2);
2566         tcg_gen_mov_i64(rl, t0);
2567         tcg_temp_free_i64(t0);
2568     }
2569 }
2570 
2571 void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
2572 {
2573     if (TCG_TARGET_HAS_muls2_i64) {
2574         tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2);
2575     } else if (TCG_TARGET_HAS_mulsh_i64) {
2576         TCGv_i64 t = tcg_temp_new_i64();
2577         tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
2578         tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2);
2579         tcg_gen_mov_i64(rl, t);
2580         tcg_temp_free_i64(t);
2581     } else if (TCG_TARGET_HAS_mulu2_i64 || TCG_TARGET_HAS_muluh_i64) {
2582         TCGv_i64 t0 = tcg_temp_new_i64();
2583         TCGv_i64 t1 = tcg_temp_new_i64();
2584         TCGv_i64 t2 = tcg_temp_new_i64();
2585         TCGv_i64 t3 = tcg_temp_new_i64();
2586         tcg_gen_mulu2_i64(t0, t1, arg1, arg2);
2587         /* Adjust for negative inputs.  */
2588         tcg_gen_sari_i64(t2, arg1, 63);
2589         tcg_gen_sari_i64(t3, arg2, 63);
2590         tcg_gen_and_i64(t2, t2, arg2);
2591         tcg_gen_and_i64(t3, t3, arg1);
2592         tcg_gen_sub_i64(rh, t1, t2);
2593         tcg_gen_sub_i64(rh, rh, t3);
2594         tcg_gen_mov_i64(rl, t0);
2595         tcg_temp_free_i64(t0);
2596         tcg_temp_free_i64(t1);
2597         tcg_temp_free_i64(t2);
2598         tcg_temp_free_i64(t3);
2599     } else {
2600         TCGv_i64 t0 = tcg_temp_new_i64();
2601         tcg_gen_mul_i64(t0, arg1, arg2);
2602         gen_helper_mulsh_i64(rh, arg1, arg2);
2603         tcg_gen_mov_i64(rl, t0);
2604         tcg_temp_free_i64(t0);
2605     }
2606 }
2607 
2608 void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
2609 {
2610     TCGv_i64 t0 = tcg_temp_new_i64();
2611     TCGv_i64 t1 = tcg_temp_new_i64();
2612     TCGv_i64 t2 = tcg_temp_new_i64();
2613     tcg_gen_mulu2_i64(t0, t1, arg1, arg2);
2614     /* Adjust for negative input for the signed arg1.  */
2615     tcg_gen_sari_i64(t2, arg1, 63);
2616     tcg_gen_and_i64(t2, t2, arg2);
2617     tcg_gen_sub_i64(rh, t1, t2);
2618     tcg_gen_mov_i64(rl, t0);
2619     tcg_temp_free_i64(t0);
2620     tcg_temp_free_i64(t1);
2621     tcg_temp_free_i64(t2);
2622 }
2623 
2624 void tcg_gen_smin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2625 {
2626     tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, a, b);
2627 }
2628 
2629 void tcg_gen_umin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2630 {
2631     tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, a, b);
2632 }
2633 
2634 void tcg_gen_smax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2635 {
2636     tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, b, a);
2637 }
2638 
2639 void tcg_gen_umax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2640 {
2641     tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, b, a);
2642 }
2643 
2644 void tcg_gen_abs_i64(TCGv_i64 ret, TCGv_i64 a)
2645 {
2646     TCGv_i64 t = tcg_temp_new_i64();
2647 
2648     tcg_gen_sari_i64(t, a, 63);
2649     tcg_gen_xor_i64(ret, a, t);
2650     tcg_gen_sub_i64(ret, ret, t);
2651     tcg_temp_free_i64(t);
2652 }
2653 
2654 /* Size changing operations.  */
2655 
2656 void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
2657 {
2658     if (TCG_TARGET_REG_BITS == 32) {
2659         tcg_gen_mov_i32(ret, TCGV_LOW(arg));
2660     } else if (TCG_TARGET_HAS_extrl_i64_i32) {
2661         tcg_gen_op2(INDEX_op_extrl_i64_i32,
2662                     tcgv_i32_arg(ret), tcgv_i64_arg(arg));
2663     } else {
2664         tcg_gen_mov_i32(ret, (TCGv_i32)arg);
2665     }
2666 }
2667 
2668 void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
2669 {
2670     if (TCG_TARGET_REG_BITS == 32) {
2671         tcg_gen_mov_i32(ret, TCGV_HIGH(arg));
2672     } else if (TCG_TARGET_HAS_extrh_i64_i32) {
2673         tcg_gen_op2(INDEX_op_extrh_i64_i32,
2674                     tcgv_i32_arg(ret), tcgv_i64_arg(arg));
2675     } else {
2676         TCGv_i64 t = tcg_temp_new_i64();
2677         tcg_gen_shri_i64(t, arg, 32);
2678         tcg_gen_mov_i32(ret, (TCGv_i32)t);
2679         tcg_temp_free_i64(t);
2680     }
2681 }
2682 
2683 void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
2684 {
2685     if (TCG_TARGET_REG_BITS == 32) {
2686         tcg_gen_mov_i32(TCGV_LOW(ret), arg);
2687         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2688     } else {
2689         tcg_gen_op2(INDEX_op_extu_i32_i64,
2690                     tcgv_i64_arg(ret), tcgv_i32_arg(arg));
2691     }
2692 }
2693 
2694 void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
2695 {
2696     if (TCG_TARGET_REG_BITS == 32) {
2697         tcg_gen_mov_i32(TCGV_LOW(ret), arg);
2698         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
2699     } else {
2700         tcg_gen_op2(INDEX_op_ext_i32_i64,
2701                     tcgv_i64_arg(ret), tcgv_i32_arg(arg));
2702     }
2703 }
2704 
2705 void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high)
2706 {
2707     TCGv_i64 tmp;
2708 
2709     if (TCG_TARGET_REG_BITS == 32) {
2710         tcg_gen_mov_i32(TCGV_LOW(dest), low);
2711         tcg_gen_mov_i32(TCGV_HIGH(dest), high);
2712         return;
2713     }
2714 
2715     tmp = tcg_temp_new_i64();
2716     /* These extensions are only needed for type correctness.
2717        We may be able to do better given target specific information.  */
2718     tcg_gen_extu_i32_i64(tmp, high);
2719     tcg_gen_extu_i32_i64(dest, low);
2720     /* If deposit is available, use it.  Otherwise use the extra
2721        knowledge that we have of the zero-extensions above.  */
2722     if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(32, 32)) {
2723         tcg_gen_deposit_i64(dest, dest, tmp, 32, 32);
2724     } else {
2725         tcg_gen_shli_i64(tmp, tmp, 32);
2726         tcg_gen_or_i64(dest, dest, tmp);
2727     }
2728     tcg_temp_free_i64(tmp);
2729 }
2730 
2731 void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg)
2732 {
2733     if (TCG_TARGET_REG_BITS == 32) {
2734         tcg_gen_mov_i32(lo, TCGV_LOW(arg));
2735         tcg_gen_mov_i32(hi, TCGV_HIGH(arg));
2736     } else {
2737         tcg_gen_extrl_i64_i32(lo, arg);
2738         tcg_gen_extrh_i64_i32(hi, arg);
2739     }
2740 }
2741 
2742 void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg)
2743 {
2744     tcg_gen_ext32u_i64(lo, arg);
2745     tcg_gen_shri_i64(hi, arg, 32);
2746 }
2747 
2748 /* QEMU specific operations.  */
2749 
2750 void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx)
2751 {
2752     /*
2753      * Let the jit code return the read-only version of the
2754      * TranslationBlock, so that we minimize the pc-relative
2755      * distance of the address of the exit_tb code to TB.
2756      * This will improve utilization of pc-relative address loads.
2757      *
2758      * TODO: Move this to translator_loop, so that all const
2759      * TranslationBlock pointers refer to read-only memory.
2760      * This requires coordination with targets that do not use
2761      * the translator_loop.
2762      */
2763     uintptr_t val = (uintptr_t)tcg_splitwx_to_rx((void *)tb) + idx;
2764 
2765     if (tb == NULL) {
2766         tcg_debug_assert(idx == 0);
2767     } else if (idx <= TB_EXIT_IDXMAX) {
2768 #ifdef CONFIG_DEBUG_TCG
2769         /* This is an exit following a goto_tb.  Verify that we have
2770            seen this numbered exit before, via tcg_gen_goto_tb.  */
2771         tcg_debug_assert(tcg_ctx->goto_tb_issue_mask & (1 << idx));
2772 #endif
2773     } else {
2774         /* This is an exit via the exitreq label.  */
2775         tcg_debug_assert(idx == TB_EXIT_REQUESTED);
2776     }
2777 
2778     plugin_gen_disable_mem_helpers();
2779     tcg_gen_op1i(INDEX_op_exit_tb, val);
2780 }
2781 
2782 void tcg_gen_goto_tb(unsigned idx)
2783 {
2784     /* We tested CF_NO_GOTO_TB in translator_use_goto_tb. */
2785     tcg_debug_assert(!(tcg_ctx->tb_cflags & CF_NO_GOTO_TB));
2786     /* We only support two chained exits.  */
2787     tcg_debug_assert(idx <= TB_EXIT_IDXMAX);
2788 #ifdef CONFIG_DEBUG_TCG
2789     /* Verify that we haven't seen this numbered exit before.  */
2790     tcg_debug_assert((tcg_ctx->goto_tb_issue_mask & (1 << idx)) == 0);
2791     tcg_ctx->goto_tb_issue_mask |= 1 << idx;
2792 #endif
2793     plugin_gen_disable_mem_helpers();
2794     tcg_gen_op1i(INDEX_op_goto_tb, idx);
2795 }
2796 
2797 void tcg_gen_lookup_and_goto_ptr(void)
2798 {
2799     TCGv_ptr ptr;
2800 
2801     if (tcg_ctx->tb_cflags & CF_NO_GOTO_PTR) {
2802         tcg_gen_exit_tb(NULL, 0);
2803         return;
2804     }
2805 
2806     plugin_gen_disable_mem_helpers();
2807     ptr = tcg_temp_new_ptr();
2808     gen_helper_lookup_tb_ptr(ptr, cpu_env);
2809     tcg_gen_op1i(INDEX_op_goto_ptr, tcgv_ptr_arg(ptr));
2810     tcg_temp_free_ptr(ptr);
2811 }
2812 
2813 static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
2814 {
2815     /* Trigger the asserts within as early as possible.  */
2816     unsigned a_bits = get_alignment_bits(op);
2817 
2818     /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
2819     if (a_bits == (op & MO_SIZE)) {
2820         op = (op & ~MO_AMASK) | MO_ALIGN;
2821     }
2822 
2823     switch (op & MO_SIZE) {
2824     case MO_8:
2825         op &= ~MO_BSWAP;
2826         break;
2827     case MO_16:
2828         break;
2829     case MO_32:
2830         if (!is64) {
2831             op &= ~MO_SIGN;
2832         }
2833         break;
2834     case MO_64:
2835         if (is64) {
2836             op &= ~MO_SIGN;
2837             break;
2838         }
2839         /* fall through */
2840     default:
2841         g_assert_not_reached();
2842     }
2843     if (st) {
2844         op &= ~MO_SIGN;
2845     }
2846     return op;
2847 }
2848 
2849 static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
2850                          MemOp memop, TCGArg idx)
2851 {
2852     MemOpIdx oi = make_memop_idx(memop, idx);
2853 #if TARGET_LONG_BITS == 32
2854     tcg_gen_op3i_i32(opc, val, addr, oi);
2855 #else
2856     if (TCG_TARGET_REG_BITS == 32) {
2857         tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
2858     } else {
2859         tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
2860     }
2861 #endif
2862 }
2863 
2864 static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
2865                          MemOp memop, TCGArg idx)
2866 {
2867     MemOpIdx oi = make_memop_idx(memop, idx);
2868 #if TARGET_LONG_BITS == 32
2869     if (TCG_TARGET_REG_BITS == 32) {
2870         tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
2871     } else {
2872         tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
2873     }
2874 #else
2875     if (TCG_TARGET_REG_BITS == 32) {
2876         tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
2877                          TCGV_LOW(addr), TCGV_HIGH(addr), oi);
2878     } else {
2879         tcg_gen_op3i_i64(opc, val, addr, oi);
2880     }
2881 #endif
2882 }
2883 
2884 static void tcg_gen_req_mo(TCGBar type)
2885 {
2886 #ifdef TCG_GUEST_DEFAULT_MO
2887     type &= TCG_GUEST_DEFAULT_MO;
2888 #endif
2889     type &= ~TCG_TARGET_DEFAULT_MO;
2890     if (type) {
2891         tcg_gen_mb(type | TCG_BAR_SC);
2892     }
2893 }
2894 
2895 static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
2896 {
2897 #ifdef CONFIG_PLUGIN
2898     if (tcg_ctx->plugin_insn != NULL) {
2899         /* Save a copy of the vaddr for use after a load.  */
2900         TCGv temp = tcg_temp_new();
2901         tcg_gen_mov_tl(temp, vaddr);
2902         return temp;
2903     }
2904 #endif
2905     return vaddr;
2906 }
2907 
2908 static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi,
2909                                      enum qemu_plugin_mem_rw rw)
2910 {
2911 #ifdef CONFIG_PLUGIN
2912     if (tcg_ctx->plugin_insn != NULL) {
2913         qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
2914         plugin_gen_empty_mem_callback(vaddr, info);
2915         tcg_temp_free(vaddr);
2916     }
2917 #endif
2918 }
2919 
2920 void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
2921 {
2922     MemOp orig_memop;
2923     MemOpIdx oi;
2924 
2925     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
2926     memop = tcg_canonicalize_memop(memop, 0, 0);
2927     oi = make_memop_idx(memop, idx);
2928 
2929     orig_memop = memop;
2930     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
2931         memop &= ~MO_BSWAP;
2932         /* The bswap primitive benefits from zero-extended input.  */
2933         if ((memop & MO_SSIZE) == MO_SW) {
2934             memop &= ~MO_SIGN;
2935         }
2936     }
2937 
2938     addr = plugin_prep_mem_callbacks(addr);
2939     gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
2940     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
2941 
2942     if ((orig_memop ^ memop) & MO_BSWAP) {
2943         switch (orig_memop & MO_SIZE) {
2944         case MO_16:
2945             tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
2946                                            ? TCG_BSWAP_IZ | TCG_BSWAP_OS
2947                                            : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
2948             break;
2949         case MO_32:
2950             tcg_gen_bswap32_i32(val, val);
2951             break;
2952         default:
2953             g_assert_not_reached();
2954         }
2955     }
2956 }
2957 
2958 void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
2959 {
2960     TCGv_i32 swap = NULL;
2961     MemOpIdx oi;
2962 
2963     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
2964     memop = tcg_canonicalize_memop(memop, 0, 1);
2965     oi = make_memop_idx(memop, idx);
2966 
2967     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
2968         swap = tcg_temp_new_i32();
2969         switch (memop & MO_SIZE) {
2970         case MO_16:
2971             tcg_gen_bswap16_i32(swap, val, 0);
2972             break;
2973         case MO_32:
2974             tcg_gen_bswap32_i32(swap, val);
2975             break;
2976         default:
2977             g_assert_not_reached();
2978         }
2979         val = swap;
2980         memop &= ~MO_BSWAP;
2981     }
2982 
2983     addr = plugin_prep_mem_callbacks(addr);
2984     if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
2985         gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
2986     } else {
2987         gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
2988     }
2989     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
2990 
2991     if (swap) {
2992         tcg_temp_free_i32(swap);
2993     }
2994 }
2995 
2996 void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
2997 {
2998     MemOp orig_memop;
2999     MemOpIdx oi;
3000 
3001     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
3002         tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
3003         if (memop & MO_SIGN) {
3004             tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
3005         } else {
3006             tcg_gen_movi_i32(TCGV_HIGH(val), 0);
3007         }
3008         return;
3009     }
3010 
3011     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
3012     memop = tcg_canonicalize_memop(memop, 1, 0);
3013     oi = make_memop_idx(memop, idx);
3014 
3015     orig_memop = memop;
3016     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
3017         memop &= ~MO_BSWAP;
3018         /* The bswap primitive benefits from zero-extended input.  */
3019         if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
3020             memop &= ~MO_SIGN;
3021         }
3022     }
3023 
3024     addr = plugin_prep_mem_callbacks(addr);
3025     gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
3026     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
3027 
3028     if ((orig_memop ^ memop) & MO_BSWAP) {
3029         int flags = (orig_memop & MO_SIGN
3030                      ? TCG_BSWAP_IZ | TCG_BSWAP_OS
3031                      : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
3032         switch (orig_memop & MO_SIZE) {
3033         case MO_16:
3034             tcg_gen_bswap16_i64(val, val, flags);
3035             break;
3036         case MO_32:
3037             tcg_gen_bswap32_i64(val, val, flags);
3038             break;
3039         case MO_64:
3040             tcg_gen_bswap64_i64(val, val);
3041             break;
3042         default:
3043             g_assert_not_reached();
3044         }
3045     }
3046 }
3047 
3048 void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
3049 {
3050     TCGv_i64 swap = NULL;
3051     MemOpIdx oi;
3052 
3053     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
3054         tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
3055         return;
3056     }
3057 
3058     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
3059     memop = tcg_canonicalize_memop(memop, 1, 1);
3060     oi = make_memop_idx(memop, idx);
3061 
3062     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
3063         swap = tcg_temp_new_i64();
3064         switch (memop & MO_SIZE) {
3065         case MO_16:
3066             tcg_gen_bswap16_i64(swap, val, 0);
3067             break;
3068         case MO_32:
3069             tcg_gen_bswap32_i64(swap, val, 0);
3070             break;
3071         case MO_64:
3072             tcg_gen_bswap64_i64(swap, val);
3073             break;
3074         default:
3075             g_assert_not_reached();
3076         }
3077         val = swap;
3078         memop &= ~MO_BSWAP;
3079     }
3080 
3081     addr = plugin_prep_mem_callbacks(addr);
3082     gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
3083     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
3084 
3085     if (swap) {
3086         tcg_temp_free_i64(swap);
3087     }
3088 }
3089 
3090 static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
3091 {
3092     switch (opc & MO_SSIZE) {
3093     case MO_SB:
3094         tcg_gen_ext8s_i32(ret, val);
3095         break;
3096     case MO_UB:
3097         tcg_gen_ext8u_i32(ret, val);
3098         break;
3099     case MO_SW:
3100         tcg_gen_ext16s_i32(ret, val);
3101         break;
3102     case MO_UW:
3103         tcg_gen_ext16u_i32(ret, val);
3104         break;
3105     default:
3106         tcg_gen_mov_i32(ret, val);
3107         break;
3108     }
3109 }
3110 
3111 static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
3112 {
3113     switch (opc & MO_SSIZE) {
3114     case MO_SB:
3115         tcg_gen_ext8s_i64(ret, val);
3116         break;
3117     case MO_UB:
3118         tcg_gen_ext8u_i64(ret, val);
3119         break;
3120     case MO_SW:
3121         tcg_gen_ext16s_i64(ret, val);
3122         break;
3123     case MO_UW:
3124         tcg_gen_ext16u_i64(ret, val);
3125         break;
3126     case MO_SL:
3127         tcg_gen_ext32s_i64(ret, val);
3128         break;
3129     case MO_UL:
3130         tcg_gen_ext32u_i64(ret, val);
3131         break;
3132     default:
3133         tcg_gen_mov_i64(ret, val);
3134         break;
3135     }
3136 }
3137 
3138 typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
3139                                   TCGv_i32, TCGv_i32, TCGv_i32);
3140 typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
3141                                   TCGv_i64, TCGv_i64, TCGv_i32);
3142 typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
3143                                   TCGv_i32, TCGv_i32);
3144 typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
3145                                   TCGv_i64, TCGv_i32);
3146 
3147 #ifdef CONFIG_ATOMIC64
3148 # define WITH_ATOMIC64(X) X,
3149 #else
3150 # define WITH_ATOMIC64(X)
3151 #endif
3152 
3153 static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
3154     [MO_8] = gen_helper_atomic_cmpxchgb,
3155     [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
3156     [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
3157     [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
3158     [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
3159     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
3160     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
3161 };
3162 
3163 void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
3164                                 TCGv_i32 newv, TCGArg idx, MemOp memop)
3165 {
3166     memop = tcg_canonicalize_memop(memop, 0, 0);
3167 
3168     if (!(tcg_ctx->tb_cflags & CF_PARALLEL)) {
3169         TCGv_i32 t1 = tcg_temp_new_i32();
3170         TCGv_i32 t2 = tcg_temp_new_i32();
3171 
3172         tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
3173 
3174         tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
3175         tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
3176         tcg_gen_qemu_st_i32(t2, addr, idx, memop);
3177         tcg_temp_free_i32(t2);
3178 
3179         if (memop & MO_SIGN) {
3180             tcg_gen_ext_i32(retv, t1, memop);
3181         } else {
3182             tcg_gen_mov_i32(retv, t1);
3183         }
3184         tcg_temp_free_i32(t1);
3185     } else {
3186         gen_atomic_cx_i32 gen;
3187         MemOpIdx oi;
3188 
3189         gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
3190         tcg_debug_assert(gen != NULL);
3191 
3192         oi = make_memop_idx(memop & ~MO_SIGN, idx);
3193         gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
3194 
3195         if (memop & MO_SIGN) {
3196             tcg_gen_ext_i32(retv, retv, memop);
3197         }
3198     }
3199 }
3200 
3201 void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
3202                                 TCGv_i64 newv, TCGArg idx, MemOp memop)
3203 {
3204     memop = tcg_canonicalize_memop(memop, 1, 0);
3205 
3206     if (!(tcg_ctx->tb_cflags & CF_PARALLEL)) {
3207         TCGv_i64 t1 = tcg_temp_new_i64();
3208         TCGv_i64 t2 = tcg_temp_new_i64();
3209 
3210         tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
3211 
3212         tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
3213         tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
3214         tcg_gen_qemu_st_i64(t2, addr, idx, memop);
3215         tcg_temp_free_i64(t2);
3216 
3217         if (memop & MO_SIGN) {
3218             tcg_gen_ext_i64(retv, t1, memop);
3219         } else {
3220             tcg_gen_mov_i64(retv, t1);
3221         }
3222         tcg_temp_free_i64(t1);
3223     } else if ((memop & MO_SIZE) == MO_64) {
3224 #ifdef CONFIG_ATOMIC64
3225         gen_atomic_cx_i64 gen;
3226         MemOpIdx oi;
3227 
3228         gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
3229         tcg_debug_assert(gen != NULL);
3230 
3231         oi = make_memop_idx(memop, idx);
3232         gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
3233 #else
3234         gen_helper_exit_atomic(cpu_env);
3235         /* Produce a result, so that we have a well-formed opcode stream
3236            with respect to uses of the result in the (dead) code following.  */
3237         tcg_gen_movi_i64(retv, 0);
3238 #endif /* CONFIG_ATOMIC64 */
3239     } else {
3240         TCGv_i32 c32 = tcg_temp_new_i32();
3241         TCGv_i32 n32 = tcg_temp_new_i32();
3242         TCGv_i32 r32 = tcg_temp_new_i32();
3243 
3244         tcg_gen_extrl_i64_i32(c32, cmpv);
3245         tcg_gen_extrl_i64_i32(n32, newv);
3246         tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
3247         tcg_temp_free_i32(c32);
3248         tcg_temp_free_i32(n32);
3249 
3250         tcg_gen_extu_i32_i64(retv, r32);
3251         tcg_temp_free_i32(r32);
3252 
3253         if (memop & MO_SIGN) {
3254             tcg_gen_ext_i64(retv, retv, memop);
3255         }
3256     }
3257 }
3258 
3259 static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
3260                                 TCGArg idx, MemOp memop, bool new_val,
3261                                 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
3262 {
3263     TCGv_i32 t1 = tcg_temp_new_i32();
3264     TCGv_i32 t2 = tcg_temp_new_i32();
3265 
3266     memop = tcg_canonicalize_memop(memop, 0, 0);
3267 
3268     tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
3269     tcg_gen_ext_i32(t2, val, memop);
3270     gen(t2, t1, t2);
3271     tcg_gen_qemu_st_i32(t2, addr, idx, memop);
3272 
3273     tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
3274     tcg_temp_free_i32(t1);
3275     tcg_temp_free_i32(t2);
3276 }
3277 
3278 static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
3279                              TCGArg idx, MemOp memop, void * const table[])
3280 {
3281     gen_atomic_op_i32 gen;
3282     MemOpIdx oi;
3283 
3284     memop = tcg_canonicalize_memop(memop, 0, 0);
3285 
3286     gen = table[memop & (MO_SIZE | MO_BSWAP)];
3287     tcg_debug_assert(gen != NULL);
3288 
3289     oi = make_memop_idx(memop & ~MO_SIGN, idx);
3290     gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
3291 
3292     if (memop & MO_SIGN) {
3293         tcg_gen_ext_i32(ret, ret, memop);
3294     }
3295 }
3296 
3297 static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
3298                                 TCGArg idx, MemOp memop, bool new_val,
3299                                 void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
3300 {
3301     TCGv_i64 t1 = tcg_temp_new_i64();
3302     TCGv_i64 t2 = tcg_temp_new_i64();
3303 
3304     memop = tcg_canonicalize_memop(memop, 1, 0);
3305 
3306     tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
3307     tcg_gen_ext_i64(t2, val, memop);
3308     gen(t2, t1, t2);
3309     tcg_gen_qemu_st_i64(t2, addr, idx, memop);
3310 
3311     tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
3312     tcg_temp_free_i64(t1);
3313     tcg_temp_free_i64(t2);
3314 }
3315 
3316 static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
3317                              TCGArg idx, MemOp memop, void * const table[])
3318 {
3319     memop = tcg_canonicalize_memop(memop, 1, 0);
3320 
3321     if ((memop & MO_SIZE) == MO_64) {
3322 #ifdef CONFIG_ATOMIC64
3323         gen_atomic_op_i64 gen;
3324         MemOpIdx oi;
3325 
3326         gen = table[memop & (MO_SIZE | MO_BSWAP)];
3327         tcg_debug_assert(gen != NULL);
3328 
3329         oi = make_memop_idx(memop & ~MO_SIGN, idx);
3330         gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
3331 #else
3332         gen_helper_exit_atomic(cpu_env);
3333         /* Produce a result, so that we have a well-formed opcode stream
3334            with respect to uses of the result in the (dead) code following.  */
3335         tcg_gen_movi_i64(ret, 0);
3336 #endif /* CONFIG_ATOMIC64 */
3337     } else {
3338         TCGv_i32 v32 = tcg_temp_new_i32();
3339         TCGv_i32 r32 = tcg_temp_new_i32();
3340 
3341         tcg_gen_extrl_i64_i32(v32, val);
3342         do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
3343         tcg_temp_free_i32(v32);
3344 
3345         tcg_gen_extu_i32_i64(ret, r32);
3346         tcg_temp_free_i32(r32);
3347 
3348         if (memop & MO_SIGN) {
3349             tcg_gen_ext_i64(ret, ret, memop);
3350         }
3351     }
3352 }
3353 
3354 #define GEN_ATOMIC_HELPER(NAME, OP, NEW)                                \
3355 static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = {          \
3356     [MO_8] = gen_helper_atomic_##NAME##b,                               \
3357     [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le,                   \
3358     [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be,                   \
3359     [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le,                   \
3360     [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be,                   \
3361     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le)     \
3362     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be)     \
3363 };                                                                      \
3364 void tcg_gen_atomic_##NAME##_i32                                        \
3365     (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop)    \
3366 {                                                                       \
3367     if (tcg_ctx->tb_cflags & CF_PARALLEL) {                             \
3368         do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME);     \
3369     } else {                                                            \
3370         do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW,            \
3371                             tcg_gen_##OP##_i32);                        \
3372     }                                                                   \
3373 }                                                                       \
3374 void tcg_gen_atomic_##NAME##_i64                                        \
3375     (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop)    \
3376 {                                                                       \
3377     if (tcg_ctx->tb_cflags & CF_PARALLEL) {                             \
3378         do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME);     \
3379     } else {                                                            \
3380         do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW,            \
3381                             tcg_gen_##OP##_i64);                        \
3382     }                                                                   \
3383 }
3384 
3385 GEN_ATOMIC_HELPER(fetch_add, add, 0)
3386 GEN_ATOMIC_HELPER(fetch_and, and, 0)
3387 GEN_ATOMIC_HELPER(fetch_or, or, 0)
3388 GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
3389 GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
3390 GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
3391 GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
3392 GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
3393 
3394 GEN_ATOMIC_HELPER(add_fetch, add, 1)
3395 GEN_ATOMIC_HELPER(and_fetch, and, 1)
3396 GEN_ATOMIC_HELPER(or_fetch, or, 1)
3397 GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
3398 GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
3399 GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
3400 GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
3401 GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
3402 
3403 static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
3404 {
3405     tcg_gen_mov_i32(r, b);
3406 }
3407 
3408 static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
3409 {
3410     tcg_gen_mov_i64(r, b);
3411 }
3412 
3413 GEN_ATOMIC_HELPER(xchg, mov2, 0)
3414 
3415 #undef GEN_ATOMIC_HELPER
3416