xref: /openbmc/qemu/tcg/tcg-op.c (revision 1098cc3f)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg.h"
28 #include "tcg/tcg-temp-internal.h"
29 #include "tcg/tcg-op.h"
30 #include "tcg/tcg-mo.h"
31 #include "exec/plugin-gen.h"
32 #include "tcg-internal.h"
33 
34 
35 void tcg_gen_op1(TCGOpcode opc, TCGArg a1)
36 {
37     TCGOp *op = tcg_emit_op(opc, 1);
38     op->args[0] = a1;
39 }
40 
41 void tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2)
42 {
43     TCGOp *op = tcg_emit_op(opc, 2);
44     op->args[0] = a1;
45     op->args[1] = a2;
46 }
47 
48 void tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3)
49 {
50     TCGOp *op = tcg_emit_op(opc, 3);
51     op->args[0] = a1;
52     op->args[1] = a2;
53     op->args[2] = a3;
54 }
55 
56 void tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4)
57 {
58     TCGOp *op = tcg_emit_op(opc, 4);
59     op->args[0] = a1;
60     op->args[1] = a2;
61     op->args[2] = a3;
62     op->args[3] = a4;
63 }
64 
65 void tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
66                  TCGArg a4, TCGArg a5)
67 {
68     TCGOp *op = tcg_emit_op(opc, 5);
69     op->args[0] = a1;
70     op->args[1] = a2;
71     op->args[2] = a3;
72     op->args[3] = a4;
73     op->args[4] = a5;
74 }
75 
76 void tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
77                  TCGArg a4, TCGArg a5, TCGArg a6)
78 {
79     TCGOp *op = tcg_emit_op(opc, 6);
80     op->args[0] = a1;
81     op->args[1] = a2;
82     op->args[2] = a3;
83     op->args[3] = a4;
84     op->args[4] = a5;
85     op->args[5] = a6;
86 }
87 
88 /* Generic ops.  */
89 
90 static void add_last_as_label_use(TCGLabel *l)
91 {
92     TCGLabelUse *u = tcg_malloc(sizeof(TCGLabelUse));
93 
94     u->op = tcg_last_op();
95     QSIMPLEQ_INSERT_TAIL(&l->branches, u, next);
96 }
97 
98 void tcg_gen_br(TCGLabel *l)
99 {
100     tcg_gen_op1(INDEX_op_br, label_arg(l));
101     add_last_as_label_use(l);
102 }
103 
104 void tcg_gen_mb(TCGBar mb_type)
105 {
106     if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {
107         tcg_gen_op1(INDEX_op_mb, mb_type);
108     }
109 }
110 
111 /* 32 bit ops */
112 
113 void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg)
114 {
115     tcg_gen_mov_i32(ret, tcg_constant_i32(arg));
116 }
117 
118 void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
119 {
120     /* some cases can be optimized here */
121     if (arg2 == 0) {
122         tcg_gen_mov_i32(ret, arg1);
123     } else {
124         tcg_gen_add_i32(ret, arg1, tcg_constant_i32(arg2));
125     }
126 }
127 
128 void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2)
129 {
130     if (arg1 == 0 && TCG_TARGET_HAS_neg_i32) {
131         /* Don't recurse with tcg_gen_neg_i32.  */
132         tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg2);
133     } else {
134         tcg_gen_sub_i32(ret, tcg_constant_i32(arg1), arg2);
135     }
136 }
137 
138 void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
139 {
140     /* some cases can be optimized here */
141     if (arg2 == 0) {
142         tcg_gen_mov_i32(ret, arg1);
143     } else {
144         tcg_gen_sub_i32(ret, arg1, tcg_constant_i32(arg2));
145     }
146 }
147 
148 void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
149 {
150     /* Some cases can be optimized here.  */
151     switch (arg2) {
152     case 0:
153         tcg_gen_movi_i32(ret, 0);
154         return;
155     case -1:
156         tcg_gen_mov_i32(ret, arg1);
157         return;
158     case 0xff:
159         /* Don't recurse with tcg_gen_ext8u_i32.  */
160         if (TCG_TARGET_HAS_ext8u_i32) {
161             tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg1);
162             return;
163         }
164         break;
165     case 0xffff:
166         if (TCG_TARGET_HAS_ext16u_i32) {
167             tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg1);
168             return;
169         }
170         break;
171     }
172 
173     tcg_gen_and_i32(ret, arg1, tcg_constant_i32(arg2));
174 }
175 
176 void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
177 {
178     /* Some cases can be optimized here.  */
179     if (arg2 == -1) {
180         tcg_gen_movi_i32(ret, -1);
181     } else if (arg2 == 0) {
182         tcg_gen_mov_i32(ret, arg1);
183     } else {
184         tcg_gen_or_i32(ret, arg1, tcg_constant_i32(arg2));
185     }
186 }
187 
188 void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
189 {
190     /* Some cases can be optimized here.  */
191     if (arg2 == 0) {
192         tcg_gen_mov_i32(ret, arg1);
193     } else if (arg2 == -1 && TCG_TARGET_HAS_not_i32) {
194         /* Don't recurse with tcg_gen_not_i32.  */
195         tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg1);
196     } else {
197         tcg_gen_xor_i32(ret, arg1, tcg_constant_i32(arg2));
198     }
199 }
200 
201 void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
202 {
203     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
204     if (arg2 == 0) {
205         tcg_gen_mov_i32(ret, arg1);
206     } else {
207         tcg_gen_shl_i32(ret, arg1, tcg_constant_i32(arg2));
208     }
209 }
210 
211 void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
212 {
213     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
214     if (arg2 == 0) {
215         tcg_gen_mov_i32(ret, arg1);
216     } else {
217         tcg_gen_shr_i32(ret, arg1, tcg_constant_i32(arg2));
218     }
219 }
220 
221 void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
222 {
223     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
224     if (arg2 == 0) {
225         tcg_gen_mov_i32(ret, arg1);
226     } else {
227         tcg_gen_sar_i32(ret, arg1, tcg_constant_i32(arg2));
228     }
229 }
230 
231 void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *l)
232 {
233     if (cond == TCG_COND_ALWAYS) {
234         tcg_gen_br(l);
235     } else if (cond != TCG_COND_NEVER) {
236         tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_arg(l));
237         add_last_as_label_use(l);
238     }
239 }
240 
241 void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *l)
242 {
243     if (cond == TCG_COND_ALWAYS) {
244         tcg_gen_br(l);
245     } else if (cond != TCG_COND_NEVER) {
246         tcg_gen_brcond_i32(cond, arg1, tcg_constant_i32(arg2), l);
247     }
248 }
249 
250 void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret,
251                          TCGv_i32 arg1, TCGv_i32 arg2)
252 {
253     if (cond == TCG_COND_ALWAYS) {
254         tcg_gen_movi_i32(ret, 1);
255     } else if (cond == TCG_COND_NEVER) {
256         tcg_gen_movi_i32(ret, 0);
257     } else {
258         tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
259     }
260 }
261 
262 void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret,
263                           TCGv_i32 arg1, int32_t arg2)
264 {
265     tcg_gen_setcond_i32(cond, ret, arg1, tcg_constant_i32(arg2));
266 }
267 
268 void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
269 {
270     if (arg2 == 0) {
271         tcg_gen_movi_i32(ret, 0);
272     } else if (is_power_of_2(arg2)) {
273         tcg_gen_shli_i32(ret, arg1, ctz32(arg2));
274     } else {
275         tcg_gen_mul_i32(ret, arg1, tcg_constant_i32(arg2));
276     }
277 }
278 
279 void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
280 {
281     if (TCG_TARGET_HAS_div_i32) {
282         tcg_gen_op3_i32(INDEX_op_div_i32, ret, arg1, arg2);
283     } else if (TCG_TARGET_HAS_div2_i32) {
284         TCGv_i32 t0 = tcg_temp_ebb_new_i32();
285         tcg_gen_sari_i32(t0, arg1, 31);
286         tcg_gen_op5_i32(INDEX_op_div2_i32, ret, t0, arg1, t0, arg2);
287         tcg_temp_free_i32(t0);
288     } else {
289         gen_helper_div_i32(ret, arg1, arg2);
290     }
291 }
292 
293 void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
294 {
295     if (TCG_TARGET_HAS_rem_i32) {
296         tcg_gen_op3_i32(INDEX_op_rem_i32, ret, arg1, arg2);
297     } else if (TCG_TARGET_HAS_div_i32) {
298         TCGv_i32 t0 = tcg_temp_ebb_new_i32();
299         tcg_gen_op3_i32(INDEX_op_div_i32, t0, arg1, arg2);
300         tcg_gen_mul_i32(t0, t0, arg2);
301         tcg_gen_sub_i32(ret, arg1, t0);
302         tcg_temp_free_i32(t0);
303     } else if (TCG_TARGET_HAS_div2_i32) {
304         TCGv_i32 t0 = tcg_temp_ebb_new_i32();
305         tcg_gen_sari_i32(t0, arg1, 31);
306         tcg_gen_op5_i32(INDEX_op_div2_i32, t0, ret, arg1, t0, arg2);
307         tcg_temp_free_i32(t0);
308     } else {
309         gen_helper_rem_i32(ret, arg1, arg2);
310     }
311 }
312 
313 void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
314 {
315     if (TCG_TARGET_HAS_div_i32) {
316         tcg_gen_op3_i32(INDEX_op_divu_i32, ret, arg1, arg2);
317     } else if (TCG_TARGET_HAS_div2_i32) {
318         TCGv_i32 t0 = tcg_temp_ebb_new_i32();
319         tcg_gen_movi_i32(t0, 0);
320         tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, t0, arg2);
321         tcg_temp_free_i32(t0);
322     } else {
323         gen_helper_divu_i32(ret, arg1, arg2);
324     }
325 }
326 
327 void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
328 {
329     if (TCG_TARGET_HAS_rem_i32) {
330         tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2);
331     } else if (TCG_TARGET_HAS_div_i32) {
332         TCGv_i32 t0 = tcg_temp_ebb_new_i32();
333         tcg_gen_op3_i32(INDEX_op_divu_i32, t0, arg1, arg2);
334         tcg_gen_mul_i32(t0, t0, arg2);
335         tcg_gen_sub_i32(ret, arg1, t0);
336         tcg_temp_free_i32(t0);
337     } else if (TCG_TARGET_HAS_div2_i32) {
338         TCGv_i32 t0 = tcg_temp_ebb_new_i32();
339         tcg_gen_movi_i32(t0, 0);
340         tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, t0, arg2);
341         tcg_temp_free_i32(t0);
342     } else {
343         gen_helper_remu_i32(ret, arg1, arg2);
344     }
345 }
346 
347 void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
348 {
349     if (TCG_TARGET_HAS_andc_i32) {
350         tcg_gen_op3_i32(INDEX_op_andc_i32, ret, arg1, arg2);
351     } else {
352         TCGv_i32 t0 = tcg_temp_ebb_new_i32();
353         tcg_gen_not_i32(t0, arg2);
354         tcg_gen_and_i32(ret, arg1, t0);
355         tcg_temp_free_i32(t0);
356     }
357 }
358 
359 void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
360 {
361     if (TCG_TARGET_HAS_eqv_i32) {
362         tcg_gen_op3_i32(INDEX_op_eqv_i32, ret, arg1, arg2);
363     } else {
364         tcg_gen_xor_i32(ret, arg1, arg2);
365         tcg_gen_not_i32(ret, ret);
366     }
367 }
368 
369 void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
370 {
371     if (TCG_TARGET_HAS_nand_i32) {
372         tcg_gen_op3_i32(INDEX_op_nand_i32, ret, arg1, arg2);
373     } else {
374         tcg_gen_and_i32(ret, arg1, arg2);
375         tcg_gen_not_i32(ret, ret);
376     }
377 }
378 
379 void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
380 {
381     if (TCG_TARGET_HAS_nor_i32) {
382         tcg_gen_op3_i32(INDEX_op_nor_i32, ret, arg1, arg2);
383     } else {
384         tcg_gen_or_i32(ret, arg1, arg2);
385         tcg_gen_not_i32(ret, ret);
386     }
387 }
388 
389 void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
390 {
391     if (TCG_TARGET_HAS_orc_i32) {
392         tcg_gen_op3_i32(INDEX_op_orc_i32, ret, arg1, arg2);
393     } else {
394         TCGv_i32 t0 = tcg_temp_ebb_new_i32();
395         tcg_gen_not_i32(t0, arg2);
396         tcg_gen_or_i32(ret, arg1, t0);
397         tcg_temp_free_i32(t0);
398     }
399 }
400 
401 void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
402 {
403     if (TCG_TARGET_HAS_clz_i32) {
404         tcg_gen_op3_i32(INDEX_op_clz_i32, ret, arg1, arg2);
405     } else if (TCG_TARGET_HAS_clz_i64) {
406         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
407         TCGv_i64 t2 = tcg_temp_ebb_new_i64();
408         tcg_gen_extu_i32_i64(t1, arg1);
409         tcg_gen_extu_i32_i64(t2, arg2);
410         tcg_gen_addi_i64(t2, t2, 32);
411         tcg_gen_clz_i64(t1, t1, t2);
412         tcg_gen_extrl_i64_i32(ret, t1);
413         tcg_temp_free_i64(t1);
414         tcg_temp_free_i64(t2);
415         tcg_gen_subi_i32(ret, ret, 32);
416     } else {
417         gen_helper_clz_i32(ret, arg1, arg2);
418     }
419 }
420 
421 void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
422 {
423     tcg_gen_clz_i32(ret, arg1, tcg_constant_i32(arg2));
424 }
425 
426 void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
427 {
428     if (TCG_TARGET_HAS_ctz_i32) {
429         tcg_gen_op3_i32(INDEX_op_ctz_i32, ret, arg1, arg2);
430     } else if (TCG_TARGET_HAS_ctz_i64) {
431         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
432         TCGv_i64 t2 = tcg_temp_ebb_new_i64();
433         tcg_gen_extu_i32_i64(t1, arg1);
434         tcg_gen_extu_i32_i64(t2, arg2);
435         tcg_gen_ctz_i64(t1, t1, t2);
436         tcg_gen_extrl_i64_i32(ret, t1);
437         tcg_temp_free_i64(t1);
438         tcg_temp_free_i64(t2);
439     } else if (TCG_TARGET_HAS_ctpop_i32
440                || TCG_TARGET_HAS_ctpop_i64
441                || TCG_TARGET_HAS_clz_i32
442                || TCG_TARGET_HAS_clz_i64) {
443         TCGv_i32 z, t = tcg_temp_ebb_new_i32();
444 
445         if (TCG_TARGET_HAS_ctpop_i32 || TCG_TARGET_HAS_ctpop_i64) {
446             tcg_gen_subi_i32(t, arg1, 1);
447             tcg_gen_andc_i32(t, t, arg1);
448             tcg_gen_ctpop_i32(t, t);
449         } else {
450             /* Since all non-x86 hosts have clz(0) == 32, don't fight it.  */
451             tcg_gen_neg_i32(t, arg1);
452             tcg_gen_and_i32(t, t, arg1);
453             tcg_gen_clzi_i32(t, t, 32);
454             tcg_gen_xori_i32(t, t, 31);
455         }
456         z = tcg_constant_i32(0);
457         tcg_gen_movcond_i32(TCG_COND_EQ, ret, arg1, z, arg2, t);
458         tcg_temp_free_i32(t);
459     } else {
460         gen_helper_ctz_i32(ret, arg1, arg2);
461     }
462 }
463 
464 void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
465 {
466     if (!TCG_TARGET_HAS_ctz_i32 && TCG_TARGET_HAS_ctpop_i32 && arg2 == 32) {
467         /* This equivalence has the advantage of not requiring a fixup.  */
468         TCGv_i32 t = tcg_temp_ebb_new_i32();
469         tcg_gen_subi_i32(t, arg1, 1);
470         tcg_gen_andc_i32(t, t, arg1);
471         tcg_gen_ctpop_i32(ret, t);
472         tcg_temp_free_i32(t);
473     } else {
474         tcg_gen_ctz_i32(ret, arg1, tcg_constant_i32(arg2));
475     }
476 }
477 
478 void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg)
479 {
480     if (TCG_TARGET_HAS_clz_i32) {
481         TCGv_i32 t = tcg_temp_ebb_new_i32();
482         tcg_gen_sari_i32(t, arg, 31);
483         tcg_gen_xor_i32(t, t, arg);
484         tcg_gen_clzi_i32(t, t, 32);
485         tcg_gen_subi_i32(ret, t, 1);
486         tcg_temp_free_i32(t);
487     } else {
488         gen_helper_clrsb_i32(ret, arg);
489     }
490 }
491 
492 void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1)
493 {
494     if (TCG_TARGET_HAS_ctpop_i32) {
495         tcg_gen_op2_i32(INDEX_op_ctpop_i32, ret, arg1);
496     } else if (TCG_TARGET_HAS_ctpop_i64) {
497         TCGv_i64 t = tcg_temp_ebb_new_i64();
498         tcg_gen_extu_i32_i64(t, arg1);
499         tcg_gen_ctpop_i64(t, t);
500         tcg_gen_extrl_i64_i32(ret, t);
501         tcg_temp_free_i64(t);
502     } else {
503         gen_helper_ctpop_i32(ret, arg1);
504     }
505 }
506 
507 void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
508 {
509     if (TCG_TARGET_HAS_rot_i32) {
510         tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, arg2);
511     } else {
512         TCGv_i32 t0, t1;
513 
514         t0 = tcg_temp_ebb_new_i32();
515         t1 = tcg_temp_ebb_new_i32();
516         tcg_gen_shl_i32(t0, arg1, arg2);
517         tcg_gen_subfi_i32(t1, 32, arg2);
518         tcg_gen_shr_i32(t1, arg1, t1);
519         tcg_gen_or_i32(ret, t0, t1);
520         tcg_temp_free_i32(t0);
521         tcg_temp_free_i32(t1);
522     }
523 }
524 
525 void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
526 {
527     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
528     /* some cases can be optimized here */
529     if (arg2 == 0) {
530         tcg_gen_mov_i32(ret, arg1);
531     } else if (TCG_TARGET_HAS_rot_i32) {
532         tcg_gen_rotl_i32(ret, arg1, tcg_constant_i32(arg2));
533     } else {
534         TCGv_i32 t0, t1;
535         t0 = tcg_temp_ebb_new_i32();
536         t1 = tcg_temp_ebb_new_i32();
537         tcg_gen_shli_i32(t0, arg1, arg2);
538         tcg_gen_shri_i32(t1, arg1, 32 - arg2);
539         tcg_gen_or_i32(ret, t0, t1);
540         tcg_temp_free_i32(t0);
541         tcg_temp_free_i32(t1);
542     }
543 }
544 
545 void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
546 {
547     if (TCG_TARGET_HAS_rot_i32) {
548         tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, arg2);
549     } else {
550         TCGv_i32 t0, t1;
551 
552         t0 = tcg_temp_ebb_new_i32();
553         t1 = tcg_temp_ebb_new_i32();
554         tcg_gen_shr_i32(t0, arg1, arg2);
555         tcg_gen_subfi_i32(t1, 32, arg2);
556         tcg_gen_shl_i32(t1, arg1, t1);
557         tcg_gen_or_i32(ret, t0, t1);
558         tcg_temp_free_i32(t0);
559         tcg_temp_free_i32(t1);
560     }
561 }
562 
563 void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
564 {
565     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
566     /* some cases can be optimized here */
567     if (arg2 == 0) {
568         tcg_gen_mov_i32(ret, arg1);
569     } else {
570         tcg_gen_rotli_i32(ret, arg1, 32 - arg2);
571     }
572 }
573 
574 void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
575                          unsigned int ofs, unsigned int len)
576 {
577     uint32_t mask;
578     TCGv_i32 t1;
579 
580     tcg_debug_assert(ofs < 32);
581     tcg_debug_assert(len > 0);
582     tcg_debug_assert(len <= 32);
583     tcg_debug_assert(ofs + len <= 32);
584 
585     if (len == 32) {
586         tcg_gen_mov_i32(ret, arg2);
587         return;
588     }
589     if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) {
590         tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len);
591         return;
592     }
593 
594     t1 = tcg_temp_ebb_new_i32();
595 
596     if (TCG_TARGET_HAS_extract2_i32) {
597         if (ofs + len == 32) {
598             tcg_gen_shli_i32(t1, arg1, len);
599             tcg_gen_extract2_i32(ret, t1, arg2, len);
600             goto done;
601         }
602         if (ofs == 0) {
603             tcg_gen_extract2_i32(ret, arg1, arg2, len);
604             tcg_gen_rotli_i32(ret, ret, len);
605             goto done;
606         }
607     }
608 
609     mask = (1u << len) - 1;
610     if (ofs + len < 32) {
611         tcg_gen_andi_i32(t1, arg2, mask);
612         tcg_gen_shli_i32(t1, t1, ofs);
613     } else {
614         tcg_gen_shli_i32(t1, arg2, ofs);
615     }
616     tcg_gen_andi_i32(ret, arg1, ~(mask << ofs));
617     tcg_gen_or_i32(ret, ret, t1);
618  done:
619     tcg_temp_free_i32(t1);
620 }
621 
622 void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg,
623                            unsigned int ofs, unsigned int len)
624 {
625     tcg_debug_assert(ofs < 32);
626     tcg_debug_assert(len > 0);
627     tcg_debug_assert(len <= 32);
628     tcg_debug_assert(ofs + len <= 32);
629 
630     if (ofs + len == 32) {
631         tcg_gen_shli_i32(ret, arg, ofs);
632     } else if (ofs == 0) {
633         tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
634     } else if (TCG_TARGET_HAS_deposit_i32
635                && TCG_TARGET_deposit_i32_valid(ofs, len)) {
636         TCGv_i32 zero = tcg_constant_i32(0);
637         tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, zero, arg, ofs, len);
638     } else {
639         /* To help two-operand hosts we prefer to zero-extend first,
640            which allows ARG to stay live.  */
641         switch (len) {
642         case 16:
643             if (TCG_TARGET_HAS_ext16u_i32) {
644                 tcg_gen_ext16u_i32(ret, arg);
645                 tcg_gen_shli_i32(ret, ret, ofs);
646                 return;
647             }
648             break;
649         case 8:
650             if (TCG_TARGET_HAS_ext8u_i32) {
651                 tcg_gen_ext8u_i32(ret, arg);
652                 tcg_gen_shli_i32(ret, ret, ofs);
653                 return;
654             }
655             break;
656         }
657         /* Otherwise prefer zero-extension over AND for code size.  */
658         switch (ofs + len) {
659         case 16:
660             if (TCG_TARGET_HAS_ext16u_i32) {
661                 tcg_gen_shli_i32(ret, arg, ofs);
662                 tcg_gen_ext16u_i32(ret, ret);
663                 return;
664             }
665             break;
666         case 8:
667             if (TCG_TARGET_HAS_ext8u_i32) {
668                 tcg_gen_shli_i32(ret, arg, ofs);
669                 tcg_gen_ext8u_i32(ret, ret);
670                 return;
671             }
672             break;
673         }
674         tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
675         tcg_gen_shli_i32(ret, ret, ofs);
676     }
677 }
678 
679 void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg,
680                          unsigned int ofs, unsigned int len)
681 {
682     tcg_debug_assert(ofs < 32);
683     tcg_debug_assert(len > 0);
684     tcg_debug_assert(len <= 32);
685     tcg_debug_assert(ofs + len <= 32);
686 
687     /* Canonicalize certain special cases, even if extract is supported.  */
688     if (ofs + len == 32) {
689         tcg_gen_shri_i32(ret, arg, 32 - len);
690         return;
691     }
692     if (ofs == 0) {
693         tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
694         return;
695     }
696 
697     if (TCG_TARGET_HAS_extract_i32
698         && TCG_TARGET_extract_i32_valid(ofs, len)) {
699         tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, ofs, len);
700         return;
701     }
702 
703     /* Assume that zero-extension, if available, is cheaper than a shift.  */
704     switch (ofs + len) {
705     case 16:
706         if (TCG_TARGET_HAS_ext16u_i32) {
707             tcg_gen_ext16u_i32(ret, arg);
708             tcg_gen_shri_i32(ret, ret, ofs);
709             return;
710         }
711         break;
712     case 8:
713         if (TCG_TARGET_HAS_ext8u_i32) {
714             tcg_gen_ext8u_i32(ret, arg);
715             tcg_gen_shri_i32(ret, ret, ofs);
716             return;
717         }
718         break;
719     }
720 
721     /* ??? Ideally we'd know what values are available for immediate AND.
722        Assume that 8 bits are available, plus the special case of 16,
723        so that we get ext8u, ext16u.  */
724     switch (len) {
725     case 1 ... 8: case 16:
726         tcg_gen_shri_i32(ret, arg, ofs);
727         tcg_gen_andi_i32(ret, ret, (1u << len) - 1);
728         break;
729     default:
730         tcg_gen_shli_i32(ret, arg, 32 - len - ofs);
731         tcg_gen_shri_i32(ret, ret, 32 - len);
732         break;
733     }
734 }
735 
736 void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg,
737                           unsigned int ofs, unsigned int len)
738 {
739     tcg_debug_assert(ofs < 32);
740     tcg_debug_assert(len > 0);
741     tcg_debug_assert(len <= 32);
742     tcg_debug_assert(ofs + len <= 32);
743 
744     /* Canonicalize certain special cases, even if extract is supported.  */
745     if (ofs + len == 32) {
746         tcg_gen_sari_i32(ret, arg, 32 - len);
747         return;
748     }
749     if (ofs == 0) {
750         switch (len) {
751         case 16:
752             tcg_gen_ext16s_i32(ret, arg);
753             return;
754         case 8:
755             tcg_gen_ext8s_i32(ret, arg);
756             return;
757         }
758     }
759 
760     if (TCG_TARGET_HAS_sextract_i32
761         && TCG_TARGET_extract_i32_valid(ofs, len)) {
762         tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, ofs, len);
763         return;
764     }
765 
766     /* Assume that sign-extension, if available, is cheaper than a shift.  */
767     switch (ofs + len) {
768     case 16:
769         if (TCG_TARGET_HAS_ext16s_i32) {
770             tcg_gen_ext16s_i32(ret, arg);
771             tcg_gen_sari_i32(ret, ret, ofs);
772             return;
773         }
774         break;
775     case 8:
776         if (TCG_TARGET_HAS_ext8s_i32) {
777             tcg_gen_ext8s_i32(ret, arg);
778             tcg_gen_sari_i32(ret, ret, ofs);
779             return;
780         }
781         break;
782     }
783     switch (len) {
784     case 16:
785         if (TCG_TARGET_HAS_ext16s_i32) {
786             tcg_gen_shri_i32(ret, arg, ofs);
787             tcg_gen_ext16s_i32(ret, ret);
788             return;
789         }
790         break;
791     case 8:
792         if (TCG_TARGET_HAS_ext8s_i32) {
793             tcg_gen_shri_i32(ret, arg, ofs);
794             tcg_gen_ext8s_i32(ret, ret);
795             return;
796         }
797         break;
798     }
799 
800     tcg_gen_shli_i32(ret, arg, 32 - len - ofs);
801     tcg_gen_sari_i32(ret, ret, 32 - len);
802 }
803 
804 /*
805  * Extract 32-bits from a 64-bit input, ah:al, starting from ofs.
806  * Unlike tcg_gen_extract_i32 above, len is fixed at 32.
807  */
808 void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah,
809                           unsigned int ofs)
810 {
811     tcg_debug_assert(ofs <= 32);
812     if (ofs == 0) {
813         tcg_gen_mov_i32(ret, al);
814     } else if (ofs == 32) {
815         tcg_gen_mov_i32(ret, ah);
816     } else if (al == ah) {
817         tcg_gen_rotri_i32(ret, al, ofs);
818     } else if (TCG_TARGET_HAS_extract2_i32) {
819         tcg_gen_op4i_i32(INDEX_op_extract2_i32, ret, al, ah, ofs);
820     } else {
821         TCGv_i32 t0 = tcg_temp_ebb_new_i32();
822         tcg_gen_shri_i32(t0, al, ofs);
823         tcg_gen_deposit_i32(ret, t0, ah, 32 - ofs, ofs);
824         tcg_temp_free_i32(t0);
825     }
826 }
827 
828 void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1,
829                          TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2)
830 {
831     if (cond == TCG_COND_ALWAYS) {
832         tcg_gen_mov_i32(ret, v1);
833     } else if (cond == TCG_COND_NEVER) {
834         tcg_gen_mov_i32(ret, v2);
835     } else if (TCG_TARGET_HAS_movcond_i32) {
836         tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond);
837     } else {
838         TCGv_i32 t0 = tcg_temp_ebb_new_i32();
839         TCGv_i32 t1 = tcg_temp_ebb_new_i32();
840         tcg_gen_setcond_i32(cond, t0, c1, c2);
841         tcg_gen_neg_i32(t0, t0);
842         tcg_gen_and_i32(t1, v1, t0);
843         tcg_gen_andc_i32(ret, v2, t0);
844         tcg_gen_or_i32(ret, ret, t1);
845         tcg_temp_free_i32(t0);
846         tcg_temp_free_i32(t1);
847     }
848 }
849 
850 void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
851                       TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
852 {
853     if (TCG_TARGET_HAS_add2_i32) {
854         tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh);
855     } else {
856         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
857         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
858         tcg_gen_concat_i32_i64(t0, al, ah);
859         tcg_gen_concat_i32_i64(t1, bl, bh);
860         tcg_gen_add_i64(t0, t0, t1);
861         tcg_gen_extr_i64_i32(rl, rh, t0);
862         tcg_temp_free_i64(t0);
863         tcg_temp_free_i64(t1);
864     }
865 }
866 
867 void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
868                       TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
869 {
870     if (TCG_TARGET_HAS_sub2_i32) {
871         tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh);
872     } else {
873         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
874         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
875         tcg_gen_concat_i32_i64(t0, al, ah);
876         tcg_gen_concat_i32_i64(t1, bl, bh);
877         tcg_gen_sub_i64(t0, t0, t1);
878         tcg_gen_extr_i64_i32(rl, rh, t0);
879         tcg_temp_free_i64(t0);
880         tcg_temp_free_i64(t1);
881     }
882 }
883 
884 void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
885 {
886     if (TCG_TARGET_HAS_mulu2_i32) {
887         tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2);
888     } else if (TCG_TARGET_HAS_muluh_i32) {
889         TCGv_i32 t = tcg_temp_ebb_new_i32();
890         tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
891         tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2);
892         tcg_gen_mov_i32(rl, t);
893         tcg_temp_free_i32(t);
894     } else if (TCG_TARGET_REG_BITS == 64) {
895         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
896         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
897         tcg_gen_extu_i32_i64(t0, arg1);
898         tcg_gen_extu_i32_i64(t1, arg2);
899         tcg_gen_mul_i64(t0, t0, t1);
900         tcg_gen_extr_i64_i32(rl, rh, t0);
901         tcg_temp_free_i64(t0);
902         tcg_temp_free_i64(t1);
903     } else {
904         qemu_build_not_reached();
905     }
906 }
907 
908 void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
909 {
910     if (TCG_TARGET_HAS_muls2_i32) {
911         tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2);
912     } else if (TCG_TARGET_HAS_mulsh_i32) {
913         TCGv_i32 t = tcg_temp_ebb_new_i32();
914         tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
915         tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2);
916         tcg_gen_mov_i32(rl, t);
917         tcg_temp_free_i32(t);
918     } else if (TCG_TARGET_REG_BITS == 32) {
919         TCGv_i32 t0 = tcg_temp_ebb_new_i32();
920         TCGv_i32 t1 = tcg_temp_ebb_new_i32();
921         TCGv_i32 t2 = tcg_temp_ebb_new_i32();
922         TCGv_i32 t3 = tcg_temp_ebb_new_i32();
923         tcg_gen_mulu2_i32(t0, t1, arg1, arg2);
924         /* Adjust for negative inputs.  */
925         tcg_gen_sari_i32(t2, arg1, 31);
926         tcg_gen_sari_i32(t3, arg2, 31);
927         tcg_gen_and_i32(t2, t2, arg2);
928         tcg_gen_and_i32(t3, t3, arg1);
929         tcg_gen_sub_i32(rh, t1, t2);
930         tcg_gen_sub_i32(rh, rh, t3);
931         tcg_gen_mov_i32(rl, t0);
932         tcg_temp_free_i32(t0);
933         tcg_temp_free_i32(t1);
934         tcg_temp_free_i32(t2);
935         tcg_temp_free_i32(t3);
936     } else {
937         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
938         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
939         tcg_gen_ext_i32_i64(t0, arg1);
940         tcg_gen_ext_i32_i64(t1, arg2);
941         tcg_gen_mul_i64(t0, t0, t1);
942         tcg_gen_extr_i64_i32(rl, rh, t0);
943         tcg_temp_free_i64(t0);
944         tcg_temp_free_i64(t1);
945     }
946 }
947 
948 void tcg_gen_mulsu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
949 {
950     if (TCG_TARGET_REG_BITS == 32) {
951         TCGv_i32 t0 = tcg_temp_ebb_new_i32();
952         TCGv_i32 t1 = tcg_temp_ebb_new_i32();
953         TCGv_i32 t2 = tcg_temp_ebb_new_i32();
954         tcg_gen_mulu2_i32(t0, t1, arg1, arg2);
955         /* Adjust for negative input for the signed arg1.  */
956         tcg_gen_sari_i32(t2, arg1, 31);
957         tcg_gen_and_i32(t2, t2, arg2);
958         tcg_gen_sub_i32(rh, t1, t2);
959         tcg_gen_mov_i32(rl, t0);
960         tcg_temp_free_i32(t0);
961         tcg_temp_free_i32(t1);
962         tcg_temp_free_i32(t2);
963     } else {
964         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
965         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
966         tcg_gen_ext_i32_i64(t0, arg1);
967         tcg_gen_extu_i32_i64(t1, arg2);
968         tcg_gen_mul_i64(t0, t0, t1);
969         tcg_gen_extr_i64_i32(rl, rh, t0);
970         tcg_temp_free_i64(t0);
971         tcg_temp_free_i64(t1);
972     }
973 }
974 
975 void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg)
976 {
977     if (TCG_TARGET_HAS_ext8s_i32) {
978         tcg_gen_op2_i32(INDEX_op_ext8s_i32, ret, arg);
979     } else {
980         tcg_gen_shli_i32(ret, arg, 24);
981         tcg_gen_sari_i32(ret, ret, 24);
982     }
983 }
984 
985 void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg)
986 {
987     if (TCG_TARGET_HAS_ext16s_i32) {
988         tcg_gen_op2_i32(INDEX_op_ext16s_i32, ret, arg);
989     } else {
990         tcg_gen_shli_i32(ret, arg, 16);
991         tcg_gen_sari_i32(ret, ret, 16);
992     }
993 }
994 
995 void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg)
996 {
997     if (TCG_TARGET_HAS_ext8u_i32) {
998         tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg);
999     } else {
1000         tcg_gen_andi_i32(ret, arg, 0xffu);
1001     }
1002 }
1003 
1004 void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg)
1005 {
1006     if (TCG_TARGET_HAS_ext16u_i32) {
1007         tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg);
1008     } else {
1009         tcg_gen_andi_i32(ret, arg, 0xffffu);
1010     }
1011 }
1012 
1013 void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags)
1014 {
1015     /* Only one extension flag may be present. */
1016     tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
1017 
1018     if (TCG_TARGET_HAS_bswap16_i32) {
1019         tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg, flags);
1020     } else {
1021         TCGv_i32 t0 = tcg_temp_ebb_new_i32();
1022         TCGv_i32 t1 = tcg_temp_ebb_new_i32();
1023 
1024         tcg_gen_shri_i32(t0, arg, 8);
1025         if (!(flags & TCG_BSWAP_IZ)) {
1026             tcg_gen_ext8u_i32(t0, t0);
1027         }
1028 
1029         if (flags & TCG_BSWAP_OS) {
1030             tcg_gen_shli_i32(t1, arg, 24);
1031             tcg_gen_sari_i32(t1, t1, 16);
1032         } else if (flags & TCG_BSWAP_OZ) {
1033             tcg_gen_ext8u_i32(t1, arg);
1034             tcg_gen_shli_i32(t1, t1, 8);
1035         } else {
1036             tcg_gen_shli_i32(t1, arg, 8);
1037         }
1038 
1039         tcg_gen_or_i32(ret, t0, t1);
1040         tcg_temp_free_i32(t0);
1041         tcg_temp_free_i32(t1);
1042     }
1043 }
1044 
1045 void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
1046 {
1047     if (TCG_TARGET_HAS_bswap32_i32) {
1048         tcg_gen_op3i_i32(INDEX_op_bswap32_i32, ret, arg, 0);
1049     } else {
1050         TCGv_i32 t0 = tcg_temp_ebb_new_i32();
1051         TCGv_i32 t1 = tcg_temp_ebb_new_i32();
1052         TCGv_i32 t2 = tcg_constant_i32(0x00ff00ff);
1053 
1054                                         /* arg = abcd */
1055         tcg_gen_shri_i32(t0, arg, 8);   /*  t0 = .abc */
1056         tcg_gen_and_i32(t1, arg, t2);   /*  t1 = .b.d */
1057         tcg_gen_and_i32(t0, t0, t2);    /*  t0 = .a.c */
1058         tcg_gen_shli_i32(t1, t1, 8);    /*  t1 = b.d. */
1059         tcg_gen_or_i32(ret, t0, t1);    /* ret = badc */
1060 
1061         tcg_gen_shri_i32(t0, ret, 16);  /*  t0 = ..ba */
1062         tcg_gen_shli_i32(t1, ret, 16);  /*  t1 = dc.. */
1063         tcg_gen_or_i32(ret, t0, t1);    /* ret = dcba */
1064 
1065         tcg_temp_free_i32(t0);
1066         tcg_temp_free_i32(t1);
1067     }
1068 }
1069 
1070 void tcg_gen_hswap_i32(TCGv_i32 ret, TCGv_i32 arg)
1071 {
1072     /* Swapping 2 16-bit elements is a rotate. */
1073     tcg_gen_rotli_i32(ret, arg, 16);
1074 }
1075 
1076 void tcg_gen_smin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1077 {
1078     tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, a, b);
1079 }
1080 
1081 void tcg_gen_umin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1082 {
1083     tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, a, b);
1084 }
1085 
1086 void tcg_gen_smax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1087 {
1088     tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, b, a);
1089 }
1090 
1091 void tcg_gen_umax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1092 {
1093     tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, b, a);
1094 }
1095 
1096 void tcg_gen_abs_i32(TCGv_i32 ret, TCGv_i32 a)
1097 {
1098     TCGv_i32 t = tcg_temp_ebb_new_i32();
1099 
1100     tcg_gen_sari_i32(t, a, 31);
1101     tcg_gen_xor_i32(ret, a, t);
1102     tcg_gen_sub_i32(ret, ret, t);
1103     tcg_temp_free_i32(t);
1104 }
1105 
1106 /* 64-bit ops */
1107 
1108 #if TCG_TARGET_REG_BITS == 32
1109 /* These are all inline for TCG_TARGET_REG_BITS == 64.  */
1110 
1111 void tcg_gen_discard_i64(TCGv_i64 arg)
1112 {
1113     tcg_gen_discard_i32(TCGV_LOW(arg));
1114     tcg_gen_discard_i32(TCGV_HIGH(arg));
1115 }
1116 
1117 void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg)
1118 {
1119     TCGTemp *ts = tcgv_i64_temp(arg);
1120 
1121     /* Canonicalize TCGv_i64 TEMP_CONST into TCGv_i32 TEMP_CONST. */
1122     if (ts->kind == TEMP_CONST) {
1123         tcg_gen_movi_i64(ret, ts->val);
1124     } else {
1125         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1126         tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
1127     }
1128 }
1129 
1130 void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
1131 {
1132     tcg_gen_movi_i32(TCGV_LOW(ret), arg);
1133     tcg_gen_movi_i32(TCGV_HIGH(ret), arg >> 32);
1134 }
1135 
1136 void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1137 {
1138     tcg_gen_ld8u_i32(TCGV_LOW(ret), arg2, offset);
1139     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1140 }
1141 
1142 void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1143 {
1144     tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset);
1145     tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1146 }
1147 
1148 void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1149 {
1150     tcg_gen_ld16u_i32(TCGV_LOW(ret), arg2, offset);
1151     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1152 }
1153 
1154 void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1155 {
1156     tcg_gen_ld16s_i32(TCGV_LOW(ret), arg2, offset);
1157     tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1158 }
1159 
1160 void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1161 {
1162     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
1163     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1164 }
1165 
1166 void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1167 {
1168     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
1169     tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1170 }
1171 
1172 void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1173 {
1174     /* Since arg2 and ret have different types,
1175        they cannot be the same temporary */
1176 #if HOST_BIG_ENDIAN
1177     tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset);
1178     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4);
1179 #else
1180     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
1181     tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset + 4);
1182 #endif
1183 }
1184 
1185 void tcg_gen_st8_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
1186 {
1187     tcg_gen_st8_i32(TCGV_LOW(arg1), arg2, offset);
1188 }
1189 
1190 void tcg_gen_st16_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
1191 {
1192     tcg_gen_st16_i32(TCGV_LOW(arg1), arg2, offset);
1193 }
1194 
1195 void tcg_gen_st32_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
1196 {
1197     tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset);
1198 }
1199 
1200 void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
1201 {
1202 #if HOST_BIG_ENDIAN
1203     tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset);
1204     tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4);
1205 #else
1206     tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset);
1207     tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset + 4);
1208 #endif
1209 }
1210 
1211 void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1212 {
1213     tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1),
1214                      TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2));
1215 }
1216 
1217 void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1218 {
1219     tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret), TCGV_LOW(arg1),
1220                      TCGV_HIGH(arg1), TCGV_LOW(arg2), TCGV_HIGH(arg2));
1221 }
1222 
1223 void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1224 {
1225     tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1226     tcg_gen_and_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1227 }
1228 
1229 void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1230 {
1231     tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1232     tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1233 }
1234 
1235 void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1236 {
1237     tcg_gen_xor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1238     tcg_gen_xor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1239 }
1240 
1241 void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1242 {
1243     gen_helper_shl_i64(ret, arg1, arg2);
1244 }
1245 
1246 void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1247 {
1248     gen_helper_shr_i64(ret, arg1, arg2);
1249 }
1250 
1251 void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1252 {
1253     gen_helper_sar_i64(ret, arg1, arg2);
1254 }
1255 
1256 void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1257 {
1258     TCGv_i64 t0;
1259     TCGv_i32 t1;
1260 
1261     t0 = tcg_temp_ebb_new_i64();
1262     t1 = tcg_temp_ebb_new_i32();
1263 
1264     tcg_gen_mulu2_i32(TCGV_LOW(t0), TCGV_HIGH(t0),
1265                       TCGV_LOW(arg1), TCGV_LOW(arg2));
1266 
1267     tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
1268     tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
1269     tcg_gen_mul_i32(t1, TCGV_HIGH(arg1), TCGV_LOW(arg2));
1270     tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
1271 
1272     tcg_gen_mov_i64(ret, t0);
1273     tcg_temp_free_i64(t0);
1274     tcg_temp_free_i32(t1);
1275 }
1276 
1277 #else
1278 
1279 void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
1280 {
1281     tcg_gen_mov_i64(ret, tcg_constant_i64(arg));
1282 }
1283 
1284 #endif /* TCG_TARGET_REG_SIZE == 32 */
1285 
1286 void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1287 {
1288     /* some cases can be optimized here */
1289     if (arg2 == 0) {
1290         tcg_gen_mov_i64(ret, arg1);
1291     } else if (TCG_TARGET_REG_BITS == 64) {
1292         tcg_gen_add_i64(ret, arg1, tcg_constant_i64(arg2));
1293     } else {
1294         tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
1295                          TCGV_LOW(arg1), TCGV_HIGH(arg1),
1296                          tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));
1297     }
1298 }
1299 
1300 void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2)
1301 {
1302     if (arg1 == 0 && TCG_TARGET_HAS_neg_i64) {
1303         /* Don't recurse with tcg_gen_neg_i64.  */
1304         tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg2);
1305     } else if (TCG_TARGET_REG_BITS == 64) {
1306         tcg_gen_sub_i64(ret, tcg_constant_i64(arg1), arg2);
1307     } else {
1308         tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
1309                          tcg_constant_i32(arg1), tcg_constant_i32(arg1 >> 32),
1310                          TCGV_LOW(arg2), TCGV_HIGH(arg2));
1311     }
1312 }
1313 
1314 void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1315 {
1316     /* some cases can be optimized here */
1317     if (arg2 == 0) {
1318         tcg_gen_mov_i64(ret, arg1);
1319     } else if (TCG_TARGET_REG_BITS == 64) {
1320         tcg_gen_sub_i64(ret, arg1, tcg_constant_i64(arg2));
1321     } else {
1322         tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
1323                          TCGV_LOW(arg1), TCGV_HIGH(arg1),
1324                          tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));
1325     }
1326 }
1327 
1328 void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1329 {
1330     if (TCG_TARGET_REG_BITS == 32) {
1331         tcg_gen_andi_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
1332         tcg_gen_andi_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
1333         return;
1334     }
1335 
1336     /* Some cases can be optimized here.  */
1337     switch (arg2) {
1338     case 0:
1339         tcg_gen_movi_i64(ret, 0);
1340         return;
1341     case -1:
1342         tcg_gen_mov_i64(ret, arg1);
1343         return;
1344     case 0xff:
1345         /* Don't recurse with tcg_gen_ext8u_i64.  */
1346         if (TCG_TARGET_HAS_ext8u_i64) {
1347             tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg1);
1348             return;
1349         }
1350         break;
1351     case 0xffff:
1352         if (TCG_TARGET_HAS_ext16u_i64) {
1353             tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg1);
1354             return;
1355         }
1356         break;
1357     case 0xffffffffu:
1358         if (TCG_TARGET_HAS_ext32u_i64) {
1359             tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg1);
1360             return;
1361         }
1362         break;
1363     }
1364 
1365     tcg_gen_and_i64(ret, arg1, tcg_constant_i64(arg2));
1366 }
1367 
1368 void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1369 {
1370     if (TCG_TARGET_REG_BITS == 32) {
1371         tcg_gen_ori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
1372         tcg_gen_ori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
1373         return;
1374     }
1375     /* Some cases can be optimized here.  */
1376     if (arg2 == -1) {
1377         tcg_gen_movi_i64(ret, -1);
1378     } else if (arg2 == 0) {
1379         tcg_gen_mov_i64(ret, arg1);
1380     } else {
1381         tcg_gen_or_i64(ret, arg1, tcg_constant_i64(arg2));
1382     }
1383 }
1384 
1385 void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1386 {
1387     if (TCG_TARGET_REG_BITS == 32) {
1388         tcg_gen_xori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
1389         tcg_gen_xori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
1390         return;
1391     }
1392     /* Some cases can be optimized here.  */
1393     if (arg2 == 0) {
1394         tcg_gen_mov_i64(ret, arg1);
1395     } else if (arg2 == -1 && TCG_TARGET_HAS_not_i64) {
1396         /* Don't recurse with tcg_gen_not_i64.  */
1397         tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg1);
1398     } else {
1399         tcg_gen_xor_i64(ret, arg1, tcg_constant_i64(arg2));
1400     }
1401 }
1402 
1403 static inline void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
1404                                       unsigned c, bool right, bool arith)
1405 {
1406     tcg_debug_assert(c < 64);
1407     if (c == 0) {
1408         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
1409         tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
1410     } else if (c >= 32) {
1411         c -= 32;
1412         if (right) {
1413             if (arith) {
1414                 tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
1415                 tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), 31);
1416             } else {
1417                 tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
1418                 tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1419             }
1420         } else {
1421             tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), c);
1422             tcg_gen_movi_i32(TCGV_LOW(ret), 0);
1423         }
1424     } else if (right) {
1425         if (TCG_TARGET_HAS_extract2_i32) {
1426             tcg_gen_extract2_i32(TCGV_LOW(ret),
1427                                  TCGV_LOW(arg1), TCGV_HIGH(arg1), c);
1428         } else {
1429             tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c);
1430             tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(ret),
1431                                 TCGV_HIGH(arg1), 32 - c, c);
1432         }
1433         if (arith) {
1434             tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c);
1435         } else {
1436             tcg_gen_shri_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c);
1437         }
1438     } else {
1439         if (TCG_TARGET_HAS_extract2_i32) {
1440             tcg_gen_extract2_i32(TCGV_HIGH(ret),
1441                                  TCGV_LOW(arg1), TCGV_HIGH(arg1), 32 - c);
1442         } else {
1443             TCGv_i32 t0 = tcg_temp_ebb_new_i32();
1444             tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c);
1445             tcg_gen_deposit_i32(TCGV_HIGH(ret), t0,
1446                                 TCGV_HIGH(arg1), c, 32 - c);
1447             tcg_temp_free_i32(t0);
1448         }
1449         tcg_gen_shli_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c);
1450     }
1451 }
1452 
1453 void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1454 {
1455     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
1456     if (TCG_TARGET_REG_BITS == 32) {
1457         tcg_gen_shifti_i64(ret, arg1, arg2, 0, 0);
1458     } else if (arg2 == 0) {
1459         tcg_gen_mov_i64(ret, arg1);
1460     } else {
1461         tcg_gen_shl_i64(ret, arg1, tcg_constant_i64(arg2));
1462     }
1463 }
1464 
1465 void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1466 {
1467     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
1468     if (TCG_TARGET_REG_BITS == 32) {
1469         tcg_gen_shifti_i64(ret, arg1, arg2, 1, 0);
1470     } else if (arg2 == 0) {
1471         tcg_gen_mov_i64(ret, arg1);
1472     } else {
1473         tcg_gen_shr_i64(ret, arg1, tcg_constant_i64(arg2));
1474     }
1475 }
1476 
1477 void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1478 {
1479     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
1480     if (TCG_TARGET_REG_BITS == 32) {
1481         tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1);
1482     } else if (arg2 == 0) {
1483         tcg_gen_mov_i64(ret, arg1);
1484     } else {
1485         tcg_gen_sar_i64(ret, arg1, tcg_constant_i64(arg2));
1486     }
1487 }
1488 
1489 void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l)
1490 {
1491     if (cond == TCG_COND_ALWAYS) {
1492         tcg_gen_br(l);
1493     } else if (cond != TCG_COND_NEVER) {
1494         if (TCG_TARGET_REG_BITS == 32) {
1495             tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1),
1496                               TCGV_HIGH(arg1), TCGV_LOW(arg2),
1497                               TCGV_HIGH(arg2), cond, label_arg(l));
1498         } else {
1499             tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond,
1500                               label_arg(l));
1501         }
1502         add_last_as_label_use(l);
1503     }
1504 }
1505 
1506 void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *l)
1507 {
1508     if (TCG_TARGET_REG_BITS == 64) {
1509         tcg_gen_brcond_i64(cond, arg1, tcg_constant_i64(arg2), l);
1510     } else if (cond == TCG_COND_ALWAYS) {
1511         tcg_gen_br(l);
1512     } else if (cond != TCG_COND_NEVER) {
1513         tcg_gen_op6ii_i32(INDEX_op_brcond2_i32,
1514                           TCGV_LOW(arg1), TCGV_HIGH(arg1),
1515                           tcg_constant_i32(arg2),
1516                           tcg_constant_i32(arg2 >> 32),
1517                           cond, label_arg(l));
1518         add_last_as_label_use(l);
1519     }
1520 }
1521 
1522 void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
1523                          TCGv_i64 arg1, TCGv_i64 arg2)
1524 {
1525     if (cond == TCG_COND_ALWAYS) {
1526         tcg_gen_movi_i64(ret, 1);
1527     } else if (cond == TCG_COND_NEVER) {
1528         tcg_gen_movi_i64(ret, 0);
1529     } else {
1530         if (TCG_TARGET_REG_BITS == 32) {
1531             tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
1532                              TCGV_LOW(arg1), TCGV_HIGH(arg1),
1533                              TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
1534             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1535         } else {
1536             tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
1537         }
1538     }
1539 }
1540 
1541 void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
1542                           TCGv_i64 arg1, int64_t arg2)
1543 {
1544     if (TCG_TARGET_REG_BITS == 64) {
1545         tcg_gen_setcond_i64(cond, ret, arg1, tcg_constant_i64(arg2));
1546     } else if (cond == TCG_COND_ALWAYS) {
1547         tcg_gen_movi_i64(ret, 1);
1548     } else if (cond == TCG_COND_NEVER) {
1549         tcg_gen_movi_i64(ret, 0);
1550     } else {
1551         tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
1552                          TCGV_LOW(arg1), TCGV_HIGH(arg1),
1553                          tcg_constant_i32(arg2),
1554                          tcg_constant_i32(arg2 >> 32), cond);
1555         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1556     }
1557 }
1558 
1559 void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1560 {
1561     if (arg2 == 0) {
1562         tcg_gen_movi_i64(ret, 0);
1563     } else if (is_power_of_2(arg2)) {
1564         tcg_gen_shli_i64(ret, arg1, ctz64(arg2));
1565     } else {
1566         tcg_gen_mul_i64(ret, arg1, tcg_constant_i64(arg2));
1567     }
1568 }
1569 
1570 void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1571 {
1572     if (TCG_TARGET_HAS_div_i64) {
1573         tcg_gen_op3_i64(INDEX_op_div_i64, ret, arg1, arg2);
1574     } else if (TCG_TARGET_HAS_div2_i64) {
1575         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1576         tcg_gen_sari_i64(t0, arg1, 63);
1577         tcg_gen_op5_i64(INDEX_op_div2_i64, ret, t0, arg1, t0, arg2);
1578         tcg_temp_free_i64(t0);
1579     } else {
1580         gen_helper_div_i64(ret, arg1, arg2);
1581     }
1582 }
1583 
1584 void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1585 {
1586     if (TCG_TARGET_HAS_rem_i64) {
1587         tcg_gen_op3_i64(INDEX_op_rem_i64, ret, arg1, arg2);
1588     } else if (TCG_TARGET_HAS_div_i64) {
1589         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1590         tcg_gen_op3_i64(INDEX_op_div_i64, t0, arg1, arg2);
1591         tcg_gen_mul_i64(t0, t0, arg2);
1592         tcg_gen_sub_i64(ret, arg1, t0);
1593         tcg_temp_free_i64(t0);
1594     } else if (TCG_TARGET_HAS_div2_i64) {
1595         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1596         tcg_gen_sari_i64(t0, arg1, 63);
1597         tcg_gen_op5_i64(INDEX_op_div2_i64, t0, ret, arg1, t0, arg2);
1598         tcg_temp_free_i64(t0);
1599     } else {
1600         gen_helper_rem_i64(ret, arg1, arg2);
1601     }
1602 }
1603 
1604 void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1605 {
1606     if (TCG_TARGET_HAS_div_i64) {
1607         tcg_gen_op3_i64(INDEX_op_divu_i64, ret, arg1, arg2);
1608     } else if (TCG_TARGET_HAS_div2_i64) {
1609         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1610         tcg_gen_movi_i64(t0, 0);
1611         tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, t0, arg2);
1612         tcg_temp_free_i64(t0);
1613     } else {
1614         gen_helper_divu_i64(ret, arg1, arg2);
1615     }
1616 }
1617 
1618 void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1619 {
1620     if (TCG_TARGET_HAS_rem_i64) {
1621         tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2);
1622     } else if (TCG_TARGET_HAS_div_i64) {
1623         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1624         tcg_gen_op3_i64(INDEX_op_divu_i64, t0, arg1, arg2);
1625         tcg_gen_mul_i64(t0, t0, arg2);
1626         tcg_gen_sub_i64(ret, arg1, t0);
1627         tcg_temp_free_i64(t0);
1628     } else if (TCG_TARGET_HAS_div2_i64) {
1629         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1630         tcg_gen_movi_i64(t0, 0);
1631         tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, t0, arg2);
1632         tcg_temp_free_i64(t0);
1633     } else {
1634         gen_helper_remu_i64(ret, arg1, arg2);
1635     }
1636 }
1637 
1638 void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg)
1639 {
1640     if (TCG_TARGET_REG_BITS == 32) {
1641         tcg_gen_ext8s_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1642         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1643     } else if (TCG_TARGET_HAS_ext8s_i64) {
1644         tcg_gen_op2_i64(INDEX_op_ext8s_i64, ret, arg);
1645     } else {
1646         tcg_gen_shli_i64(ret, arg, 56);
1647         tcg_gen_sari_i64(ret, ret, 56);
1648     }
1649 }
1650 
1651 void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg)
1652 {
1653     if (TCG_TARGET_REG_BITS == 32) {
1654         tcg_gen_ext16s_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1655         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1656     } else if (TCG_TARGET_HAS_ext16s_i64) {
1657         tcg_gen_op2_i64(INDEX_op_ext16s_i64, ret, arg);
1658     } else {
1659         tcg_gen_shli_i64(ret, arg, 48);
1660         tcg_gen_sari_i64(ret, ret, 48);
1661     }
1662 }
1663 
1664 void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg)
1665 {
1666     if (TCG_TARGET_REG_BITS == 32) {
1667         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1668         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1669     } else if (TCG_TARGET_HAS_ext32s_i64) {
1670         tcg_gen_op2_i64(INDEX_op_ext32s_i64, ret, arg);
1671     } else {
1672         tcg_gen_shli_i64(ret, arg, 32);
1673         tcg_gen_sari_i64(ret, ret, 32);
1674     }
1675 }
1676 
1677 void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg)
1678 {
1679     if (TCG_TARGET_REG_BITS == 32) {
1680         tcg_gen_ext8u_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1681         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1682     } else if (TCG_TARGET_HAS_ext8u_i64) {
1683         tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg);
1684     } else {
1685         tcg_gen_andi_i64(ret, arg, 0xffu);
1686     }
1687 }
1688 
1689 void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg)
1690 {
1691     if (TCG_TARGET_REG_BITS == 32) {
1692         tcg_gen_ext16u_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1693         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1694     } else if (TCG_TARGET_HAS_ext16u_i64) {
1695         tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg);
1696     } else {
1697         tcg_gen_andi_i64(ret, arg, 0xffffu);
1698     }
1699 }
1700 
1701 void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg)
1702 {
1703     if (TCG_TARGET_REG_BITS == 32) {
1704         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1705         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1706     } else if (TCG_TARGET_HAS_ext32u_i64) {
1707         tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg);
1708     } else {
1709         tcg_gen_andi_i64(ret, arg, 0xffffffffu);
1710     }
1711 }
1712 
1713 void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags)
1714 {
1715     /* Only one extension flag may be present. */
1716     tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
1717 
1718     if (TCG_TARGET_REG_BITS == 32) {
1719         tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg), flags);
1720         if (flags & TCG_BSWAP_OS) {
1721             tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1722         } else {
1723             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1724         }
1725     } else if (TCG_TARGET_HAS_bswap16_i64) {
1726         tcg_gen_op3i_i64(INDEX_op_bswap16_i64, ret, arg, flags);
1727     } else {
1728         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1729         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1730 
1731         tcg_gen_shri_i64(t0, arg, 8);
1732         if (!(flags & TCG_BSWAP_IZ)) {
1733             tcg_gen_ext8u_i64(t0, t0);
1734         }
1735 
1736         if (flags & TCG_BSWAP_OS) {
1737             tcg_gen_shli_i64(t1, arg, 56);
1738             tcg_gen_sari_i64(t1, t1, 48);
1739         } else if (flags & TCG_BSWAP_OZ) {
1740             tcg_gen_ext8u_i64(t1, arg);
1741             tcg_gen_shli_i64(t1, t1, 8);
1742         } else {
1743             tcg_gen_shli_i64(t1, arg, 8);
1744         }
1745 
1746         tcg_gen_or_i64(ret, t0, t1);
1747         tcg_temp_free_i64(t0);
1748         tcg_temp_free_i64(t1);
1749     }
1750 }
1751 
1752 void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags)
1753 {
1754     /* Only one extension flag may be present. */
1755     tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
1756 
1757     if (TCG_TARGET_REG_BITS == 32) {
1758         tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1759         if (flags & TCG_BSWAP_OS) {
1760             tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1761         } else {
1762             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1763         }
1764     } else if (TCG_TARGET_HAS_bswap32_i64) {
1765         tcg_gen_op3i_i64(INDEX_op_bswap32_i64, ret, arg, flags);
1766     } else {
1767         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1768         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1769         TCGv_i64 t2 = tcg_constant_i64(0x00ff00ff);
1770 
1771                                             /* arg = xxxxabcd */
1772         tcg_gen_shri_i64(t0, arg, 8);       /*  t0 = .xxxxabc */
1773         tcg_gen_and_i64(t1, arg, t2);       /*  t1 = .....b.d */
1774         tcg_gen_and_i64(t0, t0, t2);        /*  t0 = .....a.c */
1775         tcg_gen_shli_i64(t1, t1, 8);        /*  t1 = ....b.d. */
1776         tcg_gen_or_i64(ret, t0, t1);        /* ret = ....badc */
1777 
1778         tcg_gen_shli_i64(t1, ret, 48);      /*  t1 = dc...... */
1779         tcg_gen_shri_i64(t0, ret, 16);      /*  t0 = ......ba */
1780         if (flags & TCG_BSWAP_OS) {
1781             tcg_gen_sari_i64(t1, t1, 32);   /*  t1 = ssssdc.. */
1782         } else {
1783             tcg_gen_shri_i64(t1, t1, 32);   /*  t1 = ....dc.. */
1784         }
1785         tcg_gen_or_i64(ret, t0, t1);        /* ret = ssssdcba */
1786 
1787         tcg_temp_free_i64(t0);
1788         tcg_temp_free_i64(t1);
1789     }
1790 }
1791 
1792 void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg)
1793 {
1794     if (TCG_TARGET_REG_BITS == 32) {
1795         TCGv_i32 t0, t1;
1796         t0 = tcg_temp_ebb_new_i32();
1797         t1 = tcg_temp_ebb_new_i32();
1798 
1799         tcg_gen_bswap32_i32(t0, TCGV_LOW(arg));
1800         tcg_gen_bswap32_i32(t1, TCGV_HIGH(arg));
1801         tcg_gen_mov_i32(TCGV_LOW(ret), t1);
1802         tcg_gen_mov_i32(TCGV_HIGH(ret), t0);
1803         tcg_temp_free_i32(t0);
1804         tcg_temp_free_i32(t1);
1805     } else if (TCG_TARGET_HAS_bswap64_i64) {
1806         tcg_gen_op3i_i64(INDEX_op_bswap64_i64, ret, arg, 0);
1807     } else {
1808         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1809         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1810         TCGv_i64 t2 = tcg_temp_ebb_new_i64();
1811 
1812                                         /* arg = abcdefgh */
1813         tcg_gen_movi_i64(t2, 0x00ff00ff00ff00ffull);
1814         tcg_gen_shri_i64(t0, arg, 8);   /*  t0 = .abcdefg */
1815         tcg_gen_and_i64(t1, arg, t2);   /*  t1 = .b.d.f.h */
1816         tcg_gen_and_i64(t0, t0, t2);    /*  t0 = .a.c.e.g */
1817         tcg_gen_shli_i64(t1, t1, 8);    /*  t1 = b.d.f.h. */
1818         tcg_gen_or_i64(ret, t0, t1);    /* ret = badcfehg */
1819 
1820         tcg_gen_movi_i64(t2, 0x0000ffff0000ffffull);
1821         tcg_gen_shri_i64(t0, ret, 16);  /*  t0 = ..badcfe */
1822         tcg_gen_and_i64(t1, ret, t2);   /*  t1 = ..dc..hg */
1823         tcg_gen_and_i64(t0, t0, t2);    /*  t0 = ..ba..fe */
1824         tcg_gen_shli_i64(t1, t1, 16);   /*  t1 = dc..hg.. */
1825         tcg_gen_or_i64(ret, t0, t1);    /* ret = dcbahgfe */
1826 
1827         tcg_gen_shri_i64(t0, ret, 32);  /*  t0 = ....dcba */
1828         tcg_gen_shli_i64(t1, ret, 32);  /*  t1 = hgfe.... */
1829         tcg_gen_or_i64(ret, t0, t1);    /* ret = hgfedcba */
1830 
1831         tcg_temp_free_i64(t0);
1832         tcg_temp_free_i64(t1);
1833         tcg_temp_free_i64(t2);
1834     }
1835 }
1836 
1837 void tcg_gen_hswap_i64(TCGv_i64 ret, TCGv_i64 arg)
1838 {
1839     uint64_t m = 0x0000ffff0000ffffull;
1840     TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1841     TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1842 
1843     /* See include/qemu/bitops.h, hswap64. */
1844     tcg_gen_rotli_i64(t1, arg, 32);
1845     tcg_gen_andi_i64(t0, t1, m);
1846     tcg_gen_shli_i64(t0, t0, 16);
1847     tcg_gen_shri_i64(t1, t1, 16);
1848     tcg_gen_andi_i64(t1, t1, m);
1849     tcg_gen_or_i64(ret, t0, t1);
1850 
1851     tcg_temp_free_i64(t0);
1852     tcg_temp_free_i64(t1);
1853 }
1854 
1855 void tcg_gen_wswap_i64(TCGv_i64 ret, TCGv_i64 arg)
1856 {
1857     /* Swapping 2 32-bit elements is a rotate. */
1858     tcg_gen_rotli_i64(ret, arg, 32);
1859 }
1860 
1861 void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg)
1862 {
1863     if (TCG_TARGET_REG_BITS == 32) {
1864         tcg_gen_not_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1865         tcg_gen_not_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
1866     } else if (TCG_TARGET_HAS_not_i64) {
1867         tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg);
1868     } else {
1869         tcg_gen_xori_i64(ret, arg, -1);
1870     }
1871 }
1872 
1873 void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1874 {
1875     if (TCG_TARGET_REG_BITS == 32) {
1876         tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1877         tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1878     } else if (TCG_TARGET_HAS_andc_i64) {
1879         tcg_gen_op3_i64(INDEX_op_andc_i64, ret, arg1, arg2);
1880     } else {
1881         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1882         tcg_gen_not_i64(t0, arg2);
1883         tcg_gen_and_i64(ret, arg1, t0);
1884         tcg_temp_free_i64(t0);
1885     }
1886 }
1887 
1888 void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1889 {
1890     if (TCG_TARGET_REG_BITS == 32) {
1891         tcg_gen_eqv_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1892         tcg_gen_eqv_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1893     } else if (TCG_TARGET_HAS_eqv_i64) {
1894         tcg_gen_op3_i64(INDEX_op_eqv_i64, ret, arg1, arg2);
1895     } else {
1896         tcg_gen_xor_i64(ret, arg1, arg2);
1897         tcg_gen_not_i64(ret, ret);
1898     }
1899 }
1900 
1901 void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1902 {
1903     if (TCG_TARGET_REG_BITS == 32) {
1904         tcg_gen_nand_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1905         tcg_gen_nand_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1906     } else if (TCG_TARGET_HAS_nand_i64) {
1907         tcg_gen_op3_i64(INDEX_op_nand_i64, ret, arg1, arg2);
1908     } else {
1909         tcg_gen_and_i64(ret, arg1, arg2);
1910         tcg_gen_not_i64(ret, ret);
1911     }
1912 }
1913 
1914 void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1915 {
1916     if (TCG_TARGET_REG_BITS == 32) {
1917         tcg_gen_nor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1918         tcg_gen_nor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1919     } else if (TCG_TARGET_HAS_nor_i64) {
1920         tcg_gen_op3_i64(INDEX_op_nor_i64, ret, arg1, arg2);
1921     } else {
1922         tcg_gen_or_i64(ret, arg1, arg2);
1923         tcg_gen_not_i64(ret, ret);
1924     }
1925 }
1926 
1927 void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1928 {
1929     if (TCG_TARGET_REG_BITS == 32) {
1930         tcg_gen_orc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1931         tcg_gen_orc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1932     } else if (TCG_TARGET_HAS_orc_i64) {
1933         tcg_gen_op3_i64(INDEX_op_orc_i64, ret, arg1, arg2);
1934     } else {
1935         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1936         tcg_gen_not_i64(t0, arg2);
1937         tcg_gen_or_i64(ret, arg1, t0);
1938         tcg_temp_free_i64(t0);
1939     }
1940 }
1941 
1942 void tcg_gen_clz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1943 {
1944     if (TCG_TARGET_HAS_clz_i64) {
1945         tcg_gen_op3_i64(INDEX_op_clz_i64, ret, arg1, arg2);
1946     } else {
1947         gen_helper_clz_i64(ret, arg1, arg2);
1948     }
1949 }
1950 
1951 void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
1952 {
1953     if (TCG_TARGET_REG_BITS == 32
1954         && TCG_TARGET_HAS_clz_i32
1955         && arg2 <= 0xffffffffu) {
1956         TCGv_i32 t = tcg_temp_ebb_new_i32();
1957         tcg_gen_clzi_i32(t, TCGV_LOW(arg1), arg2 - 32);
1958         tcg_gen_addi_i32(t, t, 32);
1959         tcg_gen_clz_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), t);
1960         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1961         tcg_temp_free_i32(t);
1962     } else {
1963         tcg_gen_clz_i64(ret, arg1, tcg_constant_i64(arg2));
1964     }
1965 }
1966 
1967 void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1968 {
1969     if (TCG_TARGET_HAS_ctz_i64) {
1970         tcg_gen_op3_i64(INDEX_op_ctz_i64, ret, arg1, arg2);
1971     } else if (TCG_TARGET_HAS_ctpop_i64 || TCG_TARGET_HAS_clz_i64) {
1972         TCGv_i64 z, t = tcg_temp_ebb_new_i64();
1973 
1974         if (TCG_TARGET_HAS_ctpop_i64) {
1975             tcg_gen_subi_i64(t, arg1, 1);
1976             tcg_gen_andc_i64(t, t, arg1);
1977             tcg_gen_ctpop_i64(t, t);
1978         } else {
1979             /* Since all non-x86 hosts have clz(0) == 64, don't fight it.  */
1980             tcg_gen_neg_i64(t, arg1);
1981             tcg_gen_and_i64(t, t, arg1);
1982             tcg_gen_clzi_i64(t, t, 64);
1983             tcg_gen_xori_i64(t, t, 63);
1984         }
1985         z = tcg_constant_i64(0);
1986         tcg_gen_movcond_i64(TCG_COND_EQ, ret, arg1, z, arg2, t);
1987         tcg_temp_free_i64(t);
1988         tcg_temp_free_i64(z);
1989     } else {
1990         gen_helper_ctz_i64(ret, arg1, arg2);
1991     }
1992 }
1993 
1994 void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
1995 {
1996     if (TCG_TARGET_REG_BITS == 32
1997         && TCG_TARGET_HAS_ctz_i32
1998         && arg2 <= 0xffffffffu) {
1999         TCGv_i32 t32 = tcg_temp_ebb_new_i32();
2000         tcg_gen_ctzi_i32(t32, TCGV_HIGH(arg1), arg2 - 32);
2001         tcg_gen_addi_i32(t32, t32, 32);
2002         tcg_gen_ctz_i32(TCGV_LOW(ret), TCGV_LOW(arg1), t32);
2003         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2004         tcg_temp_free_i32(t32);
2005     } else if (!TCG_TARGET_HAS_ctz_i64
2006                && TCG_TARGET_HAS_ctpop_i64
2007                && arg2 == 64) {
2008         /* This equivalence has the advantage of not requiring a fixup.  */
2009         TCGv_i64 t = tcg_temp_ebb_new_i64();
2010         tcg_gen_subi_i64(t, arg1, 1);
2011         tcg_gen_andc_i64(t, t, arg1);
2012         tcg_gen_ctpop_i64(ret, t);
2013         tcg_temp_free_i64(t);
2014     } else {
2015         tcg_gen_ctz_i64(ret, arg1, tcg_constant_i64(arg2));
2016     }
2017 }
2018 
2019 void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg)
2020 {
2021     if (TCG_TARGET_HAS_clz_i64 || TCG_TARGET_HAS_clz_i32) {
2022         TCGv_i64 t = tcg_temp_ebb_new_i64();
2023         tcg_gen_sari_i64(t, arg, 63);
2024         tcg_gen_xor_i64(t, t, arg);
2025         tcg_gen_clzi_i64(t, t, 64);
2026         tcg_gen_subi_i64(ret, t, 1);
2027         tcg_temp_free_i64(t);
2028     } else {
2029         gen_helper_clrsb_i64(ret, arg);
2030     }
2031 }
2032 
2033 void tcg_gen_ctpop_i64(TCGv_i64 ret, TCGv_i64 arg1)
2034 {
2035     if (TCG_TARGET_HAS_ctpop_i64) {
2036         tcg_gen_op2_i64(INDEX_op_ctpop_i64, ret, arg1);
2037     } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_ctpop_i32) {
2038         tcg_gen_ctpop_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
2039         tcg_gen_ctpop_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
2040         tcg_gen_add_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(ret));
2041         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2042     } else {
2043         gen_helper_ctpop_i64(ret, arg1);
2044     }
2045 }
2046 
2047 void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
2048 {
2049     if (TCG_TARGET_HAS_rot_i64) {
2050         tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, arg2);
2051     } else {
2052         TCGv_i64 t0, t1;
2053         t0 = tcg_temp_ebb_new_i64();
2054         t1 = tcg_temp_ebb_new_i64();
2055         tcg_gen_shl_i64(t0, arg1, arg2);
2056         tcg_gen_subfi_i64(t1, 64, arg2);
2057         tcg_gen_shr_i64(t1, arg1, t1);
2058         tcg_gen_or_i64(ret, t0, t1);
2059         tcg_temp_free_i64(t0);
2060         tcg_temp_free_i64(t1);
2061     }
2062 }
2063 
2064 void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
2065 {
2066     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
2067     /* some cases can be optimized here */
2068     if (arg2 == 0) {
2069         tcg_gen_mov_i64(ret, arg1);
2070     } else if (TCG_TARGET_HAS_rot_i64) {
2071         tcg_gen_rotl_i64(ret, arg1, tcg_constant_i64(arg2));
2072     } else {
2073         TCGv_i64 t0, t1;
2074         t0 = tcg_temp_ebb_new_i64();
2075         t1 = tcg_temp_ebb_new_i64();
2076         tcg_gen_shli_i64(t0, arg1, arg2);
2077         tcg_gen_shri_i64(t1, arg1, 64 - arg2);
2078         tcg_gen_or_i64(ret, t0, t1);
2079         tcg_temp_free_i64(t0);
2080         tcg_temp_free_i64(t1);
2081     }
2082 }
2083 
2084 void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
2085 {
2086     if (TCG_TARGET_HAS_rot_i64) {
2087         tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, arg2);
2088     } else {
2089         TCGv_i64 t0, t1;
2090         t0 = tcg_temp_ebb_new_i64();
2091         t1 = tcg_temp_ebb_new_i64();
2092         tcg_gen_shr_i64(t0, arg1, arg2);
2093         tcg_gen_subfi_i64(t1, 64, arg2);
2094         tcg_gen_shl_i64(t1, arg1, t1);
2095         tcg_gen_or_i64(ret, t0, t1);
2096         tcg_temp_free_i64(t0);
2097         tcg_temp_free_i64(t1);
2098     }
2099 }
2100 
2101 void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
2102 {
2103     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
2104     /* some cases can be optimized here */
2105     if (arg2 == 0) {
2106         tcg_gen_mov_i64(ret, arg1);
2107     } else {
2108         tcg_gen_rotli_i64(ret, arg1, 64 - arg2);
2109     }
2110 }
2111 
2112 void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
2113                          unsigned int ofs, unsigned int len)
2114 {
2115     uint64_t mask;
2116     TCGv_i64 t1;
2117 
2118     tcg_debug_assert(ofs < 64);
2119     tcg_debug_assert(len > 0);
2120     tcg_debug_assert(len <= 64);
2121     tcg_debug_assert(ofs + len <= 64);
2122 
2123     if (len == 64) {
2124         tcg_gen_mov_i64(ret, arg2);
2125         return;
2126     }
2127     if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) {
2128         tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
2129         return;
2130     }
2131 
2132     if (TCG_TARGET_REG_BITS == 32) {
2133         if (ofs >= 32) {
2134             tcg_gen_deposit_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1),
2135                                 TCGV_LOW(arg2), ofs - 32, len);
2136             tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
2137             return;
2138         }
2139         if (ofs + len <= 32) {
2140             tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(arg1),
2141                                 TCGV_LOW(arg2), ofs, len);
2142             tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
2143             return;
2144         }
2145     }
2146 
2147     t1 = tcg_temp_ebb_new_i64();
2148 
2149     if (TCG_TARGET_HAS_extract2_i64) {
2150         if (ofs + len == 64) {
2151             tcg_gen_shli_i64(t1, arg1, len);
2152             tcg_gen_extract2_i64(ret, t1, arg2, len);
2153             goto done;
2154         }
2155         if (ofs == 0) {
2156             tcg_gen_extract2_i64(ret, arg1, arg2, len);
2157             tcg_gen_rotli_i64(ret, ret, len);
2158             goto done;
2159         }
2160     }
2161 
2162     mask = (1ull << len) - 1;
2163     if (ofs + len < 64) {
2164         tcg_gen_andi_i64(t1, arg2, mask);
2165         tcg_gen_shli_i64(t1, t1, ofs);
2166     } else {
2167         tcg_gen_shli_i64(t1, arg2, ofs);
2168     }
2169     tcg_gen_andi_i64(ret, arg1, ~(mask << ofs));
2170     tcg_gen_or_i64(ret, ret, t1);
2171  done:
2172     tcg_temp_free_i64(t1);
2173 }
2174 
2175 void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg,
2176                            unsigned int ofs, unsigned int len)
2177 {
2178     tcg_debug_assert(ofs < 64);
2179     tcg_debug_assert(len > 0);
2180     tcg_debug_assert(len <= 64);
2181     tcg_debug_assert(ofs + len <= 64);
2182 
2183     if (ofs + len == 64) {
2184         tcg_gen_shli_i64(ret, arg, ofs);
2185     } else if (ofs == 0) {
2186         tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
2187     } else if (TCG_TARGET_HAS_deposit_i64
2188                && TCG_TARGET_deposit_i64_valid(ofs, len)) {
2189         TCGv_i64 zero = tcg_constant_i64(0);
2190         tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, zero, arg, ofs, len);
2191     } else {
2192         if (TCG_TARGET_REG_BITS == 32) {
2193             if (ofs >= 32) {
2194                 tcg_gen_deposit_z_i32(TCGV_HIGH(ret), TCGV_LOW(arg),
2195                                       ofs - 32, len);
2196                 tcg_gen_movi_i32(TCGV_LOW(ret), 0);
2197                 return;
2198             }
2199             if (ofs + len <= 32) {
2200                 tcg_gen_deposit_z_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
2201                 tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2202                 return;
2203             }
2204         }
2205         /* To help two-operand hosts we prefer to zero-extend first,
2206            which allows ARG to stay live.  */
2207         switch (len) {
2208         case 32:
2209             if (TCG_TARGET_HAS_ext32u_i64) {
2210                 tcg_gen_ext32u_i64(ret, arg);
2211                 tcg_gen_shli_i64(ret, ret, ofs);
2212                 return;
2213             }
2214             break;
2215         case 16:
2216             if (TCG_TARGET_HAS_ext16u_i64) {
2217                 tcg_gen_ext16u_i64(ret, arg);
2218                 tcg_gen_shli_i64(ret, ret, ofs);
2219                 return;
2220             }
2221             break;
2222         case 8:
2223             if (TCG_TARGET_HAS_ext8u_i64) {
2224                 tcg_gen_ext8u_i64(ret, arg);
2225                 tcg_gen_shli_i64(ret, ret, ofs);
2226                 return;
2227             }
2228             break;
2229         }
2230         /* Otherwise prefer zero-extension over AND for code size.  */
2231         switch (ofs + len) {
2232         case 32:
2233             if (TCG_TARGET_HAS_ext32u_i64) {
2234                 tcg_gen_shli_i64(ret, arg, ofs);
2235                 tcg_gen_ext32u_i64(ret, ret);
2236                 return;
2237             }
2238             break;
2239         case 16:
2240             if (TCG_TARGET_HAS_ext16u_i64) {
2241                 tcg_gen_shli_i64(ret, arg, ofs);
2242                 tcg_gen_ext16u_i64(ret, ret);
2243                 return;
2244             }
2245             break;
2246         case 8:
2247             if (TCG_TARGET_HAS_ext8u_i64) {
2248                 tcg_gen_shli_i64(ret, arg, ofs);
2249                 tcg_gen_ext8u_i64(ret, ret);
2250                 return;
2251             }
2252             break;
2253         }
2254         tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
2255         tcg_gen_shli_i64(ret, ret, ofs);
2256     }
2257 }
2258 
2259 void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
2260                          unsigned int ofs, unsigned int len)
2261 {
2262     tcg_debug_assert(ofs < 64);
2263     tcg_debug_assert(len > 0);
2264     tcg_debug_assert(len <= 64);
2265     tcg_debug_assert(ofs + len <= 64);
2266 
2267     /* Canonicalize certain special cases, even if extract is supported.  */
2268     if (ofs + len == 64) {
2269         tcg_gen_shri_i64(ret, arg, 64 - len);
2270         return;
2271     }
2272     if (ofs == 0) {
2273         tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
2274         return;
2275     }
2276 
2277     if (TCG_TARGET_REG_BITS == 32) {
2278         /* Look for a 32-bit extract within one of the two words.  */
2279         if (ofs >= 32) {
2280             tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_HIGH(arg), ofs - 32, len);
2281             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2282             return;
2283         }
2284         if (ofs + len <= 32) {
2285             tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
2286             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2287             return;
2288         }
2289         /* The field is split across two words.  One double-word
2290            shift is better than two double-word shifts.  */
2291         goto do_shift_and;
2292     }
2293 
2294     if (TCG_TARGET_HAS_extract_i64
2295         && TCG_TARGET_extract_i64_valid(ofs, len)) {
2296         tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, ofs, len);
2297         return;
2298     }
2299 
2300     /* Assume that zero-extension, if available, is cheaper than a shift.  */
2301     switch (ofs + len) {
2302     case 32:
2303         if (TCG_TARGET_HAS_ext32u_i64) {
2304             tcg_gen_ext32u_i64(ret, arg);
2305             tcg_gen_shri_i64(ret, ret, ofs);
2306             return;
2307         }
2308         break;
2309     case 16:
2310         if (TCG_TARGET_HAS_ext16u_i64) {
2311             tcg_gen_ext16u_i64(ret, arg);
2312             tcg_gen_shri_i64(ret, ret, ofs);
2313             return;
2314         }
2315         break;
2316     case 8:
2317         if (TCG_TARGET_HAS_ext8u_i64) {
2318             tcg_gen_ext8u_i64(ret, arg);
2319             tcg_gen_shri_i64(ret, ret, ofs);
2320             return;
2321         }
2322         break;
2323     }
2324 
2325     /* ??? Ideally we'd know what values are available for immediate AND.
2326        Assume that 8 bits are available, plus the special cases of 16 and 32,
2327        so that we get ext8u, ext16u, and ext32u.  */
2328     switch (len) {
2329     case 1 ... 8: case 16: case 32:
2330     do_shift_and:
2331         tcg_gen_shri_i64(ret, arg, ofs);
2332         tcg_gen_andi_i64(ret, ret, (1ull << len) - 1);
2333         break;
2334     default:
2335         tcg_gen_shli_i64(ret, arg, 64 - len - ofs);
2336         tcg_gen_shri_i64(ret, ret, 64 - len);
2337         break;
2338     }
2339 }
2340 
2341 void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg,
2342                           unsigned int ofs, unsigned int len)
2343 {
2344     tcg_debug_assert(ofs < 64);
2345     tcg_debug_assert(len > 0);
2346     tcg_debug_assert(len <= 64);
2347     tcg_debug_assert(ofs + len <= 64);
2348 
2349     /* Canonicalize certain special cases, even if sextract is supported.  */
2350     if (ofs + len == 64) {
2351         tcg_gen_sari_i64(ret, arg, 64 - len);
2352         return;
2353     }
2354     if (ofs == 0) {
2355         switch (len) {
2356         case 32:
2357             tcg_gen_ext32s_i64(ret, arg);
2358             return;
2359         case 16:
2360             tcg_gen_ext16s_i64(ret, arg);
2361             return;
2362         case 8:
2363             tcg_gen_ext8s_i64(ret, arg);
2364             return;
2365         }
2366     }
2367 
2368     if (TCG_TARGET_REG_BITS == 32) {
2369         /* Look for a 32-bit extract within one of the two words.  */
2370         if (ofs >= 32) {
2371             tcg_gen_sextract_i32(TCGV_LOW(ret), TCGV_HIGH(arg), ofs - 32, len);
2372         } else if (ofs + len <= 32) {
2373             tcg_gen_sextract_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
2374         } else if (ofs == 0) {
2375             tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
2376             tcg_gen_sextract_i32(TCGV_HIGH(ret), TCGV_HIGH(arg), 0, len - 32);
2377             return;
2378         } else if (len > 32) {
2379             TCGv_i32 t = tcg_temp_ebb_new_i32();
2380             /* Extract the bits for the high word normally.  */
2381             tcg_gen_sextract_i32(t, TCGV_HIGH(arg), ofs + 32, len - 32);
2382             /* Shift the field down for the low part.  */
2383             tcg_gen_shri_i64(ret, arg, ofs);
2384             /* Overwrite the shift into the high part.  */
2385             tcg_gen_mov_i32(TCGV_HIGH(ret), t);
2386             tcg_temp_free_i32(t);
2387             return;
2388         } else {
2389             /* Shift the field down for the low part, such that the
2390                field sits at the MSB.  */
2391             tcg_gen_shri_i64(ret, arg, ofs + len - 32);
2392             /* Shift the field down from the MSB, sign extending.  */
2393             tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_LOW(ret), 32 - len);
2394         }
2395         /* Sign-extend the field from 32 bits.  */
2396         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
2397         return;
2398     }
2399 
2400     if (TCG_TARGET_HAS_sextract_i64
2401         && TCG_TARGET_extract_i64_valid(ofs, len)) {
2402         tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, arg, ofs, len);
2403         return;
2404     }
2405 
2406     /* Assume that sign-extension, if available, is cheaper than a shift.  */
2407     switch (ofs + len) {
2408     case 32:
2409         if (TCG_TARGET_HAS_ext32s_i64) {
2410             tcg_gen_ext32s_i64(ret, arg);
2411             tcg_gen_sari_i64(ret, ret, ofs);
2412             return;
2413         }
2414         break;
2415     case 16:
2416         if (TCG_TARGET_HAS_ext16s_i64) {
2417             tcg_gen_ext16s_i64(ret, arg);
2418             tcg_gen_sari_i64(ret, ret, ofs);
2419             return;
2420         }
2421         break;
2422     case 8:
2423         if (TCG_TARGET_HAS_ext8s_i64) {
2424             tcg_gen_ext8s_i64(ret, arg);
2425             tcg_gen_sari_i64(ret, ret, ofs);
2426             return;
2427         }
2428         break;
2429     }
2430     switch (len) {
2431     case 32:
2432         if (TCG_TARGET_HAS_ext32s_i64) {
2433             tcg_gen_shri_i64(ret, arg, ofs);
2434             tcg_gen_ext32s_i64(ret, ret);
2435             return;
2436         }
2437         break;
2438     case 16:
2439         if (TCG_TARGET_HAS_ext16s_i64) {
2440             tcg_gen_shri_i64(ret, arg, ofs);
2441             tcg_gen_ext16s_i64(ret, ret);
2442             return;
2443         }
2444         break;
2445     case 8:
2446         if (TCG_TARGET_HAS_ext8s_i64) {
2447             tcg_gen_shri_i64(ret, arg, ofs);
2448             tcg_gen_ext8s_i64(ret, ret);
2449             return;
2450         }
2451         break;
2452     }
2453     tcg_gen_shli_i64(ret, arg, 64 - len - ofs);
2454     tcg_gen_sari_i64(ret, ret, 64 - len);
2455 }
2456 
2457 /*
2458  * Extract 64 bits from a 128-bit input, ah:al, starting from ofs.
2459  * Unlike tcg_gen_extract_i64 above, len is fixed at 64.
2460  */
2461 void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah,
2462                           unsigned int ofs)
2463 {
2464     tcg_debug_assert(ofs <= 64);
2465     if (ofs == 0) {
2466         tcg_gen_mov_i64(ret, al);
2467     } else if (ofs == 64) {
2468         tcg_gen_mov_i64(ret, ah);
2469     } else if (al == ah) {
2470         tcg_gen_rotri_i64(ret, al, ofs);
2471     } else if (TCG_TARGET_HAS_extract2_i64) {
2472         tcg_gen_op4i_i64(INDEX_op_extract2_i64, ret, al, ah, ofs);
2473     } else {
2474         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
2475         tcg_gen_shri_i64(t0, al, ofs);
2476         tcg_gen_deposit_i64(ret, t0, ah, 64 - ofs, ofs);
2477         tcg_temp_free_i64(t0);
2478     }
2479 }
2480 
2481 void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1,
2482                          TCGv_i64 c2, TCGv_i64 v1, TCGv_i64 v2)
2483 {
2484     if (cond == TCG_COND_ALWAYS) {
2485         tcg_gen_mov_i64(ret, v1);
2486     } else if (cond == TCG_COND_NEVER) {
2487         tcg_gen_mov_i64(ret, v2);
2488     } else if (TCG_TARGET_REG_BITS == 32) {
2489         TCGv_i32 t0 = tcg_temp_ebb_new_i32();
2490         TCGv_i32 t1 = tcg_temp_ebb_new_i32();
2491         tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0,
2492                          TCGV_LOW(c1), TCGV_HIGH(c1),
2493                          TCGV_LOW(c2), TCGV_HIGH(c2), cond);
2494 
2495         if (TCG_TARGET_HAS_movcond_i32) {
2496             tcg_gen_movi_i32(t1, 0);
2497             tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, t1,
2498                                 TCGV_LOW(v1), TCGV_LOW(v2));
2499             tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, t1,
2500                                 TCGV_HIGH(v1), TCGV_HIGH(v2));
2501         } else {
2502             tcg_gen_neg_i32(t0, t0);
2503 
2504             tcg_gen_and_i32(t1, TCGV_LOW(v1), t0);
2505             tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(v2), t0);
2506             tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t1);
2507 
2508             tcg_gen_and_i32(t1, TCGV_HIGH(v1), t0);
2509             tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(v2), t0);
2510             tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t1);
2511         }
2512         tcg_temp_free_i32(t0);
2513         tcg_temp_free_i32(t1);
2514     } else if (TCG_TARGET_HAS_movcond_i64) {
2515         tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond);
2516     } else {
2517         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
2518         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
2519         tcg_gen_setcond_i64(cond, t0, c1, c2);
2520         tcg_gen_neg_i64(t0, t0);
2521         tcg_gen_and_i64(t1, v1, t0);
2522         tcg_gen_andc_i64(ret, v2, t0);
2523         tcg_gen_or_i64(ret, ret, t1);
2524         tcg_temp_free_i64(t0);
2525         tcg_temp_free_i64(t1);
2526     }
2527 }
2528 
2529 void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
2530                       TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
2531 {
2532     if (TCG_TARGET_HAS_add2_i64) {
2533         tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh);
2534     } else {
2535         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
2536         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
2537         tcg_gen_add_i64(t0, al, bl);
2538         tcg_gen_setcond_i64(TCG_COND_LTU, t1, t0, al);
2539         tcg_gen_add_i64(rh, ah, bh);
2540         tcg_gen_add_i64(rh, rh, t1);
2541         tcg_gen_mov_i64(rl, t0);
2542         tcg_temp_free_i64(t0);
2543         tcg_temp_free_i64(t1);
2544     }
2545 }
2546 
2547 void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
2548                       TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
2549 {
2550     if (TCG_TARGET_HAS_sub2_i64) {
2551         tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh);
2552     } else {
2553         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
2554         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
2555         tcg_gen_sub_i64(t0, al, bl);
2556         tcg_gen_setcond_i64(TCG_COND_LTU, t1, al, bl);
2557         tcg_gen_sub_i64(rh, ah, bh);
2558         tcg_gen_sub_i64(rh, rh, t1);
2559         tcg_gen_mov_i64(rl, t0);
2560         tcg_temp_free_i64(t0);
2561         tcg_temp_free_i64(t1);
2562     }
2563 }
2564 
2565 void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
2566 {
2567     if (TCG_TARGET_HAS_mulu2_i64) {
2568         tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2);
2569     } else if (TCG_TARGET_HAS_muluh_i64) {
2570         TCGv_i64 t = tcg_temp_ebb_new_i64();
2571         tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
2572         tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2);
2573         tcg_gen_mov_i64(rl, t);
2574         tcg_temp_free_i64(t);
2575     } else {
2576         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
2577         tcg_gen_mul_i64(t0, arg1, arg2);
2578         gen_helper_muluh_i64(rh, arg1, arg2);
2579         tcg_gen_mov_i64(rl, t0);
2580         tcg_temp_free_i64(t0);
2581     }
2582 }
2583 
2584 void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
2585 {
2586     if (TCG_TARGET_HAS_muls2_i64) {
2587         tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2);
2588     } else if (TCG_TARGET_HAS_mulsh_i64) {
2589         TCGv_i64 t = tcg_temp_ebb_new_i64();
2590         tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
2591         tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2);
2592         tcg_gen_mov_i64(rl, t);
2593         tcg_temp_free_i64(t);
2594     } else if (TCG_TARGET_HAS_mulu2_i64 || TCG_TARGET_HAS_muluh_i64) {
2595         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
2596         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
2597         TCGv_i64 t2 = tcg_temp_ebb_new_i64();
2598         TCGv_i64 t3 = tcg_temp_ebb_new_i64();
2599         tcg_gen_mulu2_i64(t0, t1, arg1, arg2);
2600         /* Adjust for negative inputs.  */
2601         tcg_gen_sari_i64(t2, arg1, 63);
2602         tcg_gen_sari_i64(t3, arg2, 63);
2603         tcg_gen_and_i64(t2, t2, arg2);
2604         tcg_gen_and_i64(t3, t3, arg1);
2605         tcg_gen_sub_i64(rh, t1, t2);
2606         tcg_gen_sub_i64(rh, rh, t3);
2607         tcg_gen_mov_i64(rl, t0);
2608         tcg_temp_free_i64(t0);
2609         tcg_temp_free_i64(t1);
2610         tcg_temp_free_i64(t2);
2611         tcg_temp_free_i64(t3);
2612     } else {
2613         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
2614         tcg_gen_mul_i64(t0, arg1, arg2);
2615         gen_helper_mulsh_i64(rh, arg1, arg2);
2616         tcg_gen_mov_i64(rl, t0);
2617         tcg_temp_free_i64(t0);
2618     }
2619 }
2620 
2621 void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
2622 {
2623     TCGv_i64 t0 = tcg_temp_ebb_new_i64();
2624     TCGv_i64 t1 = tcg_temp_ebb_new_i64();
2625     TCGv_i64 t2 = tcg_temp_ebb_new_i64();
2626     tcg_gen_mulu2_i64(t0, t1, arg1, arg2);
2627     /* Adjust for negative input for the signed arg1.  */
2628     tcg_gen_sari_i64(t2, arg1, 63);
2629     tcg_gen_and_i64(t2, t2, arg2);
2630     tcg_gen_sub_i64(rh, t1, t2);
2631     tcg_gen_mov_i64(rl, t0);
2632     tcg_temp_free_i64(t0);
2633     tcg_temp_free_i64(t1);
2634     tcg_temp_free_i64(t2);
2635 }
2636 
2637 void tcg_gen_smin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2638 {
2639     tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, a, b);
2640 }
2641 
2642 void tcg_gen_umin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2643 {
2644     tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, a, b);
2645 }
2646 
2647 void tcg_gen_smax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2648 {
2649     tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, b, a);
2650 }
2651 
2652 void tcg_gen_umax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2653 {
2654     tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, b, a);
2655 }
2656 
2657 void tcg_gen_abs_i64(TCGv_i64 ret, TCGv_i64 a)
2658 {
2659     TCGv_i64 t = tcg_temp_ebb_new_i64();
2660 
2661     tcg_gen_sari_i64(t, a, 63);
2662     tcg_gen_xor_i64(ret, a, t);
2663     tcg_gen_sub_i64(ret, ret, t);
2664     tcg_temp_free_i64(t);
2665 }
2666 
2667 /* Size changing operations.  */
2668 
2669 void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
2670 {
2671     if (TCG_TARGET_REG_BITS == 32) {
2672         tcg_gen_mov_i32(ret, TCGV_LOW(arg));
2673     } else if (TCG_TARGET_HAS_extrl_i64_i32) {
2674         tcg_gen_op2(INDEX_op_extrl_i64_i32,
2675                     tcgv_i32_arg(ret), tcgv_i64_arg(arg));
2676     } else {
2677         tcg_gen_mov_i32(ret, (TCGv_i32)arg);
2678     }
2679 }
2680 
2681 void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
2682 {
2683     if (TCG_TARGET_REG_BITS == 32) {
2684         tcg_gen_mov_i32(ret, TCGV_HIGH(arg));
2685     } else if (TCG_TARGET_HAS_extrh_i64_i32) {
2686         tcg_gen_op2(INDEX_op_extrh_i64_i32,
2687                     tcgv_i32_arg(ret), tcgv_i64_arg(arg));
2688     } else {
2689         TCGv_i64 t = tcg_temp_ebb_new_i64();
2690         tcg_gen_shri_i64(t, arg, 32);
2691         tcg_gen_mov_i32(ret, (TCGv_i32)t);
2692         tcg_temp_free_i64(t);
2693     }
2694 }
2695 
2696 void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
2697 {
2698     if (TCG_TARGET_REG_BITS == 32) {
2699         tcg_gen_mov_i32(TCGV_LOW(ret), arg);
2700         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2701     } else {
2702         tcg_gen_op2(INDEX_op_extu_i32_i64,
2703                     tcgv_i64_arg(ret), tcgv_i32_arg(arg));
2704     }
2705 }
2706 
2707 void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
2708 {
2709     if (TCG_TARGET_REG_BITS == 32) {
2710         tcg_gen_mov_i32(TCGV_LOW(ret), arg);
2711         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
2712     } else {
2713         tcg_gen_op2(INDEX_op_ext_i32_i64,
2714                     tcgv_i64_arg(ret), tcgv_i32_arg(arg));
2715     }
2716 }
2717 
2718 void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high)
2719 {
2720     TCGv_i64 tmp;
2721 
2722     if (TCG_TARGET_REG_BITS == 32) {
2723         tcg_gen_mov_i32(TCGV_LOW(dest), low);
2724         tcg_gen_mov_i32(TCGV_HIGH(dest), high);
2725         return;
2726     }
2727 
2728     tmp = tcg_temp_ebb_new_i64();
2729     /* These extensions are only needed for type correctness.
2730        We may be able to do better given target specific information.  */
2731     tcg_gen_extu_i32_i64(tmp, high);
2732     tcg_gen_extu_i32_i64(dest, low);
2733     /* If deposit is available, use it.  Otherwise use the extra
2734        knowledge that we have of the zero-extensions above.  */
2735     if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(32, 32)) {
2736         tcg_gen_deposit_i64(dest, dest, tmp, 32, 32);
2737     } else {
2738         tcg_gen_shli_i64(tmp, tmp, 32);
2739         tcg_gen_or_i64(dest, dest, tmp);
2740     }
2741     tcg_temp_free_i64(tmp);
2742 }
2743 
2744 void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg)
2745 {
2746     if (TCG_TARGET_REG_BITS == 32) {
2747         tcg_gen_mov_i32(lo, TCGV_LOW(arg));
2748         tcg_gen_mov_i32(hi, TCGV_HIGH(arg));
2749     } else {
2750         tcg_gen_extrl_i64_i32(lo, arg);
2751         tcg_gen_extrh_i64_i32(hi, arg);
2752     }
2753 }
2754 
2755 void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg)
2756 {
2757     tcg_gen_ext32u_i64(lo, arg);
2758     tcg_gen_shri_i64(hi, arg, 32);
2759 }
2760 
2761 void tcg_gen_extr_i128_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i128 arg)
2762 {
2763     tcg_gen_mov_i64(lo, TCGV128_LOW(arg));
2764     tcg_gen_mov_i64(hi, TCGV128_HIGH(arg));
2765 }
2766 
2767 void tcg_gen_concat_i64_i128(TCGv_i128 ret, TCGv_i64 lo, TCGv_i64 hi)
2768 {
2769     tcg_gen_mov_i64(TCGV128_LOW(ret), lo);
2770     tcg_gen_mov_i64(TCGV128_HIGH(ret), hi);
2771 }
2772 
2773 void tcg_gen_mov_i128(TCGv_i128 dst, TCGv_i128 src)
2774 {
2775     if (dst != src) {
2776         tcg_gen_mov_i64(TCGV128_LOW(dst), TCGV128_LOW(src));
2777         tcg_gen_mov_i64(TCGV128_HIGH(dst), TCGV128_HIGH(src));
2778     }
2779 }
2780 
2781 /* QEMU specific operations.  */
2782 
2783 void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx)
2784 {
2785     /*
2786      * Let the jit code return the read-only version of the
2787      * TranslationBlock, so that we minimize the pc-relative
2788      * distance of the address of the exit_tb code to TB.
2789      * This will improve utilization of pc-relative address loads.
2790      *
2791      * TODO: Move this to translator_loop, so that all const
2792      * TranslationBlock pointers refer to read-only memory.
2793      * This requires coordination with targets that do not use
2794      * the translator_loop.
2795      */
2796     uintptr_t val = (uintptr_t)tcg_splitwx_to_rx((void *)tb) + idx;
2797 
2798     if (tb == NULL) {
2799         tcg_debug_assert(idx == 0);
2800     } else if (idx <= TB_EXIT_IDXMAX) {
2801 #ifdef CONFIG_DEBUG_TCG
2802         /* This is an exit following a goto_tb.  Verify that we have
2803            seen this numbered exit before, via tcg_gen_goto_tb.  */
2804         tcg_debug_assert(tcg_ctx->goto_tb_issue_mask & (1 << idx));
2805 #endif
2806     } else {
2807         /* This is an exit via the exitreq label.  */
2808         tcg_debug_assert(idx == TB_EXIT_REQUESTED);
2809     }
2810 
2811     tcg_gen_op1i(INDEX_op_exit_tb, val);
2812 }
2813 
2814 void tcg_gen_goto_tb(unsigned idx)
2815 {
2816     /* We tested CF_NO_GOTO_TB in translator_use_goto_tb. */
2817     tcg_debug_assert(!(tcg_ctx->gen_tb->cflags & CF_NO_GOTO_TB));
2818     /* We only support two chained exits.  */
2819     tcg_debug_assert(idx <= TB_EXIT_IDXMAX);
2820 #ifdef CONFIG_DEBUG_TCG
2821     /* Verify that we haven't seen this numbered exit before.  */
2822     tcg_debug_assert((tcg_ctx->goto_tb_issue_mask & (1 << idx)) == 0);
2823     tcg_ctx->goto_tb_issue_mask |= 1 << idx;
2824 #endif
2825     plugin_gen_disable_mem_helpers();
2826     tcg_gen_op1i(INDEX_op_goto_tb, idx);
2827 }
2828 
2829 void tcg_gen_lookup_and_goto_ptr(void)
2830 {
2831     TCGv_ptr ptr;
2832 
2833     if (tcg_ctx->gen_tb->cflags & CF_NO_GOTO_PTR) {
2834         tcg_gen_exit_tb(NULL, 0);
2835         return;
2836     }
2837 
2838     plugin_gen_disable_mem_helpers();
2839     ptr = tcg_temp_ebb_new_ptr();
2840     gen_helper_lookup_tb_ptr(ptr, cpu_env);
2841     tcg_gen_op1i(INDEX_op_goto_ptr, tcgv_ptr_arg(ptr));
2842     tcg_temp_free_ptr(ptr);
2843 }
2844 
2845 static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
2846 {
2847     /* Trigger the asserts within as early as possible.  */
2848     unsigned a_bits = get_alignment_bits(op);
2849 
2850     /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
2851     if (a_bits == (op & MO_SIZE)) {
2852         op = (op & ~MO_AMASK) | MO_ALIGN;
2853     }
2854 
2855     switch (op & MO_SIZE) {
2856     case MO_8:
2857         op &= ~MO_BSWAP;
2858         break;
2859     case MO_16:
2860         break;
2861     case MO_32:
2862         if (!is64) {
2863             op &= ~MO_SIGN;
2864         }
2865         break;
2866     case MO_64:
2867         if (is64) {
2868             op &= ~MO_SIGN;
2869             break;
2870         }
2871         /* fall through */
2872     default:
2873         g_assert_not_reached();
2874     }
2875     if (st) {
2876         op &= ~MO_SIGN;
2877     }
2878     return op;
2879 }
2880 
2881 static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
2882                          MemOp memop, TCGArg idx)
2883 {
2884     MemOpIdx oi = make_memop_idx(memop, idx);
2885 #if TARGET_LONG_BITS == 32
2886     tcg_gen_op3i_i32(opc, val, addr, oi);
2887 #else
2888     if (TCG_TARGET_REG_BITS == 32) {
2889         tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
2890     } else {
2891         tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
2892     }
2893 #endif
2894 }
2895 
2896 static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
2897                          MemOp memop, TCGArg idx)
2898 {
2899     MemOpIdx oi = make_memop_idx(memop, idx);
2900 #if TARGET_LONG_BITS == 32
2901     if (TCG_TARGET_REG_BITS == 32) {
2902         tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
2903     } else {
2904         tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
2905     }
2906 #else
2907     if (TCG_TARGET_REG_BITS == 32) {
2908         tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
2909                          TCGV_LOW(addr), TCGV_HIGH(addr), oi);
2910     } else {
2911         tcg_gen_op3i_i64(opc, val, addr, oi);
2912     }
2913 #endif
2914 }
2915 
2916 static void tcg_gen_req_mo(TCGBar type)
2917 {
2918 #ifdef TCG_GUEST_DEFAULT_MO
2919     type &= TCG_GUEST_DEFAULT_MO;
2920 #endif
2921     type &= ~TCG_TARGET_DEFAULT_MO;
2922     if (type) {
2923         tcg_gen_mb(type | TCG_BAR_SC);
2924     }
2925 }
2926 
2927 static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
2928 {
2929 #ifdef CONFIG_PLUGIN
2930     if (tcg_ctx->plugin_insn != NULL) {
2931         /* Save a copy of the vaddr for use after a load.  */
2932         TCGv temp = tcg_temp_new();
2933         tcg_gen_mov_tl(temp, vaddr);
2934         return temp;
2935     }
2936 #endif
2937     return vaddr;
2938 }
2939 
2940 static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi,
2941                                      enum qemu_plugin_mem_rw rw)
2942 {
2943 #ifdef CONFIG_PLUGIN
2944     if (tcg_ctx->plugin_insn != NULL) {
2945         qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
2946         plugin_gen_empty_mem_callback(vaddr, info);
2947         tcg_temp_free(vaddr);
2948     }
2949 #endif
2950 }
2951 
2952 void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
2953 {
2954     MemOp orig_memop;
2955     MemOpIdx oi;
2956 
2957     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
2958     memop = tcg_canonicalize_memop(memop, 0, 0);
2959     oi = make_memop_idx(memop, idx);
2960 
2961     orig_memop = memop;
2962     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
2963         memop &= ~MO_BSWAP;
2964         /* The bswap primitive benefits from zero-extended input.  */
2965         if ((memop & MO_SSIZE) == MO_SW) {
2966             memop &= ~MO_SIGN;
2967         }
2968     }
2969 
2970     addr = plugin_prep_mem_callbacks(addr);
2971     gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
2972     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
2973 
2974     if ((orig_memop ^ memop) & MO_BSWAP) {
2975         switch (orig_memop & MO_SIZE) {
2976         case MO_16:
2977             tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
2978                                            ? TCG_BSWAP_IZ | TCG_BSWAP_OS
2979                                            : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
2980             break;
2981         case MO_32:
2982             tcg_gen_bswap32_i32(val, val);
2983             break;
2984         default:
2985             g_assert_not_reached();
2986         }
2987     }
2988 }
2989 
2990 void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
2991 {
2992     TCGv_i32 swap = NULL;
2993     MemOpIdx oi;
2994 
2995     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
2996     memop = tcg_canonicalize_memop(memop, 0, 1);
2997     oi = make_memop_idx(memop, idx);
2998 
2999     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
3000         swap = tcg_temp_ebb_new_i32();
3001         switch (memop & MO_SIZE) {
3002         case MO_16:
3003             tcg_gen_bswap16_i32(swap, val, 0);
3004             break;
3005         case MO_32:
3006             tcg_gen_bswap32_i32(swap, val);
3007             break;
3008         default:
3009             g_assert_not_reached();
3010         }
3011         val = swap;
3012         memop &= ~MO_BSWAP;
3013     }
3014 
3015     addr = plugin_prep_mem_callbacks(addr);
3016     if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
3017         gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
3018     } else {
3019         gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
3020     }
3021     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
3022 
3023     if (swap) {
3024         tcg_temp_free_i32(swap);
3025     }
3026 }
3027 
3028 void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
3029 {
3030     MemOp orig_memop;
3031     MemOpIdx oi;
3032 
3033     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
3034         tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
3035         if (memop & MO_SIGN) {
3036             tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
3037         } else {
3038             tcg_gen_movi_i32(TCGV_HIGH(val), 0);
3039         }
3040         return;
3041     }
3042 
3043     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
3044     memop = tcg_canonicalize_memop(memop, 1, 0);
3045     oi = make_memop_idx(memop, idx);
3046 
3047     orig_memop = memop;
3048     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
3049         memop &= ~MO_BSWAP;
3050         /* The bswap primitive benefits from zero-extended input.  */
3051         if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
3052             memop &= ~MO_SIGN;
3053         }
3054     }
3055 
3056     addr = plugin_prep_mem_callbacks(addr);
3057     gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
3058     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
3059 
3060     if ((orig_memop ^ memop) & MO_BSWAP) {
3061         int flags = (orig_memop & MO_SIGN
3062                      ? TCG_BSWAP_IZ | TCG_BSWAP_OS
3063                      : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
3064         switch (orig_memop & MO_SIZE) {
3065         case MO_16:
3066             tcg_gen_bswap16_i64(val, val, flags);
3067             break;
3068         case MO_32:
3069             tcg_gen_bswap32_i64(val, val, flags);
3070             break;
3071         case MO_64:
3072             tcg_gen_bswap64_i64(val, val);
3073             break;
3074         default:
3075             g_assert_not_reached();
3076         }
3077     }
3078 }
3079 
3080 void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
3081 {
3082     TCGv_i64 swap = NULL;
3083     MemOpIdx oi;
3084 
3085     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
3086         tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
3087         return;
3088     }
3089 
3090     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
3091     memop = tcg_canonicalize_memop(memop, 1, 1);
3092     oi = make_memop_idx(memop, idx);
3093 
3094     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
3095         swap = tcg_temp_ebb_new_i64();
3096         switch (memop & MO_SIZE) {
3097         case MO_16:
3098             tcg_gen_bswap16_i64(swap, val, 0);
3099             break;
3100         case MO_32:
3101             tcg_gen_bswap32_i64(swap, val, 0);
3102             break;
3103         case MO_64:
3104             tcg_gen_bswap64_i64(swap, val);
3105             break;
3106         default:
3107             g_assert_not_reached();
3108         }
3109         val = swap;
3110         memop &= ~MO_BSWAP;
3111     }
3112 
3113     addr = plugin_prep_mem_callbacks(addr);
3114     gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
3115     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
3116 
3117     if (swap) {
3118         tcg_temp_free_i64(swap);
3119     }
3120 }
3121 
3122 static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
3123 {
3124     MemOp mop_1 = orig, mop_2;
3125 
3126     tcg_debug_assert((orig & MO_SIZE) == MO_128);
3127     tcg_debug_assert((orig & MO_SIGN) == 0);
3128 
3129     /* Use a memory ordering implemented by the host. */
3130     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (orig & MO_BSWAP)) {
3131         mop_1 &= ~MO_BSWAP;
3132     }
3133 
3134     /* Reduce the size to 64-bit. */
3135     mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
3136 
3137     /* Retain the alignment constraints of the original. */
3138     switch (orig & MO_AMASK) {
3139     case MO_UNALN:
3140     case MO_ALIGN_2:
3141     case MO_ALIGN_4:
3142         mop_2 = mop_1;
3143         break;
3144     case MO_ALIGN_8:
3145         /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
3146         mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
3147         mop_2 = mop_1;
3148         break;
3149     case MO_ALIGN:
3150         /* Second has 8-byte alignment; first has 16-byte alignment. */
3151         mop_2 = mop_1;
3152         mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
3153         break;
3154     case MO_ALIGN_16:
3155     case MO_ALIGN_32:
3156     case MO_ALIGN_64:
3157         /* Second has 8-byte alignment; first retains original. */
3158         mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
3159         break;
3160     default:
3161         g_assert_not_reached();
3162     }
3163     ret[0] = mop_1;
3164     ret[1] = mop_2;
3165 }
3166 
3167 void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
3168 {
3169     MemOp mop[2];
3170     TCGv addr_p8;
3171     TCGv_i64 x, y;
3172 
3173     canonicalize_memop_i128_as_i64(mop, memop);
3174 
3175     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
3176     addr = plugin_prep_mem_callbacks(addr);
3177 
3178     /* TODO: respect atomicity of the operation. */
3179     /* TODO: allow the tcg backend to see the whole operation. */
3180 
3181     /*
3182      * Since there are no global TCGv_i128, there is no visible state
3183      * changed if the second load faults.  Load directly into the two
3184      * subwords.
3185      */
3186     if ((memop & MO_BSWAP) == MO_LE) {
3187         x = TCGV128_LOW(val);
3188         y = TCGV128_HIGH(val);
3189     } else {
3190         x = TCGV128_HIGH(val);
3191         y = TCGV128_LOW(val);
3192     }
3193 
3194     gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
3195 
3196     if ((mop[0] ^ memop) & MO_BSWAP) {
3197         tcg_gen_bswap64_i64(x, x);
3198     }
3199 
3200     addr_p8 = tcg_temp_new();
3201     tcg_gen_addi_tl(addr_p8, addr, 8);
3202     gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
3203     tcg_temp_free(addr_p8);
3204 
3205     if ((mop[0] ^ memop) & MO_BSWAP) {
3206         tcg_gen_bswap64_i64(y, y);
3207     }
3208 
3209     plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
3210                              QEMU_PLUGIN_MEM_R);
3211 }
3212 
3213 void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
3214 {
3215     MemOp mop[2];
3216     TCGv addr_p8;
3217     TCGv_i64 x, y;
3218 
3219     canonicalize_memop_i128_as_i64(mop, memop);
3220 
3221     tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
3222     addr = plugin_prep_mem_callbacks(addr);
3223 
3224     /* TODO: respect atomicity of the operation. */
3225     /* TODO: allow the tcg backend to see the whole operation. */
3226 
3227     if ((memop & MO_BSWAP) == MO_LE) {
3228         x = TCGV128_LOW(val);
3229         y = TCGV128_HIGH(val);
3230     } else {
3231         x = TCGV128_HIGH(val);
3232         y = TCGV128_LOW(val);
3233     }
3234 
3235     addr_p8 = tcg_temp_new();
3236     if ((mop[0] ^ memop) & MO_BSWAP) {
3237         TCGv_i64 t = tcg_temp_ebb_new_i64();
3238 
3239         tcg_gen_bswap64_i64(t, x);
3240         gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
3241         tcg_gen_bswap64_i64(t, y);
3242         tcg_gen_addi_tl(addr_p8, addr, 8);
3243         gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
3244         tcg_temp_free_i64(t);
3245     } else {
3246         gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
3247         tcg_gen_addi_tl(addr_p8, addr, 8);
3248         gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
3249     }
3250     tcg_temp_free(addr_p8);
3251 
3252     plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
3253                              QEMU_PLUGIN_MEM_W);
3254 }
3255 
3256 static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
3257 {
3258     switch (opc & MO_SSIZE) {
3259     case MO_SB:
3260         tcg_gen_ext8s_i32(ret, val);
3261         break;
3262     case MO_UB:
3263         tcg_gen_ext8u_i32(ret, val);
3264         break;
3265     case MO_SW:
3266         tcg_gen_ext16s_i32(ret, val);
3267         break;
3268     case MO_UW:
3269         tcg_gen_ext16u_i32(ret, val);
3270         break;
3271     default:
3272         tcg_gen_mov_i32(ret, val);
3273         break;
3274     }
3275 }
3276 
3277 static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
3278 {
3279     switch (opc & MO_SSIZE) {
3280     case MO_SB:
3281         tcg_gen_ext8s_i64(ret, val);
3282         break;
3283     case MO_UB:
3284         tcg_gen_ext8u_i64(ret, val);
3285         break;
3286     case MO_SW:
3287         tcg_gen_ext16s_i64(ret, val);
3288         break;
3289     case MO_UW:
3290         tcg_gen_ext16u_i64(ret, val);
3291         break;
3292     case MO_SL:
3293         tcg_gen_ext32s_i64(ret, val);
3294         break;
3295     case MO_UL:
3296         tcg_gen_ext32u_i64(ret, val);
3297         break;
3298     default:
3299         tcg_gen_mov_i64(ret, val);
3300         break;
3301     }
3302 }
3303 
3304 typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
3305                                   TCGv_i32, TCGv_i32, TCGv_i32);
3306 typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
3307                                   TCGv_i64, TCGv_i64, TCGv_i32);
3308 typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv,
3309                                    TCGv_i128, TCGv_i128, TCGv_i32);
3310 typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
3311                                   TCGv_i32, TCGv_i32);
3312 typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
3313                                   TCGv_i64, TCGv_i32);
3314 
3315 #ifdef CONFIG_ATOMIC64
3316 # define WITH_ATOMIC64(X) X,
3317 #else
3318 # define WITH_ATOMIC64(X)
3319 #endif
3320 #ifdef CONFIG_CMPXCHG128
3321 # define WITH_ATOMIC128(X) X,
3322 #else
3323 # define WITH_ATOMIC128(X)
3324 #endif
3325 
3326 static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
3327     [MO_8] = gen_helper_atomic_cmpxchgb,
3328     [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
3329     [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
3330     [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
3331     [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
3332     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
3333     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
3334     WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
3335     WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
3336 };
3337 
3338 void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
3339                                    TCGv_i32 newv, TCGArg idx, MemOp memop)
3340 {
3341     TCGv_i32 t1 = tcg_temp_ebb_new_i32();
3342     TCGv_i32 t2 = tcg_temp_ebb_new_i32();
3343 
3344     tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
3345 
3346     tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
3347     tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
3348     tcg_gen_qemu_st_i32(t2, addr, idx, memop);
3349     tcg_temp_free_i32(t2);
3350 
3351     if (memop & MO_SIGN) {
3352         tcg_gen_ext_i32(retv, t1, memop);
3353     } else {
3354         tcg_gen_mov_i32(retv, t1);
3355     }
3356     tcg_temp_free_i32(t1);
3357 }
3358 
3359 void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
3360                                 TCGv_i32 newv, TCGArg idx, MemOp memop)
3361 {
3362     gen_atomic_cx_i32 gen;
3363     MemOpIdx oi;
3364 
3365     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
3366         tcg_gen_nonatomic_cmpxchg_i32(retv, addr, cmpv, newv, idx, memop);
3367         return;
3368     }
3369 
3370     memop = tcg_canonicalize_memop(memop, 0, 0);
3371     gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
3372     tcg_debug_assert(gen != NULL);
3373 
3374     oi = make_memop_idx(memop & ~MO_SIGN, idx);
3375     gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
3376 
3377     if (memop & MO_SIGN) {
3378         tcg_gen_ext_i32(retv, retv, memop);
3379     }
3380 }
3381 
3382 void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
3383                                    TCGv_i64 newv, TCGArg idx, MemOp memop)
3384 {
3385     TCGv_i64 t1, t2;
3386 
3387     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
3388         tcg_gen_nonatomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
3389                                       TCGV_LOW(newv), idx, memop);
3390         if (memop & MO_SIGN) {
3391             tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
3392         } else {
3393             tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
3394         }
3395         return;
3396     }
3397 
3398     t1 = tcg_temp_ebb_new_i64();
3399     t2 = tcg_temp_ebb_new_i64();
3400 
3401     tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
3402 
3403     tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
3404     tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
3405     tcg_gen_qemu_st_i64(t2, addr, idx, memop);
3406     tcg_temp_free_i64(t2);
3407 
3408     if (memop & MO_SIGN) {
3409         tcg_gen_ext_i64(retv, t1, memop);
3410     } else {
3411         tcg_gen_mov_i64(retv, t1);
3412     }
3413     tcg_temp_free_i64(t1);
3414 }
3415 
3416 void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
3417                                 TCGv_i64 newv, TCGArg idx, MemOp memop)
3418 {
3419     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
3420         tcg_gen_nonatomic_cmpxchg_i64(retv, addr, cmpv, newv, idx, memop);
3421         return;
3422     }
3423 
3424     if ((memop & MO_SIZE) == MO_64) {
3425         gen_atomic_cx_i64 gen;
3426 
3427         memop = tcg_canonicalize_memop(memop, 1, 0);
3428         gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
3429         if (gen) {
3430             MemOpIdx oi = make_memop_idx(memop, idx);
3431             gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
3432             return;
3433         }
3434 
3435         gen_helper_exit_atomic(cpu_env);
3436 
3437         /*
3438          * Produce a result for a well-formed opcode stream.  This satisfies
3439          * liveness for set before used, which happens before this dead code
3440          * is removed.
3441          */
3442         tcg_gen_movi_i64(retv, 0);
3443         return;
3444     }
3445 
3446     if (TCG_TARGET_REG_BITS == 32) {
3447         tcg_gen_atomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
3448                                    TCGV_LOW(newv), idx, memop);
3449         if (memop & MO_SIGN) {
3450             tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
3451         } else {
3452             tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
3453         }
3454     } else {
3455         TCGv_i32 c32 = tcg_temp_ebb_new_i32();
3456         TCGv_i32 n32 = tcg_temp_ebb_new_i32();
3457         TCGv_i32 r32 = tcg_temp_ebb_new_i32();
3458 
3459         tcg_gen_extrl_i64_i32(c32, cmpv);
3460         tcg_gen_extrl_i64_i32(n32, newv);
3461         tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
3462         tcg_temp_free_i32(c32);
3463         tcg_temp_free_i32(n32);
3464 
3465         tcg_gen_extu_i32_i64(retv, r32);
3466         tcg_temp_free_i32(r32);
3467 
3468         if (memop & MO_SIGN) {
3469             tcg_gen_ext_i64(retv, retv, memop);
3470         }
3471     }
3472 }
3473 
3474 void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
3475                                     TCGv_i128 newv, TCGArg idx, MemOp memop)
3476 {
3477     if (TCG_TARGET_REG_BITS == 32) {
3478         /* Inline expansion below is simply too large for 32-bit hosts. */
3479         gen_atomic_cx_i128 gen = ((memop & MO_BSWAP) == MO_LE
3480                                   ? gen_helper_nonatomic_cmpxchgo_le
3481                                   : gen_helper_nonatomic_cmpxchgo_be);
3482         MemOpIdx oi = make_memop_idx(memop, idx);
3483 
3484         tcg_debug_assert((memop & MO_SIZE) == MO_128);
3485         tcg_debug_assert((memop & MO_SIGN) == 0);
3486 
3487         gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
3488     } else {
3489         TCGv_i128 oldv = tcg_temp_ebb_new_i128();
3490         TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
3491         TCGv_i64 t0 = tcg_temp_ebb_new_i64();
3492         TCGv_i64 t1 = tcg_temp_ebb_new_i64();
3493         TCGv_i64 z = tcg_constant_i64(0);
3494 
3495         tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
3496 
3497         /* Compare i128 */
3498         tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
3499         tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
3500         tcg_gen_or_i64(t0, t0, t1);
3501 
3502         /* tmpv = equal ? newv : oldv */
3503         tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
3504                             TCGV128_LOW(newv), TCGV128_LOW(oldv));
3505         tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
3506                             TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
3507 
3508         /* Unconditional writeback. */
3509         tcg_gen_qemu_st_i128(tmpv, addr, idx, memop);
3510         tcg_gen_mov_i128(retv, oldv);
3511 
3512         tcg_temp_free_i64(t0);
3513         tcg_temp_free_i64(t1);
3514         tcg_temp_free_i128(tmpv);
3515         tcg_temp_free_i128(oldv);
3516     }
3517 }
3518 
3519 void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
3520                                  TCGv_i128 newv, TCGArg idx, MemOp memop)
3521 {
3522     gen_atomic_cx_i128 gen;
3523 
3524     if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
3525         tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
3526         return;
3527     }
3528 
3529     tcg_debug_assert((memop & MO_SIZE) == MO_128);
3530     tcg_debug_assert((memop & MO_SIGN) == 0);
3531     gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
3532 
3533     if (gen) {
3534         MemOpIdx oi = make_memop_idx(memop, idx);
3535         gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
3536         return;
3537     }
3538 
3539     gen_helper_exit_atomic(cpu_env);
3540 
3541     /*
3542      * Produce a result for a well-formed opcode stream.  This satisfies
3543      * liveness for set before used, which happens before this dead code
3544      * is removed.
3545      */
3546     tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
3547     tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
3548 }
3549 
3550 static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
3551                                 TCGArg idx, MemOp memop, bool new_val,
3552                                 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
3553 {
3554     TCGv_i32 t1 = tcg_temp_ebb_new_i32();
3555     TCGv_i32 t2 = tcg_temp_ebb_new_i32();
3556 
3557     memop = tcg_canonicalize_memop(memop, 0, 0);
3558 
3559     tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
3560     tcg_gen_ext_i32(t2, val, memop);
3561     gen(t2, t1, t2);
3562     tcg_gen_qemu_st_i32(t2, addr, idx, memop);
3563 
3564     tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
3565     tcg_temp_free_i32(t1);
3566     tcg_temp_free_i32(t2);
3567 }
3568 
3569 static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
3570                              TCGArg idx, MemOp memop, void * const table[])
3571 {
3572     gen_atomic_op_i32 gen;
3573     MemOpIdx oi;
3574 
3575     memop = tcg_canonicalize_memop(memop, 0, 0);
3576 
3577     gen = table[memop & (MO_SIZE | MO_BSWAP)];
3578     tcg_debug_assert(gen != NULL);
3579 
3580     oi = make_memop_idx(memop & ~MO_SIGN, idx);
3581     gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
3582 
3583     if (memop & MO_SIGN) {
3584         tcg_gen_ext_i32(ret, ret, memop);
3585     }
3586 }
3587 
3588 static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
3589                                 TCGArg idx, MemOp memop, bool new_val,
3590                                 void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
3591 {
3592     TCGv_i64 t1 = tcg_temp_ebb_new_i64();
3593     TCGv_i64 t2 = tcg_temp_ebb_new_i64();
3594 
3595     memop = tcg_canonicalize_memop(memop, 1, 0);
3596 
3597     tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
3598     tcg_gen_ext_i64(t2, val, memop);
3599     gen(t2, t1, t2);
3600     tcg_gen_qemu_st_i64(t2, addr, idx, memop);
3601 
3602     tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
3603     tcg_temp_free_i64(t1);
3604     tcg_temp_free_i64(t2);
3605 }
3606 
3607 static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
3608                              TCGArg idx, MemOp memop, void * const table[])
3609 {
3610     memop = tcg_canonicalize_memop(memop, 1, 0);
3611 
3612     if ((memop & MO_SIZE) == MO_64) {
3613 #ifdef CONFIG_ATOMIC64
3614         gen_atomic_op_i64 gen;
3615         MemOpIdx oi;
3616 
3617         gen = table[memop & (MO_SIZE | MO_BSWAP)];
3618         tcg_debug_assert(gen != NULL);
3619 
3620         oi = make_memop_idx(memop & ~MO_SIGN, idx);
3621         gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
3622 #else
3623         gen_helper_exit_atomic(cpu_env);
3624         /* Produce a result, so that we have a well-formed opcode stream
3625            with respect to uses of the result in the (dead) code following.  */
3626         tcg_gen_movi_i64(ret, 0);
3627 #endif /* CONFIG_ATOMIC64 */
3628     } else {
3629         TCGv_i32 v32 = tcg_temp_ebb_new_i32();
3630         TCGv_i32 r32 = tcg_temp_ebb_new_i32();
3631 
3632         tcg_gen_extrl_i64_i32(v32, val);
3633         do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
3634         tcg_temp_free_i32(v32);
3635 
3636         tcg_gen_extu_i32_i64(ret, r32);
3637         tcg_temp_free_i32(r32);
3638 
3639         if (memop & MO_SIGN) {
3640             tcg_gen_ext_i64(ret, ret, memop);
3641         }
3642     }
3643 }
3644 
3645 #define GEN_ATOMIC_HELPER(NAME, OP, NEW)                                \
3646 static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = {          \
3647     [MO_8] = gen_helper_atomic_##NAME##b,                               \
3648     [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le,                   \
3649     [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be,                   \
3650     [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le,                   \
3651     [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be,                   \
3652     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le)     \
3653     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be)     \
3654 };                                                                      \
3655 void tcg_gen_atomic_##NAME##_i32                                        \
3656     (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop)    \
3657 {                                                                       \
3658     if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {                        \
3659         do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME);     \
3660     } else {                                                            \
3661         do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW,            \
3662                             tcg_gen_##OP##_i32);                        \
3663     }                                                                   \
3664 }                                                                       \
3665 void tcg_gen_atomic_##NAME##_i64                                        \
3666     (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop)    \
3667 {                                                                       \
3668     if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) {                        \
3669         do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME);     \
3670     } else {                                                            \
3671         do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW,            \
3672                             tcg_gen_##OP##_i64);                        \
3673     }                                                                   \
3674 }
3675 
3676 GEN_ATOMIC_HELPER(fetch_add, add, 0)
3677 GEN_ATOMIC_HELPER(fetch_and, and, 0)
3678 GEN_ATOMIC_HELPER(fetch_or, or, 0)
3679 GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
3680 GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
3681 GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
3682 GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
3683 GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
3684 
3685 GEN_ATOMIC_HELPER(add_fetch, add, 1)
3686 GEN_ATOMIC_HELPER(and_fetch, and, 1)
3687 GEN_ATOMIC_HELPER(or_fetch, or, 1)
3688 GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
3689 GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
3690 GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
3691 GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
3692 GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
3693 
3694 static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
3695 {
3696     tcg_gen_mov_i32(r, b);
3697 }
3698 
3699 static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
3700 {
3701     tcg_gen_mov_i64(r, b);
3702 }
3703 
3704 GEN_ATOMIC_HELPER(xchg, mov2, 0)
3705 
3706 #undef GEN_ATOMIC_HELPER
3707