xref: /openbmc/qemu/tcg/tcg-op.c (revision b14df228)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg.h"
28 #include "tcg/tcg-op.h"
29 #include "tcg/tcg-mo.h"
30 #include "exec/plugin-gen.h"
31 
32 /* Reduce the number of ifdefs below.  This assumes that all uses of
33    TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
34    the compiler can eliminate.  */
35 #if TCG_TARGET_REG_BITS == 64
36 extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64);
37 extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
38 #define TCGV_LOW  TCGV_LOW_link_error
39 #define TCGV_HIGH TCGV_HIGH_link_error
40 #endif
41 
42 void tcg_gen_op1(TCGOpcode opc, TCGArg a1)
43 {
44     TCGOp *op = tcg_emit_op(opc);
45     op->args[0] = a1;
46 }
47 
48 void tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2)
49 {
50     TCGOp *op = tcg_emit_op(opc);
51     op->args[0] = a1;
52     op->args[1] = a2;
53 }
54 
55 void tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3)
56 {
57     TCGOp *op = tcg_emit_op(opc);
58     op->args[0] = a1;
59     op->args[1] = a2;
60     op->args[2] = a3;
61 }
62 
63 void tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4)
64 {
65     TCGOp *op = tcg_emit_op(opc);
66     op->args[0] = a1;
67     op->args[1] = a2;
68     op->args[2] = a3;
69     op->args[3] = a4;
70 }
71 
72 void tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
73                  TCGArg a4, TCGArg a5)
74 {
75     TCGOp *op = tcg_emit_op(opc);
76     op->args[0] = a1;
77     op->args[1] = a2;
78     op->args[2] = a3;
79     op->args[3] = a4;
80     op->args[4] = a5;
81 }
82 
83 void tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
84                  TCGArg a4, TCGArg a5, TCGArg a6)
85 {
86     TCGOp *op = tcg_emit_op(opc);
87     op->args[0] = a1;
88     op->args[1] = a2;
89     op->args[2] = a3;
90     op->args[3] = a4;
91     op->args[4] = a5;
92     op->args[5] = a6;
93 }
94 
95 void tcg_gen_mb(TCGBar mb_type)
96 {
97     if (tcg_ctx->tb_cflags & CF_PARALLEL) {
98         tcg_gen_op1(INDEX_op_mb, mb_type);
99     }
100 }
101 
102 /* 32 bit ops */
103 
104 void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg)
105 {
106     tcg_gen_mov_i32(ret, tcg_constant_i32(arg));
107 }
108 
109 void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
110 {
111     /* some cases can be optimized here */
112     if (arg2 == 0) {
113         tcg_gen_mov_i32(ret, arg1);
114     } else {
115         tcg_gen_add_i32(ret, arg1, tcg_constant_i32(arg2));
116     }
117 }
118 
119 void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2)
120 {
121     if (arg1 == 0 && TCG_TARGET_HAS_neg_i32) {
122         /* Don't recurse with tcg_gen_neg_i32.  */
123         tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg2);
124     } else {
125         tcg_gen_sub_i32(ret, tcg_constant_i32(arg1), arg2);
126     }
127 }
128 
129 void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
130 {
131     /* some cases can be optimized here */
132     if (arg2 == 0) {
133         tcg_gen_mov_i32(ret, arg1);
134     } else {
135         tcg_gen_sub_i32(ret, arg1, tcg_constant_i32(arg2));
136     }
137 }
138 
139 void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
140 {
141     /* Some cases can be optimized here.  */
142     switch (arg2) {
143     case 0:
144         tcg_gen_movi_i32(ret, 0);
145         return;
146     case -1:
147         tcg_gen_mov_i32(ret, arg1);
148         return;
149     case 0xff:
150         /* Don't recurse with tcg_gen_ext8u_i32.  */
151         if (TCG_TARGET_HAS_ext8u_i32) {
152             tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg1);
153             return;
154         }
155         break;
156     case 0xffff:
157         if (TCG_TARGET_HAS_ext16u_i32) {
158             tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg1);
159             return;
160         }
161         break;
162     }
163 
164     tcg_gen_and_i32(ret, arg1, tcg_constant_i32(arg2));
165 }
166 
167 void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
168 {
169     /* Some cases can be optimized here.  */
170     if (arg2 == -1) {
171         tcg_gen_movi_i32(ret, -1);
172     } else if (arg2 == 0) {
173         tcg_gen_mov_i32(ret, arg1);
174     } else {
175         tcg_gen_or_i32(ret, arg1, tcg_constant_i32(arg2));
176     }
177 }
178 
179 void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
180 {
181     /* Some cases can be optimized here.  */
182     if (arg2 == 0) {
183         tcg_gen_mov_i32(ret, arg1);
184     } else if (arg2 == -1 && TCG_TARGET_HAS_not_i32) {
185         /* Don't recurse with tcg_gen_not_i32.  */
186         tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg1);
187     } else {
188         tcg_gen_xor_i32(ret, arg1, tcg_constant_i32(arg2));
189     }
190 }
191 
192 void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
193 {
194     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
195     if (arg2 == 0) {
196         tcg_gen_mov_i32(ret, arg1);
197     } else {
198         tcg_gen_shl_i32(ret, arg1, tcg_constant_i32(arg2));
199     }
200 }
201 
202 void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
203 {
204     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
205     if (arg2 == 0) {
206         tcg_gen_mov_i32(ret, arg1);
207     } else {
208         tcg_gen_shr_i32(ret, arg1, tcg_constant_i32(arg2));
209     }
210 }
211 
212 void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
213 {
214     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
215     if (arg2 == 0) {
216         tcg_gen_mov_i32(ret, arg1);
217     } else {
218         tcg_gen_sar_i32(ret, arg1, tcg_constant_i32(arg2));
219     }
220 }
221 
222 void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *l)
223 {
224     if (cond == TCG_COND_ALWAYS) {
225         tcg_gen_br(l);
226     } else if (cond != TCG_COND_NEVER) {
227         l->refs++;
228         tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_arg(l));
229     }
230 }
231 
232 void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *l)
233 {
234     if (cond == TCG_COND_ALWAYS) {
235         tcg_gen_br(l);
236     } else if (cond != TCG_COND_NEVER) {
237         tcg_gen_brcond_i32(cond, arg1, tcg_constant_i32(arg2), l);
238     }
239 }
240 
241 void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret,
242                          TCGv_i32 arg1, TCGv_i32 arg2)
243 {
244     if (cond == TCG_COND_ALWAYS) {
245         tcg_gen_movi_i32(ret, 1);
246     } else if (cond == TCG_COND_NEVER) {
247         tcg_gen_movi_i32(ret, 0);
248     } else {
249         tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
250     }
251 }
252 
253 void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret,
254                           TCGv_i32 arg1, int32_t arg2)
255 {
256     tcg_gen_setcond_i32(cond, ret, arg1, tcg_constant_i32(arg2));
257 }
258 
259 void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
260 {
261     if (arg2 == 0) {
262         tcg_gen_movi_i32(ret, 0);
263     } else if (is_power_of_2(arg2)) {
264         tcg_gen_shli_i32(ret, arg1, ctz32(arg2));
265     } else {
266         tcg_gen_mul_i32(ret, arg1, tcg_constant_i32(arg2));
267     }
268 }
269 
270 void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
271 {
272     if (TCG_TARGET_HAS_div_i32) {
273         tcg_gen_op3_i32(INDEX_op_div_i32, ret, arg1, arg2);
274     } else if (TCG_TARGET_HAS_div2_i32) {
275         TCGv_i32 t0 = tcg_temp_new_i32();
276         tcg_gen_sari_i32(t0, arg1, 31);
277         tcg_gen_op5_i32(INDEX_op_div2_i32, ret, t0, arg1, t0, arg2);
278         tcg_temp_free_i32(t0);
279     } else {
280         gen_helper_div_i32(ret, arg1, arg2);
281     }
282 }
283 
284 void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
285 {
286     if (TCG_TARGET_HAS_rem_i32) {
287         tcg_gen_op3_i32(INDEX_op_rem_i32, ret, arg1, arg2);
288     } else if (TCG_TARGET_HAS_div_i32) {
289         TCGv_i32 t0 = tcg_temp_new_i32();
290         tcg_gen_op3_i32(INDEX_op_div_i32, t0, arg1, arg2);
291         tcg_gen_mul_i32(t0, t0, arg2);
292         tcg_gen_sub_i32(ret, arg1, t0);
293         tcg_temp_free_i32(t0);
294     } else if (TCG_TARGET_HAS_div2_i32) {
295         TCGv_i32 t0 = tcg_temp_new_i32();
296         tcg_gen_sari_i32(t0, arg1, 31);
297         tcg_gen_op5_i32(INDEX_op_div2_i32, t0, ret, arg1, t0, arg2);
298         tcg_temp_free_i32(t0);
299     } else {
300         gen_helper_rem_i32(ret, arg1, arg2);
301     }
302 }
303 
304 void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
305 {
306     if (TCG_TARGET_HAS_div_i32) {
307         tcg_gen_op3_i32(INDEX_op_divu_i32, ret, arg1, arg2);
308     } else if (TCG_TARGET_HAS_div2_i32) {
309         TCGv_i32 t0 = tcg_temp_new_i32();
310         tcg_gen_movi_i32(t0, 0);
311         tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, t0, arg2);
312         tcg_temp_free_i32(t0);
313     } else {
314         gen_helper_divu_i32(ret, arg1, arg2);
315     }
316 }
317 
318 void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
319 {
320     if (TCG_TARGET_HAS_rem_i32) {
321         tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2);
322     } else if (TCG_TARGET_HAS_div_i32) {
323         TCGv_i32 t0 = tcg_temp_new_i32();
324         tcg_gen_op3_i32(INDEX_op_divu_i32, t0, arg1, arg2);
325         tcg_gen_mul_i32(t0, t0, arg2);
326         tcg_gen_sub_i32(ret, arg1, t0);
327         tcg_temp_free_i32(t0);
328     } else if (TCG_TARGET_HAS_div2_i32) {
329         TCGv_i32 t0 = tcg_temp_new_i32();
330         tcg_gen_movi_i32(t0, 0);
331         tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, t0, arg2);
332         tcg_temp_free_i32(t0);
333     } else {
334         gen_helper_remu_i32(ret, arg1, arg2);
335     }
336 }
337 
338 void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
339 {
340     if (TCG_TARGET_HAS_andc_i32) {
341         tcg_gen_op3_i32(INDEX_op_andc_i32, ret, arg1, arg2);
342     } else {
343         TCGv_i32 t0 = tcg_temp_new_i32();
344         tcg_gen_not_i32(t0, arg2);
345         tcg_gen_and_i32(ret, arg1, t0);
346         tcg_temp_free_i32(t0);
347     }
348 }
349 
350 void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
351 {
352     if (TCG_TARGET_HAS_eqv_i32) {
353         tcg_gen_op3_i32(INDEX_op_eqv_i32, ret, arg1, arg2);
354     } else {
355         tcg_gen_xor_i32(ret, arg1, arg2);
356         tcg_gen_not_i32(ret, ret);
357     }
358 }
359 
360 void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
361 {
362     if (TCG_TARGET_HAS_nand_i32) {
363         tcg_gen_op3_i32(INDEX_op_nand_i32, ret, arg1, arg2);
364     } else {
365         tcg_gen_and_i32(ret, arg1, arg2);
366         tcg_gen_not_i32(ret, ret);
367     }
368 }
369 
370 void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
371 {
372     if (TCG_TARGET_HAS_nor_i32) {
373         tcg_gen_op3_i32(INDEX_op_nor_i32, ret, arg1, arg2);
374     } else {
375         tcg_gen_or_i32(ret, arg1, arg2);
376         tcg_gen_not_i32(ret, ret);
377     }
378 }
379 
380 void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
381 {
382     if (TCG_TARGET_HAS_orc_i32) {
383         tcg_gen_op3_i32(INDEX_op_orc_i32, ret, arg1, arg2);
384     } else {
385         TCGv_i32 t0 = tcg_temp_new_i32();
386         tcg_gen_not_i32(t0, arg2);
387         tcg_gen_or_i32(ret, arg1, t0);
388         tcg_temp_free_i32(t0);
389     }
390 }
391 
392 void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
393 {
394     if (TCG_TARGET_HAS_clz_i32) {
395         tcg_gen_op3_i32(INDEX_op_clz_i32, ret, arg1, arg2);
396     } else if (TCG_TARGET_HAS_clz_i64) {
397         TCGv_i64 t1 = tcg_temp_new_i64();
398         TCGv_i64 t2 = tcg_temp_new_i64();
399         tcg_gen_extu_i32_i64(t1, arg1);
400         tcg_gen_extu_i32_i64(t2, arg2);
401         tcg_gen_addi_i64(t2, t2, 32);
402         tcg_gen_clz_i64(t1, t1, t2);
403         tcg_gen_extrl_i64_i32(ret, t1);
404         tcg_temp_free_i64(t1);
405         tcg_temp_free_i64(t2);
406         tcg_gen_subi_i32(ret, ret, 32);
407     } else {
408         gen_helper_clz_i32(ret, arg1, arg2);
409     }
410 }
411 
412 void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
413 {
414     tcg_gen_clz_i32(ret, arg1, tcg_constant_i32(arg2));
415 }
416 
417 void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
418 {
419     if (TCG_TARGET_HAS_ctz_i32) {
420         tcg_gen_op3_i32(INDEX_op_ctz_i32, ret, arg1, arg2);
421     } else if (TCG_TARGET_HAS_ctz_i64) {
422         TCGv_i64 t1 = tcg_temp_new_i64();
423         TCGv_i64 t2 = tcg_temp_new_i64();
424         tcg_gen_extu_i32_i64(t1, arg1);
425         tcg_gen_extu_i32_i64(t2, arg2);
426         tcg_gen_ctz_i64(t1, t1, t2);
427         tcg_gen_extrl_i64_i32(ret, t1);
428         tcg_temp_free_i64(t1);
429         tcg_temp_free_i64(t2);
430     } else if (TCG_TARGET_HAS_ctpop_i32
431                || TCG_TARGET_HAS_ctpop_i64
432                || TCG_TARGET_HAS_clz_i32
433                || TCG_TARGET_HAS_clz_i64) {
434         TCGv_i32 z, t = tcg_temp_new_i32();
435 
436         if (TCG_TARGET_HAS_ctpop_i32 || TCG_TARGET_HAS_ctpop_i64) {
437             tcg_gen_subi_i32(t, arg1, 1);
438             tcg_gen_andc_i32(t, t, arg1);
439             tcg_gen_ctpop_i32(t, t);
440         } else {
441             /* Since all non-x86 hosts have clz(0) == 32, don't fight it.  */
442             tcg_gen_neg_i32(t, arg1);
443             tcg_gen_and_i32(t, t, arg1);
444             tcg_gen_clzi_i32(t, t, 32);
445             tcg_gen_xori_i32(t, t, 31);
446         }
447         z = tcg_constant_i32(0);
448         tcg_gen_movcond_i32(TCG_COND_EQ, ret, arg1, z, arg2, t);
449         tcg_temp_free_i32(t);
450     } else {
451         gen_helper_ctz_i32(ret, arg1, arg2);
452     }
453 }
454 
455 void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
456 {
457     if (!TCG_TARGET_HAS_ctz_i32 && TCG_TARGET_HAS_ctpop_i32 && arg2 == 32) {
458         /* This equivalence has the advantage of not requiring a fixup.  */
459         TCGv_i32 t = tcg_temp_new_i32();
460         tcg_gen_subi_i32(t, arg1, 1);
461         tcg_gen_andc_i32(t, t, arg1);
462         tcg_gen_ctpop_i32(ret, t);
463         tcg_temp_free_i32(t);
464     } else {
465         tcg_gen_ctz_i32(ret, arg1, tcg_constant_i32(arg2));
466     }
467 }
468 
469 void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg)
470 {
471     if (TCG_TARGET_HAS_clz_i32) {
472         TCGv_i32 t = tcg_temp_new_i32();
473         tcg_gen_sari_i32(t, arg, 31);
474         tcg_gen_xor_i32(t, t, arg);
475         tcg_gen_clzi_i32(t, t, 32);
476         tcg_gen_subi_i32(ret, t, 1);
477         tcg_temp_free_i32(t);
478     } else {
479         gen_helper_clrsb_i32(ret, arg);
480     }
481 }
482 
483 void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1)
484 {
485     if (TCG_TARGET_HAS_ctpop_i32) {
486         tcg_gen_op2_i32(INDEX_op_ctpop_i32, ret, arg1);
487     } else if (TCG_TARGET_HAS_ctpop_i64) {
488         TCGv_i64 t = tcg_temp_new_i64();
489         tcg_gen_extu_i32_i64(t, arg1);
490         tcg_gen_ctpop_i64(t, t);
491         tcg_gen_extrl_i64_i32(ret, t);
492         tcg_temp_free_i64(t);
493     } else {
494         gen_helper_ctpop_i32(ret, arg1);
495     }
496 }
497 
498 void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
499 {
500     if (TCG_TARGET_HAS_rot_i32) {
501         tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, arg2);
502     } else {
503         TCGv_i32 t0, t1;
504 
505         t0 = tcg_temp_new_i32();
506         t1 = tcg_temp_new_i32();
507         tcg_gen_shl_i32(t0, arg1, arg2);
508         tcg_gen_subfi_i32(t1, 32, arg2);
509         tcg_gen_shr_i32(t1, arg1, t1);
510         tcg_gen_or_i32(ret, t0, t1);
511         tcg_temp_free_i32(t0);
512         tcg_temp_free_i32(t1);
513     }
514 }
515 
516 void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
517 {
518     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
519     /* some cases can be optimized here */
520     if (arg2 == 0) {
521         tcg_gen_mov_i32(ret, arg1);
522     } else if (TCG_TARGET_HAS_rot_i32) {
523         tcg_gen_rotl_i32(ret, arg1, tcg_constant_i32(arg2));
524     } else {
525         TCGv_i32 t0, t1;
526         t0 = tcg_temp_new_i32();
527         t1 = tcg_temp_new_i32();
528         tcg_gen_shli_i32(t0, arg1, arg2);
529         tcg_gen_shri_i32(t1, arg1, 32 - arg2);
530         tcg_gen_or_i32(ret, t0, t1);
531         tcg_temp_free_i32(t0);
532         tcg_temp_free_i32(t1);
533     }
534 }
535 
536 void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
537 {
538     if (TCG_TARGET_HAS_rot_i32) {
539         tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, arg2);
540     } else {
541         TCGv_i32 t0, t1;
542 
543         t0 = tcg_temp_new_i32();
544         t1 = tcg_temp_new_i32();
545         tcg_gen_shr_i32(t0, arg1, arg2);
546         tcg_gen_subfi_i32(t1, 32, arg2);
547         tcg_gen_shl_i32(t1, arg1, t1);
548         tcg_gen_or_i32(ret, t0, t1);
549         tcg_temp_free_i32(t0);
550         tcg_temp_free_i32(t1);
551     }
552 }
553 
554 void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
555 {
556     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
557     /* some cases can be optimized here */
558     if (arg2 == 0) {
559         tcg_gen_mov_i32(ret, arg1);
560     } else {
561         tcg_gen_rotli_i32(ret, arg1, 32 - arg2);
562     }
563 }
564 
565 void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
566                          unsigned int ofs, unsigned int len)
567 {
568     uint32_t mask;
569     TCGv_i32 t1;
570 
571     tcg_debug_assert(ofs < 32);
572     tcg_debug_assert(len > 0);
573     tcg_debug_assert(len <= 32);
574     tcg_debug_assert(ofs + len <= 32);
575 
576     if (len == 32) {
577         tcg_gen_mov_i32(ret, arg2);
578         return;
579     }
580     if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) {
581         tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len);
582         return;
583     }
584 
585     t1 = tcg_temp_new_i32();
586 
587     if (TCG_TARGET_HAS_extract2_i32) {
588         if (ofs + len == 32) {
589             tcg_gen_shli_i32(t1, arg1, len);
590             tcg_gen_extract2_i32(ret, t1, arg2, len);
591             goto done;
592         }
593         if (ofs == 0) {
594             tcg_gen_extract2_i32(ret, arg1, arg2, len);
595             tcg_gen_rotli_i32(ret, ret, len);
596             goto done;
597         }
598     }
599 
600     mask = (1u << len) - 1;
601     if (ofs + len < 32) {
602         tcg_gen_andi_i32(t1, arg2, mask);
603         tcg_gen_shli_i32(t1, t1, ofs);
604     } else {
605         tcg_gen_shli_i32(t1, arg2, ofs);
606     }
607     tcg_gen_andi_i32(ret, arg1, ~(mask << ofs));
608     tcg_gen_or_i32(ret, ret, t1);
609  done:
610     tcg_temp_free_i32(t1);
611 }
612 
613 void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg,
614                            unsigned int ofs, unsigned int len)
615 {
616     tcg_debug_assert(ofs < 32);
617     tcg_debug_assert(len > 0);
618     tcg_debug_assert(len <= 32);
619     tcg_debug_assert(ofs + len <= 32);
620 
621     if (ofs + len == 32) {
622         tcg_gen_shli_i32(ret, arg, ofs);
623     } else if (ofs == 0) {
624         tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
625     } else if (TCG_TARGET_HAS_deposit_i32
626                && TCG_TARGET_deposit_i32_valid(ofs, len)) {
627         TCGv_i32 zero = tcg_constant_i32(0);
628         tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, zero, arg, ofs, len);
629     } else {
630         /* To help two-operand hosts we prefer to zero-extend first,
631            which allows ARG to stay live.  */
632         switch (len) {
633         case 16:
634             if (TCG_TARGET_HAS_ext16u_i32) {
635                 tcg_gen_ext16u_i32(ret, arg);
636                 tcg_gen_shli_i32(ret, ret, ofs);
637                 return;
638             }
639             break;
640         case 8:
641             if (TCG_TARGET_HAS_ext8u_i32) {
642                 tcg_gen_ext8u_i32(ret, arg);
643                 tcg_gen_shli_i32(ret, ret, ofs);
644                 return;
645             }
646             break;
647         }
648         /* Otherwise prefer zero-extension over AND for code size.  */
649         switch (ofs + len) {
650         case 16:
651             if (TCG_TARGET_HAS_ext16u_i32) {
652                 tcg_gen_shli_i32(ret, arg, ofs);
653                 tcg_gen_ext16u_i32(ret, ret);
654                 return;
655             }
656             break;
657         case 8:
658             if (TCG_TARGET_HAS_ext8u_i32) {
659                 tcg_gen_shli_i32(ret, arg, ofs);
660                 tcg_gen_ext8u_i32(ret, ret);
661                 return;
662             }
663             break;
664         }
665         tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
666         tcg_gen_shli_i32(ret, ret, ofs);
667     }
668 }
669 
670 void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg,
671                          unsigned int ofs, unsigned int len)
672 {
673     tcg_debug_assert(ofs < 32);
674     tcg_debug_assert(len > 0);
675     tcg_debug_assert(len <= 32);
676     tcg_debug_assert(ofs + len <= 32);
677 
678     /* Canonicalize certain special cases, even if extract is supported.  */
679     if (ofs + len == 32) {
680         tcg_gen_shri_i32(ret, arg, 32 - len);
681         return;
682     }
683     if (ofs == 0) {
684         tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
685         return;
686     }
687 
688     if (TCG_TARGET_HAS_extract_i32
689         && TCG_TARGET_extract_i32_valid(ofs, len)) {
690         tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, ofs, len);
691         return;
692     }
693 
694     /* Assume that zero-extension, if available, is cheaper than a shift.  */
695     switch (ofs + len) {
696     case 16:
697         if (TCG_TARGET_HAS_ext16u_i32) {
698             tcg_gen_ext16u_i32(ret, arg);
699             tcg_gen_shri_i32(ret, ret, ofs);
700             return;
701         }
702         break;
703     case 8:
704         if (TCG_TARGET_HAS_ext8u_i32) {
705             tcg_gen_ext8u_i32(ret, arg);
706             tcg_gen_shri_i32(ret, ret, ofs);
707             return;
708         }
709         break;
710     }
711 
712     /* ??? Ideally we'd know what values are available for immediate AND.
713        Assume that 8 bits are available, plus the special case of 16,
714        so that we get ext8u, ext16u.  */
715     switch (len) {
716     case 1 ... 8: case 16:
717         tcg_gen_shri_i32(ret, arg, ofs);
718         tcg_gen_andi_i32(ret, ret, (1u << len) - 1);
719         break;
720     default:
721         tcg_gen_shli_i32(ret, arg, 32 - len - ofs);
722         tcg_gen_shri_i32(ret, ret, 32 - len);
723         break;
724     }
725 }
726 
727 void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg,
728                           unsigned int ofs, unsigned int len)
729 {
730     tcg_debug_assert(ofs < 32);
731     tcg_debug_assert(len > 0);
732     tcg_debug_assert(len <= 32);
733     tcg_debug_assert(ofs + len <= 32);
734 
735     /* Canonicalize certain special cases, even if extract is supported.  */
736     if (ofs + len == 32) {
737         tcg_gen_sari_i32(ret, arg, 32 - len);
738         return;
739     }
740     if (ofs == 0) {
741         switch (len) {
742         case 16:
743             tcg_gen_ext16s_i32(ret, arg);
744             return;
745         case 8:
746             tcg_gen_ext8s_i32(ret, arg);
747             return;
748         }
749     }
750 
751     if (TCG_TARGET_HAS_sextract_i32
752         && TCG_TARGET_extract_i32_valid(ofs, len)) {
753         tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, ofs, len);
754         return;
755     }
756 
757     /* Assume that sign-extension, if available, is cheaper than a shift.  */
758     switch (ofs + len) {
759     case 16:
760         if (TCG_TARGET_HAS_ext16s_i32) {
761             tcg_gen_ext16s_i32(ret, arg);
762             tcg_gen_sari_i32(ret, ret, ofs);
763             return;
764         }
765         break;
766     case 8:
767         if (TCG_TARGET_HAS_ext8s_i32) {
768             tcg_gen_ext8s_i32(ret, arg);
769             tcg_gen_sari_i32(ret, ret, ofs);
770             return;
771         }
772         break;
773     }
774     switch (len) {
775     case 16:
776         if (TCG_TARGET_HAS_ext16s_i32) {
777             tcg_gen_shri_i32(ret, arg, ofs);
778             tcg_gen_ext16s_i32(ret, ret);
779             return;
780         }
781         break;
782     case 8:
783         if (TCG_TARGET_HAS_ext8s_i32) {
784             tcg_gen_shri_i32(ret, arg, ofs);
785             tcg_gen_ext8s_i32(ret, ret);
786             return;
787         }
788         break;
789     }
790 
791     tcg_gen_shli_i32(ret, arg, 32 - len - ofs);
792     tcg_gen_sari_i32(ret, ret, 32 - len);
793 }
794 
795 /*
796  * Extract 32-bits from a 64-bit input, ah:al, starting from ofs.
797  * Unlike tcg_gen_extract_i32 above, len is fixed at 32.
798  */
799 void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah,
800                           unsigned int ofs)
801 {
802     tcg_debug_assert(ofs <= 32);
803     if (ofs == 0) {
804         tcg_gen_mov_i32(ret, al);
805     } else if (ofs == 32) {
806         tcg_gen_mov_i32(ret, ah);
807     } else if (al == ah) {
808         tcg_gen_rotri_i32(ret, al, ofs);
809     } else if (TCG_TARGET_HAS_extract2_i32) {
810         tcg_gen_op4i_i32(INDEX_op_extract2_i32, ret, al, ah, ofs);
811     } else {
812         TCGv_i32 t0 = tcg_temp_new_i32();
813         tcg_gen_shri_i32(t0, al, ofs);
814         tcg_gen_deposit_i32(ret, t0, ah, 32 - ofs, ofs);
815         tcg_temp_free_i32(t0);
816     }
817 }
818 
819 void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1,
820                          TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2)
821 {
822     if (cond == TCG_COND_ALWAYS) {
823         tcg_gen_mov_i32(ret, v1);
824     } else if (cond == TCG_COND_NEVER) {
825         tcg_gen_mov_i32(ret, v2);
826     } else if (TCG_TARGET_HAS_movcond_i32) {
827         tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond);
828     } else {
829         TCGv_i32 t0 = tcg_temp_new_i32();
830         TCGv_i32 t1 = tcg_temp_new_i32();
831         tcg_gen_setcond_i32(cond, t0, c1, c2);
832         tcg_gen_neg_i32(t0, t0);
833         tcg_gen_and_i32(t1, v1, t0);
834         tcg_gen_andc_i32(ret, v2, t0);
835         tcg_gen_or_i32(ret, ret, t1);
836         tcg_temp_free_i32(t0);
837         tcg_temp_free_i32(t1);
838     }
839 }
840 
841 void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
842                       TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
843 {
844     if (TCG_TARGET_HAS_add2_i32) {
845         tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh);
846     } else {
847         TCGv_i64 t0 = tcg_temp_new_i64();
848         TCGv_i64 t1 = tcg_temp_new_i64();
849         tcg_gen_concat_i32_i64(t0, al, ah);
850         tcg_gen_concat_i32_i64(t1, bl, bh);
851         tcg_gen_add_i64(t0, t0, t1);
852         tcg_gen_extr_i64_i32(rl, rh, t0);
853         tcg_temp_free_i64(t0);
854         tcg_temp_free_i64(t1);
855     }
856 }
857 
858 void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
859                       TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
860 {
861     if (TCG_TARGET_HAS_sub2_i32) {
862         tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh);
863     } else {
864         TCGv_i64 t0 = tcg_temp_new_i64();
865         TCGv_i64 t1 = tcg_temp_new_i64();
866         tcg_gen_concat_i32_i64(t0, al, ah);
867         tcg_gen_concat_i32_i64(t1, bl, bh);
868         tcg_gen_sub_i64(t0, t0, t1);
869         tcg_gen_extr_i64_i32(rl, rh, t0);
870         tcg_temp_free_i64(t0);
871         tcg_temp_free_i64(t1);
872     }
873 }
874 
875 void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
876 {
877     if (TCG_TARGET_HAS_mulu2_i32) {
878         tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2);
879     } else if (TCG_TARGET_HAS_muluh_i32) {
880         TCGv_i32 t = tcg_temp_new_i32();
881         tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
882         tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2);
883         tcg_gen_mov_i32(rl, t);
884         tcg_temp_free_i32(t);
885     } else {
886         TCGv_i64 t0 = tcg_temp_new_i64();
887         TCGv_i64 t1 = tcg_temp_new_i64();
888         tcg_gen_extu_i32_i64(t0, arg1);
889         tcg_gen_extu_i32_i64(t1, arg2);
890         tcg_gen_mul_i64(t0, t0, t1);
891         tcg_gen_extr_i64_i32(rl, rh, t0);
892         tcg_temp_free_i64(t0);
893         tcg_temp_free_i64(t1);
894     }
895 }
896 
897 void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
898 {
899     if (TCG_TARGET_HAS_muls2_i32) {
900         tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2);
901     } else if (TCG_TARGET_HAS_mulsh_i32) {
902         TCGv_i32 t = tcg_temp_new_i32();
903         tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
904         tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2);
905         tcg_gen_mov_i32(rl, t);
906         tcg_temp_free_i32(t);
907     } else if (TCG_TARGET_REG_BITS == 32) {
908         TCGv_i32 t0 = tcg_temp_new_i32();
909         TCGv_i32 t1 = tcg_temp_new_i32();
910         TCGv_i32 t2 = tcg_temp_new_i32();
911         TCGv_i32 t3 = tcg_temp_new_i32();
912         tcg_gen_mulu2_i32(t0, t1, arg1, arg2);
913         /* Adjust for negative inputs.  */
914         tcg_gen_sari_i32(t2, arg1, 31);
915         tcg_gen_sari_i32(t3, arg2, 31);
916         tcg_gen_and_i32(t2, t2, arg2);
917         tcg_gen_and_i32(t3, t3, arg1);
918         tcg_gen_sub_i32(rh, t1, t2);
919         tcg_gen_sub_i32(rh, rh, t3);
920         tcg_gen_mov_i32(rl, t0);
921         tcg_temp_free_i32(t0);
922         tcg_temp_free_i32(t1);
923         tcg_temp_free_i32(t2);
924         tcg_temp_free_i32(t3);
925     } else {
926         TCGv_i64 t0 = tcg_temp_new_i64();
927         TCGv_i64 t1 = tcg_temp_new_i64();
928         tcg_gen_ext_i32_i64(t0, arg1);
929         tcg_gen_ext_i32_i64(t1, arg2);
930         tcg_gen_mul_i64(t0, t0, t1);
931         tcg_gen_extr_i64_i32(rl, rh, t0);
932         tcg_temp_free_i64(t0);
933         tcg_temp_free_i64(t1);
934     }
935 }
936 
937 void tcg_gen_mulsu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
938 {
939     if (TCG_TARGET_REG_BITS == 32) {
940         TCGv_i32 t0 = tcg_temp_new_i32();
941         TCGv_i32 t1 = tcg_temp_new_i32();
942         TCGv_i32 t2 = tcg_temp_new_i32();
943         tcg_gen_mulu2_i32(t0, t1, arg1, arg2);
944         /* Adjust for negative input for the signed arg1.  */
945         tcg_gen_sari_i32(t2, arg1, 31);
946         tcg_gen_and_i32(t2, t2, arg2);
947         tcg_gen_sub_i32(rh, t1, t2);
948         tcg_gen_mov_i32(rl, t0);
949         tcg_temp_free_i32(t0);
950         tcg_temp_free_i32(t1);
951         tcg_temp_free_i32(t2);
952     } else {
953         TCGv_i64 t0 = tcg_temp_new_i64();
954         TCGv_i64 t1 = tcg_temp_new_i64();
955         tcg_gen_ext_i32_i64(t0, arg1);
956         tcg_gen_extu_i32_i64(t1, arg2);
957         tcg_gen_mul_i64(t0, t0, t1);
958         tcg_gen_extr_i64_i32(rl, rh, t0);
959         tcg_temp_free_i64(t0);
960         tcg_temp_free_i64(t1);
961     }
962 }
963 
964 void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg)
965 {
966     if (TCG_TARGET_HAS_ext8s_i32) {
967         tcg_gen_op2_i32(INDEX_op_ext8s_i32, ret, arg);
968     } else {
969         tcg_gen_shli_i32(ret, arg, 24);
970         tcg_gen_sari_i32(ret, ret, 24);
971     }
972 }
973 
974 void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg)
975 {
976     if (TCG_TARGET_HAS_ext16s_i32) {
977         tcg_gen_op2_i32(INDEX_op_ext16s_i32, ret, arg);
978     } else {
979         tcg_gen_shli_i32(ret, arg, 16);
980         tcg_gen_sari_i32(ret, ret, 16);
981     }
982 }
983 
984 void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg)
985 {
986     if (TCG_TARGET_HAS_ext8u_i32) {
987         tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg);
988     } else {
989         tcg_gen_andi_i32(ret, arg, 0xffu);
990     }
991 }
992 
993 void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg)
994 {
995     if (TCG_TARGET_HAS_ext16u_i32) {
996         tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg);
997     } else {
998         tcg_gen_andi_i32(ret, arg, 0xffffu);
999     }
1000 }
1001 
1002 void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags)
1003 {
1004     /* Only one extension flag may be present. */
1005     tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
1006 
1007     if (TCG_TARGET_HAS_bswap16_i32) {
1008         tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg, flags);
1009     } else {
1010         TCGv_i32 t0 = tcg_temp_new_i32();
1011         TCGv_i32 t1 = tcg_temp_new_i32();
1012 
1013         tcg_gen_shri_i32(t0, arg, 8);
1014         if (!(flags & TCG_BSWAP_IZ)) {
1015             tcg_gen_ext8u_i32(t0, t0);
1016         }
1017 
1018         if (flags & TCG_BSWAP_OS) {
1019             tcg_gen_shli_i32(t1, arg, 24);
1020             tcg_gen_sari_i32(t1, t1, 16);
1021         } else if (flags & TCG_BSWAP_OZ) {
1022             tcg_gen_ext8u_i32(t1, arg);
1023             tcg_gen_shli_i32(t1, t1, 8);
1024         } else {
1025             tcg_gen_shli_i32(t1, arg, 8);
1026         }
1027 
1028         tcg_gen_or_i32(ret, t0, t1);
1029         tcg_temp_free_i32(t0);
1030         tcg_temp_free_i32(t1);
1031     }
1032 }
1033 
1034 void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
1035 {
1036     if (TCG_TARGET_HAS_bswap32_i32) {
1037         tcg_gen_op3i_i32(INDEX_op_bswap32_i32, ret, arg, 0);
1038     } else {
1039         TCGv_i32 t0 = tcg_temp_new_i32();
1040         TCGv_i32 t1 = tcg_temp_new_i32();
1041         TCGv_i32 t2 = tcg_constant_i32(0x00ff00ff);
1042 
1043                                         /* arg = abcd */
1044         tcg_gen_shri_i32(t0, arg, 8);   /*  t0 = .abc */
1045         tcg_gen_and_i32(t1, arg, t2);   /*  t1 = .b.d */
1046         tcg_gen_and_i32(t0, t0, t2);    /*  t0 = .a.c */
1047         tcg_gen_shli_i32(t1, t1, 8);    /*  t1 = b.d. */
1048         tcg_gen_or_i32(ret, t0, t1);    /* ret = badc */
1049 
1050         tcg_gen_shri_i32(t0, ret, 16);  /*  t0 = ..ba */
1051         tcg_gen_shli_i32(t1, ret, 16);  /*  t1 = dc.. */
1052         tcg_gen_or_i32(ret, t0, t1);    /* ret = dcba */
1053 
1054         tcg_temp_free_i32(t0);
1055         tcg_temp_free_i32(t1);
1056     }
1057 }
1058 
1059 void tcg_gen_hswap_i32(TCGv_i32 ret, TCGv_i32 arg)
1060 {
1061     /* Swapping 2 16-bit elements is a rotate. */
1062     tcg_gen_rotli_i32(ret, arg, 16);
1063 }
1064 
1065 void tcg_gen_smin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1066 {
1067     tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, a, b);
1068 }
1069 
1070 void tcg_gen_umin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1071 {
1072     tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, a, b);
1073 }
1074 
1075 void tcg_gen_smax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1076 {
1077     tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, b, a);
1078 }
1079 
1080 void tcg_gen_umax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1081 {
1082     tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, b, a);
1083 }
1084 
1085 void tcg_gen_abs_i32(TCGv_i32 ret, TCGv_i32 a)
1086 {
1087     TCGv_i32 t = tcg_temp_new_i32();
1088 
1089     tcg_gen_sari_i32(t, a, 31);
1090     tcg_gen_xor_i32(ret, a, t);
1091     tcg_gen_sub_i32(ret, ret, t);
1092     tcg_temp_free_i32(t);
1093 }
1094 
1095 /* 64-bit ops */
1096 
1097 #if TCG_TARGET_REG_BITS == 32
1098 /* These are all inline for TCG_TARGET_REG_BITS == 64.  */
1099 
1100 void tcg_gen_discard_i64(TCGv_i64 arg)
1101 {
1102     tcg_gen_discard_i32(TCGV_LOW(arg));
1103     tcg_gen_discard_i32(TCGV_HIGH(arg));
1104 }
1105 
1106 void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg)
1107 {
1108     TCGTemp *ts = tcgv_i64_temp(arg);
1109 
1110     /* Canonicalize TCGv_i64 TEMP_CONST into TCGv_i32 TEMP_CONST. */
1111     if (ts->kind == TEMP_CONST) {
1112         tcg_gen_movi_i64(ret, ts->val);
1113     } else {
1114         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1115         tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
1116     }
1117 }
1118 
1119 void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
1120 {
1121     tcg_gen_movi_i32(TCGV_LOW(ret), arg);
1122     tcg_gen_movi_i32(TCGV_HIGH(ret), arg >> 32);
1123 }
1124 
1125 void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1126 {
1127     tcg_gen_ld8u_i32(TCGV_LOW(ret), arg2, offset);
1128     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1129 }
1130 
1131 void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1132 {
1133     tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset);
1134     tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1135 }
1136 
1137 void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1138 {
1139     tcg_gen_ld16u_i32(TCGV_LOW(ret), arg2, offset);
1140     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1141 }
1142 
1143 void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1144 {
1145     tcg_gen_ld16s_i32(TCGV_LOW(ret), arg2, offset);
1146     tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1147 }
1148 
1149 void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1150 {
1151     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
1152     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1153 }
1154 
1155 void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1156 {
1157     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
1158     tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1159 }
1160 
1161 void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1162 {
1163     /* Since arg2 and ret have different types,
1164        they cannot be the same temporary */
1165 #if HOST_BIG_ENDIAN
1166     tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset);
1167     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4);
1168 #else
1169     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
1170     tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset + 4);
1171 #endif
1172 }
1173 
1174 void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
1175 {
1176 #if HOST_BIG_ENDIAN
1177     tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset);
1178     tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4);
1179 #else
1180     tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset);
1181     tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset + 4);
1182 #endif
1183 }
1184 
1185 void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1186 {
1187     tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1188     tcg_gen_and_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1189 }
1190 
1191 void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1192 {
1193     tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1194     tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1195 }
1196 
1197 void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1198 {
1199     tcg_gen_xor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1200     tcg_gen_xor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1201 }
1202 
1203 void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1204 {
1205     gen_helper_shl_i64(ret, arg1, arg2);
1206 }
1207 
1208 void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1209 {
1210     gen_helper_shr_i64(ret, arg1, arg2);
1211 }
1212 
1213 void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1214 {
1215     gen_helper_sar_i64(ret, arg1, arg2);
1216 }
1217 
1218 void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1219 {
1220     TCGv_i64 t0;
1221     TCGv_i32 t1;
1222 
1223     t0 = tcg_temp_new_i64();
1224     t1 = tcg_temp_new_i32();
1225 
1226     tcg_gen_mulu2_i32(TCGV_LOW(t0), TCGV_HIGH(t0),
1227                       TCGV_LOW(arg1), TCGV_LOW(arg2));
1228 
1229     tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
1230     tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
1231     tcg_gen_mul_i32(t1, TCGV_HIGH(arg1), TCGV_LOW(arg2));
1232     tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
1233 
1234     tcg_gen_mov_i64(ret, t0);
1235     tcg_temp_free_i64(t0);
1236     tcg_temp_free_i32(t1);
1237 }
1238 
1239 #else
1240 
1241 void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
1242 {
1243     tcg_gen_mov_i64(ret, tcg_constant_i64(arg));
1244 }
1245 
1246 #endif /* TCG_TARGET_REG_SIZE == 32 */
1247 
1248 void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1249 {
1250     /* some cases can be optimized here */
1251     if (arg2 == 0) {
1252         tcg_gen_mov_i64(ret, arg1);
1253     } else if (TCG_TARGET_REG_BITS == 64) {
1254         tcg_gen_add_i64(ret, arg1, tcg_constant_i64(arg2));
1255     } else {
1256         tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
1257                          TCGV_LOW(arg1), TCGV_HIGH(arg1),
1258                          tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));
1259     }
1260 }
1261 
1262 void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2)
1263 {
1264     if (arg1 == 0 && TCG_TARGET_HAS_neg_i64) {
1265         /* Don't recurse with tcg_gen_neg_i64.  */
1266         tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg2);
1267     } else if (TCG_TARGET_REG_BITS == 64) {
1268         tcg_gen_sub_i64(ret, tcg_constant_i64(arg1), arg2);
1269     } else {
1270         tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
1271                          tcg_constant_i32(arg1), tcg_constant_i32(arg1 >> 32),
1272                          TCGV_LOW(arg2), TCGV_HIGH(arg2));
1273     }
1274 }
1275 
1276 void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1277 {
1278     /* some cases can be optimized here */
1279     if (arg2 == 0) {
1280         tcg_gen_mov_i64(ret, arg1);
1281     } else if (TCG_TARGET_REG_BITS == 64) {
1282         tcg_gen_sub_i64(ret, arg1, tcg_constant_i64(arg2));
1283     } else {
1284         tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
1285                          TCGV_LOW(arg1), TCGV_HIGH(arg1),
1286                          tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));
1287     }
1288 }
1289 
1290 void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1291 {
1292     if (TCG_TARGET_REG_BITS == 32) {
1293         tcg_gen_andi_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
1294         tcg_gen_andi_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
1295         return;
1296     }
1297 
1298     /* Some cases can be optimized here.  */
1299     switch (arg2) {
1300     case 0:
1301         tcg_gen_movi_i64(ret, 0);
1302         return;
1303     case -1:
1304         tcg_gen_mov_i64(ret, arg1);
1305         return;
1306     case 0xff:
1307         /* Don't recurse with tcg_gen_ext8u_i64.  */
1308         if (TCG_TARGET_HAS_ext8u_i64) {
1309             tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg1);
1310             return;
1311         }
1312         break;
1313     case 0xffff:
1314         if (TCG_TARGET_HAS_ext16u_i64) {
1315             tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg1);
1316             return;
1317         }
1318         break;
1319     case 0xffffffffu:
1320         if (TCG_TARGET_HAS_ext32u_i64) {
1321             tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg1);
1322             return;
1323         }
1324         break;
1325     }
1326 
1327     tcg_gen_and_i64(ret, arg1, tcg_constant_i64(arg2));
1328 }
1329 
1330 void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1331 {
1332     if (TCG_TARGET_REG_BITS == 32) {
1333         tcg_gen_ori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
1334         tcg_gen_ori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
1335         return;
1336     }
1337     /* Some cases can be optimized here.  */
1338     if (arg2 == -1) {
1339         tcg_gen_movi_i64(ret, -1);
1340     } else if (arg2 == 0) {
1341         tcg_gen_mov_i64(ret, arg1);
1342     } else {
1343         tcg_gen_or_i64(ret, arg1, tcg_constant_i64(arg2));
1344     }
1345 }
1346 
1347 void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1348 {
1349     if (TCG_TARGET_REG_BITS == 32) {
1350         tcg_gen_xori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
1351         tcg_gen_xori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
1352         return;
1353     }
1354     /* Some cases can be optimized here.  */
1355     if (arg2 == 0) {
1356         tcg_gen_mov_i64(ret, arg1);
1357     } else if (arg2 == -1 && TCG_TARGET_HAS_not_i64) {
1358         /* Don't recurse with tcg_gen_not_i64.  */
1359         tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg1);
1360     } else {
1361         tcg_gen_xor_i64(ret, arg1, tcg_constant_i64(arg2));
1362     }
1363 }
1364 
1365 static inline void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
1366                                       unsigned c, bool right, bool arith)
1367 {
1368     tcg_debug_assert(c < 64);
1369     if (c == 0) {
1370         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
1371         tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
1372     } else if (c >= 32) {
1373         c -= 32;
1374         if (right) {
1375             if (arith) {
1376                 tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
1377                 tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), 31);
1378             } else {
1379                 tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
1380                 tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1381             }
1382         } else {
1383             tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), c);
1384             tcg_gen_movi_i32(TCGV_LOW(ret), 0);
1385         }
1386     } else if (right) {
1387         if (TCG_TARGET_HAS_extract2_i32) {
1388             tcg_gen_extract2_i32(TCGV_LOW(ret),
1389                                  TCGV_LOW(arg1), TCGV_HIGH(arg1), c);
1390         } else {
1391             tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c);
1392             tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(ret),
1393                                 TCGV_HIGH(arg1), 32 - c, c);
1394         }
1395         if (arith) {
1396             tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c);
1397         } else {
1398             tcg_gen_shri_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c);
1399         }
1400     } else {
1401         if (TCG_TARGET_HAS_extract2_i32) {
1402             tcg_gen_extract2_i32(TCGV_HIGH(ret),
1403                                  TCGV_LOW(arg1), TCGV_HIGH(arg1), 32 - c);
1404         } else {
1405             TCGv_i32 t0 = tcg_temp_new_i32();
1406             tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c);
1407             tcg_gen_deposit_i32(TCGV_HIGH(ret), t0,
1408                                 TCGV_HIGH(arg1), c, 32 - c);
1409             tcg_temp_free_i32(t0);
1410         }
1411         tcg_gen_shli_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c);
1412     }
1413 }
1414 
1415 void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1416 {
1417     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
1418     if (TCG_TARGET_REG_BITS == 32) {
1419         tcg_gen_shifti_i64(ret, arg1, arg2, 0, 0);
1420     } else if (arg2 == 0) {
1421         tcg_gen_mov_i64(ret, arg1);
1422     } else {
1423         tcg_gen_shl_i64(ret, arg1, tcg_constant_i64(arg2));
1424     }
1425 }
1426 
1427 void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1428 {
1429     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
1430     if (TCG_TARGET_REG_BITS == 32) {
1431         tcg_gen_shifti_i64(ret, arg1, arg2, 1, 0);
1432     } else if (arg2 == 0) {
1433         tcg_gen_mov_i64(ret, arg1);
1434     } else {
1435         tcg_gen_shr_i64(ret, arg1, tcg_constant_i64(arg2));
1436     }
1437 }
1438 
1439 void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1440 {
1441     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
1442     if (TCG_TARGET_REG_BITS == 32) {
1443         tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1);
1444     } else if (arg2 == 0) {
1445         tcg_gen_mov_i64(ret, arg1);
1446     } else {
1447         tcg_gen_sar_i64(ret, arg1, tcg_constant_i64(arg2));
1448     }
1449 }
1450 
1451 void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l)
1452 {
1453     if (cond == TCG_COND_ALWAYS) {
1454         tcg_gen_br(l);
1455     } else if (cond != TCG_COND_NEVER) {
1456         l->refs++;
1457         if (TCG_TARGET_REG_BITS == 32) {
1458             tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1),
1459                               TCGV_HIGH(arg1), TCGV_LOW(arg2),
1460                               TCGV_HIGH(arg2), cond, label_arg(l));
1461         } else {
1462             tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond,
1463                               label_arg(l));
1464         }
1465     }
1466 }
1467 
1468 void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *l)
1469 {
1470     if (TCG_TARGET_REG_BITS == 64) {
1471         tcg_gen_brcond_i64(cond, arg1, tcg_constant_i64(arg2), l);
1472     } else if (cond == TCG_COND_ALWAYS) {
1473         tcg_gen_br(l);
1474     } else if (cond != TCG_COND_NEVER) {
1475         l->refs++;
1476         tcg_gen_op6ii_i32(INDEX_op_brcond2_i32,
1477                           TCGV_LOW(arg1), TCGV_HIGH(arg1),
1478                           tcg_constant_i32(arg2),
1479                           tcg_constant_i32(arg2 >> 32),
1480                           cond, label_arg(l));
1481     }
1482 }
1483 
1484 void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
1485                          TCGv_i64 arg1, TCGv_i64 arg2)
1486 {
1487     if (cond == TCG_COND_ALWAYS) {
1488         tcg_gen_movi_i64(ret, 1);
1489     } else if (cond == TCG_COND_NEVER) {
1490         tcg_gen_movi_i64(ret, 0);
1491     } else {
1492         if (TCG_TARGET_REG_BITS == 32) {
1493             tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
1494                              TCGV_LOW(arg1), TCGV_HIGH(arg1),
1495                              TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
1496             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1497         } else {
1498             tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
1499         }
1500     }
1501 }
1502 
1503 void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
1504                           TCGv_i64 arg1, int64_t arg2)
1505 {
1506     if (TCG_TARGET_REG_BITS == 64) {
1507         tcg_gen_setcond_i64(cond, ret, arg1, tcg_constant_i64(arg2));
1508     } else if (cond == TCG_COND_ALWAYS) {
1509         tcg_gen_movi_i64(ret, 1);
1510     } else if (cond == TCG_COND_NEVER) {
1511         tcg_gen_movi_i64(ret, 0);
1512     } else {
1513         tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
1514                          TCGV_LOW(arg1), TCGV_HIGH(arg1),
1515                          tcg_constant_i32(arg2),
1516                          tcg_constant_i32(arg2 >> 32), cond);
1517         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1518     }
1519 }
1520 
1521 void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1522 {
1523     if (arg2 == 0) {
1524         tcg_gen_movi_i64(ret, 0);
1525     } else if (is_power_of_2(arg2)) {
1526         tcg_gen_shli_i64(ret, arg1, ctz64(arg2));
1527     } else {
1528         TCGv_i64 t0 = tcg_const_i64(arg2);
1529         tcg_gen_mul_i64(ret, arg1, t0);
1530         tcg_temp_free_i64(t0);
1531     }
1532 }
1533 
1534 void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1535 {
1536     if (TCG_TARGET_HAS_div_i64) {
1537         tcg_gen_op3_i64(INDEX_op_div_i64, ret, arg1, arg2);
1538     } else if (TCG_TARGET_HAS_div2_i64) {
1539         TCGv_i64 t0 = tcg_temp_new_i64();
1540         tcg_gen_sari_i64(t0, arg1, 63);
1541         tcg_gen_op5_i64(INDEX_op_div2_i64, ret, t0, arg1, t0, arg2);
1542         tcg_temp_free_i64(t0);
1543     } else {
1544         gen_helper_div_i64(ret, arg1, arg2);
1545     }
1546 }
1547 
1548 void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1549 {
1550     if (TCG_TARGET_HAS_rem_i64) {
1551         tcg_gen_op3_i64(INDEX_op_rem_i64, ret, arg1, arg2);
1552     } else if (TCG_TARGET_HAS_div_i64) {
1553         TCGv_i64 t0 = tcg_temp_new_i64();
1554         tcg_gen_op3_i64(INDEX_op_div_i64, t0, arg1, arg2);
1555         tcg_gen_mul_i64(t0, t0, arg2);
1556         tcg_gen_sub_i64(ret, arg1, t0);
1557         tcg_temp_free_i64(t0);
1558     } else if (TCG_TARGET_HAS_div2_i64) {
1559         TCGv_i64 t0 = tcg_temp_new_i64();
1560         tcg_gen_sari_i64(t0, arg1, 63);
1561         tcg_gen_op5_i64(INDEX_op_div2_i64, t0, ret, arg1, t0, arg2);
1562         tcg_temp_free_i64(t0);
1563     } else {
1564         gen_helper_rem_i64(ret, arg1, arg2);
1565     }
1566 }
1567 
1568 void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1569 {
1570     if (TCG_TARGET_HAS_div_i64) {
1571         tcg_gen_op3_i64(INDEX_op_divu_i64, ret, arg1, arg2);
1572     } else if (TCG_TARGET_HAS_div2_i64) {
1573         TCGv_i64 t0 = tcg_temp_new_i64();
1574         tcg_gen_movi_i64(t0, 0);
1575         tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, t0, arg2);
1576         tcg_temp_free_i64(t0);
1577     } else {
1578         gen_helper_divu_i64(ret, arg1, arg2);
1579     }
1580 }
1581 
1582 void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1583 {
1584     if (TCG_TARGET_HAS_rem_i64) {
1585         tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2);
1586     } else if (TCG_TARGET_HAS_div_i64) {
1587         TCGv_i64 t0 = tcg_temp_new_i64();
1588         tcg_gen_op3_i64(INDEX_op_divu_i64, t0, arg1, arg2);
1589         tcg_gen_mul_i64(t0, t0, arg2);
1590         tcg_gen_sub_i64(ret, arg1, t0);
1591         tcg_temp_free_i64(t0);
1592     } else if (TCG_TARGET_HAS_div2_i64) {
1593         TCGv_i64 t0 = tcg_temp_new_i64();
1594         tcg_gen_movi_i64(t0, 0);
1595         tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, t0, arg2);
1596         tcg_temp_free_i64(t0);
1597     } else {
1598         gen_helper_remu_i64(ret, arg1, arg2);
1599     }
1600 }
1601 
1602 void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg)
1603 {
1604     if (TCG_TARGET_REG_BITS == 32) {
1605         tcg_gen_ext8s_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1606         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1607     } else if (TCG_TARGET_HAS_ext8s_i64) {
1608         tcg_gen_op2_i64(INDEX_op_ext8s_i64, ret, arg);
1609     } else {
1610         tcg_gen_shli_i64(ret, arg, 56);
1611         tcg_gen_sari_i64(ret, ret, 56);
1612     }
1613 }
1614 
1615 void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg)
1616 {
1617     if (TCG_TARGET_REG_BITS == 32) {
1618         tcg_gen_ext16s_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1619         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1620     } else if (TCG_TARGET_HAS_ext16s_i64) {
1621         tcg_gen_op2_i64(INDEX_op_ext16s_i64, ret, arg);
1622     } else {
1623         tcg_gen_shli_i64(ret, arg, 48);
1624         tcg_gen_sari_i64(ret, ret, 48);
1625     }
1626 }
1627 
1628 void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg)
1629 {
1630     if (TCG_TARGET_REG_BITS == 32) {
1631         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1632         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1633     } else if (TCG_TARGET_HAS_ext32s_i64) {
1634         tcg_gen_op2_i64(INDEX_op_ext32s_i64, ret, arg);
1635     } else {
1636         tcg_gen_shli_i64(ret, arg, 32);
1637         tcg_gen_sari_i64(ret, ret, 32);
1638     }
1639 }
1640 
1641 void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg)
1642 {
1643     if (TCG_TARGET_REG_BITS == 32) {
1644         tcg_gen_ext8u_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1645         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1646     } else if (TCG_TARGET_HAS_ext8u_i64) {
1647         tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg);
1648     } else {
1649         tcg_gen_andi_i64(ret, arg, 0xffu);
1650     }
1651 }
1652 
1653 void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg)
1654 {
1655     if (TCG_TARGET_REG_BITS == 32) {
1656         tcg_gen_ext16u_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1657         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1658     } else if (TCG_TARGET_HAS_ext16u_i64) {
1659         tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg);
1660     } else {
1661         tcg_gen_andi_i64(ret, arg, 0xffffu);
1662     }
1663 }
1664 
1665 void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg)
1666 {
1667     if (TCG_TARGET_REG_BITS == 32) {
1668         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1669         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1670     } else if (TCG_TARGET_HAS_ext32u_i64) {
1671         tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg);
1672     } else {
1673         tcg_gen_andi_i64(ret, arg, 0xffffffffu);
1674     }
1675 }
1676 
1677 void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags)
1678 {
1679     /* Only one extension flag may be present. */
1680     tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
1681 
1682     if (TCG_TARGET_REG_BITS == 32) {
1683         tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg), flags);
1684         if (flags & TCG_BSWAP_OS) {
1685             tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1686         } else {
1687             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1688         }
1689     } else if (TCG_TARGET_HAS_bswap16_i64) {
1690         tcg_gen_op3i_i64(INDEX_op_bswap16_i64, ret, arg, flags);
1691     } else {
1692         TCGv_i64 t0 = tcg_temp_new_i64();
1693         TCGv_i64 t1 = tcg_temp_new_i64();
1694 
1695         tcg_gen_shri_i64(t0, arg, 8);
1696         if (!(flags & TCG_BSWAP_IZ)) {
1697             tcg_gen_ext8u_i64(t0, t0);
1698         }
1699 
1700         if (flags & TCG_BSWAP_OS) {
1701             tcg_gen_shli_i64(t1, arg, 56);
1702             tcg_gen_sari_i64(t1, t1, 48);
1703         } else if (flags & TCG_BSWAP_OZ) {
1704             tcg_gen_ext8u_i64(t1, arg);
1705             tcg_gen_shli_i64(t1, t1, 8);
1706         } else {
1707             tcg_gen_shli_i64(t1, arg, 8);
1708         }
1709 
1710         tcg_gen_or_i64(ret, t0, t1);
1711         tcg_temp_free_i64(t0);
1712         tcg_temp_free_i64(t1);
1713     }
1714 }
1715 
1716 void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags)
1717 {
1718     /* Only one extension flag may be present. */
1719     tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
1720 
1721     if (TCG_TARGET_REG_BITS == 32) {
1722         tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1723         if (flags & TCG_BSWAP_OS) {
1724             tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1725         } else {
1726             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1727         }
1728     } else if (TCG_TARGET_HAS_bswap32_i64) {
1729         tcg_gen_op3i_i64(INDEX_op_bswap32_i64, ret, arg, flags);
1730     } else {
1731         TCGv_i64 t0 = tcg_temp_new_i64();
1732         TCGv_i64 t1 = tcg_temp_new_i64();
1733         TCGv_i64 t2 = tcg_constant_i64(0x00ff00ff);
1734 
1735                                             /* arg = xxxxabcd */
1736         tcg_gen_shri_i64(t0, arg, 8);       /*  t0 = .xxxxabc */
1737         tcg_gen_and_i64(t1, arg, t2);       /*  t1 = .....b.d */
1738         tcg_gen_and_i64(t0, t0, t2);        /*  t0 = .....a.c */
1739         tcg_gen_shli_i64(t1, t1, 8);        /*  t1 = ....b.d. */
1740         tcg_gen_or_i64(ret, t0, t1);        /* ret = ....badc */
1741 
1742         tcg_gen_shli_i64(t1, ret, 48);      /*  t1 = dc...... */
1743         tcg_gen_shri_i64(t0, ret, 16);      /*  t0 = ......ba */
1744         if (flags & TCG_BSWAP_OS) {
1745             tcg_gen_sari_i64(t1, t1, 32);   /*  t1 = ssssdc.. */
1746         } else {
1747             tcg_gen_shri_i64(t1, t1, 32);   /*  t1 = ....dc.. */
1748         }
1749         tcg_gen_or_i64(ret, t0, t1);        /* ret = ssssdcba */
1750 
1751         tcg_temp_free_i64(t0);
1752         tcg_temp_free_i64(t1);
1753     }
1754 }
1755 
1756 void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg)
1757 {
1758     if (TCG_TARGET_REG_BITS == 32) {
1759         TCGv_i32 t0, t1;
1760         t0 = tcg_temp_new_i32();
1761         t1 = tcg_temp_new_i32();
1762 
1763         tcg_gen_bswap32_i32(t0, TCGV_LOW(arg));
1764         tcg_gen_bswap32_i32(t1, TCGV_HIGH(arg));
1765         tcg_gen_mov_i32(TCGV_LOW(ret), t1);
1766         tcg_gen_mov_i32(TCGV_HIGH(ret), t0);
1767         tcg_temp_free_i32(t0);
1768         tcg_temp_free_i32(t1);
1769     } else if (TCG_TARGET_HAS_bswap64_i64) {
1770         tcg_gen_op3i_i64(INDEX_op_bswap64_i64, ret, arg, 0);
1771     } else {
1772         TCGv_i64 t0 = tcg_temp_new_i64();
1773         TCGv_i64 t1 = tcg_temp_new_i64();
1774         TCGv_i64 t2 = tcg_temp_new_i64();
1775 
1776                                         /* arg = abcdefgh */
1777         tcg_gen_movi_i64(t2, 0x00ff00ff00ff00ffull);
1778         tcg_gen_shri_i64(t0, arg, 8);   /*  t0 = .abcdefg */
1779         tcg_gen_and_i64(t1, arg, t2);   /*  t1 = .b.d.f.h */
1780         tcg_gen_and_i64(t0, t0, t2);    /*  t0 = .a.c.e.g */
1781         tcg_gen_shli_i64(t1, t1, 8);    /*  t1 = b.d.f.h. */
1782         tcg_gen_or_i64(ret, t0, t1);    /* ret = badcfehg */
1783 
1784         tcg_gen_movi_i64(t2, 0x0000ffff0000ffffull);
1785         tcg_gen_shri_i64(t0, ret, 16);  /*  t0 = ..badcfe */
1786         tcg_gen_and_i64(t1, ret, t2);   /*  t1 = ..dc..hg */
1787         tcg_gen_and_i64(t0, t0, t2);    /*  t0 = ..ba..fe */
1788         tcg_gen_shli_i64(t1, t1, 16);   /*  t1 = dc..hg.. */
1789         tcg_gen_or_i64(ret, t0, t1);    /* ret = dcbahgfe */
1790 
1791         tcg_gen_shri_i64(t0, ret, 32);  /*  t0 = ....dcba */
1792         tcg_gen_shli_i64(t1, ret, 32);  /*  t1 = hgfe.... */
1793         tcg_gen_or_i64(ret, t0, t1);    /* ret = hgfedcba */
1794 
1795         tcg_temp_free_i64(t0);
1796         tcg_temp_free_i64(t1);
1797         tcg_temp_free_i64(t2);
1798     }
1799 }
1800 
1801 void tcg_gen_hswap_i64(TCGv_i64 ret, TCGv_i64 arg)
1802 {
1803     uint64_t m = 0x0000ffff0000ffffull;
1804     TCGv_i64 t0 = tcg_temp_new_i64();
1805     TCGv_i64 t1 = tcg_temp_new_i64();
1806 
1807     /* See include/qemu/bitops.h, hswap64. */
1808     tcg_gen_rotli_i64(t1, arg, 32);
1809     tcg_gen_andi_i64(t0, t1, m);
1810     tcg_gen_shli_i64(t0, t0, 16);
1811     tcg_gen_shri_i64(t1, t1, 16);
1812     tcg_gen_andi_i64(t1, t1, m);
1813     tcg_gen_or_i64(ret, t0, t1);
1814 
1815     tcg_temp_free_i64(t0);
1816     tcg_temp_free_i64(t1);
1817 }
1818 
1819 void tcg_gen_wswap_i64(TCGv_i64 ret, TCGv_i64 arg)
1820 {
1821     /* Swapping 2 32-bit elements is a rotate. */
1822     tcg_gen_rotli_i64(ret, arg, 32);
1823 }
1824 
1825 void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg)
1826 {
1827     if (TCG_TARGET_REG_BITS == 32) {
1828         tcg_gen_not_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1829         tcg_gen_not_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
1830     } else if (TCG_TARGET_HAS_not_i64) {
1831         tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg);
1832     } else {
1833         tcg_gen_xori_i64(ret, arg, -1);
1834     }
1835 }
1836 
1837 void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1838 {
1839     if (TCG_TARGET_REG_BITS == 32) {
1840         tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1841         tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1842     } else if (TCG_TARGET_HAS_andc_i64) {
1843         tcg_gen_op3_i64(INDEX_op_andc_i64, ret, arg1, arg2);
1844     } else {
1845         TCGv_i64 t0 = tcg_temp_new_i64();
1846         tcg_gen_not_i64(t0, arg2);
1847         tcg_gen_and_i64(ret, arg1, t0);
1848         tcg_temp_free_i64(t0);
1849     }
1850 }
1851 
1852 void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1853 {
1854     if (TCG_TARGET_REG_BITS == 32) {
1855         tcg_gen_eqv_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1856         tcg_gen_eqv_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1857     } else if (TCG_TARGET_HAS_eqv_i64) {
1858         tcg_gen_op3_i64(INDEX_op_eqv_i64, ret, arg1, arg2);
1859     } else {
1860         tcg_gen_xor_i64(ret, arg1, arg2);
1861         tcg_gen_not_i64(ret, ret);
1862     }
1863 }
1864 
1865 void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1866 {
1867     if (TCG_TARGET_REG_BITS == 32) {
1868         tcg_gen_nand_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1869         tcg_gen_nand_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1870     } else if (TCG_TARGET_HAS_nand_i64) {
1871         tcg_gen_op3_i64(INDEX_op_nand_i64, ret, arg1, arg2);
1872     } else {
1873         tcg_gen_and_i64(ret, arg1, arg2);
1874         tcg_gen_not_i64(ret, ret);
1875     }
1876 }
1877 
1878 void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1879 {
1880     if (TCG_TARGET_REG_BITS == 32) {
1881         tcg_gen_nor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1882         tcg_gen_nor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1883     } else if (TCG_TARGET_HAS_nor_i64) {
1884         tcg_gen_op3_i64(INDEX_op_nor_i64, ret, arg1, arg2);
1885     } else {
1886         tcg_gen_or_i64(ret, arg1, arg2);
1887         tcg_gen_not_i64(ret, ret);
1888     }
1889 }
1890 
1891 void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1892 {
1893     if (TCG_TARGET_REG_BITS == 32) {
1894         tcg_gen_orc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1895         tcg_gen_orc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1896     } else if (TCG_TARGET_HAS_orc_i64) {
1897         tcg_gen_op3_i64(INDEX_op_orc_i64, ret, arg1, arg2);
1898     } else {
1899         TCGv_i64 t0 = tcg_temp_new_i64();
1900         tcg_gen_not_i64(t0, arg2);
1901         tcg_gen_or_i64(ret, arg1, t0);
1902         tcg_temp_free_i64(t0);
1903     }
1904 }
1905 
1906 void tcg_gen_clz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1907 {
1908     if (TCG_TARGET_HAS_clz_i64) {
1909         tcg_gen_op3_i64(INDEX_op_clz_i64, ret, arg1, arg2);
1910     } else {
1911         gen_helper_clz_i64(ret, arg1, arg2);
1912     }
1913 }
1914 
1915 void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
1916 {
1917     if (TCG_TARGET_REG_BITS == 32
1918         && TCG_TARGET_HAS_clz_i32
1919         && arg2 <= 0xffffffffu) {
1920         TCGv_i32 t = tcg_temp_new_i32();
1921         tcg_gen_clzi_i32(t, TCGV_LOW(arg1), arg2 - 32);
1922         tcg_gen_addi_i32(t, t, 32);
1923         tcg_gen_clz_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), t);
1924         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1925         tcg_temp_free_i32(t);
1926     } else {
1927         TCGv_i64 t0 = tcg_const_i64(arg2);
1928         tcg_gen_clz_i64(ret, arg1, t0);
1929         tcg_temp_free_i64(t0);
1930     }
1931 }
1932 
1933 void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1934 {
1935     if (TCG_TARGET_HAS_ctz_i64) {
1936         tcg_gen_op3_i64(INDEX_op_ctz_i64, ret, arg1, arg2);
1937     } else if (TCG_TARGET_HAS_ctpop_i64 || TCG_TARGET_HAS_clz_i64) {
1938         TCGv_i64 z, t = tcg_temp_new_i64();
1939 
1940         if (TCG_TARGET_HAS_ctpop_i64) {
1941             tcg_gen_subi_i64(t, arg1, 1);
1942             tcg_gen_andc_i64(t, t, arg1);
1943             tcg_gen_ctpop_i64(t, t);
1944         } else {
1945             /* Since all non-x86 hosts have clz(0) == 64, don't fight it.  */
1946             tcg_gen_neg_i64(t, arg1);
1947             tcg_gen_and_i64(t, t, arg1);
1948             tcg_gen_clzi_i64(t, t, 64);
1949             tcg_gen_xori_i64(t, t, 63);
1950         }
1951         z = tcg_constant_i64(0);
1952         tcg_gen_movcond_i64(TCG_COND_EQ, ret, arg1, z, arg2, t);
1953         tcg_temp_free_i64(t);
1954         tcg_temp_free_i64(z);
1955     } else {
1956         gen_helper_ctz_i64(ret, arg1, arg2);
1957     }
1958 }
1959 
1960 void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
1961 {
1962     if (TCG_TARGET_REG_BITS == 32
1963         && TCG_TARGET_HAS_ctz_i32
1964         && arg2 <= 0xffffffffu) {
1965         TCGv_i32 t32 = tcg_temp_new_i32();
1966         tcg_gen_ctzi_i32(t32, TCGV_HIGH(arg1), arg2 - 32);
1967         tcg_gen_addi_i32(t32, t32, 32);
1968         tcg_gen_ctz_i32(TCGV_LOW(ret), TCGV_LOW(arg1), t32);
1969         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1970         tcg_temp_free_i32(t32);
1971     } else if (!TCG_TARGET_HAS_ctz_i64
1972                && TCG_TARGET_HAS_ctpop_i64
1973                && arg2 == 64) {
1974         /* This equivalence has the advantage of not requiring a fixup.  */
1975         TCGv_i64 t = tcg_temp_new_i64();
1976         tcg_gen_subi_i64(t, arg1, 1);
1977         tcg_gen_andc_i64(t, t, arg1);
1978         tcg_gen_ctpop_i64(ret, t);
1979         tcg_temp_free_i64(t);
1980     } else {
1981         TCGv_i64 t0 = tcg_const_i64(arg2);
1982         tcg_gen_ctz_i64(ret, arg1, t0);
1983         tcg_temp_free_i64(t0);
1984     }
1985 }
1986 
1987 void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg)
1988 {
1989     if (TCG_TARGET_HAS_clz_i64 || TCG_TARGET_HAS_clz_i32) {
1990         TCGv_i64 t = tcg_temp_new_i64();
1991         tcg_gen_sari_i64(t, arg, 63);
1992         tcg_gen_xor_i64(t, t, arg);
1993         tcg_gen_clzi_i64(t, t, 64);
1994         tcg_gen_subi_i64(ret, t, 1);
1995         tcg_temp_free_i64(t);
1996     } else {
1997         gen_helper_clrsb_i64(ret, arg);
1998     }
1999 }
2000 
2001 void tcg_gen_ctpop_i64(TCGv_i64 ret, TCGv_i64 arg1)
2002 {
2003     if (TCG_TARGET_HAS_ctpop_i64) {
2004         tcg_gen_op2_i64(INDEX_op_ctpop_i64, ret, arg1);
2005     } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_ctpop_i32) {
2006         tcg_gen_ctpop_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
2007         tcg_gen_ctpop_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
2008         tcg_gen_add_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(ret));
2009         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2010     } else {
2011         gen_helper_ctpop_i64(ret, arg1);
2012     }
2013 }
2014 
2015 void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
2016 {
2017     if (TCG_TARGET_HAS_rot_i64) {
2018         tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, arg2);
2019     } else {
2020         TCGv_i64 t0, t1;
2021         t0 = tcg_temp_new_i64();
2022         t1 = tcg_temp_new_i64();
2023         tcg_gen_shl_i64(t0, arg1, arg2);
2024         tcg_gen_subfi_i64(t1, 64, arg2);
2025         tcg_gen_shr_i64(t1, arg1, t1);
2026         tcg_gen_or_i64(ret, t0, t1);
2027         tcg_temp_free_i64(t0);
2028         tcg_temp_free_i64(t1);
2029     }
2030 }
2031 
2032 void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
2033 {
2034     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
2035     /* some cases can be optimized here */
2036     if (arg2 == 0) {
2037         tcg_gen_mov_i64(ret, arg1);
2038     } else if (TCG_TARGET_HAS_rot_i64) {
2039         tcg_gen_rotl_i64(ret, arg1, tcg_constant_i64(arg2));
2040     } else {
2041         TCGv_i64 t0, t1;
2042         t0 = tcg_temp_new_i64();
2043         t1 = tcg_temp_new_i64();
2044         tcg_gen_shli_i64(t0, arg1, arg2);
2045         tcg_gen_shri_i64(t1, arg1, 64 - arg2);
2046         tcg_gen_or_i64(ret, t0, t1);
2047         tcg_temp_free_i64(t0);
2048         tcg_temp_free_i64(t1);
2049     }
2050 }
2051 
2052 void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
2053 {
2054     if (TCG_TARGET_HAS_rot_i64) {
2055         tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, arg2);
2056     } else {
2057         TCGv_i64 t0, t1;
2058         t0 = tcg_temp_new_i64();
2059         t1 = tcg_temp_new_i64();
2060         tcg_gen_shr_i64(t0, arg1, arg2);
2061         tcg_gen_subfi_i64(t1, 64, arg2);
2062         tcg_gen_shl_i64(t1, arg1, t1);
2063         tcg_gen_or_i64(ret, t0, t1);
2064         tcg_temp_free_i64(t0);
2065         tcg_temp_free_i64(t1);
2066     }
2067 }
2068 
2069 void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
2070 {
2071     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
2072     /* some cases can be optimized here */
2073     if (arg2 == 0) {
2074         tcg_gen_mov_i64(ret, arg1);
2075     } else {
2076         tcg_gen_rotli_i64(ret, arg1, 64 - arg2);
2077     }
2078 }
2079 
2080 void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
2081                          unsigned int ofs, unsigned int len)
2082 {
2083     uint64_t mask;
2084     TCGv_i64 t1;
2085 
2086     tcg_debug_assert(ofs < 64);
2087     tcg_debug_assert(len > 0);
2088     tcg_debug_assert(len <= 64);
2089     tcg_debug_assert(ofs + len <= 64);
2090 
2091     if (len == 64) {
2092         tcg_gen_mov_i64(ret, arg2);
2093         return;
2094     }
2095     if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) {
2096         tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
2097         return;
2098     }
2099 
2100     if (TCG_TARGET_REG_BITS == 32) {
2101         if (ofs >= 32) {
2102             tcg_gen_deposit_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1),
2103                                 TCGV_LOW(arg2), ofs - 32, len);
2104             tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
2105             return;
2106         }
2107         if (ofs + len <= 32) {
2108             tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(arg1),
2109                                 TCGV_LOW(arg2), ofs, len);
2110             tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
2111             return;
2112         }
2113     }
2114 
2115     t1 = tcg_temp_new_i64();
2116 
2117     if (TCG_TARGET_HAS_extract2_i64) {
2118         if (ofs + len == 64) {
2119             tcg_gen_shli_i64(t1, arg1, len);
2120             tcg_gen_extract2_i64(ret, t1, arg2, len);
2121             goto done;
2122         }
2123         if (ofs == 0) {
2124             tcg_gen_extract2_i64(ret, arg1, arg2, len);
2125             tcg_gen_rotli_i64(ret, ret, len);
2126             goto done;
2127         }
2128     }
2129 
2130     mask = (1ull << len) - 1;
2131     if (ofs + len < 64) {
2132         tcg_gen_andi_i64(t1, arg2, mask);
2133         tcg_gen_shli_i64(t1, t1, ofs);
2134     } else {
2135         tcg_gen_shli_i64(t1, arg2, ofs);
2136     }
2137     tcg_gen_andi_i64(ret, arg1, ~(mask << ofs));
2138     tcg_gen_or_i64(ret, ret, t1);
2139  done:
2140     tcg_temp_free_i64(t1);
2141 }
2142 
2143 void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg,
2144                            unsigned int ofs, unsigned int len)
2145 {
2146     tcg_debug_assert(ofs < 64);
2147     tcg_debug_assert(len > 0);
2148     tcg_debug_assert(len <= 64);
2149     tcg_debug_assert(ofs + len <= 64);
2150 
2151     if (ofs + len == 64) {
2152         tcg_gen_shli_i64(ret, arg, ofs);
2153     } else if (ofs == 0) {
2154         tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
2155     } else if (TCG_TARGET_HAS_deposit_i64
2156                && TCG_TARGET_deposit_i64_valid(ofs, len)) {
2157         TCGv_i64 zero = tcg_constant_i64(0);
2158         tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, zero, arg, ofs, len);
2159     } else {
2160         if (TCG_TARGET_REG_BITS == 32) {
2161             if (ofs >= 32) {
2162                 tcg_gen_deposit_z_i32(TCGV_HIGH(ret), TCGV_LOW(arg),
2163                                       ofs - 32, len);
2164                 tcg_gen_movi_i32(TCGV_LOW(ret), 0);
2165                 return;
2166             }
2167             if (ofs + len <= 32) {
2168                 tcg_gen_deposit_z_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
2169                 tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2170                 return;
2171             }
2172         }
2173         /* To help two-operand hosts we prefer to zero-extend first,
2174            which allows ARG to stay live.  */
2175         switch (len) {
2176         case 32:
2177             if (TCG_TARGET_HAS_ext32u_i64) {
2178                 tcg_gen_ext32u_i64(ret, arg);
2179                 tcg_gen_shli_i64(ret, ret, ofs);
2180                 return;
2181             }
2182             break;
2183         case 16:
2184             if (TCG_TARGET_HAS_ext16u_i64) {
2185                 tcg_gen_ext16u_i64(ret, arg);
2186                 tcg_gen_shli_i64(ret, ret, ofs);
2187                 return;
2188             }
2189             break;
2190         case 8:
2191             if (TCG_TARGET_HAS_ext8u_i64) {
2192                 tcg_gen_ext8u_i64(ret, arg);
2193                 tcg_gen_shli_i64(ret, ret, ofs);
2194                 return;
2195             }
2196             break;
2197         }
2198         /* Otherwise prefer zero-extension over AND for code size.  */
2199         switch (ofs + len) {
2200         case 32:
2201             if (TCG_TARGET_HAS_ext32u_i64) {
2202                 tcg_gen_shli_i64(ret, arg, ofs);
2203                 tcg_gen_ext32u_i64(ret, ret);
2204                 return;
2205             }
2206             break;
2207         case 16:
2208             if (TCG_TARGET_HAS_ext16u_i64) {
2209                 tcg_gen_shli_i64(ret, arg, ofs);
2210                 tcg_gen_ext16u_i64(ret, ret);
2211                 return;
2212             }
2213             break;
2214         case 8:
2215             if (TCG_TARGET_HAS_ext8u_i64) {
2216                 tcg_gen_shli_i64(ret, arg, ofs);
2217                 tcg_gen_ext8u_i64(ret, ret);
2218                 return;
2219             }
2220             break;
2221         }
2222         tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
2223         tcg_gen_shli_i64(ret, ret, ofs);
2224     }
2225 }
2226 
2227 void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
2228                          unsigned int ofs, unsigned int len)
2229 {
2230     tcg_debug_assert(ofs < 64);
2231     tcg_debug_assert(len > 0);
2232     tcg_debug_assert(len <= 64);
2233     tcg_debug_assert(ofs + len <= 64);
2234 
2235     /* Canonicalize certain special cases, even if extract is supported.  */
2236     if (ofs + len == 64) {
2237         tcg_gen_shri_i64(ret, arg, 64 - len);
2238         return;
2239     }
2240     if (ofs == 0) {
2241         tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
2242         return;
2243     }
2244 
2245     if (TCG_TARGET_REG_BITS == 32) {
2246         /* Look for a 32-bit extract within one of the two words.  */
2247         if (ofs >= 32) {
2248             tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_HIGH(arg), ofs - 32, len);
2249             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2250             return;
2251         }
2252         if (ofs + len <= 32) {
2253             tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
2254             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2255             return;
2256         }
2257         /* The field is split across two words.  One double-word
2258            shift is better than two double-word shifts.  */
2259         goto do_shift_and;
2260     }
2261 
2262     if (TCG_TARGET_HAS_extract_i64
2263         && TCG_TARGET_extract_i64_valid(ofs, len)) {
2264         tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, ofs, len);
2265         return;
2266     }
2267 
2268     /* Assume that zero-extension, if available, is cheaper than a shift.  */
2269     switch (ofs + len) {
2270     case 32:
2271         if (TCG_TARGET_HAS_ext32u_i64) {
2272             tcg_gen_ext32u_i64(ret, arg);
2273             tcg_gen_shri_i64(ret, ret, ofs);
2274             return;
2275         }
2276         break;
2277     case 16:
2278         if (TCG_TARGET_HAS_ext16u_i64) {
2279             tcg_gen_ext16u_i64(ret, arg);
2280             tcg_gen_shri_i64(ret, ret, ofs);
2281             return;
2282         }
2283         break;
2284     case 8:
2285         if (TCG_TARGET_HAS_ext8u_i64) {
2286             tcg_gen_ext8u_i64(ret, arg);
2287             tcg_gen_shri_i64(ret, ret, ofs);
2288             return;
2289         }
2290         break;
2291     }
2292 
2293     /* ??? Ideally we'd know what values are available for immediate AND.
2294        Assume that 8 bits are available, plus the special cases of 16 and 32,
2295        so that we get ext8u, ext16u, and ext32u.  */
2296     switch (len) {
2297     case 1 ... 8: case 16: case 32:
2298     do_shift_and:
2299         tcg_gen_shri_i64(ret, arg, ofs);
2300         tcg_gen_andi_i64(ret, ret, (1ull << len) - 1);
2301         break;
2302     default:
2303         tcg_gen_shli_i64(ret, arg, 64 - len - ofs);
2304         tcg_gen_shri_i64(ret, ret, 64 - len);
2305         break;
2306     }
2307 }
2308 
2309 void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg,
2310                           unsigned int ofs, unsigned int len)
2311 {
2312     tcg_debug_assert(ofs < 64);
2313     tcg_debug_assert(len > 0);
2314     tcg_debug_assert(len <= 64);
2315     tcg_debug_assert(ofs + len <= 64);
2316 
2317     /* Canonicalize certain special cases, even if sextract is supported.  */
2318     if (ofs + len == 64) {
2319         tcg_gen_sari_i64(ret, arg, 64 - len);
2320         return;
2321     }
2322     if (ofs == 0) {
2323         switch (len) {
2324         case 32:
2325             tcg_gen_ext32s_i64(ret, arg);
2326             return;
2327         case 16:
2328             tcg_gen_ext16s_i64(ret, arg);
2329             return;
2330         case 8:
2331             tcg_gen_ext8s_i64(ret, arg);
2332             return;
2333         }
2334     }
2335 
2336     if (TCG_TARGET_REG_BITS == 32) {
2337         /* Look for a 32-bit extract within one of the two words.  */
2338         if (ofs >= 32) {
2339             tcg_gen_sextract_i32(TCGV_LOW(ret), TCGV_HIGH(arg), ofs - 32, len);
2340         } else if (ofs + len <= 32) {
2341             tcg_gen_sextract_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
2342         } else if (ofs == 0) {
2343             tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
2344             tcg_gen_sextract_i32(TCGV_HIGH(ret), TCGV_HIGH(arg), 0, len - 32);
2345             return;
2346         } else if (len > 32) {
2347             TCGv_i32 t = tcg_temp_new_i32();
2348             /* Extract the bits for the high word normally.  */
2349             tcg_gen_sextract_i32(t, TCGV_HIGH(arg), ofs + 32, len - 32);
2350             /* Shift the field down for the low part.  */
2351             tcg_gen_shri_i64(ret, arg, ofs);
2352             /* Overwrite the shift into the high part.  */
2353             tcg_gen_mov_i32(TCGV_HIGH(ret), t);
2354             tcg_temp_free_i32(t);
2355             return;
2356         } else {
2357             /* Shift the field down for the low part, such that the
2358                field sits at the MSB.  */
2359             tcg_gen_shri_i64(ret, arg, ofs + len - 32);
2360             /* Shift the field down from the MSB, sign extending.  */
2361             tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_LOW(ret), 32 - len);
2362         }
2363         /* Sign-extend the field from 32 bits.  */
2364         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
2365         return;
2366     }
2367 
2368     if (TCG_TARGET_HAS_sextract_i64
2369         && TCG_TARGET_extract_i64_valid(ofs, len)) {
2370         tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, arg, ofs, len);
2371         return;
2372     }
2373 
2374     /* Assume that sign-extension, if available, is cheaper than a shift.  */
2375     switch (ofs + len) {
2376     case 32:
2377         if (TCG_TARGET_HAS_ext32s_i64) {
2378             tcg_gen_ext32s_i64(ret, arg);
2379             tcg_gen_sari_i64(ret, ret, ofs);
2380             return;
2381         }
2382         break;
2383     case 16:
2384         if (TCG_TARGET_HAS_ext16s_i64) {
2385             tcg_gen_ext16s_i64(ret, arg);
2386             tcg_gen_sari_i64(ret, ret, ofs);
2387             return;
2388         }
2389         break;
2390     case 8:
2391         if (TCG_TARGET_HAS_ext8s_i64) {
2392             tcg_gen_ext8s_i64(ret, arg);
2393             tcg_gen_sari_i64(ret, ret, ofs);
2394             return;
2395         }
2396         break;
2397     }
2398     switch (len) {
2399     case 32:
2400         if (TCG_TARGET_HAS_ext32s_i64) {
2401             tcg_gen_shri_i64(ret, arg, ofs);
2402             tcg_gen_ext32s_i64(ret, ret);
2403             return;
2404         }
2405         break;
2406     case 16:
2407         if (TCG_TARGET_HAS_ext16s_i64) {
2408             tcg_gen_shri_i64(ret, arg, ofs);
2409             tcg_gen_ext16s_i64(ret, ret);
2410             return;
2411         }
2412         break;
2413     case 8:
2414         if (TCG_TARGET_HAS_ext8s_i64) {
2415             tcg_gen_shri_i64(ret, arg, ofs);
2416             tcg_gen_ext8s_i64(ret, ret);
2417             return;
2418         }
2419         break;
2420     }
2421     tcg_gen_shli_i64(ret, arg, 64 - len - ofs);
2422     tcg_gen_sari_i64(ret, ret, 64 - len);
2423 }
2424 
2425 /*
2426  * Extract 64 bits from a 128-bit input, ah:al, starting from ofs.
2427  * Unlike tcg_gen_extract_i64 above, len is fixed at 64.
2428  */
2429 void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah,
2430                           unsigned int ofs)
2431 {
2432     tcg_debug_assert(ofs <= 64);
2433     if (ofs == 0) {
2434         tcg_gen_mov_i64(ret, al);
2435     } else if (ofs == 64) {
2436         tcg_gen_mov_i64(ret, ah);
2437     } else if (al == ah) {
2438         tcg_gen_rotri_i64(ret, al, ofs);
2439     } else if (TCG_TARGET_HAS_extract2_i64) {
2440         tcg_gen_op4i_i64(INDEX_op_extract2_i64, ret, al, ah, ofs);
2441     } else {
2442         TCGv_i64 t0 = tcg_temp_new_i64();
2443         tcg_gen_shri_i64(t0, al, ofs);
2444         tcg_gen_deposit_i64(ret, t0, ah, 64 - ofs, ofs);
2445         tcg_temp_free_i64(t0);
2446     }
2447 }
2448 
2449 void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1,
2450                          TCGv_i64 c2, TCGv_i64 v1, TCGv_i64 v2)
2451 {
2452     if (cond == TCG_COND_ALWAYS) {
2453         tcg_gen_mov_i64(ret, v1);
2454     } else if (cond == TCG_COND_NEVER) {
2455         tcg_gen_mov_i64(ret, v2);
2456     } else if (TCG_TARGET_REG_BITS == 32) {
2457         TCGv_i32 t0 = tcg_temp_new_i32();
2458         TCGv_i32 t1 = tcg_temp_new_i32();
2459         tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0,
2460                          TCGV_LOW(c1), TCGV_HIGH(c1),
2461                          TCGV_LOW(c2), TCGV_HIGH(c2), cond);
2462 
2463         if (TCG_TARGET_HAS_movcond_i32) {
2464             tcg_gen_movi_i32(t1, 0);
2465             tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, t1,
2466                                 TCGV_LOW(v1), TCGV_LOW(v2));
2467             tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, t1,
2468                                 TCGV_HIGH(v1), TCGV_HIGH(v2));
2469         } else {
2470             tcg_gen_neg_i32(t0, t0);
2471 
2472             tcg_gen_and_i32(t1, TCGV_LOW(v1), t0);
2473             tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(v2), t0);
2474             tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t1);
2475 
2476             tcg_gen_and_i32(t1, TCGV_HIGH(v1), t0);
2477             tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(v2), t0);
2478             tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t1);
2479         }
2480         tcg_temp_free_i32(t0);
2481         tcg_temp_free_i32(t1);
2482     } else if (TCG_TARGET_HAS_movcond_i64) {
2483         tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond);
2484     } else {
2485         TCGv_i64 t0 = tcg_temp_new_i64();
2486         TCGv_i64 t1 = tcg_temp_new_i64();
2487         tcg_gen_setcond_i64(cond, t0, c1, c2);
2488         tcg_gen_neg_i64(t0, t0);
2489         tcg_gen_and_i64(t1, v1, t0);
2490         tcg_gen_andc_i64(ret, v2, t0);
2491         tcg_gen_or_i64(ret, ret, t1);
2492         tcg_temp_free_i64(t0);
2493         tcg_temp_free_i64(t1);
2494     }
2495 }
2496 
2497 void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
2498                       TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
2499 {
2500     if (TCG_TARGET_HAS_add2_i64) {
2501         tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh);
2502     } else {
2503         TCGv_i64 t0 = tcg_temp_new_i64();
2504         TCGv_i64 t1 = tcg_temp_new_i64();
2505         tcg_gen_add_i64(t0, al, bl);
2506         tcg_gen_setcond_i64(TCG_COND_LTU, t1, t0, al);
2507         tcg_gen_add_i64(rh, ah, bh);
2508         tcg_gen_add_i64(rh, rh, t1);
2509         tcg_gen_mov_i64(rl, t0);
2510         tcg_temp_free_i64(t0);
2511         tcg_temp_free_i64(t1);
2512     }
2513 }
2514 
2515 void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
2516                       TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
2517 {
2518     if (TCG_TARGET_HAS_sub2_i64) {
2519         tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh);
2520     } else {
2521         TCGv_i64 t0 = tcg_temp_new_i64();
2522         TCGv_i64 t1 = tcg_temp_new_i64();
2523         tcg_gen_sub_i64(t0, al, bl);
2524         tcg_gen_setcond_i64(TCG_COND_LTU, t1, al, bl);
2525         tcg_gen_sub_i64(rh, ah, bh);
2526         tcg_gen_sub_i64(rh, rh, t1);
2527         tcg_gen_mov_i64(rl, t0);
2528         tcg_temp_free_i64(t0);
2529         tcg_temp_free_i64(t1);
2530     }
2531 }
2532 
2533 void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
2534 {
2535     if (TCG_TARGET_HAS_mulu2_i64) {
2536         tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2);
2537     } else if (TCG_TARGET_HAS_muluh_i64) {
2538         TCGv_i64 t = tcg_temp_new_i64();
2539         tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
2540         tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2);
2541         tcg_gen_mov_i64(rl, t);
2542         tcg_temp_free_i64(t);
2543     } else {
2544         TCGv_i64 t0 = tcg_temp_new_i64();
2545         tcg_gen_mul_i64(t0, arg1, arg2);
2546         gen_helper_muluh_i64(rh, arg1, arg2);
2547         tcg_gen_mov_i64(rl, t0);
2548         tcg_temp_free_i64(t0);
2549     }
2550 }
2551 
2552 void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
2553 {
2554     if (TCG_TARGET_HAS_muls2_i64) {
2555         tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2);
2556     } else if (TCG_TARGET_HAS_mulsh_i64) {
2557         TCGv_i64 t = tcg_temp_new_i64();
2558         tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
2559         tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2);
2560         tcg_gen_mov_i64(rl, t);
2561         tcg_temp_free_i64(t);
2562     } else if (TCG_TARGET_HAS_mulu2_i64 || TCG_TARGET_HAS_muluh_i64) {
2563         TCGv_i64 t0 = tcg_temp_new_i64();
2564         TCGv_i64 t1 = tcg_temp_new_i64();
2565         TCGv_i64 t2 = tcg_temp_new_i64();
2566         TCGv_i64 t3 = tcg_temp_new_i64();
2567         tcg_gen_mulu2_i64(t0, t1, arg1, arg2);
2568         /* Adjust for negative inputs.  */
2569         tcg_gen_sari_i64(t2, arg1, 63);
2570         tcg_gen_sari_i64(t3, arg2, 63);
2571         tcg_gen_and_i64(t2, t2, arg2);
2572         tcg_gen_and_i64(t3, t3, arg1);
2573         tcg_gen_sub_i64(rh, t1, t2);
2574         tcg_gen_sub_i64(rh, rh, t3);
2575         tcg_gen_mov_i64(rl, t0);
2576         tcg_temp_free_i64(t0);
2577         tcg_temp_free_i64(t1);
2578         tcg_temp_free_i64(t2);
2579         tcg_temp_free_i64(t3);
2580     } else {
2581         TCGv_i64 t0 = tcg_temp_new_i64();
2582         tcg_gen_mul_i64(t0, arg1, arg2);
2583         gen_helper_mulsh_i64(rh, arg1, arg2);
2584         tcg_gen_mov_i64(rl, t0);
2585         tcg_temp_free_i64(t0);
2586     }
2587 }
2588 
2589 void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
2590 {
2591     TCGv_i64 t0 = tcg_temp_new_i64();
2592     TCGv_i64 t1 = tcg_temp_new_i64();
2593     TCGv_i64 t2 = tcg_temp_new_i64();
2594     tcg_gen_mulu2_i64(t0, t1, arg1, arg2);
2595     /* Adjust for negative input for the signed arg1.  */
2596     tcg_gen_sari_i64(t2, arg1, 63);
2597     tcg_gen_and_i64(t2, t2, arg2);
2598     tcg_gen_sub_i64(rh, t1, t2);
2599     tcg_gen_mov_i64(rl, t0);
2600     tcg_temp_free_i64(t0);
2601     tcg_temp_free_i64(t1);
2602     tcg_temp_free_i64(t2);
2603 }
2604 
2605 void tcg_gen_smin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2606 {
2607     tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, a, b);
2608 }
2609 
2610 void tcg_gen_umin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2611 {
2612     tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, a, b);
2613 }
2614 
2615 void tcg_gen_smax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2616 {
2617     tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, b, a);
2618 }
2619 
2620 void tcg_gen_umax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2621 {
2622     tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, b, a);
2623 }
2624 
2625 void tcg_gen_abs_i64(TCGv_i64 ret, TCGv_i64 a)
2626 {
2627     TCGv_i64 t = tcg_temp_new_i64();
2628 
2629     tcg_gen_sari_i64(t, a, 63);
2630     tcg_gen_xor_i64(ret, a, t);
2631     tcg_gen_sub_i64(ret, ret, t);
2632     tcg_temp_free_i64(t);
2633 }
2634 
2635 /* Size changing operations.  */
2636 
2637 void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
2638 {
2639     if (TCG_TARGET_REG_BITS == 32) {
2640         tcg_gen_mov_i32(ret, TCGV_LOW(arg));
2641     } else if (TCG_TARGET_HAS_extrl_i64_i32) {
2642         tcg_gen_op2(INDEX_op_extrl_i64_i32,
2643                     tcgv_i32_arg(ret), tcgv_i64_arg(arg));
2644     } else {
2645         tcg_gen_mov_i32(ret, (TCGv_i32)arg);
2646     }
2647 }
2648 
2649 void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
2650 {
2651     if (TCG_TARGET_REG_BITS == 32) {
2652         tcg_gen_mov_i32(ret, TCGV_HIGH(arg));
2653     } else if (TCG_TARGET_HAS_extrh_i64_i32) {
2654         tcg_gen_op2(INDEX_op_extrh_i64_i32,
2655                     tcgv_i32_arg(ret), tcgv_i64_arg(arg));
2656     } else {
2657         TCGv_i64 t = tcg_temp_new_i64();
2658         tcg_gen_shri_i64(t, arg, 32);
2659         tcg_gen_mov_i32(ret, (TCGv_i32)t);
2660         tcg_temp_free_i64(t);
2661     }
2662 }
2663 
2664 void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
2665 {
2666     if (TCG_TARGET_REG_BITS == 32) {
2667         tcg_gen_mov_i32(TCGV_LOW(ret), arg);
2668         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2669     } else {
2670         tcg_gen_op2(INDEX_op_extu_i32_i64,
2671                     tcgv_i64_arg(ret), tcgv_i32_arg(arg));
2672     }
2673 }
2674 
2675 void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
2676 {
2677     if (TCG_TARGET_REG_BITS == 32) {
2678         tcg_gen_mov_i32(TCGV_LOW(ret), arg);
2679         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
2680     } else {
2681         tcg_gen_op2(INDEX_op_ext_i32_i64,
2682                     tcgv_i64_arg(ret), tcgv_i32_arg(arg));
2683     }
2684 }
2685 
2686 void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high)
2687 {
2688     TCGv_i64 tmp;
2689 
2690     if (TCG_TARGET_REG_BITS == 32) {
2691         tcg_gen_mov_i32(TCGV_LOW(dest), low);
2692         tcg_gen_mov_i32(TCGV_HIGH(dest), high);
2693         return;
2694     }
2695 
2696     tmp = tcg_temp_new_i64();
2697     /* These extensions are only needed for type correctness.
2698        We may be able to do better given target specific information.  */
2699     tcg_gen_extu_i32_i64(tmp, high);
2700     tcg_gen_extu_i32_i64(dest, low);
2701     /* If deposit is available, use it.  Otherwise use the extra
2702        knowledge that we have of the zero-extensions above.  */
2703     if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(32, 32)) {
2704         tcg_gen_deposit_i64(dest, dest, tmp, 32, 32);
2705     } else {
2706         tcg_gen_shli_i64(tmp, tmp, 32);
2707         tcg_gen_or_i64(dest, dest, tmp);
2708     }
2709     tcg_temp_free_i64(tmp);
2710 }
2711 
2712 void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg)
2713 {
2714     if (TCG_TARGET_REG_BITS == 32) {
2715         tcg_gen_mov_i32(lo, TCGV_LOW(arg));
2716         tcg_gen_mov_i32(hi, TCGV_HIGH(arg));
2717     } else {
2718         tcg_gen_extrl_i64_i32(lo, arg);
2719         tcg_gen_extrh_i64_i32(hi, arg);
2720     }
2721 }
2722 
2723 void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg)
2724 {
2725     tcg_gen_ext32u_i64(lo, arg);
2726     tcg_gen_shri_i64(hi, arg, 32);
2727 }
2728 
2729 /* QEMU specific operations.  */
2730 
2731 void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx)
2732 {
2733     /*
2734      * Let the jit code return the read-only version of the
2735      * TranslationBlock, so that we minimize the pc-relative
2736      * distance of the address of the exit_tb code to TB.
2737      * This will improve utilization of pc-relative address loads.
2738      *
2739      * TODO: Move this to translator_loop, so that all const
2740      * TranslationBlock pointers refer to read-only memory.
2741      * This requires coordination with targets that do not use
2742      * the translator_loop.
2743      */
2744     uintptr_t val = (uintptr_t)tcg_splitwx_to_rx((void *)tb) + idx;
2745 
2746     if (tb == NULL) {
2747         tcg_debug_assert(idx == 0);
2748     } else if (idx <= TB_EXIT_IDXMAX) {
2749 #ifdef CONFIG_DEBUG_TCG
2750         /* This is an exit following a goto_tb.  Verify that we have
2751            seen this numbered exit before, via tcg_gen_goto_tb.  */
2752         tcg_debug_assert(tcg_ctx->goto_tb_issue_mask & (1 << idx));
2753 #endif
2754     } else {
2755         /* This is an exit via the exitreq label.  */
2756         tcg_debug_assert(idx == TB_EXIT_REQUESTED);
2757     }
2758 
2759     plugin_gen_disable_mem_helpers();
2760     tcg_gen_op1i(INDEX_op_exit_tb, val);
2761 }
2762 
2763 void tcg_gen_goto_tb(unsigned idx)
2764 {
2765     /* We tested CF_NO_GOTO_TB in translator_use_goto_tb. */
2766     tcg_debug_assert(!(tcg_ctx->tb_cflags & CF_NO_GOTO_TB));
2767     /* We only support two chained exits.  */
2768     tcg_debug_assert(idx <= TB_EXIT_IDXMAX);
2769 #ifdef CONFIG_DEBUG_TCG
2770     /* Verify that we haven't seen this numbered exit before.  */
2771     tcg_debug_assert((tcg_ctx->goto_tb_issue_mask & (1 << idx)) == 0);
2772     tcg_ctx->goto_tb_issue_mask |= 1 << idx;
2773 #endif
2774     plugin_gen_disable_mem_helpers();
2775     tcg_gen_op1i(INDEX_op_goto_tb, idx);
2776 }
2777 
2778 void tcg_gen_lookup_and_goto_ptr(void)
2779 {
2780     TCGv_ptr ptr;
2781 
2782     if (tcg_ctx->tb_cflags & CF_NO_GOTO_PTR) {
2783         tcg_gen_exit_tb(NULL, 0);
2784         return;
2785     }
2786 
2787     plugin_gen_disable_mem_helpers();
2788     ptr = tcg_temp_new_ptr();
2789     gen_helper_lookup_tb_ptr(ptr, cpu_env);
2790     tcg_gen_op1i(INDEX_op_goto_ptr, tcgv_ptr_arg(ptr));
2791     tcg_temp_free_ptr(ptr);
2792 }
2793 
2794 static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
2795 {
2796     /* Trigger the asserts within as early as possible.  */
2797     unsigned a_bits = get_alignment_bits(op);
2798 
2799     /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
2800     if (a_bits == (op & MO_SIZE)) {
2801         op = (op & ~MO_AMASK) | MO_ALIGN;
2802     }
2803 
2804     switch (op & MO_SIZE) {
2805     case MO_8:
2806         op &= ~MO_BSWAP;
2807         break;
2808     case MO_16:
2809         break;
2810     case MO_32:
2811         if (!is64) {
2812             op &= ~MO_SIGN;
2813         }
2814         break;
2815     case MO_64:
2816         if (is64) {
2817             op &= ~MO_SIGN;
2818             break;
2819         }
2820         /* fall through */
2821     default:
2822         g_assert_not_reached();
2823     }
2824     if (st) {
2825         op &= ~MO_SIGN;
2826     }
2827     return op;
2828 }
2829 
2830 static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
2831                          MemOp memop, TCGArg idx)
2832 {
2833     MemOpIdx oi = make_memop_idx(memop, idx);
2834 #if TARGET_LONG_BITS == 32
2835     tcg_gen_op3i_i32(opc, val, addr, oi);
2836 #else
2837     if (TCG_TARGET_REG_BITS == 32) {
2838         tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
2839     } else {
2840         tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
2841     }
2842 #endif
2843 }
2844 
2845 static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
2846                          MemOp memop, TCGArg idx)
2847 {
2848     MemOpIdx oi = make_memop_idx(memop, idx);
2849 #if TARGET_LONG_BITS == 32
2850     if (TCG_TARGET_REG_BITS == 32) {
2851         tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
2852     } else {
2853         tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
2854     }
2855 #else
2856     if (TCG_TARGET_REG_BITS == 32) {
2857         tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
2858                          TCGV_LOW(addr), TCGV_HIGH(addr), oi);
2859     } else {
2860         tcg_gen_op3i_i64(opc, val, addr, oi);
2861     }
2862 #endif
2863 }
2864 
2865 static void tcg_gen_req_mo(TCGBar type)
2866 {
2867 #ifdef TCG_GUEST_DEFAULT_MO
2868     type &= TCG_GUEST_DEFAULT_MO;
2869 #endif
2870     type &= ~TCG_TARGET_DEFAULT_MO;
2871     if (type) {
2872         tcg_gen_mb(type | TCG_BAR_SC);
2873     }
2874 }
2875 
2876 static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
2877 {
2878 #ifdef CONFIG_PLUGIN
2879     if (tcg_ctx->plugin_insn != NULL) {
2880         /* Save a copy of the vaddr for use after a load.  */
2881         TCGv temp = tcg_temp_new();
2882         tcg_gen_mov_tl(temp, vaddr);
2883         return temp;
2884     }
2885 #endif
2886     return vaddr;
2887 }
2888 
2889 static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi,
2890                                      enum qemu_plugin_mem_rw rw)
2891 {
2892 #ifdef CONFIG_PLUGIN
2893     if (tcg_ctx->plugin_insn != NULL) {
2894         qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
2895         plugin_gen_empty_mem_callback(vaddr, info);
2896         tcg_temp_free(vaddr);
2897     }
2898 #endif
2899 }
2900 
2901 void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
2902 {
2903     MemOp orig_memop;
2904     MemOpIdx oi;
2905 
2906     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
2907     memop = tcg_canonicalize_memop(memop, 0, 0);
2908     oi = make_memop_idx(memop, idx);
2909 
2910     orig_memop = memop;
2911     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
2912         memop &= ~MO_BSWAP;
2913         /* The bswap primitive benefits from zero-extended input.  */
2914         if ((memop & MO_SSIZE) == MO_SW) {
2915             memop &= ~MO_SIGN;
2916         }
2917     }
2918 
2919     addr = plugin_prep_mem_callbacks(addr);
2920     gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
2921     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
2922 
2923     if ((orig_memop ^ memop) & MO_BSWAP) {
2924         switch (orig_memop & MO_SIZE) {
2925         case MO_16:
2926             tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
2927                                            ? TCG_BSWAP_IZ | TCG_BSWAP_OS
2928                                            : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
2929             break;
2930         case MO_32:
2931             tcg_gen_bswap32_i32(val, val);
2932             break;
2933         default:
2934             g_assert_not_reached();
2935         }
2936     }
2937 }
2938 
2939 void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
2940 {
2941     TCGv_i32 swap = NULL;
2942     MemOpIdx oi;
2943 
2944     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
2945     memop = tcg_canonicalize_memop(memop, 0, 1);
2946     oi = make_memop_idx(memop, idx);
2947 
2948     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
2949         swap = tcg_temp_new_i32();
2950         switch (memop & MO_SIZE) {
2951         case MO_16:
2952             tcg_gen_bswap16_i32(swap, val, 0);
2953             break;
2954         case MO_32:
2955             tcg_gen_bswap32_i32(swap, val);
2956             break;
2957         default:
2958             g_assert_not_reached();
2959         }
2960         val = swap;
2961         memop &= ~MO_BSWAP;
2962     }
2963 
2964     addr = plugin_prep_mem_callbacks(addr);
2965     if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
2966         gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
2967     } else {
2968         gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
2969     }
2970     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
2971 
2972     if (swap) {
2973         tcg_temp_free_i32(swap);
2974     }
2975 }
2976 
2977 void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
2978 {
2979     MemOp orig_memop;
2980     MemOpIdx oi;
2981 
2982     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
2983         tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
2984         if (memop & MO_SIGN) {
2985             tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
2986         } else {
2987             tcg_gen_movi_i32(TCGV_HIGH(val), 0);
2988         }
2989         return;
2990     }
2991 
2992     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
2993     memop = tcg_canonicalize_memop(memop, 1, 0);
2994     oi = make_memop_idx(memop, idx);
2995 
2996     orig_memop = memop;
2997     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
2998         memop &= ~MO_BSWAP;
2999         /* The bswap primitive benefits from zero-extended input.  */
3000         if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
3001             memop &= ~MO_SIGN;
3002         }
3003     }
3004 
3005     addr = plugin_prep_mem_callbacks(addr);
3006     gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
3007     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
3008 
3009     if ((orig_memop ^ memop) & MO_BSWAP) {
3010         int flags = (orig_memop & MO_SIGN
3011                      ? TCG_BSWAP_IZ | TCG_BSWAP_OS
3012                      : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
3013         switch (orig_memop & MO_SIZE) {
3014         case MO_16:
3015             tcg_gen_bswap16_i64(val, val, flags);
3016             break;
3017         case MO_32:
3018             tcg_gen_bswap32_i64(val, val, flags);
3019             break;
3020         case MO_64:
3021             tcg_gen_bswap64_i64(val, val);
3022             break;
3023         default:
3024             g_assert_not_reached();
3025         }
3026     }
3027 }
3028 
3029 void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
3030 {
3031     TCGv_i64 swap = NULL;
3032     MemOpIdx oi;
3033 
3034     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
3035         tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
3036         return;
3037     }
3038 
3039     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
3040     memop = tcg_canonicalize_memop(memop, 1, 1);
3041     oi = make_memop_idx(memop, idx);
3042 
3043     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
3044         swap = tcg_temp_new_i64();
3045         switch (memop & MO_SIZE) {
3046         case MO_16:
3047             tcg_gen_bswap16_i64(swap, val, 0);
3048             break;
3049         case MO_32:
3050             tcg_gen_bswap32_i64(swap, val, 0);
3051             break;
3052         case MO_64:
3053             tcg_gen_bswap64_i64(swap, val);
3054             break;
3055         default:
3056             g_assert_not_reached();
3057         }
3058         val = swap;
3059         memop &= ~MO_BSWAP;
3060     }
3061 
3062     addr = plugin_prep_mem_callbacks(addr);
3063     gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
3064     plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
3065 
3066     if (swap) {
3067         tcg_temp_free_i64(swap);
3068     }
3069 }
3070 
3071 static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
3072 {
3073     switch (opc & MO_SSIZE) {
3074     case MO_SB:
3075         tcg_gen_ext8s_i32(ret, val);
3076         break;
3077     case MO_UB:
3078         tcg_gen_ext8u_i32(ret, val);
3079         break;
3080     case MO_SW:
3081         tcg_gen_ext16s_i32(ret, val);
3082         break;
3083     case MO_UW:
3084         tcg_gen_ext16u_i32(ret, val);
3085         break;
3086     default:
3087         tcg_gen_mov_i32(ret, val);
3088         break;
3089     }
3090 }
3091 
3092 static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
3093 {
3094     switch (opc & MO_SSIZE) {
3095     case MO_SB:
3096         tcg_gen_ext8s_i64(ret, val);
3097         break;
3098     case MO_UB:
3099         tcg_gen_ext8u_i64(ret, val);
3100         break;
3101     case MO_SW:
3102         tcg_gen_ext16s_i64(ret, val);
3103         break;
3104     case MO_UW:
3105         tcg_gen_ext16u_i64(ret, val);
3106         break;
3107     case MO_SL:
3108         tcg_gen_ext32s_i64(ret, val);
3109         break;
3110     case MO_UL:
3111         tcg_gen_ext32u_i64(ret, val);
3112         break;
3113     default:
3114         tcg_gen_mov_i64(ret, val);
3115         break;
3116     }
3117 }
3118 
3119 typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
3120                                   TCGv_i32, TCGv_i32, TCGv_i32);
3121 typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
3122                                   TCGv_i64, TCGv_i64, TCGv_i32);
3123 typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
3124                                   TCGv_i32, TCGv_i32);
3125 typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
3126                                   TCGv_i64, TCGv_i32);
3127 
3128 #ifdef CONFIG_ATOMIC64
3129 # define WITH_ATOMIC64(X) X,
3130 #else
3131 # define WITH_ATOMIC64(X)
3132 #endif
3133 
3134 static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
3135     [MO_8] = gen_helper_atomic_cmpxchgb,
3136     [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
3137     [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
3138     [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
3139     [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
3140     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
3141     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
3142 };
3143 
3144 void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
3145                                 TCGv_i32 newv, TCGArg idx, MemOp memop)
3146 {
3147     memop = tcg_canonicalize_memop(memop, 0, 0);
3148 
3149     if (!(tcg_ctx->tb_cflags & CF_PARALLEL)) {
3150         TCGv_i32 t1 = tcg_temp_new_i32();
3151         TCGv_i32 t2 = tcg_temp_new_i32();
3152 
3153         tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
3154 
3155         tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
3156         tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
3157         tcg_gen_qemu_st_i32(t2, addr, idx, memop);
3158         tcg_temp_free_i32(t2);
3159 
3160         if (memop & MO_SIGN) {
3161             tcg_gen_ext_i32(retv, t1, memop);
3162         } else {
3163             tcg_gen_mov_i32(retv, t1);
3164         }
3165         tcg_temp_free_i32(t1);
3166     } else {
3167         gen_atomic_cx_i32 gen;
3168         MemOpIdx oi;
3169 
3170         gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
3171         tcg_debug_assert(gen != NULL);
3172 
3173         oi = make_memop_idx(memop & ~MO_SIGN, idx);
3174         gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
3175 
3176         if (memop & MO_SIGN) {
3177             tcg_gen_ext_i32(retv, retv, memop);
3178         }
3179     }
3180 }
3181 
3182 void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
3183                                 TCGv_i64 newv, TCGArg idx, MemOp memop)
3184 {
3185     memop = tcg_canonicalize_memop(memop, 1, 0);
3186 
3187     if (!(tcg_ctx->tb_cflags & CF_PARALLEL)) {
3188         TCGv_i64 t1 = tcg_temp_new_i64();
3189         TCGv_i64 t2 = tcg_temp_new_i64();
3190 
3191         tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
3192 
3193         tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
3194         tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
3195         tcg_gen_qemu_st_i64(t2, addr, idx, memop);
3196         tcg_temp_free_i64(t2);
3197 
3198         if (memop & MO_SIGN) {
3199             tcg_gen_ext_i64(retv, t1, memop);
3200         } else {
3201             tcg_gen_mov_i64(retv, t1);
3202         }
3203         tcg_temp_free_i64(t1);
3204     } else if ((memop & MO_SIZE) == MO_64) {
3205 #ifdef CONFIG_ATOMIC64
3206         gen_atomic_cx_i64 gen;
3207         MemOpIdx oi;
3208 
3209         gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
3210         tcg_debug_assert(gen != NULL);
3211 
3212         oi = make_memop_idx(memop, idx);
3213         gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
3214 #else
3215         gen_helper_exit_atomic(cpu_env);
3216         /* Produce a result, so that we have a well-formed opcode stream
3217            with respect to uses of the result in the (dead) code following.  */
3218         tcg_gen_movi_i64(retv, 0);
3219 #endif /* CONFIG_ATOMIC64 */
3220     } else {
3221         TCGv_i32 c32 = tcg_temp_new_i32();
3222         TCGv_i32 n32 = tcg_temp_new_i32();
3223         TCGv_i32 r32 = tcg_temp_new_i32();
3224 
3225         tcg_gen_extrl_i64_i32(c32, cmpv);
3226         tcg_gen_extrl_i64_i32(n32, newv);
3227         tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
3228         tcg_temp_free_i32(c32);
3229         tcg_temp_free_i32(n32);
3230 
3231         tcg_gen_extu_i32_i64(retv, r32);
3232         tcg_temp_free_i32(r32);
3233 
3234         if (memop & MO_SIGN) {
3235             tcg_gen_ext_i64(retv, retv, memop);
3236         }
3237     }
3238 }
3239 
3240 static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
3241                                 TCGArg idx, MemOp memop, bool new_val,
3242                                 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
3243 {
3244     TCGv_i32 t1 = tcg_temp_new_i32();
3245     TCGv_i32 t2 = tcg_temp_new_i32();
3246 
3247     memop = tcg_canonicalize_memop(memop, 0, 0);
3248 
3249     tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
3250     tcg_gen_ext_i32(t2, val, memop);
3251     gen(t2, t1, t2);
3252     tcg_gen_qemu_st_i32(t2, addr, idx, memop);
3253 
3254     tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
3255     tcg_temp_free_i32(t1);
3256     tcg_temp_free_i32(t2);
3257 }
3258 
3259 static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
3260                              TCGArg idx, MemOp memop, void * const table[])
3261 {
3262     gen_atomic_op_i32 gen;
3263     MemOpIdx oi;
3264 
3265     memop = tcg_canonicalize_memop(memop, 0, 0);
3266 
3267     gen = table[memop & (MO_SIZE | MO_BSWAP)];
3268     tcg_debug_assert(gen != NULL);
3269 
3270     oi = make_memop_idx(memop & ~MO_SIGN, idx);
3271     gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
3272 
3273     if (memop & MO_SIGN) {
3274         tcg_gen_ext_i32(ret, ret, memop);
3275     }
3276 }
3277 
3278 static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
3279                                 TCGArg idx, MemOp memop, bool new_val,
3280                                 void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
3281 {
3282     TCGv_i64 t1 = tcg_temp_new_i64();
3283     TCGv_i64 t2 = tcg_temp_new_i64();
3284 
3285     memop = tcg_canonicalize_memop(memop, 1, 0);
3286 
3287     tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
3288     tcg_gen_ext_i64(t2, val, memop);
3289     gen(t2, t1, t2);
3290     tcg_gen_qemu_st_i64(t2, addr, idx, memop);
3291 
3292     tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
3293     tcg_temp_free_i64(t1);
3294     tcg_temp_free_i64(t2);
3295 }
3296 
3297 static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
3298                              TCGArg idx, MemOp memop, void * const table[])
3299 {
3300     memop = tcg_canonicalize_memop(memop, 1, 0);
3301 
3302     if ((memop & MO_SIZE) == MO_64) {
3303 #ifdef CONFIG_ATOMIC64
3304         gen_atomic_op_i64 gen;
3305         MemOpIdx oi;
3306 
3307         gen = table[memop & (MO_SIZE | MO_BSWAP)];
3308         tcg_debug_assert(gen != NULL);
3309 
3310         oi = make_memop_idx(memop & ~MO_SIGN, idx);
3311         gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
3312 #else
3313         gen_helper_exit_atomic(cpu_env);
3314         /* Produce a result, so that we have a well-formed opcode stream
3315            with respect to uses of the result in the (dead) code following.  */
3316         tcg_gen_movi_i64(ret, 0);
3317 #endif /* CONFIG_ATOMIC64 */
3318     } else {
3319         TCGv_i32 v32 = tcg_temp_new_i32();
3320         TCGv_i32 r32 = tcg_temp_new_i32();
3321 
3322         tcg_gen_extrl_i64_i32(v32, val);
3323         do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
3324         tcg_temp_free_i32(v32);
3325 
3326         tcg_gen_extu_i32_i64(ret, r32);
3327         tcg_temp_free_i32(r32);
3328 
3329         if (memop & MO_SIGN) {
3330             tcg_gen_ext_i64(ret, ret, memop);
3331         }
3332     }
3333 }
3334 
3335 #define GEN_ATOMIC_HELPER(NAME, OP, NEW)                                \
3336 static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = {          \
3337     [MO_8] = gen_helper_atomic_##NAME##b,                               \
3338     [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le,                   \
3339     [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be,                   \
3340     [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le,                   \
3341     [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be,                   \
3342     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le)     \
3343     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be)     \
3344 };                                                                      \
3345 void tcg_gen_atomic_##NAME##_i32                                        \
3346     (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop)    \
3347 {                                                                       \
3348     if (tcg_ctx->tb_cflags & CF_PARALLEL) {                             \
3349         do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME);     \
3350     } else {                                                            \
3351         do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW,            \
3352                             tcg_gen_##OP##_i32);                        \
3353     }                                                                   \
3354 }                                                                       \
3355 void tcg_gen_atomic_##NAME##_i64                                        \
3356     (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop)    \
3357 {                                                                       \
3358     if (tcg_ctx->tb_cflags & CF_PARALLEL) {                             \
3359         do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME);     \
3360     } else {                                                            \
3361         do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW,            \
3362                             tcg_gen_##OP##_i64);                        \
3363     }                                                                   \
3364 }
3365 
3366 GEN_ATOMIC_HELPER(fetch_add, add, 0)
3367 GEN_ATOMIC_HELPER(fetch_and, and, 0)
3368 GEN_ATOMIC_HELPER(fetch_or, or, 0)
3369 GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
3370 GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
3371 GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
3372 GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
3373 GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
3374 
3375 GEN_ATOMIC_HELPER(add_fetch, add, 1)
3376 GEN_ATOMIC_HELPER(and_fetch, and, 1)
3377 GEN_ATOMIC_HELPER(or_fetch, or, 1)
3378 GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
3379 GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
3380 GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
3381 GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
3382 GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
3383 
3384 static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
3385 {
3386     tcg_gen_mov_i32(r, b);
3387 }
3388 
3389 static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
3390 {
3391     tcg_gen_mov_i64(r, b);
3392 }
3393 
3394 GEN_ATOMIC_HELPER(xchg, mov2, 0)
3395 
3396 #undef GEN_ATOMIC_HELPER
3397