xref: /openbmc/qemu/tcg/tcg-op.c (revision b097ba37)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include "qemu-common.h"
27 #include "cpu.h"
28 #include "exec/exec-all.h"
29 #include "tcg.h"
30 #include "tcg-op.h"
31 #include "tcg-mo.h"
32 #include "trace-tcg.h"
33 #include "trace/mem.h"
34 
35 /* Reduce the number of ifdefs below.  This assumes that all uses of
36    TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
37    the compiler can eliminate.  */
38 #if TCG_TARGET_REG_BITS == 64
39 extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64);
40 extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
41 #define TCGV_LOW  TCGV_LOW_link_error
42 #define TCGV_HIGH TCGV_HIGH_link_error
43 #endif
44 
45 void tcg_gen_op1(TCGOpcode opc, TCGArg a1)
46 {
47     TCGOp *op = tcg_emit_op(opc);
48     op->args[0] = a1;
49 }
50 
51 void tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2)
52 {
53     TCGOp *op = tcg_emit_op(opc);
54     op->args[0] = a1;
55     op->args[1] = a2;
56 }
57 
58 void tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3)
59 {
60     TCGOp *op = tcg_emit_op(opc);
61     op->args[0] = a1;
62     op->args[1] = a2;
63     op->args[2] = a3;
64 }
65 
66 void tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4)
67 {
68     TCGOp *op = tcg_emit_op(opc);
69     op->args[0] = a1;
70     op->args[1] = a2;
71     op->args[2] = a3;
72     op->args[3] = a4;
73 }
74 
75 void tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
76                  TCGArg a4, TCGArg a5)
77 {
78     TCGOp *op = tcg_emit_op(opc);
79     op->args[0] = a1;
80     op->args[1] = a2;
81     op->args[2] = a3;
82     op->args[3] = a4;
83     op->args[4] = a5;
84 }
85 
86 void tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
87                  TCGArg a4, TCGArg a5, TCGArg a6)
88 {
89     TCGOp *op = tcg_emit_op(opc);
90     op->args[0] = a1;
91     op->args[1] = a2;
92     op->args[2] = a3;
93     op->args[3] = a4;
94     op->args[4] = a5;
95     op->args[5] = a6;
96 }
97 
98 void tcg_gen_mb(TCGBar mb_type)
99 {
100     if (tcg_ctx->tb_cflags & CF_PARALLEL) {
101         tcg_gen_op1(INDEX_op_mb, mb_type);
102     }
103 }
104 
105 /* 32 bit ops */
106 
107 void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
108 {
109     /* some cases can be optimized here */
110     if (arg2 == 0) {
111         tcg_gen_mov_i32(ret, arg1);
112     } else {
113         TCGv_i32 t0 = tcg_const_i32(arg2);
114         tcg_gen_add_i32(ret, arg1, t0);
115         tcg_temp_free_i32(t0);
116     }
117 }
118 
119 void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2)
120 {
121     if (arg1 == 0 && TCG_TARGET_HAS_neg_i32) {
122         /* Don't recurse with tcg_gen_neg_i32.  */
123         tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg2);
124     } else {
125         TCGv_i32 t0 = tcg_const_i32(arg1);
126         tcg_gen_sub_i32(ret, t0, arg2);
127         tcg_temp_free_i32(t0);
128     }
129 }
130 
131 void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
132 {
133     /* some cases can be optimized here */
134     if (arg2 == 0) {
135         tcg_gen_mov_i32(ret, arg1);
136     } else {
137         TCGv_i32 t0 = tcg_const_i32(arg2);
138         tcg_gen_sub_i32(ret, arg1, t0);
139         tcg_temp_free_i32(t0);
140     }
141 }
142 
143 void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
144 {
145     TCGv_i32 t0;
146     /* Some cases can be optimized here.  */
147     switch (arg2) {
148     case 0:
149         tcg_gen_movi_i32(ret, 0);
150         return;
151     case -1:
152         tcg_gen_mov_i32(ret, arg1);
153         return;
154     case 0xff:
155         /* Don't recurse with tcg_gen_ext8u_i32.  */
156         if (TCG_TARGET_HAS_ext8u_i32) {
157             tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg1);
158             return;
159         }
160         break;
161     case 0xffff:
162         if (TCG_TARGET_HAS_ext16u_i32) {
163             tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg1);
164             return;
165         }
166         break;
167     }
168     t0 = tcg_const_i32(arg2);
169     tcg_gen_and_i32(ret, arg1, t0);
170     tcg_temp_free_i32(t0);
171 }
172 
173 void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
174 {
175     /* Some cases can be optimized here.  */
176     if (arg2 == -1) {
177         tcg_gen_movi_i32(ret, -1);
178     } else if (arg2 == 0) {
179         tcg_gen_mov_i32(ret, arg1);
180     } else {
181         TCGv_i32 t0 = tcg_const_i32(arg2);
182         tcg_gen_or_i32(ret, arg1, t0);
183         tcg_temp_free_i32(t0);
184     }
185 }
186 
187 void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
188 {
189     /* Some cases can be optimized here.  */
190     if (arg2 == 0) {
191         tcg_gen_mov_i32(ret, arg1);
192     } else if (arg2 == -1 && TCG_TARGET_HAS_not_i32) {
193         /* Don't recurse with tcg_gen_not_i32.  */
194         tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg1);
195     } else {
196         TCGv_i32 t0 = tcg_const_i32(arg2);
197         tcg_gen_xor_i32(ret, arg1, t0);
198         tcg_temp_free_i32(t0);
199     }
200 }
201 
202 void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
203 {
204     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
205     if (arg2 == 0) {
206         tcg_gen_mov_i32(ret, arg1);
207     } else {
208         TCGv_i32 t0 = tcg_const_i32(arg2);
209         tcg_gen_shl_i32(ret, arg1, t0);
210         tcg_temp_free_i32(t0);
211     }
212 }
213 
214 void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
215 {
216     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
217     if (arg2 == 0) {
218         tcg_gen_mov_i32(ret, arg1);
219     } else {
220         TCGv_i32 t0 = tcg_const_i32(arg2);
221         tcg_gen_shr_i32(ret, arg1, t0);
222         tcg_temp_free_i32(t0);
223     }
224 }
225 
226 void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
227 {
228     tcg_debug_assert(arg2 >= 0 && arg2 < 32);
229     if (arg2 == 0) {
230         tcg_gen_mov_i32(ret, arg1);
231     } else {
232         TCGv_i32 t0 = tcg_const_i32(arg2);
233         tcg_gen_sar_i32(ret, arg1, t0);
234         tcg_temp_free_i32(t0);
235     }
236 }
237 
238 void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *l)
239 {
240     if (cond == TCG_COND_ALWAYS) {
241         tcg_gen_br(l);
242     } else if (cond != TCG_COND_NEVER) {
243         l->refs++;
244         tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_arg(l));
245     }
246 }
247 
248 void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *l)
249 {
250     if (cond == TCG_COND_ALWAYS) {
251         tcg_gen_br(l);
252     } else if (cond != TCG_COND_NEVER) {
253         TCGv_i32 t0 = tcg_const_i32(arg2);
254         tcg_gen_brcond_i32(cond, arg1, t0, l);
255         tcg_temp_free_i32(t0);
256     }
257 }
258 
259 void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret,
260                          TCGv_i32 arg1, TCGv_i32 arg2)
261 {
262     if (cond == TCG_COND_ALWAYS) {
263         tcg_gen_movi_i32(ret, 1);
264     } else if (cond == TCG_COND_NEVER) {
265         tcg_gen_movi_i32(ret, 0);
266     } else {
267         tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
268     }
269 }
270 
271 void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret,
272                           TCGv_i32 arg1, int32_t arg2)
273 {
274     TCGv_i32 t0 = tcg_const_i32(arg2);
275     tcg_gen_setcond_i32(cond, ret, arg1, t0);
276     tcg_temp_free_i32(t0);
277 }
278 
279 void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
280 {
281     if (arg2 == 0) {
282         tcg_gen_movi_i32(ret, 0);
283     } else if (is_power_of_2(arg2)) {
284         tcg_gen_shli_i32(ret, arg1, ctz32(arg2));
285     } else {
286         TCGv_i32 t0 = tcg_const_i32(arg2);
287         tcg_gen_mul_i32(ret, arg1, t0);
288         tcg_temp_free_i32(t0);
289     }
290 }
291 
292 void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
293 {
294     if (TCG_TARGET_HAS_div_i32) {
295         tcg_gen_op3_i32(INDEX_op_div_i32, ret, arg1, arg2);
296     } else if (TCG_TARGET_HAS_div2_i32) {
297         TCGv_i32 t0 = tcg_temp_new_i32();
298         tcg_gen_sari_i32(t0, arg1, 31);
299         tcg_gen_op5_i32(INDEX_op_div2_i32, ret, t0, arg1, t0, arg2);
300         tcg_temp_free_i32(t0);
301     } else {
302         gen_helper_div_i32(ret, arg1, arg2);
303     }
304 }
305 
306 void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
307 {
308     if (TCG_TARGET_HAS_rem_i32) {
309         tcg_gen_op3_i32(INDEX_op_rem_i32, ret, arg1, arg2);
310     } else if (TCG_TARGET_HAS_div_i32) {
311         TCGv_i32 t0 = tcg_temp_new_i32();
312         tcg_gen_op3_i32(INDEX_op_div_i32, t0, arg1, arg2);
313         tcg_gen_mul_i32(t0, t0, arg2);
314         tcg_gen_sub_i32(ret, arg1, t0);
315         tcg_temp_free_i32(t0);
316     } else if (TCG_TARGET_HAS_div2_i32) {
317         TCGv_i32 t0 = tcg_temp_new_i32();
318         tcg_gen_sari_i32(t0, arg1, 31);
319         tcg_gen_op5_i32(INDEX_op_div2_i32, t0, ret, arg1, t0, arg2);
320         tcg_temp_free_i32(t0);
321     } else {
322         gen_helper_rem_i32(ret, arg1, arg2);
323     }
324 }
325 
326 void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
327 {
328     if (TCG_TARGET_HAS_div_i32) {
329         tcg_gen_op3_i32(INDEX_op_divu_i32, ret, arg1, arg2);
330     } else if (TCG_TARGET_HAS_div2_i32) {
331         TCGv_i32 t0 = tcg_temp_new_i32();
332         tcg_gen_movi_i32(t0, 0);
333         tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, t0, arg2);
334         tcg_temp_free_i32(t0);
335     } else {
336         gen_helper_divu_i32(ret, arg1, arg2);
337     }
338 }
339 
340 void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
341 {
342     if (TCG_TARGET_HAS_rem_i32) {
343         tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2);
344     } else if (TCG_TARGET_HAS_div_i32) {
345         TCGv_i32 t0 = tcg_temp_new_i32();
346         tcg_gen_op3_i32(INDEX_op_divu_i32, t0, arg1, arg2);
347         tcg_gen_mul_i32(t0, t0, arg2);
348         tcg_gen_sub_i32(ret, arg1, t0);
349         tcg_temp_free_i32(t0);
350     } else if (TCG_TARGET_HAS_div2_i32) {
351         TCGv_i32 t0 = tcg_temp_new_i32();
352         tcg_gen_movi_i32(t0, 0);
353         tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, t0, arg2);
354         tcg_temp_free_i32(t0);
355     } else {
356         gen_helper_remu_i32(ret, arg1, arg2);
357     }
358 }
359 
360 void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
361 {
362     if (TCG_TARGET_HAS_andc_i32) {
363         tcg_gen_op3_i32(INDEX_op_andc_i32, ret, arg1, arg2);
364     } else {
365         TCGv_i32 t0 = tcg_temp_new_i32();
366         tcg_gen_not_i32(t0, arg2);
367         tcg_gen_and_i32(ret, arg1, t0);
368         tcg_temp_free_i32(t0);
369     }
370 }
371 
372 void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
373 {
374     if (TCG_TARGET_HAS_eqv_i32) {
375         tcg_gen_op3_i32(INDEX_op_eqv_i32, ret, arg1, arg2);
376     } else {
377         tcg_gen_xor_i32(ret, arg1, arg2);
378         tcg_gen_not_i32(ret, ret);
379     }
380 }
381 
382 void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
383 {
384     if (TCG_TARGET_HAS_nand_i32) {
385         tcg_gen_op3_i32(INDEX_op_nand_i32, ret, arg1, arg2);
386     } else {
387         tcg_gen_and_i32(ret, arg1, arg2);
388         tcg_gen_not_i32(ret, ret);
389     }
390 }
391 
392 void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
393 {
394     if (TCG_TARGET_HAS_nor_i32) {
395         tcg_gen_op3_i32(INDEX_op_nor_i32, ret, arg1, arg2);
396     } else {
397         tcg_gen_or_i32(ret, arg1, arg2);
398         tcg_gen_not_i32(ret, ret);
399     }
400 }
401 
402 void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
403 {
404     if (TCG_TARGET_HAS_orc_i32) {
405         tcg_gen_op3_i32(INDEX_op_orc_i32, ret, arg1, arg2);
406     } else {
407         TCGv_i32 t0 = tcg_temp_new_i32();
408         tcg_gen_not_i32(t0, arg2);
409         tcg_gen_or_i32(ret, arg1, t0);
410         tcg_temp_free_i32(t0);
411     }
412 }
413 
414 void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
415 {
416     if (TCG_TARGET_HAS_clz_i32) {
417         tcg_gen_op3_i32(INDEX_op_clz_i32, ret, arg1, arg2);
418     } else if (TCG_TARGET_HAS_clz_i64) {
419         TCGv_i64 t1 = tcg_temp_new_i64();
420         TCGv_i64 t2 = tcg_temp_new_i64();
421         tcg_gen_extu_i32_i64(t1, arg1);
422         tcg_gen_extu_i32_i64(t2, arg2);
423         tcg_gen_addi_i64(t2, t2, 32);
424         tcg_gen_clz_i64(t1, t1, t2);
425         tcg_gen_extrl_i64_i32(ret, t1);
426         tcg_temp_free_i64(t1);
427         tcg_temp_free_i64(t2);
428         tcg_gen_subi_i32(ret, ret, 32);
429     } else {
430         gen_helper_clz_i32(ret, arg1, arg2);
431     }
432 }
433 
434 void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
435 {
436     TCGv_i32 t = tcg_const_i32(arg2);
437     tcg_gen_clz_i32(ret, arg1, t);
438     tcg_temp_free_i32(t);
439 }
440 
441 void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
442 {
443     if (TCG_TARGET_HAS_ctz_i32) {
444         tcg_gen_op3_i32(INDEX_op_ctz_i32, ret, arg1, arg2);
445     } else if (TCG_TARGET_HAS_ctz_i64) {
446         TCGv_i64 t1 = tcg_temp_new_i64();
447         TCGv_i64 t2 = tcg_temp_new_i64();
448         tcg_gen_extu_i32_i64(t1, arg1);
449         tcg_gen_extu_i32_i64(t2, arg2);
450         tcg_gen_ctz_i64(t1, t1, t2);
451         tcg_gen_extrl_i64_i32(ret, t1);
452         tcg_temp_free_i64(t1);
453         tcg_temp_free_i64(t2);
454     } else if (TCG_TARGET_HAS_ctpop_i32
455                || TCG_TARGET_HAS_ctpop_i64
456                || TCG_TARGET_HAS_clz_i32
457                || TCG_TARGET_HAS_clz_i64) {
458         TCGv_i32 z, t = tcg_temp_new_i32();
459 
460         if (TCG_TARGET_HAS_ctpop_i32 || TCG_TARGET_HAS_ctpop_i64) {
461             tcg_gen_subi_i32(t, arg1, 1);
462             tcg_gen_andc_i32(t, t, arg1);
463             tcg_gen_ctpop_i32(t, t);
464         } else {
465             /* Since all non-x86 hosts have clz(0) == 32, don't fight it.  */
466             tcg_gen_neg_i32(t, arg1);
467             tcg_gen_and_i32(t, t, arg1);
468             tcg_gen_clzi_i32(t, t, 32);
469             tcg_gen_xori_i32(t, t, 31);
470         }
471         z = tcg_const_i32(0);
472         tcg_gen_movcond_i32(TCG_COND_EQ, ret, arg1, z, arg2, t);
473         tcg_temp_free_i32(t);
474         tcg_temp_free_i32(z);
475     } else {
476         gen_helper_ctz_i32(ret, arg1, arg2);
477     }
478 }
479 
480 void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
481 {
482     if (!TCG_TARGET_HAS_ctz_i32 && TCG_TARGET_HAS_ctpop_i32 && arg2 == 32) {
483         /* This equivalence has the advantage of not requiring a fixup.  */
484         TCGv_i32 t = tcg_temp_new_i32();
485         tcg_gen_subi_i32(t, arg1, 1);
486         tcg_gen_andc_i32(t, t, arg1);
487         tcg_gen_ctpop_i32(ret, t);
488         tcg_temp_free_i32(t);
489     } else {
490         TCGv_i32 t = tcg_const_i32(arg2);
491         tcg_gen_ctz_i32(ret, arg1, t);
492         tcg_temp_free_i32(t);
493     }
494 }
495 
496 void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg)
497 {
498     if (TCG_TARGET_HAS_clz_i32) {
499         TCGv_i32 t = tcg_temp_new_i32();
500         tcg_gen_sari_i32(t, arg, 31);
501         tcg_gen_xor_i32(t, t, arg);
502         tcg_gen_clzi_i32(t, t, 32);
503         tcg_gen_subi_i32(ret, t, 1);
504         tcg_temp_free_i32(t);
505     } else {
506         gen_helper_clrsb_i32(ret, arg);
507     }
508 }
509 
510 void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1)
511 {
512     if (TCG_TARGET_HAS_ctpop_i32) {
513         tcg_gen_op2_i32(INDEX_op_ctpop_i32, ret, arg1);
514     } else if (TCG_TARGET_HAS_ctpop_i64) {
515         TCGv_i64 t = tcg_temp_new_i64();
516         tcg_gen_extu_i32_i64(t, arg1);
517         tcg_gen_ctpop_i64(t, t);
518         tcg_gen_extrl_i64_i32(ret, t);
519         tcg_temp_free_i64(t);
520     } else {
521         gen_helper_ctpop_i32(ret, arg1);
522     }
523 }
524 
525 void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
526 {
527     if (TCG_TARGET_HAS_rot_i32) {
528         tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, arg2);
529     } else {
530         TCGv_i32 t0, t1;
531 
532         t0 = tcg_temp_new_i32();
533         t1 = tcg_temp_new_i32();
534         tcg_gen_shl_i32(t0, arg1, arg2);
535         tcg_gen_subfi_i32(t1, 32, arg2);
536         tcg_gen_shr_i32(t1, arg1, t1);
537         tcg_gen_or_i32(ret, t0, t1);
538         tcg_temp_free_i32(t0);
539         tcg_temp_free_i32(t1);
540     }
541 }
542 
543 void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
544 {
545     tcg_debug_assert(arg2 < 32);
546     /* some cases can be optimized here */
547     if (arg2 == 0) {
548         tcg_gen_mov_i32(ret, arg1);
549     } else if (TCG_TARGET_HAS_rot_i32) {
550         TCGv_i32 t0 = tcg_const_i32(arg2);
551         tcg_gen_rotl_i32(ret, arg1, t0);
552         tcg_temp_free_i32(t0);
553     } else {
554         TCGv_i32 t0, t1;
555         t0 = tcg_temp_new_i32();
556         t1 = tcg_temp_new_i32();
557         tcg_gen_shli_i32(t0, arg1, arg2);
558         tcg_gen_shri_i32(t1, arg1, 32 - arg2);
559         tcg_gen_or_i32(ret, t0, t1);
560         tcg_temp_free_i32(t0);
561         tcg_temp_free_i32(t1);
562     }
563 }
564 
565 void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
566 {
567     if (TCG_TARGET_HAS_rot_i32) {
568         tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, arg2);
569     } else {
570         TCGv_i32 t0, t1;
571 
572         t0 = tcg_temp_new_i32();
573         t1 = tcg_temp_new_i32();
574         tcg_gen_shr_i32(t0, arg1, arg2);
575         tcg_gen_subfi_i32(t1, 32, arg2);
576         tcg_gen_shl_i32(t1, arg1, t1);
577         tcg_gen_or_i32(ret, t0, t1);
578         tcg_temp_free_i32(t0);
579         tcg_temp_free_i32(t1);
580     }
581 }
582 
583 void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
584 {
585     tcg_debug_assert(arg2 < 32);
586     /* some cases can be optimized here */
587     if (arg2 == 0) {
588         tcg_gen_mov_i32(ret, arg1);
589     } else {
590         tcg_gen_rotli_i32(ret, arg1, 32 - arg2);
591     }
592 }
593 
594 void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
595                          unsigned int ofs, unsigned int len)
596 {
597     uint32_t mask;
598     TCGv_i32 t1;
599 
600     tcg_debug_assert(ofs < 32);
601     tcg_debug_assert(len > 0);
602     tcg_debug_assert(len <= 32);
603     tcg_debug_assert(ofs + len <= 32);
604 
605     if (len == 32) {
606         tcg_gen_mov_i32(ret, arg2);
607         return;
608     }
609     if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) {
610         tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len);
611         return;
612     }
613 
614     t1 = tcg_temp_new_i32();
615 
616     if (TCG_TARGET_HAS_extract2_i32) {
617         if (ofs + len == 32) {
618             tcg_gen_shli_i32(t1, arg1, len);
619             tcg_gen_extract2_i32(ret, t1, arg2, len);
620             goto done;
621         }
622         if (ofs == 0) {
623             tcg_gen_extract2_i32(ret, arg1, arg2, len);
624             tcg_gen_rotli_i32(ret, ret, len);
625             goto done;
626         }
627     }
628 
629     mask = (1u << len) - 1;
630     if (ofs + len < 32) {
631         tcg_gen_andi_i32(t1, arg2, mask);
632         tcg_gen_shli_i32(t1, t1, ofs);
633     } else {
634         tcg_gen_shli_i32(t1, arg2, ofs);
635     }
636     tcg_gen_andi_i32(ret, arg1, ~(mask << ofs));
637     tcg_gen_or_i32(ret, ret, t1);
638  done:
639     tcg_temp_free_i32(t1);
640 }
641 
642 void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg,
643                            unsigned int ofs, unsigned int len)
644 {
645     tcg_debug_assert(ofs < 32);
646     tcg_debug_assert(len > 0);
647     tcg_debug_assert(len <= 32);
648     tcg_debug_assert(ofs + len <= 32);
649 
650     if (ofs + len == 32) {
651         tcg_gen_shli_i32(ret, arg, ofs);
652     } else if (ofs == 0) {
653         tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
654     } else if (TCG_TARGET_HAS_deposit_i32
655                && TCG_TARGET_deposit_i32_valid(ofs, len)) {
656         TCGv_i32 zero = tcg_const_i32(0);
657         tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, zero, arg, ofs, len);
658         tcg_temp_free_i32(zero);
659     } else {
660         /* To help two-operand hosts we prefer to zero-extend first,
661            which allows ARG to stay live.  */
662         switch (len) {
663         case 16:
664             if (TCG_TARGET_HAS_ext16u_i32) {
665                 tcg_gen_ext16u_i32(ret, arg);
666                 tcg_gen_shli_i32(ret, ret, ofs);
667                 return;
668             }
669             break;
670         case 8:
671             if (TCG_TARGET_HAS_ext8u_i32) {
672                 tcg_gen_ext8u_i32(ret, arg);
673                 tcg_gen_shli_i32(ret, ret, ofs);
674                 return;
675             }
676             break;
677         }
678         /* Otherwise prefer zero-extension over AND for code size.  */
679         switch (ofs + len) {
680         case 16:
681             if (TCG_TARGET_HAS_ext16u_i32) {
682                 tcg_gen_shli_i32(ret, arg, ofs);
683                 tcg_gen_ext16u_i32(ret, ret);
684                 return;
685             }
686             break;
687         case 8:
688             if (TCG_TARGET_HAS_ext8u_i32) {
689                 tcg_gen_shli_i32(ret, arg, ofs);
690                 tcg_gen_ext8u_i32(ret, ret);
691                 return;
692             }
693             break;
694         }
695         tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
696         tcg_gen_shli_i32(ret, ret, ofs);
697     }
698 }
699 
700 void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg,
701                          unsigned int ofs, unsigned int len)
702 {
703     tcg_debug_assert(ofs < 32);
704     tcg_debug_assert(len > 0);
705     tcg_debug_assert(len <= 32);
706     tcg_debug_assert(ofs + len <= 32);
707 
708     /* Canonicalize certain special cases, even if extract is supported.  */
709     if (ofs + len == 32) {
710         tcg_gen_shri_i32(ret, arg, 32 - len);
711         return;
712     }
713     if (ofs == 0) {
714         tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
715         return;
716     }
717 
718     if (TCG_TARGET_HAS_extract_i32
719         && TCG_TARGET_extract_i32_valid(ofs, len)) {
720         tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, ofs, len);
721         return;
722     }
723 
724     /* Assume that zero-extension, if available, is cheaper than a shift.  */
725     switch (ofs + len) {
726     case 16:
727         if (TCG_TARGET_HAS_ext16u_i32) {
728             tcg_gen_ext16u_i32(ret, arg);
729             tcg_gen_shri_i32(ret, ret, ofs);
730             return;
731         }
732         break;
733     case 8:
734         if (TCG_TARGET_HAS_ext8u_i32) {
735             tcg_gen_ext8u_i32(ret, arg);
736             tcg_gen_shri_i32(ret, ret, ofs);
737             return;
738         }
739         break;
740     }
741 
742     /* ??? Ideally we'd know what values are available for immediate AND.
743        Assume that 8 bits are available, plus the special case of 16,
744        so that we get ext8u, ext16u.  */
745     switch (len) {
746     case 1 ... 8: case 16:
747         tcg_gen_shri_i32(ret, arg, ofs);
748         tcg_gen_andi_i32(ret, ret, (1u << len) - 1);
749         break;
750     default:
751         tcg_gen_shli_i32(ret, arg, 32 - len - ofs);
752         tcg_gen_shri_i32(ret, ret, 32 - len);
753         break;
754     }
755 }
756 
757 void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg,
758                           unsigned int ofs, unsigned int len)
759 {
760     tcg_debug_assert(ofs < 32);
761     tcg_debug_assert(len > 0);
762     tcg_debug_assert(len <= 32);
763     tcg_debug_assert(ofs + len <= 32);
764 
765     /* Canonicalize certain special cases, even if extract is supported.  */
766     if (ofs + len == 32) {
767         tcg_gen_sari_i32(ret, arg, 32 - len);
768         return;
769     }
770     if (ofs == 0) {
771         switch (len) {
772         case 16:
773             tcg_gen_ext16s_i32(ret, arg);
774             return;
775         case 8:
776             tcg_gen_ext8s_i32(ret, arg);
777             return;
778         }
779     }
780 
781     if (TCG_TARGET_HAS_sextract_i32
782         && TCG_TARGET_extract_i32_valid(ofs, len)) {
783         tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, ofs, len);
784         return;
785     }
786 
787     /* Assume that sign-extension, if available, is cheaper than a shift.  */
788     switch (ofs + len) {
789     case 16:
790         if (TCG_TARGET_HAS_ext16s_i32) {
791             tcg_gen_ext16s_i32(ret, arg);
792             tcg_gen_sari_i32(ret, ret, ofs);
793             return;
794         }
795         break;
796     case 8:
797         if (TCG_TARGET_HAS_ext8s_i32) {
798             tcg_gen_ext8s_i32(ret, arg);
799             tcg_gen_sari_i32(ret, ret, ofs);
800             return;
801         }
802         break;
803     }
804     switch (len) {
805     case 16:
806         if (TCG_TARGET_HAS_ext16s_i32) {
807             tcg_gen_shri_i32(ret, arg, ofs);
808             tcg_gen_ext16s_i32(ret, ret);
809             return;
810         }
811         break;
812     case 8:
813         if (TCG_TARGET_HAS_ext8s_i32) {
814             tcg_gen_shri_i32(ret, arg, ofs);
815             tcg_gen_ext8s_i32(ret, ret);
816             return;
817         }
818         break;
819     }
820 
821     tcg_gen_shli_i32(ret, arg, 32 - len - ofs);
822     tcg_gen_sari_i32(ret, ret, 32 - len);
823 }
824 
825 /*
826  * Extract 32-bits from a 64-bit input, ah:al, starting from ofs.
827  * Unlike tcg_gen_extract_i32 above, len is fixed at 32.
828  */
829 void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah,
830                           unsigned int ofs)
831 {
832     tcg_debug_assert(ofs <= 32);
833     if (ofs == 0) {
834         tcg_gen_mov_i32(ret, al);
835     } else if (ofs == 32) {
836         tcg_gen_mov_i32(ret, ah);
837     } else if (al == ah) {
838         tcg_gen_rotri_i32(ret, al, ofs);
839     } else if (TCG_TARGET_HAS_extract2_i32) {
840         tcg_gen_op4i_i32(INDEX_op_extract2_i32, ret, al, ah, ofs);
841     } else {
842         TCGv_i32 t0 = tcg_temp_new_i32();
843         tcg_gen_shri_i32(t0, al, ofs);
844         tcg_gen_deposit_i32(ret, t0, ah, 32 - ofs, ofs);
845         tcg_temp_free_i32(t0);
846     }
847 }
848 
849 void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1,
850                          TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2)
851 {
852     if (cond == TCG_COND_ALWAYS) {
853         tcg_gen_mov_i32(ret, v1);
854     } else if (cond == TCG_COND_NEVER) {
855         tcg_gen_mov_i32(ret, v2);
856     } else if (TCG_TARGET_HAS_movcond_i32) {
857         tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond);
858     } else {
859         TCGv_i32 t0 = tcg_temp_new_i32();
860         TCGv_i32 t1 = tcg_temp_new_i32();
861         tcg_gen_setcond_i32(cond, t0, c1, c2);
862         tcg_gen_neg_i32(t0, t0);
863         tcg_gen_and_i32(t1, v1, t0);
864         tcg_gen_andc_i32(ret, v2, t0);
865         tcg_gen_or_i32(ret, ret, t1);
866         tcg_temp_free_i32(t0);
867         tcg_temp_free_i32(t1);
868     }
869 }
870 
871 void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
872                       TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
873 {
874     if (TCG_TARGET_HAS_add2_i32) {
875         tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh);
876     } else {
877         TCGv_i64 t0 = tcg_temp_new_i64();
878         TCGv_i64 t1 = tcg_temp_new_i64();
879         tcg_gen_concat_i32_i64(t0, al, ah);
880         tcg_gen_concat_i32_i64(t1, bl, bh);
881         tcg_gen_add_i64(t0, t0, t1);
882         tcg_gen_extr_i64_i32(rl, rh, t0);
883         tcg_temp_free_i64(t0);
884         tcg_temp_free_i64(t1);
885     }
886 }
887 
888 void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
889                       TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
890 {
891     if (TCG_TARGET_HAS_sub2_i32) {
892         tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh);
893     } else {
894         TCGv_i64 t0 = tcg_temp_new_i64();
895         TCGv_i64 t1 = tcg_temp_new_i64();
896         tcg_gen_concat_i32_i64(t0, al, ah);
897         tcg_gen_concat_i32_i64(t1, bl, bh);
898         tcg_gen_sub_i64(t0, t0, t1);
899         tcg_gen_extr_i64_i32(rl, rh, t0);
900         tcg_temp_free_i64(t0);
901         tcg_temp_free_i64(t1);
902     }
903 }
904 
905 void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
906 {
907     if (TCG_TARGET_HAS_mulu2_i32) {
908         tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2);
909     } else if (TCG_TARGET_HAS_muluh_i32) {
910         TCGv_i32 t = tcg_temp_new_i32();
911         tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
912         tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2);
913         tcg_gen_mov_i32(rl, t);
914         tcg_temp_free_i32(t);
915     } else {
916         TCGv_i64 t0 = tcg_temp_new_i64();
917         TCGv_i64 t1 = tcg_temp_new_i64();
918         tcg_gen_extu_i32_i64(t0, arg1);
919         tcg_gen_extu_i32_i64(t1, arg2);
920         tcg_gen_mul_i64(t0, t0, t1);
921         tcg_gen_extr_i64_i32(rl, rh, t0);
922         tcg_temp_free_i64(t0);
923         tcg_temp_free_i64(t1);
924     }
925 }
926 
927 void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
928 {
929     if (TCG_TARGET_HAS_muls2_i32) {
930         tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2);
931     } else if (TCG_TARGET_HAS_mulsh_i32) {
932         TCGv_i32 t = tcg_temp_new_i32();
933         tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
934         tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2);
935         tcg_gen_mov_i32(rl, t);
936         tcg_temp_free_i32(t);
937     } else if (TCG_TARGET_REG_BITS == 32) {
938         TCGv_i32 t0 = tcg_temp_new_i32();
939         TCGv_i32 t1 = tcg_temp_new_i32();
940         TCGv_i32 t2 = tcg_temp_new_i32();
941         TCGv_i32 t3 = tcg_temp_new_i32();
942         tcg_gen_mulu2_i32(t0, t1, arg1, arg2);
943         /* Adjust for negative inputs.  */
944         tcg_gen_sari_i32(t2, arg1, 31);
945         tcg_gen_sari_i32(t3, arg2, 31);
946         tcg_gen_and_i32(t2, t2, arg2);
947         tcg_gen_and_i32(t3, t3, arg1);
948         tcg_gen_sub_i32(rh, t1, t2);
949         tcg_gen_sub_i32(rh, rh, t3);
950         tcg_gen_mov_i32(rl, t0);
951         tcg_temp_free_i32(t0);
952         tcg_temp_free_i32(t1);
953         tcg_temp_free_i32(t2);
954         tcg_temp_free_i32(t3);
955     } else {
956         TCGv_i64 t0 = tcg_temp_new_i64();
957         TCGv_i64 t1 = tcg_temp_new_i64();
958         tcg_gen_ext_i32_i64(t0, arg1);
959         tcg_gen_ext_i32_i64(t1, arg2);
960         tcg_gen_mul_i64(t0, t0, t1);
961         tcg_gen_extr_i64_i32(rl, rh, t0);
962         tcg_temp_free_i64(t0);
963         tcg_temp_free_i64(t1);
964     }
965 }
966 
967 void tcg_gen_mulsu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
968 {
969     if (TCG_TARGET_REG_BITS == 32) {
970         TCGv_i32 t0 = tcg_temp_new_i32();
971         TCGv_i32 t1 = tcg_temp_new_i32();
972         TCGv_i32 t2 = tcg_temp_new_i32();
973         tcg_gen_mulu2_i32(t0, t1, arg1, arg2);
974         /* Adjust for negative input for the signed arg1.  */
975         tcg_gen_sari_i32(t2, arg1, 31);
976         tcg_gen_and_i32(t2, t2, arg2);
977         tcg_gen_sub_i32(rh, t1, t2);
978         tcg_gen_mov_i32(rl, t0);
979         tcg_temp_free_i32(t0);
980         tcg_temp_free_i32(t1);
981         tcg_temp_free_i32(t2);
982     } else {
983         TCGv_i64 t0 = tcg_temp_new_i64();
984         TCGv_i64 t1 = tcg_temp_new_i64();
985         tcg_gen_ext_i32_i64(t0, arg1);
986         tcg_gen_extu_i32_i64(t1, arg2);
987         tcg_gen_mul_i64(t0, t0, t1);
988         tcg_gen_extr_i64_i32(rl, rh, t0);
989         tcg_temp_free_i64(t0);
990         tcg_temp_free_i64(t1);
991     }
992 }
993 
994 void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg)
995 {
996     if (TCG_TARGET_HAS_ext8s_i32) {
997         tcg_gen_op2_i32(INDEX_op_ext8s_i32, ret, arg);
998     } else {
999         tcg_gen_shli_i32(ret, arg, 24);
1000         tcg_gen_sari_i32(ret, ret, 24);
1001     }
1002 }
1003 
1004 void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg)
1005 {
1006     if (TCG_TARGET_HAS_ext16s_i32) {
1007         tcg_gen_op2_i32(INDEX_op_ext16s_i32, ret, arg);
1008     } else {
1009         tcg_gen_shli_i32(ret, arg, 16);
1010         tcg_gen_sari_i32(ret, ret, 16);
1011     }
1012 }
1013 
1014 void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg)
1015 {
1016     if (TCG_TARGET_HAS_ext8u_i32) {
1017         tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg);
1018     } else {
1019         tcg_gen_andi_i32(ret, arg, 0xffu);
1020     }
1021 }
1022 
1023 void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg)
1024 {
1025     if (TCG_TARGET_HAS_ext16u_i32) {
1026         tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg);
1027     } else {
1028         tcg_gen_andi_i32(ret, arg, 0xffffu);
1029     }
1030 }
1031 
1032 /* Note: we assume the two high bytes are set to zero */
1033 void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg)
1034 {
1035     if (TCG_TARGET_HAS_bswap16_i32) {
1036         tcg_gen_op2_i32(INDEX_op_bswap16_i32, ret, arg);
1037     } else {
1038         TCGv_i32 t0 = tcg_temp_new_i32();
1039 
1040         tcg_gen_ext8u_i32(t0, arg);
1041         tcg_gen_shli_i32(t0, t0, 8);
1042         tcg_gen_shri_i32(ret, arg, 8);
1043         tcg_gen_or_i32(ret, ret, t0);
1044         tcg_temp_free_i32(t0);
1045     }
1046 }
1047 
1048 void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
1049 {
1050     if (TCG_TARGET_HAS_bswap32_i32) {
1051         tcg_gen_op2_i32(INDEX_op_bswap32_i32, ret, arg);
1052     } else {
1053         TCGv_i32 t0 = tcg_temp_new_i32();
1054         TCGv_i32 t1 = tcg_temp_new_i32();
1055         TCGv_i32 t2 = tcg_const_i32(0x00ff00ff);
1056 
1057                                         /* arg = abcd */
1058         tcg_gen_shri_i32(t0, arg, 8);   /*  t0 = .abc */
1059         tcg_gen_and_i32(t1, arg, t2);   /*  t1 = .b.d */
1060         tcg_gen_and_i32(t0, t0, t2);    /*  t0 = .a.c */
1061         tcg_gen_shli_i32(t1, t1, 8);    /*  t1 = b.d. */
1062         tcg_gen_or_i32(ret, t0, t1);    /* ret = badc */
1063 
1064         tcg_gen_shri_i32(t0, ret, 16);  /*  t0 = ..ba */
1065         tcg_gen_shli_i32(t1, ret, 16);  /*  t1 = dc.. */
1066         tcg_gen_or_i32(ret, t0, t1);    /* ret = dcba */
1067 
1068         tcg_temp_free_i32(t0);
1069         tcg_temp_free_i32(t1);
1070         tcg_temp_free_i32(t2);
1071     }
1072 }
1073 
1074 void tcg_gen_smin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1075 {
1076     tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, a, b);
1077 }
1078 
1079 void tcg_gen_umin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1080 {
1081     tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, a, b);
1082 }
1083 
1084 void tcg_gen_smax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1085 {
1086     tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, b, a);
1087 }
1088 
1089 void tcg_gen_umax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
1090 {
1091     tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, b, a);
1092 }
1093 
1094 /* 64-bit ops */
1095 
1096 #if TCG_TARGET_REG_BITS == 32
1097 /* These are all inline for TCG_TARGET_REG_BITS == 64.  */
1098 
1099 void tcg_gen_discard_i64(TCGv_i64 arg)
1100 {
1101     tcg_gen_discard_i32(TCGV_LOW(arg));
1102     tcg_gen_discard_i32(TCGV_HIGH(arg));
1103 }
1104 
1105 void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg)
1106 {
1107     tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1108     tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
1109 }
1110 
1111 void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
1112 {
1113     tcg_gen_movi_i32(TCGV_LOW(ret), arg);
1114     tcg_gen_movi_i32(TCGV_HIGH(ret), arg >> 32);
1115 }
1116 
1117 void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1118 {
1119     tcg_gen_ld8u_i32(TCGV_LOW(ret), arg2, offset);
1120     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1121 }
1122 
1123 void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1124 {
1125     tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset);
1126     tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1127 }
1128 
1129 void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1130 {
1131     tcg_gen_ld16u_i32(TCGV_LOW(ret), arg2, offset);
1132     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1133 }
1134 
1135 void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1136 {
1137     tcg_gen_ld16s_i32(TCGV_LOW(ret), arg2, offset);
1138     tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1139 }
1140 
1141 void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1142 {
1143     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
1144     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1145 }
1146 
1147 void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1148 {
1149     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
1150     tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1151 }
1152 
1153 void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
1154 {
1155     /* Since arg2 and ret have different types,
1156        they cannot be the same temporary */
1157 #ifdef HOST_WORDS_BIGENDIAN
1158     tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset);
1159     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4);
1160 #else
1161     tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
1162     tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset + 4);
1163 #endif
1164 }
1165 
1166 void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
1167 {
1168 #ifdef HOST_WORDS_BIGENDIAN
1169     tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset);
1170     tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4);
1171 #else
1172     tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset);
1173     tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset + 4);
1174 #endif
1175 }
1176 
1177 void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1178 {
1179     tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1180     tcg_gen_and_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1181 }
1182 
1183 void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1184 {
1185     tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1186     tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1187 }
1188 
1189 void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1190 {
1191     tcg_gen_xor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1192     tcg_gen_xor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1193 }
1194 
1195 void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1196 {
1197     gen_helper_shl_i64(ret, arg1, arg2);
1198 }
1199 
1200 void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1201 {
1202     gen_helper_shr_i64(ret, arg1, arg2);
1203 }
1204 
1205 void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1206 {
1207     gen_helper_sar_i64(ret, arg1, arg2);
1208 }
1209 
1210 void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1211 {
1212     TCGv_i64 t0;
1213     TCGv_i32 t1;
1214 
1215     t0 = tcg_temp_new_i64();
1216     t1 = tcg_temp_new_i32();
1217 
1218     tcg_gen_mulu2_i32(TCGV_LOW(t0), TCGV_HIGH(t0),
1219                       TCGV_LOW(arg1), TCGV_LOW(arg2));
1220 
1221     tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
1222     tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
1223     tcg_gen_mul_i32(t1, TCGV_HIGH(arg1), TCGV_LOW(arg2));
1224     tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
1225 
1226     tcg_gen_mov_i64(ret, t0);
1227     tcg_temp_free_i64(t0);
1228     tcg_temp_free_i32(t1);
1229 }
1230 #endif /* TCG_TARGET_REG_SIZE == 32 */
1231 
1232 void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1233 {
1234     /* some cases can be optimized here */
1235     if (arg2 == 0) {
1236         tcg_gen_mov_i64(ret, arg1);
1237     } else {
1238         TCGv_i64 t0 = tcg_const_i64(arg2);
1239         tcg_gen_add_i64(ret, arg1, t0);
1240         tcg_temp_free_i64(t0);
1241     }
1242 }
1243 
1244 void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2)
1245 {
1246     if (arg1 == 0 && TCG_TARGET_HAS_neg_i64) {
1247         /* Don't recurse with tcg_gen_neg_i64.  */
1248         tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg2);
1249     } else {
1250         TCGv_i64 t0 = tcg_const_i64(arg1);
1251         tcg_gen_sub_i64(ret, t0, arg2);
1252         tcg_temp_free_i64(t0);
1253     }
1254 }
1255 
1256 void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1257 {
1258     /* some cases can be optimized here */
1259     if (arg2 == 0) {
1260         tcg_gen_mov_i64(ret, arg1);
1261     } else {
1262         TCGv_i64 t0 = tcg_const_i64(arg2);
1263         tcg_gen_sub_i64(ret, arg1, t0);
1264         tcg_temp_free_i64(t0);
1265     }
1266 }
1267 
1268 void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1269 {
1270     TCGv_i64 t0;
1271 
1272     if (TCG_TARGET_REG_BITS == 32) {
1273         tcg_gen_andi_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
1274         tcg_gen_andi_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
1275         return;
1276     }
1277 
1278     /* Some cases can be optimized here.  */
1279     switch (arg2) {
1280     case 0:
1281         tcg_gen_movi_i64(ret, 0);
1282         return;
1283     case -1:
1284         tcg_gen_mov_i64(ret, arg1);
1285         return;
1286     case 0xff:
1287         /* Don't recurse with tcg_gen_ext8u_i64.  */
1288         if (TCG_TARGET_HAS_ext8u_i64) {
1289             tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg1);
1290             return;
1291         }
1292         break;
1293     case 0xffff:
1294         if (TCG_TARGET_HAS_ext16u_i64) {
1295             tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg1);
1296             return;
1297         }
1298         break;
1299     case 0xffffffffu:
1300         if (TCG_TARGET_HAS_ext32u_i64) {
1301             tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg1);
1302             return;
1303         }
1304         break;
1305     }
1306     t0 = tcg_const_i64(arg2);
1307     tcg_gen_and_i64(ret, arg1, t0);
1308     tcg_temp_free_i64(t0);
1309 }
1310 
1311 void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1312 {
1313     if (TCG_TARGET_REG_BITS == 32) {
1314         tcg_gen_ori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
1315         tcg_gen_ori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
1316         return;
1317     }
1318     /* Some cases can be optimized here.  */
1319     if (arg2 == -1) {
1320         tcg_gen_movi_i64(ret, -1);
1321     } else if (arg2 == 0) {
1322         tcg_gen_mov_i64(ret, arg1);
1323     } else {
1324         TCGv_i64 t0 = tcg_const_i64(arg2);
1325         tcg_gen_or_i64(ret, arg1, t0);
1326         tcg_temp_free_i64(t0);
1327     }
1328 }
1329 
1330 void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1331 {
1332     if (TCG_TARGET_REG_BITS == 32) {
1333         tcg_gen_xori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
1334         tcg_gen_xori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
1335         return;
1336     }
1337     /* Some cases can be optimized here.  */
1338     if (arg2 == 0) {
1339         tcg_gen_mov_i64(ret, arg1);
1340     } else if (arg2 == -1 && TCG_TARGET_HAS_not_i64) {
1341         /* Don't recurse with tcg_gen_not_i64.  */
1342         tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg1);
1343     } else {
1344         TCGv_i64 t0 = tcg_const_i64(arg2);
1345         tcg_gen_xor_i64(ret, arg1, t0);
1346         tcg_temp_free_i64(t0);
1347     }
1348 }
1349 
1350 static inline void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
1351                                       unsigned c, bool right, bool arith)
1352 {
1353     tcg_debug_assert(c < 64);
1354     if (c == 0) {
1355         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
1356         tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
1357     } else if (c >= 32) {
1358         c -= 32;
1359         if (right) {
1360             if (arith) {
1361                 tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
1362                 tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), 31);
1363             } else {
1364                 tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
1365                 tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1366             }
1367         } else {
1368             tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), c);
1369             tcg_gen_movi_i32(TCGV_LOW(ret), 0);
1370         }
1371     } else if (right) {
1372         if (TCG_TARGET_HAS_extract2_i32) {
1373             tcg_gen_extract2_i32(TCGV_LOW(ret),
1374                                  TCGV_LOW(arg1), TCGV_HIGH(arg1), c);
1375         } else {
1376             tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c);
1377             tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(ret),
1378                                 TCGV_HIGH(arg1), 32 - c, c);
1379         }
1380         if (arith) {
1381             tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c);
1382         } else {
1383             tcg_gen_shri_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c);
1384         }
1385     } else {
1386         if (TCG_TARGET_HAS_extract2_i32) {
1387             tcg_gen_extract2_i32(TCGV_HIGH(ret),
1388                                  TCGV_LOW(arg1), TCGV_HIGH(arg1), 32 - c);
1389         } else {
1390             TCGv_i32 t0 = tcg_temp_new_i32();
1391             tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c);
1392             tcg_gen_deposit_i32(TCGV_HIGH(ret), t0,
1393                                 TCGV_HIGH(arg1), c, 32 - c);
1394             tcg_temp_free_i32(t0);
1395         }
1396         tcg_gen_shli_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c);
1397     }
1398 }
1399 
1400 void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1401 {
1402     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
1403     if (TCG_TARGET_REG_BITS == 32) {
1404         tcg_gen_shifti_i64(ret, arg1, arg2, 0, 0);
1405     } else if (arg2 == 0) {
1406         tcg_gen_mov_i64(ret, arg1);
1407     } else {
1408         TCGv_i64 t0 = tcg_const_i64(arg2);
1409         tcg_gen_shl_i64(ret, arg1, t0);
1410         tcg_temp_free_i64(t0);
1411     }
1412 }
1413 
1414 void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1415 {
1416     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
1417     if (TCG_TARGET_REG_BITS == 32) {
1418         tcg_gen_shifti_i64(ret, arg1, arg2, 1, 0);
1419     } else if (arg2 == 0) {
1420         tcg_gen_mov_i64(ret, arg1);
1421     } else {
1422         TCGv_i64 t0 = tcg_const_i64(arg2);
1423         tcg_gen_shr_i64(ret, arg1, t0);
1424         tcg_temp_free_i64(t0);
1425     }
1426 }
1427 
1428 void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1429 {
1430     tcg_debug_assert(arg2 >= 0 && arg2 < 64);
1431     if (TCG_TARGET_REG_BITS == 32) {
1432         tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1);
1433     } else if (arg2 == 0) {
1434         tcg_gen_mov_i64(ret, arg1);
1435     } else {
1436         TCGv_i64 t0 = tcg_const_i64(arg2);
1437         tcg_gen_sar_i64(ret, arg1, t0);
1438         tcg_temp_free_i64(t0);
1439     }
1440 }
1441 
1442 void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l)
1443 {
1444     if (cond == TCG_COND_ALWAYS) {
1445         tcg_gen_br(l);
1446     } else if (cond != TCG_COND_NEVER) {
1447         l->refs++;
1448         if (TCG_TARGET_REG_BITS == 32) {
1449             tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1),
1450                               TCGV_HIGH(arg1), TCGV_LOW(arg2),
1451                               TCGV_HIGH(arg2), cond, label_arg(l));
1452         } else {
1453             tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond,
1454                               label_arg(l));
1455         }
1456     }
1457 }
1458 
1459 void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *l)
1460 {
1461     if (cond == TCG_COND_ALWAYS) {
1462         tcg_gen_br(l);
1463     } else if (cond != TCG_COND_NEVER) {
1464         TCGv_i64 t0 = tcg_const_i64(arg2);
1465         tcg_gen_brcond_i64(cond, arg1, t0, l);
1466         tcg_temp_free_i64(t0);
1467     }
1468 }
1469 
1470 void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
1471                          TCGv_i64 arg1, TCGv_i64 arg2)
1472 {
1473     if (cond == TCG_COND_ALWAYS) {
1474         tcg_gen_movi_i64(ret, 1);
1475     } else if (cond == TCG_COND_NEVER) {
1476         tcg_gen_movi_i64(ret, 0);
1477     } else {
1478         if (TCG_TARGET_REG_BITS == 32) {
1479             tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
1480                              TCGV_LOW(arg1), TCGV_HIGH(arg1),
1481                              TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
1482             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1483         } else {
1484             tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
1485         }
1486     }
1487 }
1488 
1489 void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
1490                           TCGv_i64 arg1, int64_t arg2)
1491 {
1492     TCGv_i64 t0 = tcg_const_i64(arg2);
1493     tcg_gen_setcond_i64(cond, ret, arg1, t0);
1494     tcg_temp_free_i64(t0);
1495 }
1496 
1497 void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
1498 {
1499     if (arg2 == 0) {
1500         tcg_gen_movi_i64(ret, 0);
1501     } else if (is_power_of_2(arg2)) {
1502         tcg_gen_shli_i64(ret, arg1, ctz64(arg2));
1503     } else {
1504         TCGv_i64 t0 = tcg_const_i64(arg2);
1505         tcg_gen_mul_i64(ret, arg1, t0);
1506         tcg_temp_free_i64(t0);
1507     }
1508 }
1509 
1510 void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1511 {
1512     if (TCG_TARGET_HAS_div_i64) {
1513         tcg_gen_op3_i64(INDEX_op_div_i64, ret, arg1, arg2);
1514     } else if (TCG_TARGET_HAS_div2_i64) {
1515         TCGv_i64 t0 = tcg_temp_new_i64();
1516         tcg_gen_sari_i64(t0, arg1, 63);
1517         tcg_gen_op5_i64(INDEX_op_div2_i64, ret, t0, arg1, t0, arg2);
1518         tcg_temp_free_i64(t0);
1519     } else {
1520         gen_helper_div_i64(ret, arg1, arg2);
1521     }
1522 }
1523 
1524 void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1525 {
1526     if (TCG_TARGET_HAS_rem_i64) {
1527         tcg_gen_op3_i64(INDEX_op_rem_i64, ret, arg1, arg2);
1528     } else if (TCG_TARGET_HAS_div_i64) {
1529         TCGv_i64 t0 = tcg_temp_new_i64();
1530         tcg_gen_op3_i64(INDEX_op_div_i64, t0, arg1, arg2);
1531         tcg_gen_mul_i64(t0, t0, arg2);
1532         tcg_gen_sub_i64(ret, arg1, t0);
1533         tcg_temp_free_i64(t0);
1534     } else if (TCG_TARGET_HAS_div2_i64) {
1535         TCGv_i64 t0 = tcg_temp_new_i64();
1536         tcg_gen_sari_i64(t0, arg1, 63);
1537         tcg_gen_op5_i64(INDEX_op_div2_i64, t0, ret, arg1, t0, arg2);
1538         tcg_temp_free_i64(t0);
1539     } else {
1540         gen_helper_rem_i64(ret, arg1, arg2);
1541     }
1542 }
1543 
1544 void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1545 {
1546     if (TCG_TARGET_HAS_div_i64) {
1547         tcg_gen_op3_i64(INDEX_op_divu_i64, ret, arg1, arg2);
1548     } else if (TCG_TARGET_HAS_div2_i64) {
1549         TCGv_i64 t0 = tcg_temp_new_i64();
1550         tcg_gen_movi_i64(t0, 0);
1551         tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, t0, arg2);
1552         tcg_temp_free_i64(t0);
1553     } else {
1554         gen_helper_divu_i64(ret, arg1, arg2);
1555     }
1556 }
1557 
1558 void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1559 {
1560     if (TCG_TARGET_HAS_rem_i64) {
1561         tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2);
1562     } else if (TCG_TARGET_HAS_div_i64) {
1563         TCGv_i64 t0 = tcg_temp_new_i64();
1564         tcg_gen_op3_i64(INDEX_op_divu_i64, t0, arg1, arg2);
1565         tcg_gen_mul_i64(t0, t0, arg2);
1566         tcg_gen_sub_i64(ret, arg1, t0);
1567         tcg_temp_free_i64(t0);
1568     } else if (TCG_TARGET_HAS_div2_i64) {
1569         TCGv_i64 t0 = tcg_temp_new_i64();
1570         tcg_gen_movi_i64(t0, 0);
1571         tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, t0, arg2);
1572         tcg_temp_free_i64(t0);
1573     } else {
1574         gen_helper_remu_i64(ret, arg1, arg2);
1575     }
1576 }
1577 
1578 void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg)
1579 {
1580     if (TCG_TARGET_REG_BITS == 32) {
1581         tcg_gen_ext8s_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1582         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1583     } else if (TCG_TARGET_HAS_ext8s_i64) {
1584         tcg_gen_op2_i64(INDEX_op_ext8s_i64, ret, arg);
1585     } else {
1586         tcg_gen_shli_i64(ret, arg, 56);
1587         tcg_gen_sari_i64(ret, ret, 56);
1588     }
1589 }
1590 
1591 void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg)
1592 {
1593     if (TCG_TARGET_REG_BITS == 32) {
1594         tcg_gen_ext16s_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1595         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1596     } else if (TCG_TARGET_HAS_ext16s_i64) {
1597         tcg_gen_op2_i64(INDEX_op_ext16s_i64, ret, arg);
1598     } else {
1599         tcg_gen_shli_i64(ret, arg, 48);
1600         tcg_gen_sari_i64(ret, ret, 48);
1601     }
1602 }
1603 
1604 void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg)
1605 {
1606     if (TCG_TARGET_REG_BITS == 32) {
1607         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1608         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
1609     } else if (TCG_TARGET_HAS_ext32s_i64) {
1610         tcg_gen_op2_i64(INDEX_op_ext32s_i64, ret, arg);
1611     } else {
1612         tcg_gen_shli_i64(ret, arg, 32);
1613         tcg_gen_sari_i64(ret, ret, 32);
1614     }
1615 }
1616 
1617 void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg)
1618 {
1619     if (TCG_TARGET_REG_BITS == 32) {
1620         tcg_gen_ext8u_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1621         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1622     } else if (TCG_TARGET_HAS_ext8u_i64) {
1623         tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg);
1624     } else {
1625         tcg_gen_andi_i64(ret, arg, 0xffu);
1626     }
1627 }
1628 
1629 void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg)
1630 {
1631     if (TCG_TARGET_REG_BITS == 32) {
1632         tcg_gen_ext16u_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1633         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1634     } else if (TCG_TARGET_HAS_ext16u_i64) {
1635         tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg);
1636     } else {
1637         tcg_gen_andi_i64(ret, arg, 0xffffu);
1638     }
1639 }
1640 
1641 void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg)
1642 {
1643     if (TCG_TARGET_REG_BITS == 32) {
1644         tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1645         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1646     } else if (TCG_TARGET_HAS_ext32u_i64) {
1647         tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg);
1648     } else {
1649         tcg_gen_andi_i64(ret, arg, 0xffffffffu);
1650     }
1651 }
1652 
1653 /* Note: we assume the six high bytes are set to zero */
1654 void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg)
1655 {
1656     if (TCG_TARGET_REG_BITS == 32) {
1657         tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1658         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1659     } else if (TCG_TARGET_HAS_bswap16_i64) {
1660         tcg_gen_op2_i64(INDEX_op_bswap16_i64, ret, arg);
1661     } else {
1662         TCGv_i64 t0 = tcg_temp_new_i64();
1663 
1664         tcg_gen_ext8u_i64(t0, arg);
1665         tcg_gen_shli_i64(t0, t0, 8);
1666         tcg_gen_shri_i64(ret, arg, 8);
1667         tcg_gen_or_i64(ret, ret, t0);
1668         tcg_temp_free_i64(t0);
1669     }
1670 }
1671 
1672 /* Note: we assume the four high bytes are set to zero */
1673 void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg)
1674 {
1675     if (TCG_TARGET_REG_BITS == 32) {
1676         tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1677         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1678     } else if (TCG_TARGET_HAS_bswap32_i64) {
1679         tcg_gen_op2_i64(INDEX_op_bswap32_i64, ret, arg);
1680     } else {
1681         TCGv_i64 t0 = tcg_temp_new_i64();
1682         TCGv_i64 t1 = tcg_temp_new_i64();
1683         TCGv_i64 t2 = tcg_const_i64(0x00ff00ff);
1684 
1685                                         /* arg = ....abcd */
1686         tcg_gen_shri_i64(t0, arg, 8);   /*  t0 = .....abc */
1687         tcg_gen_and_i64(t1, arg, t2);   /*  t1 = .....b.d */
1688         tcg_gen_and_i64(t0, t0, t2);    /*  t0 = .....a.c */
1689         tcg_gen_shli_i64(t1, t1, 8);    /*  t1 = ....b.d. */
1690         tcg_gen_or_i64(ret, t0, t1);    /* ret = ....badc */
1691 
1692         tcg_gen_shli_i64(t1, ret, 48);  /*  t1 = dc...... */
1693         tcg_gen_shri_i64(t0, ret, 16);  /*  t0 = ......ba */
1694         tcg_gen_shri_i64(t1, t1, 32);   /*  t1 = ....dc.. */
1695         tcg_gen_or_i64(ret, t0, t1);    /* ret = ....dcba */
1696 
1697         tcg_temp_free_i64(t0);
1698         tcg_temp_free_i64(t1);
1699         tcg_temp_free_i64(t2);
1700     }
1701 }
1702 
1703 void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg)
1704 {
1705     if (TCG_TARGET_REG_BITS == 32) {
1706         TCGv_i32 t0, t1;
1707         t0 = tcg_temp_new_i32();
1708         t1 = tcg_temp_new_i32();
1709 
1710         tcg_gen_bswap32_i32(t0, TCGV_LOW(arg));
1711         tcg_gen_bswap32_i32(t1, TCGV_HIGH(arg));
1712         tcg_gen_mov_i32(TCGV_LOW(ret), t1);
1713         tcg_gen_mov_i32(TCGV_HIGH(ret), t0);
1714         tcg_temp_free_i32(t0);
1715         tcg_temp_free_i32(t1);
1716     } else if (TCG_TARGET_HAS_bswap64_i64) {
1717         tcg_gen_op2_i64(INDEX_op_bswap64_i64, ret, arg);
1718     } else {
1719         TCGv_i64 t0 = tcg_temp_new_i64();
1720         TCGv_i64 t1 = tcg_temp_new_i64();
1721         TCGv_i64 t2 = tcg_temp_new_i64();
1722 
1723                                         /* arg = abcdefgh */
1724         tcg_gen_movi_i64(t2, 0x00ff00ff00ff00ffull);
1725         tcg_gen_shri_i64(t0, arg, 8);   /*  t0 = .abcdefg */
1726         tcg_gen_and_i64(t1, arg, t2);   /*  t1 = .b.d.f.h */
1727         tcg_gen_and_i64(t0, t0, t2);    /*  t0 = .a.c.e.g */
1728         tcg_gen_shli_i64(t1, t1, 8);    /*  t1 = b.d.f.h. */
1729         tcg_gen_or_i64(ret, t0, t1);    /* ret = badcfehg */
1730 
1731         tcg_gen_movi_i64(t2, 0x0000ffff0000ffffull);
1732         tcg_gen_shri_i64(t0, ret, 16);  /*  t0 = ..badcfe */
1733         tcg_gen_and_i64(t1, ret, t2);   /*  t1 = ..dc..hg */
1734         tcg_gen_and_i64(t0, t0, t2);    /*  t0 = ..ba..fe */
1735         tcg_gen_shli_i64(t1, t1, 16);   /*  t1 = dc..hg.. */
1736         tcg_gen_or_i64(ret, t0, t1);    /* ret = dcbahgfe */
1737 
1738         tcg_gen_shri_i64(t0, ret, 32);  /*  t0 = ....dcba */
1739         tcg_gen_shli_i64(t1, ret, 32);  /*  t1 = hgfe.... */
1740         tcg_gen_or_i64(ret, t0, t1);    /* ret = hgfedcba */
1741 
1742         tcg_temp_free_i64(t0);
1743         tcg_temp_free_i64(t1);
1744         tcg_temp_free_i64(t2);
1745     }
1746 }
1747 
1748 void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg)
1749 {
1750     if (TCG_TARGET_REG_BITS == 32) {
1751         tcg_gen_not_i32(TCGV_LOW(ret), TCGV_LOW(arg));
1752         tcg_gen_not_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
1753     } else if (TCG_TARGET_HAS_not_i64) {
1754         tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg);
1755     } else {
1756         tcg_gen_xori_i64(ret, arg, -1);
1757     }
1758 }
1759 
1760 void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1761 {
1762     if (TCG_TARGET_REG_BITS == 32) {
1763         tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1764         tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1765     } else if (TCG_TARGET_HAS_andc_i64) {
1766         tcg_gen_op3_i64(INDEX_op_andc_i64, ret, arg1, arg2);
1767     } else {
1768         TCGv_i64 t0 = tcg_temp_new_i64();
1769         tcg_gen_not_i64(t0, arg2);
1770         tcg_gen_and_i64(ret, arg1, t0);
1771         tcg_temp_free_i64(t0);
1772     }
1773 }
1774 
1775 void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1776 {
1777     if (TCG_TARGET_REG_BITS == 32) {
1778         tcg_gen_eqv_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1779         tcg_gen_eqv_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1780     } else if (TCG_TARGET_HAS_eqv_i64) {
1781         tcg_gen_op3_i64(INDEX_op_eqv_i64, ret, arg1, arg2);
1782     } else {
1783         tcg_gen_xor_i64(ret, arg1, arg2);
1784         tcg_gen_not_i64(ret, ret);
1785     }
1786 }
1787 
1788 void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1789 {
1790     if (TCG_TARGET_REG_BITS == 32) {
1791         tcg_gen_nand_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1792         tcg_gen_nand_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1793     } else if (TCG_TARGET_HAS_nand_i64) {
1794         tcg_gen_op3_i64(INDEX_op_nand_i64, ret, arg1, arg2);
1795     } else {
1796         tcg_gen_and_i64(ret, arg1, arg2);
1797         tcg_gen_not_i64(ret, ret);
1798     }
1799 }
1800 
1801 void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1802 {
1803     if (TCG_TARGET_REG_BITS == 32) {
1804         tcg_gen_nor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1805         tcg_gen_nor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1806     } else if (TCG_TARGET_HAS_nor_i64) {
1807         tcg_gen_op3_i64(INDEX_op_nor_i64, ret, arg1, arg2);
1808     } else {
1809         tcg_gen_or_i64(ret, arg1, arg2);
1810         tcg_gen_not_i64(ret, ret);
1811     }
1812 }
1813 
1814 void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1815 {
1816     if (TCG_TARGET_REG_BITS == 32) {
1817         tcg_gen_orc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
1818         tcg_gen_orc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
1819     } else if (TCG_TARGET_HAS_orc_i64) {
1820         tcg_gen_op3_i64(INDEX_op_orc_i64, ret, arg1, arg2);
1821     } else {
1822         TCGv_i64 t0 = tcg_temp_new_i64();
1823         tcg_gen_not_i64(t0, arg2);
1824         tcg_gen_or_i64(ret, arg1, t0);
1825         tcg_temp_free_i64(t0);
1826     }
1827 }
1828 
1829 void tcg_gen_clz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1830 {
1831     if (TCG_TARGET_HAS_clz_i64) {
1832         tcg_gen_op3_i64(INDEX_op_clz_i64, ret, arg1, arg2);
1833     } else {
1834         gen_helper_clz_i64(ret, arg1, arg2);
1835     }
1836 }
1837 
1838 void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
1839 {
1840     if (TCG_TARGET_REG_BITS == 32
1841         && TCG_TARGET_HAS_clz_i32
1842         && arg2 <= 0xffffffffu) {
1843         TCGv_i32 t = tcg_const_i32((uint32_t)arg2 - 32);
1844         tcg_gen_clz_i32(t, TCGV_LOW(arg1), t);
1845         tcg_gen_addi_i32(t, t, 32);
1846         tcg_gen_clz_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), t);
1847         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1848         tcg_temp_free_i32(t);
1849     } else {
1850         TCGv_i64 t = tcg_const_i64(arg2);
1851         tcg_gen_clz_i64(ret, arg1, t);
1852         tcg_temp_free_i64(t);
1853     }
1854 }
1855 
1856 void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1857 {
1858     if (TCG_TARGET_HAS_ctz_i64) {
1859         tcg_gen_op3_i64(INDEX_op_ctz_i64, ret, arg1, arg2);
1860     } else if (TCG_TARGET_HAS_ctpop_i64 || TCG_TARGET_HAS_clz_i64) {
1861         TCGv_i64 z, t = tcg_temp_new_i64();
1862 
1863         if (TCG_TARGET_HAS_ctpop_i64) {
1864             tcg_gen_subi_i64(t, arg1, 1);
1865             tcg_gen_andc_i64(t, t, arg1);
1866             tcg_gen_ctpop_i64(t, t);
1867         } else {
1868             /* Since all non-x86 hosts have clz(0) == 64, don't fight it.  */
1869             tcg_gen_neg_i64(t, arg1);
1870             tcg_gen_and_i64(t, t, arg1);
1871             tcg_gen_clzi_i64(t, t, 64);
1872             tcg_gen_xori_i64(t, t, 63);
1873         }
1874         z = tcg_const_i64(0);
1875         tcg_gen_movcond_i64(TCG_COND_EQ, ret, arg1, z, arg2, t);
1876         tcg_temp_free_i64(t);
1877         tcg_temp_free_i64(z);
1878     } else {
1879         gen_helper_ctz_i64(ret, arg1, arg2);
1880     }
1881 }
1882 
1883 void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
1884 {
1885     if (TCG_TARGET_REG_BITS == 32
1886         && TCG_TARGET_HAS_ctz_i32
1887         && arg2 <= 0xffffffffu) {
1888         TCGv_i32 t32 = tcg_const_i32((uint32_t)arg2 - 32);
1889         tcg_gen_ctz_i32(t32, TCGV_HIGH(arg1), t32);
1890         tcg_gen_addi_i32(t32, t32, 32);
1891         tcg_gen_ctz_i32(TCGV_LOW(ret), TCGV_LOW(arg1), t32);
1892         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1893         tcg_temp_free_i32(t32);
1894     } else if (!TCG_TARGET_HAS_ctz_i64
1895                && TCG_TARGET_HAS_ctpop_i64
1896                && arg2 == 64) {
1897         /* This equivalence has the advantage of not requiring a fixup.  */
1898         TCGv_i64 t = tcg_temp_new_i64();
1899         tcg_gen_subi_i64(t, arg1, 1);
1900         tcg_gen_andc_i64(t, t, arg1);
1901         tcg_gen_ctpop_i64(ret, t);
1902         tcg_temp_free_i64(t);
1903     } else {
1904         TCGv_i64 t64 = tcg_const_i64(arg2);
1905         tcg_gen_ctz_i64(ret, arg1, t64);
1906         tcg_temp_free_i64(t64);
1907     }
1908 }
1909 
1910 void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg)
1911 {
1912     if (TCG_TARGET_HAS_clz_i64 || TCG_TARGET_HAS_clz_i32) {
1913         TCGv_i64 t = tcg_temp_new_i64();
1914         tcg_gen_sari_i64(t, arg, 63);
1915         tcg_gen_xor_i64(t, t, arg);
1916         tcg_gen_clzi_i64(t, t, 64);
1917         tcg_gen_subi_i64(ret, t, 1);
1918         tcg_temp_free_i64(t);
1919     } else {
1920         gen_helper_clrsb_i64(ret, arg);
1921     }
1922 }
1923 
1924 void tcg_gen_ctpop_i64(TCGv_i64 ret, TCGv_i64 arg1)
1925 {
1926     if (TCG_TARGET_HAS_ctpop_i64) {
1927         tcg_gen_op2_i64(INDEX_op_ctpop_i64, ret, arg1);
1928     } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_ctpop_i32) {
1929         tcg_gen_ctpop_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
1930         tcg_gen_ctpop_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
1931         tcg_gen_add_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(ret));
1932         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
1933     } else {
1934         gen_helper_ctpop_i64(ret, arg1);
1935     }
1936 }
1937 
1938 void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1939 {
1940     if (TCG_TARGET_HAS_rot_i64) {
1941         tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, arg2);
1942     } else {
1943         TCGv_i64 t0, t1;
1944         t0 = tcg_temp_new_i64();
1945         t1 = tcg_temp_new_i64();
1946         tcg_gen_shl_i64(t0, arg1, arg2);
1947         tcg_gen_subfi_i64(t1, 64, arg2);
1948         tcg_gen_shr_i64(t1, arg1, t1);
1949         tcg_gen_or_i64(ret, t0, t1);
1950         tcg_temp_free_i64(t0);
1951         tcg_temp_free_i64(t1);
1952     }
1953 }
1954 
1955 void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
1956 {
1957     tcg_debug_assert(arg2 < 64);
1958     /* some cases can be optimized here */
1959     if (arg2 == 0) {
1960         tcg_gen_mov_i64(ret, arg1);
1961     } else if (TCG_TARGET_HAS_rot_i64) {
1962         TCGv_i64 t0 = tcg_const_i64(arg2);
1963         tcg_gen_rotl_i64(ret, arg1, t0);
1964         tcg_temp_free_i64(t0);
1965     } else {
1966         TCGv_i64 t0, t1;
1967         t0 = tcg_temp_new_i64();
1968         t1 = tcg_temp_new_i64();
1969         tcg_gen_shli_i64(t0, arg1, arg2);
1970         tcg_gen_shri_i64(t1, arg1, 64 - arg2);
1971         tcg_gen_or_i64(ret, t0, t1);
1972         tcg_temp_free_i64(t0);
1973         tcg_temp_free_i64(t1);
1974     }
1975 }
1976 
1977 void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
1978 {
1979     if (TCG_TARGET_HAS_rot_i64) {
1980         tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, arg2);
1981     } else {
1982         TCGv_i64 t0, t1;
1983         t0 = tcg_temp_new_i64();
1984         t1 = tcg_temp_new_i64();
1985         tcg_gen_shr_i64(t0, arg1, arg2);
1986         tcg_gen_subfi_i64(t1, 64, arg2);
1987         tcg_gen_shl_i64(t1, arg1, t1);
1988         tcg_gen_or_i64(ret, t0, t1);
1989         tcg_temp_free_i64(t0);
1990         tcg_temp_free_i64(t1);
1991     }
1992 }
1993 
1994 void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
1995 {
1996     tcg_debug_assert(arg2 < 64);
1997     /* some cases can be optimized here */
1998     if (arg2 == 0) {
1999         tcg_gen_mov_i64(ret, arg1);
2000     } else {
2001         tcg_gen_rotli_i64(ret, arg1, 64 - arg2);
2002     }
2003 }
2004 
2005 void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
2006                          unsigned int ofs, unsigned int len)
2007 {
2008     uint64_t mask;
2009     TCGv_i64 t1;
2010 
2011     tcg_debug_assert(ofs < 64);
2012     tcg_debug_assert(len > 0);
2013     tcg_debug_assert(len <= 64);
2014     tcg_debug_assert(ofs + len <= 64);
2015 
2016     if (len == 64) {
2017         tcg_gen_mov_i64(ret, arg2);
2018         return;
2019     }
2020     if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) {
2021         tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
2022         return;
2023     }
2024 
2025     if (TCG_TARGET_REG_BITS == 32) {
2026         if (ofs >= 32) {
2027             tcg_gen_deposit_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1),
2028                                 TCGV_LOW(arg2), ofs - 32, len);
2029             tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
2030             return;
2031         }
2032         if (ofs + len <= 32) {
2033             tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(arg1),
2034                                 TCGV_LOW(arg2), ofs, len);
2035             tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
2036             return;
2037         }
2038     }
2039 
2040     t1 = tcg_temp_new_i64();
2041 
2042     if (TCG_TARGET_HAS_extract2_i64) {
2043         if (ofs + len == 64) {
2044             tcg_gen_shli_i64(t1, arg1, len);
2045             tcg_gen_extract2_i64(ret, t1, arg2, len);
2046             goto done;
2047         }
2048         if (ofs == 0) {
2049             tcg_gen_extract2_i64(ret, arg1, arg2, len);
2050             tcg_gen_rotli_i64(ret, ret, len);
2051             goto done;
2052         }
2053     }
2054 
2055     mask = (1ull << len) - 1;
2056     if (ofs + len < 64) {
2057         tcg_gen_andi_i64(t1, arg2, mask);
2058         tcg_gen_shli_i64(t1, t1, ofs);
2059     } else {
2060         tcg_gen_shli_i64(t1, arg2, ofs);
2061     }
2062     tcg_gen_andi_i64(ret, arg1, ~(mask << ofs));
2063     tcg_gen_or_i64(ret, ret, t1);
2064  done:
2065     tcg_temp_free_i64(t1);
2066 }
2067 
2068 void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg,
2069                            unsigned int ofs, unsigned int len)
2070 {
2071     tcg_debug_assert(ofs < 64);
2072     tcg_debug_assert(len > 0);
2073     tcg_debug_assert(len <= 64);
2074     tcg_debug_assert(ofs + len <= 64);
2075 
2076     if (ofs + len == 64) {
2077         tcg_gen_shli_i64(ret, arg, ofs);
2078     } else if (ofs == 0) {
2079         tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
2080     } else if (TCG_TARGET_HAS_deposit_i64
2081                && TCG_TARGET_deposit_i64_valid(ofs, len)) {
2082         TCGv_i64 zero = tcg_const_i64(0);
2083         tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, zero, arg, ofs, len);
2084         tcg_temp_free_i64(zero);
2085     } else {
2086         if (TCG_TARGET_REG_BITS == 32) {
2087             if (ofs >= 32) {
2088                 tcg_gen_deposit_z_i32(TCGV_HIGH(ret), TCGV_LOW(arg),
2089                                       ofs - 32, len);
2090                 tcg_gen_movi_i32(TCGV_LOW(ret), 0);
2091                 return;
2092             }
2093             if (ofs + len <= 32) {
2094                 tcg_gen_deposit_z_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
2095                 tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2096                 return;
2097             }
2098         }
2099         /* To help two-operand hosts we prefer to zero-extend first,
2100            which allows ARG to stay live.  */
2101         switch (len) {
2102         case 32:
2103             if (TCG_TARGET_HAS_ext32u_i64) {
2104                 tcg_gen_ext32u_i64(ret, arg);
2105                 tcg_gen_shli_i64(ret, ret, ofs);
2106                 return;
2107             }
2108             break;
2109         case 16:
2110             if (TCG_TARGET_HAS_ext16u_i64) {
2111                 tcg_gen_ext16u_i64(ret, arg);
2112                 tcg_gen_shli_i64(ret, ret, ofs);
2113                 return;
2114             }
2115             break;
2116         case 8:
2117             if (TCG_TARGET_HAS_ext8u_i64) {
2118                 tcg_gen_ext8u_i64(ret, arg);
2119                 tcg_gen_shli_i64(ret, ret, ofs);
2120                 return;
2121             }
2122             break;
2123         }
2124         /* Otherwise prefer zero-extension over AND for code size.  */
2125         switch (ofs + len) {
2126         case 32:
2127             if (TCG_TARGET_HAS_ext32u_i64) {
2128                 tcg_gen_shli_i64(ret, arg, ofs);
2129                 tcg_gen_ext32u_i64(ret, ret);
2130                 return;
2131             }
2132             break;
2133         case 16:
2134             if (TCG_TARGET_HAS_ext16u_i64) {
2135                 tcg_gen_shli_i64(ret, arg, ofs);
2136                 tcg_gen_ext16u_i64(ret, ret);
2137                 return;
2138             }
2139             break;
2140         case 8:
2141             if (TCG_TARGET_HAS_ext8u_i64) {
2142                 tcg_gen_shli_i64(ret, arg, ofs);
2143                 tcg_gen_ext8u_i64(ret, ret);
2144                 return;
2145             }
2146             break;
2147         }
2148         tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
2149         tcg_gen_shli_i64(ret, ret, ofs);
2150     }
2151 }
2152 
2153 void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
2154                          unsigned int ofs, unsigned int len)
2155 {
2156     tcg_debug_assert(ofs < 64);
2157     tcg_debug_assert(len > 0);
2158     tcg_debug_assert(len <= 64);
2159     tcg_debug_assert(ofs + len <= 64);
2160 
2161     /* Canonicalize certain special cases, even if extract is supported.  */
2162     if (ofs + len == 64) {
2163         tcg_gen_shri_i64(ret, arg, 64 - len);
2164         return;
2165     }
2166     if (ofs == 0) {
2167         tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
2168         return;
2169     }
2170 
2171     if (TCG_TARGET_REG_BITS == 32) {
2172         /* Look for a 32-bit extract within one of the two words.  */
2173         if (ofs >= 32) {
2174             tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_HIGH(arg), ofs - 32, len);
2175             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2176             return;
2177         }
2178         if (ofs + len <= 32) {
2179             tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
2180             tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2181             return;
2182         }
2183         /* The field is split across two words.  One double-word
2184            shift is better than two double-word shifts.  */
2185         goto do_shift_and;
2186     }
2187 
2188     if (TCG_TARGET_HAS_extract_i64
2189         && TCG_TARGET_extract_i64_valid(ofs, len)) {
2190         tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, ofs, len);
2191         return;
2192     }
2193 
2194     /* Assume that zero-extension, if available, is cheaper than a shift.  */
2195     switch (ofs + len) {
2196     case 32:
2197         if (TCG_TARGET_HAS_ext32u_i64) {
2198             tcg_gen_ext32u_i64(ret, arg);
2199             tcg_gen_shri_i64(ret, ret, ofs);
2200             return;
2201         }
2202         break;
2203     case 16:
2204         if (TCG_TARGET_HAS_ext16u_i64) {
2205             tcg_gen_ext16u_i64(ret, arg);
2206             tcg_gen_shri_i64(ret, ret, ofs);
2207             return;
2208         }
2209         break;
2210     case 8:
2211         if (TCG_TARGET_HAS_ext8u_i64) {
2212             tcg_gen_ext8u_i64(ret, arg);
2213             tcg_gen_shri_i64(ret, ret, ofs);
2214             return;
2215         }
2216         break;
2217     }
2218 
2219     /* ??? Ideally we'd know what values are available for immediate AND.
2220        Assume that 8 bits are available, plus the special cases of 16 and 32,
2221        so that we get ext8u, ext16u, and ext32u.  */
2222     switch (len) {
2223     case 1 ... 8: case 16: case 32:
2224     do_shift_and:
2225         tcg_gen_shri_i64(ret, arg, ofs);
2226         tcg_gen_andi_i64(ret, ret, (1ull << len) - 1);
2227         break;
2228     default:
2229         tcg_gen_shli_i64(ret, arg, 64 - len - ofs);
2230         tcg_gen_shri_i64(ret, ret, 64 - len);
2231         break;
2232     }
2233 }
2234 
2235 void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg,
2236                           unsigned int ofs, unsigned int len)
2237 {
2238     tcg_debug_assert(ofs < 64);
2239     tcg_debug_assert(len > 0);
2240     tcg_debug_assert(len <= 64);
2241     tcg_debug_assert(ofs + len <= 64);
2242 
2243     /* Canonicalize certain special cases, even if sextract is supported.  */
2244     if (ofs + len == 64) {
2245         tcg_gen_sari_i64(ret, arg, 64 - len);
2246         return;
2247     }
2248     if (ofs == 0) {
2249         switch (len) {
2250         case 32:
2251             tcg_gen_ext32s_i64(ret, arg);
2252             return;
2253         case 16:
2254             tcg_gen_ext16s_i64(ret, arg);
2255             return;
2256         case 8:
2257             tcg_gen_ext8s_i64(ret, arg);
2258             return;
2259         }
2260     }
2261 
2262     if (TCG_TARGET_REG_BITS == 32) {
2263         /* Look for a 32-bit extract within one of the two words.  */
2264         if (ofs >= 32) {
2265             tcg_gen_sextract_i32(TCGV_LOW(ret), TCGV_HIGH(arg), ofs - 32, len);
2266         } else if (ofs + len <= 32) {
2267             tcg_gen_sextract_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
2268         } else if (ofs == 0) {
2269             tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
2270             tcg_gen_sextract_i32(TCGV_HIGH(ret), TCGV_HIGH(arg), 0, len - 32);
2271             return;
2272         } else if (len > 32) {
2273             TCGv_i32 t = tcg_temp_new_i32();
2274             /* Extract the bits for the high word normally.  */
2275             tcg_gen_sextract_i32(t, TCGV_HIGH(arg), ofs + 32, len - 32);
2276             /* Shift the field down for the low part.  */
2277             tcg_gen_shri_i64(ret, arg, ofs);
2278             /* Overwrite the shift into the high part.  */
2279             tcg_gen_mov_i32(TCGV_HIGH(ret), t);
2280             tcg_temp_free_i32(t);
2281             return;
2282         } else {
2283             /* Shift the field down for the low part, such that the
2284                field sits at the MSB.  */
2285             tcg_gen_shri_i64(ret, arg, ofs + len - 32);
2286             /* Shift the field down from the MSB, sign extending.  */
2287             tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_LOW(ret), 32 - len);
2288         }
2289         /* Sign-extend the field from 32 bits.  */
2290         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
2291         return;
2292     }
2293 
2294     if (TCG_TARGET_HAS_sextract_i64
2295         && TCG_TARGET_extract_i64_valid(ofs, len)) {
2296         tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, arg, ofs, len);
2297         return;
2298     }
2299 
2300     /* Assume that sign-extension, if available, is cheaper than a shift.  */
2301     switch (ofs + len) {
2302     case 32:
2303         if (TCG_TARGET_HAS_ext32s_i64) {
2304             tcg_gen_ext32s_i64(ret, arg);
2305             tcg_gen_sari_i64(ret, ret, ofs);
2306             return;
2307         }
2308         break;
2309     case 16:
2310         if (TCG_TARGET_HAS_ext16s_i64) {
2311             tcg_gen_ext16s_i64(ret, arg);
2312             tcg_gen_sari_i64(ret, ret, ofs);
2313             return;
2314         }
2315         break;
2316     case 8:
2317         if (TCG_TARGET_HAS_ext8s_i64) {
2318             tcg_gen_ext8s_i64(ret, arg);
2319             tcg_gen_sari_i64(ret, ret, ofs);
2320             return;
2321         }
2322         break;
2323     }
2324     switch (len) {
2325     case 32:
2326         if (TCG_TARGET_HAS_ext32s_i64) {
2327             tcg_gen_shri_i64(ret, arg, ofs);
2328             tcg_gen_ext32s_i64(ret, ret);
2329             return;
2330         }
2331         break;
2332     case 16:
2333         if (TCG_TARGET_HAS_ext16s_i64) {
2334             tcg_gen_shri_i64(ret, arg, ofs);
2335             tcg_gen_ext16s_i64(ret, ret);
2336             return;
2337         }
2338         break;
2339     case 8:
2340         if (TCG_TARGET_HAS_ext8s_i64) {
2341             tcg_gen_shri_i64(ret, arg, ofs);
2342             tcg_gen_ext8s_i64(ret, ret);
2343             return;
2344         }
2345         break;
2346     }
2347     tcg_gen_shli_i64(ret, arg, 64 - len - ofs);
2348     tcg_gen_sari_i64(ret, ret, 64 - len);
2349 }
2350 
2351 /*
2352  * Extract 64 bits from a 128-bit input, ah:al, starting from ofs.
2353  * Unlike tcg_gen_extract_i64 above, len is fixed at 64.
2354  */
2355 void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah,
2356                           unsigned int ofs)
2357 {
2358     tcg_debug_assert(ofs <= 64);
2359     if (ofs == 0) {
2360         tcg_gen_mov_i64(ret, al);
2361     } else if (ofs == 64) {
2362         tcg_gen_mov_i64(ret, ah);
2363     } else if (al == ah) {
2364         tcg_gen_rotri_i64(ret, al, ofs);
2365     } else if (TCG_TARGET_HAS_extract2_i64) {
2366         tcg_gen_op4i_i64(INDEX_op_extract2_i64, ret, al, ah, ofs);
2367     } else {
2368         TCGv_i64 t0 = tcg_temp_new_i64();
2369         tcg_gen_shri_i64(t0, al, ofs);
2370         tcg_gen_deposit_i64(ret, t0, ah, 64 - ofs, ofs);
2371         tcg_temp_free_i64(t0);
2372     }
2373 }
2374 
2375 void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1,
2376                          TCGv_i64 c2, TCGv_i64 v1, TCGv_i64 v2)
2377 {
2378     if (cond == TCG_COND_ALWAYS) {
2379         tcg_gen_mov_i64(ret, v1);
2380     } else if (cond == TCG_COND_NEVER) {
2381         tcg_gen_mov_i64(ret, v2);
2382     } else if (TCG_TARGET_REG_BITS == 32) {
2383         TCGv_i32 t0 = tcg_temp_new_i32();
2384         TCGv_i32 t1 = tcg_temp_new_i32();
2385         tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0,
2386                          TCGV_LOW(c1), TCGV_HIGH(c1),
2387                          TCGV_LOW(c2), TCGV_HIGH(c2), cond);
2388 
2389         if (TCG_TARGET_HAS_movcond_i32) {
2390             tcg_gen_movi_i32(t1, 0);
2391             tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, t1,
2392                                 TCGV_LOW(v1), TCGV_LOW(v2));
2393             tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, t1,
2394                                 TCGV_HIGH(v1), TCGV_HIGH(v2));
2395         } else {
2396             tcg_gen_neg_i32(t0, t0);
2397 
2398             tcg_gen_and_i32(t1, TCGV_LOW(v1), t0);
2399             tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(v2), t0);
2400             tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t1);
2401 
2402             tcg_gen_and_i32(t1, TCGV_HIGH(v1), t0);
2403             tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(v2), t0);
2404             tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t1);
2405         }
2406         tcg_temp_free_i32(t0);
2407         tcg_temp_free_i32(t1);
2408     } else if (TCG_TARGET_HAS_movcond_i64) {
2409         tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond);
2410     } else {
2411         TCGv_i64 t0 = tcg_temp_new_i64();
2412         TCGv_i64 t1 = tcg_temp_new_i64();
2413         tcg_gen_setcond_i64(cond, t0, c1, c2);
2414         tcg_gen_neg_i64(t0, t0);
2415         tcg_gen_and_i64(t1, v1, t0);
2416         tcg_gen_andc_i64(ret, v2, t0);
2417         tcg_gen_or_i64(ret, ret, t1);
2418         tcg_temp_free_i64(t0);
2419         tcg_temp_free_i64(t1);
2420     }
2421 }
2422 
2423 void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
2424                       TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
2425 {
2426     if (TCG_TARGET_HAS_add2_i64) {
2427         tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh);
2428     } else {
2429         TCGv_i64 t0 = tcg_temp_new_i64();
2430         TCGv_i64 t1 = tcg_temp_new_i64();
2431         tcg_gen_add_i64(t0, al, bl);
2432         tcg_gen_setcond_i64(TCG_COND_LTU, t1, t0, al);
2433         tcg_gen_add_i64(rh, ah, bh);
2434         tcg_gen_add_i64(rh, rh, t1);
2435         tcg_gen_mov_i64(rl, t0);
2436         tcg_temp_free_i64(t0);
2437         tcg_temp_free_i64(t1);
2438     }
2439 }
2440 
2441 void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
2442                       TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
2443 {
2444     if (TCG_TARGET_HAS_sub2_i64) {
2445         tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh);
2446     } else {
2447         TCGv_i64 t0 = tcg_temp_new_i64();
2448         TCGv_i64 t1 = tcg_temp_new_i64();
2449         tcg_gen_sub_i64(t0, al, bl);
2450         tcg_gen_setcond_i64(TCG_COND_LTU, t1, al, bl);
2451         tcg_gen_sub_i64(rh, ah, bh);
2452         tcg_gen_sub_i64(rh, rh, t1);
2453         tcg_gen_mov_i64(rl, t0);
2454         tcg_temp_free_i64(t0);
2455         tcg_temp_free_i64(t1);
2456     }
2457 }
2458 
2459 void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
2460 {
2461     if (TCG_TARGET_HAS_mulu2_i64) {
2462         tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2);
2463     } else if (TCG_TARGET_HAS_muluh_i64) {
2464         TCGv_i64 t = tcg_temp_new_i64();
2465         tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
2466         tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2);
2467         tcg_gen_mov_i64(rl, t);
2468         tcg_temp_free_i64(t);
2469     } else {
2470         TCGv_i64 t0 = tcg_temp_new_i64();
2471         tcg_gen_mul_i64(t0, arg1, arg2);
2472         gen_helper_muluh_i64(rh, arg1, arg2);
2473         tcg_gen_mov_i64(rl, t0);
2474         tcg_temp_free_i64(t0);
2475     }
2476 }
2477 
2478 void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
2479 {
2480     if (TCG_TARGET_HAS_muls2_i64) {
2481         tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2);
2482     } else if (TCG_TARGET_HAS_mulsh_i64) {
2483         TCGv_i64 t = tcg_temp_new_i64();
2484         tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
2485         tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2);
2486         tcg_gen_mov_i64(rl, t);
2487         tcg_temp_free_i64(t);
2488     } else if (TCG_TARGET_HAS_mulu2_i64 || TCG_TARGET_HAS_muluh_i64) {
2489         TCGv_i64 t0 = tcg_temp_new_i64();
2490         TCGv_i64 t1 = tcg_temp_new_i64();
2491         TCGv_i64 t2 = tcg_temp_new_i64();
2492         TCGv_i64 t3 = tcg_temp_new_i64();
2493         tcg_gen_mulu2_i64(t0, t1, arg1, arg2);
2494         /* Adjust for negative inputs.  */
2495         tcg_gen_sari_i64(t2, arg1, 63);
2496         tcg_gen_sari_i64(t3, arg2, 63);
2497         tcg_gen_and_i64(t2, t2, arg2);
2498         tcg_gen_and_i64(t3, t3, arg1);
2499         tcg_gen_sub_i64(rh, t1, t2);
2500         tcg_gen_sub_i64(rh, rh, t3);
2501         tcg_gen_mov_i64(rl, t0);
2502         tcg_temp_free_i64(t0);
2503         tcg_temp_free_i64(t1);
2504         tcg_temp_free_i64(t2);
2505         tcg_temp_free_i64(t3);
2506     } else {
2507         TCGv_i64 t0 = tcg_temp_new_i64();
2508         tcg_gen_mul_i64(t0, arg1, arg2);
2509         gen_helper_mulsh_i64(rh, arg1, arg2);
2510         tcg_gen_mov_i64(rl, t0);
2511         tcg_temp_free_i64(t0);
2512     }
2513 }
2514 
2515 void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
2516 {
2517     TCGv_i64 t0 = tcg_temp_new_i64();
2518     TCGv_i64 t1 = tcg_temp_new_i64();
2519     TCGv_i64 t2 = tcg_temp_new_i64();
2520     tcg_gen_mulu2_i64(t0, t1, arg1, arg2);
2521     /* Adjust for negative input for the signed arg1.  */
2522     tcg_gen_sari_i64(t2, arg1, 63);
2523     tcg_gen_and_i64(t2, t2, arg2);
2524     tcg_gen_sub_i64(rh, t1, t2);
2525     tcg_gen_mov_i64(rl, t0);
2526     tcg_temp_free_i64(t0);
2527     tcg_temp_free_i64(t1);
2528     tcg_temp_free_i64(t2);
2529 }
2530 
2531 void tcg_gen_smin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2532 {
2533     tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, a, b);
2534 }
2535 
2536 void tcg_gen_umin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2537 {
2538     tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, a, b);
2539 }
2540 
2541 void tcg_gen_smax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2542 {
2543     tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, b, a);
2544 }
2545 
2546 void tcg_gen_umax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
2547 {
2548     tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, b, a);
2549 }
2550 
2551 /* Size changing operations.  */
2552 
2553 void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
2554 {
2555     if (TCG_TARGET_REG_BITS == 32) {
2556         tcg_gen_mov_i32(ret, TCGV_LOW(arg));
2557     } else if (TCG_TARGET_HAS_extrl_i64_i32) {
2558         tcg_gen_op2(INDEX_op_extrl_i64_i32,
2559                     tcgv_i32_arg(ret), tcgv_i64_arg(arg));
2560     } else {
2561         tcg_gen_mov_i32(ret, (TCGv_i32)arg);
2562     }
2563 }
2564 
2565 void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
2566 {
2567     if (TCG_TARGET_REG_BITS == 32) {
2568         tcg_gen_mov_i32(ret, TCGV_HIGH(arg));
2569     } else if (TCG_TARGET_HAS_extrh_i64_i32) {
2570         tcg_gen_op2(INDEX_op_extrh_i64_i32,
2571                     tcgv_i32_arg(ret), tcgv_i64_arg(arg));
2572     } else {
2573         TCGv_i64 t = tcg_temp_new_i64();
2574         tcg_gen_shri_i64(t, arg, 32);
2575         tcg_gen_mov_i32(ret, (TCGv_i32)t);
2576         tcg_temp_free_i64(t);
2577     }
2578 }
2579 
2580 void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
2581 {
2582     if (TCG_TARGET_REG_BITS == 32) {
2583         tcg_gen_mov_i32(TCGV_LOW(ret), arg);
2584         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
2585     } else {
2586         tcg_gen_op2(INDEX_op_extu_i32_i64,
2587                     tcgv_i64_arg(ret), tcgv_i32_arg(arg));
2588     }
2589 }
2590 
2591 void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
2592 {
2593     if (TCG_TARGET_REG_BITS == 32) {
2594         tcg_gen_mov_i32(TCGV_LOW(ret), arg);
2595         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
2596     } else {
2597         tcg_gen_op2(INDEX_op_ext_i32_i64,
2598                     tcgv_i64_arg(ret), tcgv_i32_arg(arg));
2599     }
2600 }
2601 
2602 void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high)
2603 {
2604     TCGv_i64 tmp;
2605 
2606     if (TCG_TARGET_REG_BITS == 32) {
2607         tcg_gen_mov_i32(TCGV_LOW(dest), low);
2608         tcg_gen_mov_i32(TCGV_HIGH(dest), high);
2609         return;
2610     }
2611 
2612     tmp = tcg_temp_new_i64();
2613     /* These extensions are only needed for type correctness.
2614        We may be able to do better given target specific information.  */
2615     tcg_gen_extu_i32_i64(tmp, high);
2616     tcg_gen_extu_i32_i64(dest, low);
2617     /* If deposit is available, use it.  Otherwise use the extra
2618        knowledge that we have of the zero-extensions above.  */
2619     if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(32, 32)) {
2620         tcg_gen_deposit_i64(dest, dest, tmp, 32, 32);
2621     } else {
2622         tcg_gen_shli_i64(tmp, tmp, 32);
2623         tcg_gen_or_i64(dest, dest, tmp);
2624     }
2625     tcg_temp_free_i64(tmp);
2626 }
2627 
2628 void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg)
2629 {
2630     if (TCG_TARGET_REG_BITS == 32) {
2631         tcg_gen_mov_i32(lo, TCGV_LOW(arg));
2632         tcg_gen_mov_i32(hi, TCGV_HIGH(arg));
2633     } else {
2634         tcg_gen_extrl_i64_i32(lo, arg);
2635         tcg_gen_extrh_i64_i32(hi, arg);
2636     }
2637 }
2638 
2639 void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg)
2640 {
2641     tcg_gen_ext32u_i64(lo, arg);
2642     tcg_gen_shri_i64(hi, arg, 32);
2643 }
2644 
2645 /* QEMU specific operations.  */
2646 
2647 void tcg_gen_exit_tb(TranslationBlock *tb, unsigned idx)
2648 {
2649     uintptr_t val = (uintptr_t)tb + idx;
2650 
2651     if (tb == NULL) {
2652         tcg_debug_assert(idx == 0);
2653     } else if (idx <= TB_EXIT_IDXMAX) {
2654 #ifdef CONFIG_DEBUG_TCG
2655         /* This is an exit following a goto_tb.  Verify that we have
2656            seen this numbered exit before, via tcg_gen_goto_tb.  */
2657         tcg_debug_assert(tcg_ctx->goto_tb_issue_mask & (1 << idx));
2658 #endif
2659         /* When not chaining, exit without indicating a link.  */
2660         if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
2661             val = 0;
2662         }
2663     } else {
2664         /* This is an exit via the exitreq label.  */
2665         tcg_debug_assert(idx == TB_EXIT_REQUESTED);
2666     }
2667 
2668     tcg_gen_op1i(INDEX_op_exit_tb, val);
2669 }
2670 
2671 void tcg_gen_goto_tb(unsigned idx)
2672 {
2673     /* We only support two chained exits.  */
2674     tcg_debug_assert(idx <= TB_EXIT_IDXMAX);
2675 #ifdef CONFIG_DEBUG_TCG
2676     /* Verify that we havn't seen this numbered exit before.  */
2677     tcg_debug_assert((tcg_ctx->goto_tb_issue_mask & (1 << idx)) == 0);
2678     tcg_ctx->goto_tb_issue_mask |= 1 << idx;
2679 #endif
2680     /* When not chaining, we simply fall through to the "fallback" exit.  */
2681     if (!qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
2682         tcg_gen_op1i(INDEX_op_goto_tb, idx);
2683     }
2684 }
2685 
2686 void tcg_gen_lookup_and_goto_ptr(void)
2687 {
2688     if (TCG_TARGET_HAS_goto_ptr && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
2689         TCGv_ptr ptr = tcg_temp_new_ptr();
2690         gen_helper_lookup_tb_ptr(ptr, cpu_env);
2691         tcg_gen_op1i(INDEX_op_goto_ptr, tcgv_ptr_arg(ptr));
2692         tcg_temp_free_ptr(ptr);
2693     } else {
2694         tcg_gen_exit_tb(NULL, 0);
2695     }
2696 }
2697 
2698 static inline TCGMemOp tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st)
2699 {
2700     /* Trigger the asserts within as early as possible.  */
2701     (void)get_alignment_bits(op);
2702 
2703     switch (op & MO_SIZE) {
2704     case MO_8:
2705         op &= ~MO_BSWAP;
2706         break;
2707     case MO_16:
2708         break;
2709     case MO_32:
2710         if (!is64) {
2711             op &= ~MO_SIGN;
2712         }
2713         break;
2714     case MO_64:
2715         if (!is64) {
2716             tcg_abort();
2717         }
2718         break;
2719     }
2720     if (st) {
2721         op &= ~MO_SIGN;
2722     }
2723     return op;
2724 }
2725 
2726 static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
2727                          TCGMemOp memop, TCGArg idx)
2728 {
2729     TCGMemOpIdx oi = make_memop_idx(memop, idx);
2730 #if TARGET_LONG_BITS == 32
2731     tcg_gen_op3i_i32(opc, val, addr, oi);
2732 #else
2733     if (TCG_TARGET_REG_BITS == 32) {
2734         tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
2735     } else {
2736         tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
2737     }
2738 #endif
2739 }
2740 
2741 static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
2742                          TCGMemOp memop, TCGArg idx)
2743 {
2744     TCGMemOpIdx oi = make_memop_idx(memop, idx);
2745 #if TARGET_LONG_BITS == 32
2746     if (TCG_TARGET_REG_BITS == 32) {
2747         tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
2748     } else {
2749         tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
2750     }
2751 #else
2752     if (TCG_TARGET_REG_BITS == 32) {
2753         tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
2754                          TCGV_LOW(addr), TCGV_HIGH(addr), oi);
2755     } else {
2756         tcg_gen_op3i_i64(opc, val, addr, oi);
2757     }
2758 #endif
2759 }
2760 
2761 static void tcg_gen_req_mo(TCGBar type)
2762 {
2763 #ifdef TCG_GUEST_DEFAULT_MO
2764     type &= TCG_GUEST_DEFAULT_MO;
2765 #endif
2766     type &= ~TCG_TARGET_DEFAULT_MO;
2767     if (type) {
2768         tcg_gen_mb(type | TCG_BAR_SC);
2769     }
2770 }
2771 
2772 void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
2773 {
2774     TCGMemOp orig_memop;
2775 
2776     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
2777     memop = tcg_canonicalize_memop(memop, 0, 0);
2778     trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env,
2779                                addr, trace_mem_get_info(memop, 0));
2780 
2781     orig_memop = memop;
2782     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
2783         memop &= ~MO_BSWAP;
2784         /* The bswap primitive requires zero-extended input.  */
2785         if ((memop & MO_SSIZE) == MO_SW) {
2786             memop &= ~MO_SIGN;
2787         }
2788     }
2789 
2790     gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
2791 
2792     if ((orig_memop ^ memop) & MO_BSWAP) {
2793         switch (orig_memop & MO_SIZE) {
2794         case MO_16:
2795             tcg_gen_bswap16_i32(val, val);
2796             if (orig_memop & MO_SIGN) {
2797                 tcg_gen_ext16s_i32(val, val);
2798             }
2799             break;
2800         case MO_32:
2801             tcg_gen_bswap32_i32(val, val);
2802             break;
2803         default:
2804             g_assert_not_reached();
2805         }
2806     }
2807 }
2808 
2809 void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
2810 {
2811     TCGv_i32 swap = NULL;
2812 
2813     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
2814     memop = tcg_canonicalize_memop(memop, 0, 1);
2815     trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env,
2816                                addr, trace_mem_get_info(memop, 1));
2817 
2818     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
2819         swap = tcg_temp_new_i32();
2820         switch (memop & MO_SIZE) {
2821         case MO_16:
2822             tcg_gen_ext16u_i32(swap, val);
2823             tcg_gen_bswap16_i32(swap, swap);
2824             break;
2825         case MO_32:
2826             tcg_gen_bswap32_i32(swap, val);
2827             break;
2828         default:
2829             g_assert_not_reached();
2830         }
2831         val = swap;
2832         memop &= ~MO_BSWAP;
2833     }
2834 
2835     gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
2836 
2837     if (swap) {
2838         tcg_temp_free_i32(swap);
2839     }
2840 }
2841 
2842 void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
2843 {
2844     TCGMemOp orig_memop;
2845 
2846     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
2847         tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
2848         if (memop & MO_SIGN) {
2849             tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
2850         } else {
2851             tcg_gen_movi_i32(TCGV_HIGH(val), 0);
2852         }
2853         return;
2854     }
2855 
2856     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
2857     memop = tcg_canonicalize_memop(memop, 1, 0);
2858     trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env,
2859                                addr, trace_mem_get_info(memop, 0));
2860 
2861     orig_memop = memop;
2862     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
2863         memop &= ~MO_BSWAP;
2864         /* The bswap primitive requires zero-extended input.  */
2865         if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
2866             memop &= ~MO_SIGN;
2867         }
2868     }
2869 
2870     gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
2871 
2872     if ((orig_memop ^ memop) & MO_BSWAP) {
2873         switch (orig_memop & MO_SIZE) {
2874         case MO_16:
2875             tcg_gen_bswap16_i64(val, val);
2876             if (orig_memop & MO_SIGN) {
2877                 tcg_gen_ext16s_i64(val, val);
2878             }
2879             break;
2880         case MO_32:
2881             tcg_gen_bswap32_i64(val, val);
2882             if (orig_memop & MO_SIGN) {
2883                 tcg_gen_ext32s_i64(val, val);
2884             }
2885             break;
2886         case MO_64:
2887             tcg_gen_bswap64_i64(val, val);
2888             break;
2889         default:
2890             g_assert_not_reached();
2891         }
2892     }
2893 }
2894 
2895 void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
2896 {
2897     TCGv_i64 swap = NULL;
2898 
2899     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
2900         tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
2901         return;
2902     }
2903 
2904     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
2905     memop = tcg_canonicalize_memop(memop, 1, 1);
2906     trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env,
2907                                addr, trace_mem_get_info(memop, 1));
2908 
2909     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
2910         swap = tcg_temp_new_i64();
2911         switch (memop & MO_SIZE) {
2912         case MO_16:
2913             tcg_gen_ext16u_i64(swap, val);
2914             tcg_gen_bswap16_i64(swap, swap);
2915             break;
2916         case MO_32:
2917             tcg_gen_ext32u_i64(swap, val);
2918             tcg_gen_bswap32_i64(swap, swap);
2919             break;
2920         case MO_64:
2921             tcg_gen_bswap64_i64(swap, val);
2922             break;
2923         default:
2924             g_assert_not_reached();
2925         }
2926         val = swap;
2927         memop &= ~MO_BSWAP;
2928     }
2929 
2930     gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
2931 
2932     if (swap) {
2933         tcg_temp_free_i64(swap);
2934     }
2935 }
2936 
2937 static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, TCGMemOp opc)
2938 {
2939     switch (opc & MO_SSIZE) {
2940     case MO_SB:
2941         tcg_gen_ext8s_i32(ret, val);
2942         break;
2943     case MO_UB:
2944         tcg_gen_ext8u_i32(ret, val);
2945         break;
2946     case MO_SW:
2947         tcg_gen_ext16s_i32(ret, val);
2948         break;
2949     case MO_UW:
2950         tcg_gen_ext16u_i32(ret, val);
2951         break;
2952     default:
2953         tcg_gen_mov_i32(ret, val);
2954         break;
2955     }
2956 }
2957 
2958 static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, TCGMemOp opc)
2959 {
2960     switch (opc & MO_SSIZE) {
2961     case MO_SB:
2962         tcg_gen_ext8s_i64(ret, val);
2963         break;
2964     case MO_UB:
2965         tcg_gen_ext8u_i64(ret, val);
2966         break;
2967     case MO_SW:
2968         tcg_gen_ext16s_i64(ret, val);
2969         break;
2970     case MO_UW:
2971         tcg_gen_ext16u_i64(ret, val);
2972         break;
2973     case MO_SL:
2974         tcg_gen_ext32s_i64(ret, val);
2975         break;
2976     case MO_UL:
2977         tcg_gen_ext32u_i64(ret, val);
2978         break;
2979     default:
2980         tcg_gen_mov_i64(ret, val);
2981         break;
2982     }
2983 }
2984 
2985 #ifdef CONFIG_SOFTMMU
2986 typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
2987                                   TCGv_i32, TCGv_i32, TCGv_i32);
2988 typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
2989                                   TCGv_i64, TCGv_i64, TCGv_i32);
2990 typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
2991                                   TCGv_i32, TCGv_i32);
2992 typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
2993                                   TCGv_i64, TCGv_i32);
2994 #else
2995 typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv, TCGv_i32, TCGv_i32);
2996 typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv, TCGv_i64, TCGv_i64);
2997 typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv, TCGv_i32);
2998 typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv, TCGv_i64);
2999 #endif
3000 
3001 #ifdef CONFIG_ATOMIC64
3002 # define WITH_ATOMIC64(X) X,
3003 #else
3004 # define WITH_ATOMIC64(X)
3005 #endif
3006 
3007 static void * const table_cmpxchg[16] = {
3008     [MO_8] = gen_helper_atomic_cmpxchgb,
3009     [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
3010     [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
3011     [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
3012     [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
3013     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
3014     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
3015 };
3016 
3017 void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
3018                                 TCGv_i32 newv, TCGArg idx, TCGMemOp memop)
3019 {
3020     memop = tcg_canonicalize_memop(memop, 0, 0);
3021 
3022     if (!(tcg_ctx->tb_cflags & CF_PARALLEL)) {
3023         TCGv_i32 t1 = tcg_temp_new_i32();
3024         TCGv_i32 t2 = tcg_temp_new_i32();
3025 
3026         tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
3027 
3028         tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
3029         tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
3030         tcg_gen_qemu_st_i32(t2, addr, idx, memop);
3031         tcg_temp_free_i32(t2);
3032 
3033         if (memop & MO_SIGN) {
3034             tcg_gen_ext_i32(retv, t1, memop);
3035         } else {
3036             tcg_gen_mov_i32(retv, t1);
3037         }
3038         tcg_temp_free_i32(t1);
3039     } else {
3040         gen_atomic_cx_i32 gen;
3041 
3042         gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
3043         tcg_debug_assert(gen != NULL);
3044 
3045 #ifdef CONFIG_SOFTMMU
3046         {
3047             TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx));
3048             gen(retv, cpu_env, addr, cmpv, newv, oi);
3049             tcg_temp_free_i32(oi);
3050         }
3051 #else
3052         gen(retv, cpu_env, addr, cmpv, newv);
3053 #endif
3054 
3055         if (memop & MO_SIGN) {
3056             tcg_gen_ext_i32(retv, retv, memop);
3057         }
3058     }
3059 }
3060 
3061 void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
3062                                 TCGv_i64 newv, TCGArg idx, TCGMemOp memop)
3063 {
3064     memop = tcg_canonicalize_memop(memop, 1, 0);
3065 
3066     if (!(tcg_ctx->tb_cflags & CF_PARALLEL)) {
3067         TCGv_i64 t1 = tcg_temp_new_i64();
3068         TCGv_i64 t2 = tcg_temp_new_i64();
3069 
3070         tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
3071 
3072         tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
3073         tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
3074         tcg_gen_qemu_st_i64(t2, addr, idx, memop);
3075         tcg_temp_free_i64(t2);
3076 
3077         if (memop & MO_SIGN) {
3078             tcg_gen_ext_i64(retv, t1, memop);
3079         } else {
3080             tcg_gen_mov_i64(retv, t1);
3081         }
3082         tcg_temp_free_i64(t1);
3083     } else if ((memop & MO_SIZE) == MO_64) {
3084 #ifdef CONFIG_ATOMIC64
3085         gen_atomic_cx_i64 gen;
3086 
3087         gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
3088         tcg_debug_assert(gen != NULL);
3089 
3090 #ifdef CONFIG_SOFTMMU
3091         {
3092             TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop, idx));
3093             gen(retv, cpu_env, addr, cmpv, newv, oi);
3094             tcg_temp_free_i32(oi);
3095         }
3096 #else
3097         gen(retv, cpu_env, addr, cmpv, newv);
3098 #endif
3099 #else
3100         gen_helper_exit_atomic(cpu_env);
3101         /* Produce a result, so that we have a well-formed opcode stream
3102            with respect to uses of the result in the (dead) code following.  */
3103         tcg_gen_movi_i64(retv, 0);
3104 #endif /* CONFIG_ATOMIC64 */
3105     } else {
3106         TCGv_i32 c32 = tcg_temp_new_i32();
3107         TCGv_i32 n32 = tcg_temp_new_i32();
3108         TCGv_i32 r32 = tcg_temp_new_i32();
3109 
3110         tcg_gen_extrl_i64_i32(c32, cmpv);
3111         tcg_gen_extrl_i64_i32(n32, newv);
3112         tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
3113         tcg_temp_free_i32(c32);
3114         tcg_temp_free_i32(n32);
3115 
3116         tcg_gen_extu_i32_i64(retv, r32);
3117         tcg_temp_free_i32(r32);
3118 
3119         if (memop & MO_SIGN) {
3120             tcg_gen_ext_i64(retv, retv, memop);
3121         }
3122     }
3123 }
3124 
3125 static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
3126                                 TCGArg idx, TCGMemOp memop, bool new_val,
3127                                 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
3128 {
3129     TCGv_i32 t1 = tcg_temp_new_i32();
3130     TCGv_i32 t2 = tcg_temp_new_i32();
3131 
3132     memop = tcg_canonicalize_memop(memop, 0, 0);
3133 
3134     tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
3135     gen(t2, t1, val);
3136     tcg_gen_qemu_st_i32(t2, addr, idx, memop);
3137 
3138     tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
3139     tcg_temp_free_i32(t1);
3140     tcg_temp_free_i32(t2);
3141 }
3142 
3143 static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
3144                              TCGArg idx, TCGMemOp memop, void * const table[])
3145 {
3146     gen_atomic_op_i32 gen;
3147 
3148     memop = tcg_canonicalize_memop(memop, 0, 0);
3149 
3150     gen = table[memop & (MO_SIZE | MO_BSWAP)];
3151     tcg_debug_assert(gen != NULL);
3152 
3153 #ifdef CONFIG_SOFTMMU
3154     {
3155         TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx));
3156         gen(ret, cpu_env, addr, val, oi);
3157         tcg_temp_free_i32(oi);
3158     }
3159 #else
3160     gen(ret, cpu_env, addr, val);
3161 #endif
3162 
3163     if (memop & MO_SIGN) {
3164         tcg_gen_ext_i32(ret, ret, memop);
3165     }
3166 }
3167 
3168 static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
3169                                 TCGArg idx, TCGMemOp memop, bool new_val,
3170                                 void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
3171 {
3172     TCGv_i64 t1 = tcg_temp_new_i64();
3173     TCGv_i64 t2 = tcg_temp_new_i64();
3174 
3175     memop = tcg_canonicalize_memop(memop, 1, 0);
3176 
3177     tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
3178     gen(t2, t1, val);
3179     tcg_gen_qemu_st_i64(t2, addr, idx, memop);
3180 
3181     tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
3182     tcg_temp_free_i64(t1);
3183     tcg_temp_free_i64(t2);
3184 }
3185 
3186 static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
3187                              TCGArg idx, TCGMemOp memop, void * const table[])
3188 {
3189     memop = tcg_canonicalize_memop(memop, 1, 0);
3190 
3191     if ((memop & MO_SIZE) == MO_64) {
3192 #ifdef CONFIG_ATOMIC64
3193         gen_atomic_op_i64 gen;
3194 
3195         gen = table[memop & (MO_SIZE | MO_BSWAP)];
3196         tcg_debug_assert(gen != NULL);
3197 
3198 #ifdef CONFIG_SOFTMMU
3199         {
3200             TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx));
3201             gen(ret, cpu_env, addr, val, oi);
3202             tcg_temp_free_i32(oi);
3203         }
3204 #else
3205         gen(ret, cpu_env, addr, val);
3206 #endif
3207 #else
3208         gen_helper_exit_atomic(cpu_env);
3209         /* Produce a result, so that we have a well-formed opcode stream
3210            with respect to uses of the result in the (dead) code following.  */
3211         tcg_gen_movi_i64(ret, 0);
3212 #endif /* CONFIG_ATOMIC64 */
3213     } else {
3214         TCGv_i32 v32 = tcg_temp_new_i32();
3215         TCGv_i32 r32 = tcg_temp_new_i32();
3216 
3217         tcg_gen_extrl_i64_i32(v32, val);
3218         do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
3219         tcg_temp_free_i32(v32);
3220 
3221         tcg_gen_extu_i32_i64(ret, r32);
3222         tcg_temp_free_i32(r32);
3223 
3224         if (memop & MO_SIGN) {
3225             tcg_gen_ext_i64(ret, ret, memop);
3226         }
3227     }
3228 }
3229 
3230 #define GEN_ATOMIC_HELPER(NAME, OP, NEW)                                \
3231 static void * const table_##NAME[16] = {                                \
3232     [MO_8] = gen_helper_atomic_##NAME##b,                               \
3233     [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le,                   \
3234     [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be,                   \
3235     [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le,                   \
3236     [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be,                   \
3237     WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le)     \
3238     WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be)     \
3239 };                                                                      \
3240 void tcg_gen_atomic_##NAME##_i32                                        \
3241     (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, TCGMemOp memop) \
3242 {                                                                       \
3243     if (tcg_ctx->tb_cflags & CF_PARALLEL) {                             \
3244         do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME);     \
3245     } else {                                                            \
3246         do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW,            \
3247                             tcg_gen_##OP##_i32);                        \
3248     }                                                                   \
3249 }                                                                       \
3250 void tcg_gen_atomic_##NAME##_i64                                        \
3251     (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, TCGMemOp memop) \
3252 {                                                                       \
3253     if (tcg_ctx->tb_cflags & CF_PARALLEL) {                             \
3254         do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME);     \
3255     } else {                                                            \
3256         do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW,            \
3257                             tcg_gen_##OP##_i64);                        \
3258     }                                                                   \
3259 }
3260 
3261 GEN_ATOMIC_HELPER(fetch_add, add, 0)
3262 GEN_ATOMIC_HELPER(fetch_and, and, 0)
3263 GEN_ATOMIC_HELPER(fetch_or, or, 0)
3264 GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
3265 GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
3266 GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
3267 GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
3268 GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
3269 
3270 GEN_ATOMIC_HELPER(add_fetch, add, 1)
3271 GEN_ATOMIC_HELPER(and_fetch, and, 1)
3272 GEN_ATOMIC_HELPER(or_fetch, or, 1)
3273 GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
3274 GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
3275 GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
3276 GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
3277 GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
3278 
3279 static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
3280 {
3281     tcg_gen_mov_i32(r, b);
3282 }
3283 
3284 static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
3285 {
3286     tcg_gen_mov_i64(r, b);
3287 }
3288 
3289 GEN_ATOMIC_HELPER(xchg, mov2, 0)
3290 
3291 #undef GEN_ATOMIC_HELPER
3292