xref: /openbmc/qemu/tcg/optimize.c (revision 8f17a975)
1 /*
2  * Optimizations for Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2010 Samsung Electronics.
5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "tcg/tcg-op.h"
28 
29 #define CASE_OP_32_64(x)                        \
30         glue(glue(case INDEX_op_, x), _i32):    \
31         glue(glue(case INDEX_op_, x), _i64)
32 
33 #define CASE_OP_32_64_VEC(x)                    \
34         glue(glue(case INDEX_op_, x), _i32):    \
35         glue(glue(case INDEX_op_, x), _i64):    \
36         glue(glue(case INDEX_op_, x), _vec)
37 
38 typedef struct TempOptInfo {
39     bool is_const;
40     TCGTemp *prev_copy;
41     TCGTemp *next_copy;
42     uint64_t val;
43     uint64_t mask;
44 } TempOptInfo;
45 
46 static inline TempOptInfo *ts_info(TCGTemp *ts)
47 {
48     return ts->state_ptr;
49 }
50 
51 static inline TempOptInfo *arg_info(TCGArg arg)
52 {
53     return ts_info(arg_temp(arg));
54 }
55 
56 static inline bool ts_is_const(TCGTemp *ts)
57 {
58     return ts_info(ts)->is_const;
59 }
60 
61 static inline bool arg_is_const(TCGArg arg)
62 {
63     return ts_is_const(arg_temp(arg));
64 }
65 
66 static inline bool ts_is_copy(TCGTemp *ts)
67 {
68     return ts_info(ts)->next_copy != ts;
69 }
70 
71 /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
72 static void reset_ts(TCGTemp *ts)
73 {
74     TempOptInfo *ti = ts_info(ts);
75     TempOptInfo *pi = ts_info(ti->prev_copy);
76     TempOptInfo *ni = ts_info(ti->next_copy);
77 
78     ni->prev_copy = ti->prev_copy;
79     pi->next_copy = ti->next_copy;
80     ti->next_copy = ts;
81     ti->prev_copy = ts;
82     ti->is_const = false;
83     ti->mask = -1;
84 }
85 
86 static void reset_temp(TCGArg arg)
87 {
88     reset_ts(arg_temp(arg));
89 }
90 
91 /* Initialize and activate a temporary.  */
92 static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
93 {
94     size_t idx = temp_idx(ts);
95     TempOptInfo *ti;
96 
97     if (test_bit(idx, temps_used->l)) {
98         return;
99     }
100     set_bit(idx, temps_used->l);
101 
102     ti = ts->state_ptr;
103     if (ti == NULL) {
104         ti = tcg_malloc(sizeof(TempOptInfo));
105         ts->state_ptr = ti;
106     }
107 
108     ti->next_copy = ts;
109     ti->prev_copy = ts;
110     if (ts->kind == TEMP_CONST) {
111         ti->is_const = true;
112         ti->val = ts->val;
113         ti->mask = ts->val;
114         if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
115             /* High bits of a 32-bit quantity are garbage.  */
116             ti->mask |= ~0xffffffffull;
117         }
118     } else {
119         ti->is_const = false;
120         ti->mask = -1;
121     }
122 }
123 
124 static void init_arg_info(TCGTempSet *temps_used, TCGArg arg)
125 {
126     init_ts_info(temps_used, arg_temp(arg));
127 }
128 
129 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
130 {
131     TCGTemp *i, *g, *l;
132 
133     /* If this is already readonly, we can't do better. */
134     if (temp_readonly(ts)) {
135         return ts;
136     }
137 
138     g = l = NULL;
139     for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
140         if (temp_readonly(i)) {
141             return i;
142         } else if (i->kind > ts->kind) {
143             if (i->kind == TEMP_GLOBAL) {
144                 g = i;
145             } else if (i->kind == TEMP_LOCAL) {
146                 l = i;
147             }
148         }
149     }
150 
151     /* If we didn't find a better representation, return the same temp. */
152     return g ? g : l ? l : ts;
153 }
154 
155 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
156 {
157     TCGTemp *i;
158 
159     if (ts1 == ts2) {
160         return true;
161     }
162 
163     if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
164         return false;
165     }
166 
167     for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
168         if (i == ts2) {
169             return true;
170         }
171     }
172 
173     return false;
174 }
175 
176 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
177 {
178     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
179 }
180 
181 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, uint64_t val)
182 {
183     const TCGOpDef *def;
184     TCGOpcode new_op;
185     uint64_t mask;
186     TempOptInfo *di = arg_info(dst);
187 
188     def = &tcg_op_defs[op->opc];
189     if (def->flags & TCG_OPF_VECTOR) {
190         new_op = INDEX_op_dupi_vec;
191     } else if (def->flags & TCG_OPF_64BIT) {
192         new_op = INDEX_op_movi_i64;
193     } else {
194         new_op = INDEX_op_movi_i32;
195     }
196     op->opc = new_op;
197     /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
198     op->args[0] = dst;
199     op->args[1] = val;
200 
201     reset_temp(dst);
202     di->is_const = true;
203     di->val = val;
204     mask = val;
205     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {
206         /* High bits of the destination are now garbage.  */
207         mask |= ~0xffffffffull;
208     }
209     di->mask = mask;
210 }
211 
212 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
213 {
214     TCGTemp *dst_ts = arg_temp(dst);
215     TCGTemp *src_ts = arg_temp(src);
216     const TCGOpDef *def;
217     TempOptInfo *di;
218     TempOptInfo *si;
219     uint64_t mask;
220     TCGOpcode new_op;
221 
222     if (ts_are_copies(dst_ts, src_ts)) {
223         tcg_op_remove(s, op);
224         return;
225     }
226 
227     reset_ts(dst_ts);
228     di = ts_info(dst_ts);
229     si = ts_info(src_ts);
230     def = &tcg_op_defs[op->opc];
231     if (def->flags & TCG_OPF_VECTOR) {
232         new_op = INDEX_op_mov_vec;
233     } else if (def->flags & TCG_OPF_64BIT) {
234         new_op = INDEX_op_mov_i64;
235     } else {
236         new_op = INDEX_op_mov_i32;
237     }
238     op->opc = new_op;
239     /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
240     op->args[0] = dst;
241     op->args[1] = src;
242 
243     mask = si->mask;
244     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
245         /* High bits of the destination are now garbage.  */
246         mask |= ~0xffffffffull;
247     }
248     di->mask = mask;
249 
250     if (src_ts->type == dst_ts->type) {
251         TempOptInfo *ni = ts_info(si->next_copy);
252 
253         di->next_copy = si->next_copy;
254         di->prev_copy = src_ts;
255         ni->prev_copy = dst_ts;
256         si->next_copy = dst_ts;
257         di->is_const = si->is_const;
258         di->val = si->val;
259     }
260 }
261 
262 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
263 {
264     uint64_t l64, h64;
265 
266     switch (op) {
267     CASE_OP_32_64(add):
268         return x + y;
269 
270     CASE_OP_32_64(sub):
271         return x - y;
272 
273     CASE_OP_32_64(mul):
274         return x * y;
275 
276     CASE_OP_32_64(and):
277         return x & y;
278 
279     CASE_OP_32_64(or):
280         return x | y;
281 
282     CASE_OP_32_64(xor):
283         return x ^ y;
284 
285     case INDEX_op_shl_i32:
286         return (uint32_t)x << (y & 31);
287 
288     case INDEX_op_shl_i64:
289         return (uint64_t)x << (y & 63);
290 
291     case INDEX_op_shr_i32:
292         return (uint32_t)x >> (y & 31);
293 
294     case INDEX_op_shr_i64:
295         return (uint64_t)x >> (y & 63);
296 
297     case INDEX_op_sar_i32:
298         return (int32_t)x >> (y & 31);
299 
300     case INDEX_op_sar_i64:
301         return (int64_t)x >> (y & 63);
302 
303     case INDEX_op_rotr_i32:
304         return ror32(x, y & 31);
305 
306     case INDEX_op_rotr_i64:
307         return ror64(x, y & 63);
308 
309     case INDEX_op_rotl_i32:
310         return rol32(x, y & 31);
311 
312     case INDEX_op_rotl_i64:
313         return rol64(x, y & 63);
314 
315     CASE_OP_32_64(not):
316         return ~x;
317 
318     CASE_OP_32_64(neg):
319         return -x;
320 
321     CASE_OP_32_64(andc):
322         return x & ~y;
323 
324     CASE_OP_32_64(orc):
325         return x | ~y;
326 
327     CASE_OP_32_64(eqv):
328         return ~(x ^ y);
329 
330     CASE_OP_32_64(nand):
331         return ~(x & y);
332 
333     CASE_OP_32_64(nor):
334         return ~(x | y);
335 
336     case INDEX_op_clz_i32:
337         return (uint32_t)x ? clz32(x) : y;
338 
339     case INDEX_op_clz_i64:
340         return x ? clz64(x) : y;
341 
342     case INDEX_op_ctz_i32:
343         return (uint32_t)x ? ctz32(x) : y;
344 
345     case INDEX_op_ctz_i64:
346         return x ? ctz64(x) : y;
347 
348     case INDEX_op_ctpop_i32:
349         return ctpop32(x);
350 
351     case INDEX_op_ctpop_i64:
352         return ctpop64(x);
353 
354     CASE_OP_32_64(ext8s):
355         return (int8_t)x;
356 
357     CASE_OP_32_64(ext16s):
358         return (int16_t)x;
359 
360     CASE_OP_32_64(ext8u):
361         return (uint8_t)x;
362 
363     CASE_OP_32_64(ext16u):
364         return (uint16_t)x;
365 
366     CASE_OP_32_64(bswap16):
367         return bswap16(x);
368 
369     CASE_OP_32_64(bswap32):
370         return bswap32(x);
371 
372     case INDEX_op_bswap64_i64:
373         return bswap64(x);
374 
375     case INDEX_op_ext_i32_i64:
376     case INDEX_op_ext32s_i64:
377         return (int32_t)x;
378 
379     case INDEX_op_extu_i32_i64:
380     case INDEX_op_extrl_i64_i32:
381     case INDEX_op_ext32u_i64:
382         return (uint32_t)x;
383 
384     case INDEX_op_extrh_i64_i32:
385         return (uint64_t)x >> 32;
386 
387     case INDEX_op_muluh_i32:
388         return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
389     case INDEX_op_mulsh_i32:
390         return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
391 
392     case INDEX_op_muluh_i64:
393         mulu64(&l64, &h64, x, y);
394         return h64;
395     case INDEX_op_mulsh_i64:
396         muls64(&l64, &h64, x, y);
397         return h64;
398 
399     case INDEX_op_div_i32:
400         /* Avoid crashing on divide by zero, otherwise undefined.  */
401         return (int32_t)x / ((int32_t)y ? : 1);
402     case INDEX_op_divu_i32:
403         return (uint32_t)x / ((uint32_t)y ? : 1);
404     case INDEX_op_div_i64:
405         return (int64_t)x / ((int64_t)y ? : 1);
406     case INDEX_op_divu_i64:
407         return (uint64_t)x / ((uint64_t)y ? : 1);
408 
409     case INDEX_op_rem_i32:
410         return (int32_t)x % ((int32_t)y ? : 1);
411     case INDEX_op_remu_i32:
412         return (uint32_t)x % ((uint32_t)y ? : 1);
413     case INDEX_op_rem_i64:
414         return (int64_t)x % ((int64_t)y ? : 1);
415     case INDEX_op_remu_i64:
416         return (uint64_t)x % ((uint64_t)y ? : 1);
417 
418     default:
419         fprintf(stderr,
420                 "Unrecognized operation %d in do_constant_folding.\n", op);
421         tcg_abort();
422     }
423 }
424 
425 static uint64_t do_constant_folding(TCGOpcode op, uint64_t x, uint64_t y)
426 {
427     const TCGOpDef *def = &tcg_op_defs[op];
428     uint64_t res = do_constant_folding_2(op, x, y);
429     if (!(def->flags & TCG_OPF_64BIT)) {
430         res = (int32_t)res;
431     }
432     return res;
433 }
434 
435 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
436 {
437     switch (c) {
438     case TCG_COND_EQ:
439         return x == y;
440     case TCG_COND_NE:
441         return x != y;
442     case TCG_COND_LT:
443         return (int32_t)x < (int32_t)y;
444     case TCG_COND_GE:
445         return (int32_t)x >= (int32_t)y;
446     case TCG_COND_LE:
447         return (int32_t)x <= (int32_t)y;
448     case TCG_COND_GT:
449         return (int32_t)x > (int32_t)y;
450     case TCG_COND_LTU:
451         return x < y;
452     case TCG_COND_GEU:
453         return x >= y;
454     case TCG_COND_LEU:
455         return x <= y;
456     case TCG_COND_GTU:
457         return x > y;
458     default:
459         tcg_abort();
460     }
461 }
462 
463 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
464 {
465     switch (c) {
466     case TCG_COND_EQ:
467         return x == y;
468     case TCG_COND_NE:
469         return x != y;
470     case TCG_COND_LT:
471         return (int64_t)x < (int64_t)y;
472     case TCG_COND_GE:
473         return (int64_t)x >= (int64_t)y;
474     case TCG_COND_LE:
475         return (int64_t)x <= (int64_t)y;
476     case TCG_COND_GT:
477         return (int64_t)x > (int64_t)y;
478     case TCG_COND_LTU:
479         return x < y;
480     case TCG_COND_GEU:
481         return x >= y;
482     case TCG_COND_LEU:
483         return x <= y;
484     case TCG_COND_GTU:
485         return x > y;
486     default:
487         tcg_abort();
488     }
489 }
490 
491 static bool do_constant_folding_cond_eq(TCGCond c)
492 {
493     switch (c) {
494     case TCG_COND_GT:
495     case TCG_COND_LTU:
496     case TCG_COND_LT:
497     case TCG_COND_GTU:
498     case TCG_COND_NE:
499         return 0;
500     case TCG_COND_GE:
501     case TCG_COND_GEU:
502     case TCG_COND_LE:
503     case TCG_COND_LEU:
504     case TCG_COND_EQ:
505         return 1;
506     default:
507         tcg_abort();
508     }
509 }
510 
511 /* Return 2 if the condition can't be simplified, and the result
512    of the condition (0 or 1) if it can */
513 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
514                                        TCGArg y, TCGCond c)
515 {
516     uint64_t xv = arg_info(x)->val;
517     uint64_t yv = arg_info(y)->val;
518 
519     if (arg_is_const(x) && arg_is_const(y)) {
520         const TCGOpDef *def = &tcg_op_defs[op];
521         tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
522         if (def->flags & TCG_OPF_64BIT) {
523             return do_constant_folding_cond_64(xv, yv, c);
524         } else {
525             return do_constant_folding_cond_32(xv, yv, c);
526         }
527     } else if (args_are_copies(x, y)) {
528         return do_constant_folding_cond_eq(c);
529     } else if (arg_is_const(y) && yv == 0) {
530         switch (c) {
531         case TCG_COND_LTU:
532             return 0;
533         case TCG_COND_GEU:
534             return 1;
535         default:
536             return 2;
537         }
538     }
539     return 2;
540 }
541 
542 /* Return 2 if the condition can't be simplified, and the result
543    of the condition (0 or 1) if it can */
544 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
545 {
546     TCGArg al = p1[0], ah = p1[1];
547     TCGArg bl = p2[0], bh = p2[1];
548 
549     if (arg_is_const(bl) && arg_is_const(bh)) {
550         tcg_target_ulong blv = arg_info(bl)->val;
551         tcg_target_ulong bhv = arg_info(bh)->val;
552         uint64_t b = deposit64(blv, 32, 32, bhv);
553 
554         if (arg_is_const(al) && arg_is_const(ah)) {
555             tcg_target_ulong alv = arg_info(al)->val;
556             tcg_target_ulong ahv = arg_info(ah)->val;
557             uint64_t a = deposit64(alv, 32, 32, ahv);
558             return do_constant_folding_cond_64(a, b, c);
559         }
560         if (b == 0) {
561             switch (c) {
562             case TCG_COND_LTU:
563                 return 0;
564             case TCG_COND_GEU:
565                 return 1;
566             default:
567                 break;
568             }
569         }
570     }
571     if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
572         return do_constant_folding_cond_eq(c);
573     }
574     return 2;
575 }
576 
577 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
578 {
579     TCGArg a1 = *p1, a2 = *p2;
580     int sum = 0;
581     sum += arg_is_const(a1);
582     sum -= arg_is_const(a2);
583 
584     /* Prefer the constant in second argument, and then the form
585        op a, a, b, which is better handled on non-RISC hosts. */
586     if (sum > 0 || (sum == 0 && dest == a2)) {
587         *p1 = a2;
588         *p2 = a1;
589         return true;
590     }
591     return false;
592 }
593 
594 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
595 {
596     int sum = 0;
597     sum += arg_is_const(p1[0]);
598     sum += arg_is_const(p1[1]);
599     sum -= arg_is_const(p2[0]);
600     sum -= arg_is_const(p2[1]);
601     if (sum > 0) {
602         TCGArg t;
603         t = p1[0], p1[0] = p2[0], p2[0] = t;
604         t = p1[1], p1[1] = p2[1], p2[1] = t;
605         return true;
606     }
607     return false;
608 }
609 
610 /* Propagate constants and copies, fold constant expressions. */
611 void tcg_optimize(TCGContext *s)
612 {
613     int nb_temps, nb_globals, i;
614     TCGOp *op, *op_next, *prev_mb = NULL;
615     TCGTempSet temps_used;
616 
617     /* Array VALS has an element for each temp.
618        If this temp holds a constant then its value is kept in VALS' element.
619        If this temp is a copy of other ones then the other copies are
620        available through the doubly linked circular list. */
621 
622     nb_temps = s->nb_temps;
623     nb_globals = s->nb_globals;
624 
625     bitmap_zero(temps_used.l, nb_temps);
626     for (i = 0; i < nb_temps; ++i) {
627         s->temps[i].state_ptr = NULL;
628     }
629 
630     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
631         uint64_t mask, partmask, affected, tmp;
632         int nb_oargs, nb_iargs;
633         TCGOpcode opc = op->opc;
634         const TCGOpDef *def = &tcg_op_defs[opc];
635 
636         /* Count the arguments, and initialize the temps that are
637            going to be used */
638         if (opc == INDEX_op_call) {
639             nb_oargs = TCGOP_CALLO(op);
640             nb_iargs = TCGOP_CALLI(op);
641             for (i = 0; i < nb_oargs + nb_iargs; i++) {
642                 TCGTemp *ts = arg_temp(op->args[i]);
643                 if (ts) {
644                     init_ts_info(&temps_used, ts);
645                 }
646             }
647         } else {
648             nb_oargs = def->nb_oargs;
649             nb_iargs = def->nb_iargs;
650             for (i = 0; i < nb_oargs + nb_iargs; i++) {
651                 init_arg_info(&temps_used, op->args[i]);
652             }
653         }
654 
655         /* Do copy propagation */
656         for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
657             TCGTemp *ts = arg_temp(op->args[i]);
658             if (ts && ts_is_copy(ts)) {
659                 op->args[i] = temp_arg(find_better_copy(s, ts));
660             }
661         }
662 
663         /* For commutative operations make constant second argument */
664         switch (opc) {
665         CASE_OP_32_64_VEC(add):
666         CASE_OP_32_64_VEC(mul):
667         CASE_OP_32_64_VEC(and):
668         CASE_OP_32_64_VEC(or):
669         CASE_OP_32_64_VEC(xor):
670         CASE_OP_32_64(eqv):
671         CASE_OP_32_64(nand):
672         CASE_OP_32_64(nor):
673         CASE_OP_32_64(muluh):
674         CASE_OP_32_64(mulsh):
675             swap_commutative(op->args[0], &op->args[1], &op->args[2]);
676             break;
677         CASE_OP_32_64(brcond):
678             if (swap_commutative(-1, &op->args[0], &op->args[1])) {
679                 op->args[2] = tcg_swap_cond(op->args[2]);
680             }
681             break;
682         CASE_OP_32_64(setcond):
683             if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
684                 op->args[3] = tcg_swap_cond(op->args[3]);
685             }
686             break;
687         CASE_OP_32_64(movcond):
688             if (swap_commutative(-1, &op->args[1], &op->args[2])) {
689                 op->args[5] = tcg_swap_cond(op->args[5]);
690             }
691             /* For movcond, we canonicalize the "false" input reg to match
692                the destination reg so that the tcg backend can implement
693                a "move if true" operation.  */
694             if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
695                 op->args[5] = tcg_invert_cond(op->args[5]);
696             }
697             break;
698         CASE_OP_32_64(add2):
699             swap_commutative(op->args[0], &op->args[2], &op->args[4]);
700             swap_commutative(op->args[1], &op->args[3], &op->args[5]);
701             break;
702         CASE_OP_32_64(mulu2):
703         CASE_OP_32_64(muls2):
704             swap_commutative(op->args[0], &op->args[2], &op->args[3]);
705             break;
706         case INDEX_op_brcond2_i32:
707             if (swap_commutative2(&op->args[0], &op->args[2])) {
708                 op->args[4] = tcg_swap_cond(op->args[4]);
709             }
710             break;
711         case INDEX_op_setcond2_i32:
712             if (swap_commutative2(&op->args[1], &op->args[3])) {
713                 op->args[5] = tcg_swap_cond(op->args[5]);
714             }
715             break;
716         default:
717             break;
718         }
719 
720         /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
721            and "sub r, 0, a => neg r, a" case.  */
722         switch (opc) {
723         CASE_OP_32_64(shl):
724         CASE_OP_32_64(shr):
725         CASE_OP_32_64(sar):
726         CASE_OP_32_64(rotl):
727         CASE_OP_32_64(rotr):
728             if (arg_is_const(op->args[1])
729                 && arg_info(op->args[1])->val == 0) {
730                 tcg_opt_gen_movi(s, op, op->args[0], 0);
731                 continue;
732             }
733             break;
734         CASE_OP_32_64_VEC(sub):
735             {
736                 TCGOpcode neg_op;
737                 bool have_neg;
738 
739                 if (arg_is_const(op->args[2])) {
740                     /* Proceed with possible constant folding. */
741                     break;
742                 }
743                 if (opc == INDEX_op_sub_i32) {
744                     neg_op = INDEX_op_neg_i32;
745                     have_neg = TCG_TARGET_HAS_neg_i32;
746                 } else if (opc == INDEX_op_sub_i64) {
747                     neg_op = INDEX_op_neg_i64;
748                     have_neg = TCG_TARGET_HAS_neg_i64;
749                 } else if (TCG_TARGET_HAS_neg_vec) {
750                     TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64;
751                     unsigned vece = TCGOP_VECE(op);
752                     neg_op = INDEX_op_neg_vec;
753                     have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0;
754                 } else {
755                     break;
756                 }
757                 if (!have_neg) {
758                     break;
759                 }
760                 if (arg_is_const(op->args[1])
761                     && arg_info(op->args[1])->val == 0) {
762                     op->opc = neg_op;
763                     reset_temp(op->args[0]);
764                     op->args[1] = op->args[2];
765                     continue;
766                 }
767             }
768             break;
769         CASE_OP_32_64_VEC(xor):
770         CASE_OP_32_64(nand):
771             if (!arg_is_const(op->args[1])
772                 && arg_is_const(op->args[2])
773                 && arg_info(op->args[2])->val == -1) {
774                 i = 1;
775                 goto try_not;
776             }
777             break;
778         CASE_OP_32_64(nor):
779             if (!arg_is_const(op->args[1])
780                 && arg_is_const(op->args[2])
781                 && arg_info(op->args[2])->val == 0) {
782                 i = 1;
783                 goto try_not;
784             }
785             break;
786         CASE_OP_32_64_VEC(andc):
787             if (!arg_is_const(op->args[2])
788                 && arg_is_const(op->args[1])
789                 && arg_info(op->args[1])->val == -1) {
790                 i = 2;
791                 goto try_not;
792             }
793             break;
794         CASE_OP_32_64_VEC(orc):
795         CASE_OP_32_64(eqv):
796             if (!arg_is_const(op->args[2])
797                 && arg_is_const(op->args[1])
798                 && arg_info(op->args[1])->val == 0) {
799                 i = 2;
800                 goto try_not;
801             }
802             break;
803         try_not:
804             {
805                 TCGOpcode not_op;
806                 bool have_not;
807 
808                 if (def->flags & TCG_OPF_VECTOR) {
809                     not_op = INDEX_op_not_vec;
810                     have_not = TCG_TARGET_HAS_not_vec;
811                 } else if (def->flags & TCG_OPF_64BIT) {
812                     not_op = INDEX_op_not_i64;
813                     have_not = TCG_TARGET_HAS_not_i64;
814                 } else {
815                     not_op = INDEX_op_not_i32;
816                     have_not = TCG_TARGET_HAS_not_i32;
817                 }
818                 if (!have_not) {
819                     break;
820                 }
821                 op->opc = not_op;
822                 reset_temp(op->args[0]);
823                 op->args[1] = op->args[i];
824                 continue;
825             }
826         default:
827             break;
828         }
829 
830         /* Simplify expression for "op r, a, const => mov r, a" cases */
831         switch (opc) {
832         CASE_OP_32_64_VEC(add):
833         CASE_OP_32_64_VEC(sub):
834         CASE_OP_32_64_VEC(or):
835         CASE_OP_32_64_VEC(xor):
836         CASE_OP_32_64_VEC(andc):
837         CASE_OP_32_64(shl):
838         CASE_OP_32_64(shr):
839         CASE_OP_32_64(sar):
840         CASE_OP_32_64(rotl):
841         CASE_OP_32_64(rotr):
842             if (!arg_is_const(op->args[1])
843                 && arg_is_const(op->args[2])
844                 && arg_info(op->args[2])->val == 0) {
845                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
846                 continue;
847             }
848             break;
849         CASE_OP_32_64_VEC(and):
850         CASE_OP_32_64_VEC(orc):
851         CASE_OP_32_64(eqv):
852             if (!arg_is_const(op->args[1])
853                 && arg_is_const(op->args[2])
854                 && arg_info(op->args[2])->val == -1) {
855                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
856                 continue;
857             }
858             break;
859         default:
860             break;
861         }
862 
863         /* Simplify using known-zero bits. Currently only ops with a single
864            output argument is supported. */
865         mask = -1;
866         affected = -1;
867         switch (opc) {
868         CASE_OP_32_64(ext8s):
869             if ((arg_info(op->args[1])->mask & 0x80) != 0) {
870                 break;
871             }
872             QEMU_FALLTHROUGH;
873         CASE_OP_32_64(ext8u):
874             mask = 0xff;
875             goto and_const;
876         CASE_OP_32_64(ext16s):
877             if ((arg_info(op->args[1])->mask & 0x8000) != 0) {
878                 break;
879             }
880             QEMU_FALLTHROUGH;
881         CASE_OP_32_64(ext16u):
882             mask = 0xffff;
883             goto and_const;
884         case INDEX_op_ext32s_i64:
885             if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
886                 break;
887             }
888             QEMU_FALLTHROUGH;
889         case INDEX_op_ext32u_i64:
890             mask = 0xffffffffU;
891             goto and_const;
892 
893         CASE_OP_32_64(and):
894             mask = arg_info(op->args[2])->mask;
895             if (arg_is_const(op->args[2])) {
896         and_const:
897                 affected = arg_info(op->args[1])->mask & ~mask;
898             }
899             mask = arg_info(op->args[1])->mask & mask;
900             break;
901 
902         case INDEX_op_ext_i32_i64:
903             if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
904                 break;
905             }
906             QEMU_FALLTHROUGH;
907         case INDEX_op_extu_i32_i64:
908             /* We do not compute affected as it is a size changing op.  */
909             mask = (uint32_t)arg_info(op->args[1])->mask;
910             break;
911 
912         CASE_OP_32_64(andc):
913             /* Known-zeros does not imply known-ones.  Therefore unless
914                op->args[2] is constant, we can't infer anything from it.  */
915             if (arg_is_const(op->args[2])) {
916                 mask = ~arg_info(op->args[2])->mask;
917                 goto and_const;
918             }
919             /* But we certainly know nothing outside args[1] may be set. */
920             mask = arg_info(op->args[1])->mask;
921             break;
922 
923         case INDEX_op_sar_i32:
924             if (arg_is_const(op->args[2])) {
925                 tmp = arg_info(op->args[2])->val & 31;
926                 mask = (int32_t)arg_info(op->args[1])->mask >> tmp;
927             }
928             break;
929         case INDEX_op_sar_i64:
930             if (arg_is_const(op->args[2])) {
931                 tmp = arg_info(op->args[2])->val & 63;
932                 mask = (int64_t)arg_info(op->args[1])->mask >> tmp;
933             }
934             break;
935 
936         case INDEX_op_shr_i32:
937             if (arg_is_const(op->args[2])) {
938                 tmp = arg_info(op->args[2])->val & 31;
939                 mask = (uint32_t)arg_info(op->args[1])->mask >> tmp;
940             }
941             break;
942         case INDEX_op_shr_i64:
943             if (arg_is_const(op->args[2])) {
944                 tmp = arg_info(op->args[2])->val & 63;
945                 mask = (uint64_t)arg_info(op->args[1])->mask >> tmp;
946             }
947             break;
948 
949         case INDEX_op_extrl_i64_i32:
950             mask = (uint32_t)arg_info(op->args[1])->mask;
951             break;
952         case INDEX_op_extrh_i64_i32:
953             mask = (uint64_t)arg_info(op->args[1])->mask >> 32;
954             break;
955 
956         CASE_OP_32_64(shl):
957             if (arg_is_const(op->args[2])) {
958                 tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
959                 mask = arg_info(op->args[1])->mask << tmp;
960             }
961             break;
962 
963         CASE_OP_32_64(neg):
964             /* Set to 1 all bits to the left of the rightmost.  */
965             mask = -(arg_info(op->args[1])->mask
966                      & -arg_info(op->args[1])->mask);
967             break;
968 
969         CASE_OP_32_64(deposit):
970             mask = deposit64(arg_info(op->args[1])->mask,
971                              op->args[3], op->args[4],
972                              arg_info(op->args[2])->mask);
973             break;
974 
975         CASE_OP_32_64(extract):
976             mask = extract64(arg_info(op->args[1])->mask,
977                              op->args[2], op->args[3]);
978             if (op->args[2] == 0) {
979                 affected = arg_info(op->args[1])->mask & ~mask;
980             }
981             break;
982         CASE_OP_32_64(sextract):
983             mask = sextract64(arg_info(op->args[1])->mask,
984                               op->args[2], op->args[3]);
985             if (op->args[2] == 0 && (tcg_target_long)mask >= 0) {
986                 affected = arg_info(op->args[1])->mask & ~mask;
987             }
988             break;
989 
990         CASE_OP_32_64(or):
991         CASE_OP_32_64(xor):
992             mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask;
993             break;
994 
995         case INDEX_op_clz_i32:
996         case INDEX_op_ctz_i32:
997             mask = arg_info(op->args[2])->mask | 31;
998             break;
999 
1000         case INDEX_op_clz_i64:
1001         case INDEX_op_ctz_i64:
1002             mask = arg_info(op->args[2])->mask | 63;
1003             break;
1004 
1005         case INDEX_op_ctpop_i32:
1006             mask = 32 | 31;
1007             break;
1008         case INDEX_op_ctpop_i64:
1009             mask = 64 | 63;
1010             break;
1011 
1012         CASE_OP_32_64(setcond):
1013         case INDEX_op_setcond2_i32:
1014             mask = 1;
1015             break;
1016 
1017         CASE_OP_32_64(movcond):
1018             mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask;
1019             break;
1020 
1021         CASE_OP_32_64(ld8u):
1022             mask = 0xff;
1023             break;
1024         CASE_OP_32_64(ld16u):
1025             mask = 0xffff;
1026             break;
1027         case INDEX_op_ld32u_i64:
1028             mask = 0xffffffffu;
1029             break;
1030 
1031         CASE_OP_32_64(qemu_ld):
1032             {
1033                 TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs];
1034                 MemOp mop = get_memop(oi);
1035                 if (!(mop & MO_SIGN)) {
1036                     mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
1037                 }
1038             }
1039             break;
1040 
1041         default:
1042             break;
1043         }
1044 
1045         /* 32-bit ops generate 32-bit results.  For the result is zero test
1046            below, we can ignore high bits, but for further optimizations we
1047            need to record that the high bits contain garbage.  */
1048         partmask = mask;
1049         if (!(def->flags & TCG_OPF_64BIT)) {
1050             mask |= ~(tcg_target_ulong)0xffffffffu;
1051             partmask &= 0xffffffffu;
1052             affected &= 0xffffffffu;
1053         }
1054 
1055         if (partmask == 0) {
1056             tcg_debug_assert(nb_oargs == 1);
1057             tcg_opt_gen_movi(s, op, op->args[0], 0);
1058             continue;
1059         }
1060         if (affected == 0) {
1061             tcg_debug_assert(nb_oargs == 1);
1062             tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1063             continue;
1064         }
1065 
1066         /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
1067         switch (opc) {
1068         CASE_OP_32_64_VEC(and):
1069         CASE_OP_32_64_VEC(mul):
1070         CASE_OP_32_64(muluh):
1071         CASE_OP_32_64(mulsh):
1072             if (arg_is_const(op->args[2])
1073                 && arg_info(op->args[2])->val == 0) {
1074                 tcg_opt_gen_movi(s, op, op->args[0], 0);
1075                 continue;
1076             }
1077             break;
1078         default:
1079             break;
1080         }
1081 
1082         /* Simplify expression for "op r, a, a => mov r, a" cases */
1083         switch (opc) {
1084         CASE_OP_32_64_VEC(or):
1085         CASE_OP_32_64_VEC(and):
1086             if (args_are_copies(op->args[1], op->args[2])) {
1087                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1088                 continue;
1089             }
1090             break;
1091         default:
1092             break;
1093         }
1094 
1095         /* Simplify expression for "op r, a, a => movi r, 0" cases */
1096         switch (opc) {
1097         CASE_OP_32_64_VEC(andc):
1098         CASE_OP_32_64_VEC(sub):
1099         CASE_OP_32_64_VEC(xor):
1100             if (args_are_copies(op->args[1], op->args[2])) {
1101                 tcg_opt_gen_movi(s, op, op->args[0], 0);
1102                 continue;
1103             }
1104             break;
1105         default:
1106             break;
1107         }
1108 
1109         /* Propagate constants through copy operations and do constant
1110            folding.  Constants will be substituted to arguments by register
1111            allocator where needed and possible.  Also detect copies. */
1112         switch (opc) {
1113         CASE_OP_32_64_VEC(mov):
1114             tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1115             break;
1116         CASE_OP_32_64(movi):
1117         case INDEX_op_dupi_vec:
1118             tcg_opt_gen_movi(s, op, op->args[0], op->args[1]);
1119             break;
1120 
1121         case INDEX_op_dup_vec:
1122             if (arg_is_const(op->args[1])) {
1123                 tmp = arg_info(op->args[1])->val;
1124                 tmp = dup_const(TCGOP_VECE(op), tmp);
1125                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1126                 break;
1127             }
1128             goto do_default;
1129 
1130         case INDEX_op_dup2_vec:
1131             assert(TCG_TARGET_REG_BITS == 32);
1132             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1133                 tmp = arg_info(op->args[1])->val;
1134                 if (tmp == arg_info(op->args[2])->val) {
1135                     tcg_opt_gen_movi(s, op, op->args[0], tmp);
1136                     break;
1137                 }
1138             } else if (args_are_copies(op->args[1], op->args[2])) {
1139                 op->opc = INDEX_op_dup_vec;
1140                 TCGOP_VECE(op) = MO_32;
1141                 nb_iargs = 1;
1142             }
1143             goto do_default;
1144 
1145         CASE_OP_32_64(not):
1146         CASE_OP_32_64(neg):
1147         CASE_OP_32_64(ext8s):
1148         CASE_OP_32_64(ext8u):
1149         CASE_OP_32_64(ext16s):
1150         CASE_OP_32_64(ext16u):
1151         CASE_OP_32_64(ctpop):
1152         CASE_OP_32_64(bswap16):
1153         CASE_OP_32_64(bswap32):
1154         case INDEX_op_bswap64_i64:
1155         case INDEX_op_ext32s_i64:
1156         case INDEX_op_ext32u_i64:
1157         case INDEX_op_ext_i32_i64:
1158         case INDEX_op_extu_i32_i64:
1159         case INDEX_op_extrl_i64_i32:
1160         case INDEX_op_extrh_i64_i32:
1161             if (arg_is_const(op->args[1])) {
1162                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
1163                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1164                 break;
1165             }
1166             goto do_default;
1167 
1168         CASE_OP_32_64(add):
1169         CASE_OP_32_64(sub):
1170         CASE_OP_32_64(mul):
1171         CASE_OP_32_64(or):
1172         CASE_OP_32_64(and):
1173         CASE_OP_32_64(xor):
1174         CASE_OP_32_64(shl):
1175         CASE_OP_32_64(shr):
1176         CASE_OP_32_64(sar):
1177         CASE_OP_32_64(rotl):
1178         CASE_OP_32_64(rotr):
1179         CASE_OP_32_64(andc):
1180         CASE_OP_32_64(orc):
1181         CASE_OP_32_64(eqv):
1182         CASE_OP_32_64(nand):
1183         CASE_OP_32_64(nor):
1184         CASE_OP_32_64(muluh):
1185         CASE_OP_32_64(mulsh):
1186         CASE_OP_32_64(div):
1187         CASE_OP_32_64(divu):
1188         CASE_OP_32_64(rem):
1189         CASE_OP_32_64(remu):
1190             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1191                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
1192                                           arg_info(op->args[2])->val);
1193                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1194                 break;
1195             }
1196             goto do_default;
1197 
1198         CASE_OP_32_64(clz):
1199         CASE_OP_32_64(ctz):
1200             if (arg_is_const(op->args[1])) {
1201                 TCGArg v = arg_info(op->args[1])->val;
1202                 if (v != 0) {
1203                     tmp = do_constant_folding(opc, v, 0);
1204                     tcg_opt_gen_movi(s, op, op->args[0], tmp);
1205                 } else {
1206                     tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
1207                 }
1208                 break;
1209             }
1210             goto do_default;
1211 
1212         CASE_OP_32_64(deposit):
1213             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1214                 tmp = deposit64(arg_info(op->args[1])->val,
1215                                 op->args[3], op->args[4],
1216                                 arg_info(op->args[2])->val);
1217                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1218                 break;
1219             }
1220             goto do_default;
1221 
1222         CASE_OP_32_64(extract):
1223             if (arg_is_const(op->args[1])) {
1224                 tmp = extract64(arg_info(op->args[1])->val,
1225                                 op->args[2], op->args[3]);
1226                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1227                 break;
1228             }
1229             goto do_default;
1230 
1231         CASE_OP_32_64(sextract):
1232             if (arg_is_const(op->args[1])) {
1233                 tmp = sextract64(arg_info(op->args[1])->val,
1234                                  op->args[2], op->args[3]);
1235                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1236                 break;
1237             }
1238             goto do_default;
1239 
1240         CASE_OP_32_64(extract2):
1241             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1242                 uint64_t v1 = arg_info(op->args[1])->val;
1243                 uint64_t v2 = arg_info(op->args[2])->val;
1244                 int shr = op->args[3];
1245 
1246                 if (opc == INDEX_op_extract2_i64) {
1247                     tmp = (v1 >> shr) | (v2 << (64 - shr));
1248                 } else {
1249                     tmp = (int32_t)(((uint32_t)v1 >> shr) |
1250                                     ((uint32_t)v2 << (32 - shr)));
1251                 }
1252                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1253                 break;
1254             }
1255             goto do_default;
1256 
1257         CASE_OP_32_64(setcond):
1258             tmp = do_constant_folding_cond(opc, op->args[1],
1259                                            op->args[2], op->args[3]);
1260             if (tmp != 2) {
1261                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1262                 break;
1263             }
1264             goto do_default;
1265 
1266         CASE_OP_32_64(brcond):
1267             tmp = do_constant_folding_cond(opc, op->args[0],
1268                                            op->args[1], op->args[2]);
1269             if (tmp != 2) {
1270                 if (tmp) {
1271                     bitmap_zero(temps_used.l, nb_temps);
1272                     op->opc = INDEX_op_br;
1273                     op->args[0] = op->args[3];
1274                 } else {
1275                     tcg_op_remove(s, op);
1276                 }
1277                 break;
1278             }
1279             goto do_default;
1280 
1281         CASE_OP_32_64(movcond):
1282             tmp = do_constant_folding_cond(opc, op->args[1],
1283                                            op->args[2], op->args[5]);
1284             if (tmp != 2) {
1285                 tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
1286                 break;
1287             }
1288             if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
1289                 uint64_t tv = arg_info(op->args[3])->val;
1290                 uint64_t fv = arg_info(op->args[4])->val;
1291                 TCGCond cond = op->args[5];
1292 
1293                 if (fv == 1 && tv == 0) {
1294                     cond = tcg_invert_cond(cond);
1295                 } else if (!(tv == 1 && fv == 0)) {
1296                     goto do_default;
1297                 }
1298                 op->args[3] = cond;
1299                 op->opc = opc = (opc == INDEX_op_movcond_i32
1300                                  ? INDEX_op_setcond_i32
1301                                  : INDEX_op_setcond_i64);
1302                 nb_iargs = 2;
1303             }
1304             goto do_default;
1305 
1306         case INDEX_op_add2_i32:
1307         case INDEX_op_sub2_i32:
1308             if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])
1309                 && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
1310                 uint32_t al = arg_info(op->args[2])->val;
1311                 uint32_t ah = arg_info(op->args[3])->val;
1312                 uint32_t bl = arg_info(op->args[4])->val;
1313                 uint32_t bh = arg_info(op->args[5])->val;
1314                 uint64_t a = ((uint64_t)ah << 32) | al;
1315                 uint64_t b = ((uint64_t)bh << 32) | bl;
1316                 TCGArg rl, rh;
1317                 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
1318 
1319                 if (opc == INDEX_op_add2_i32) {
1320                     a += b;
1321                 } else {
1322                     a -= b;
1323                 }
1324 
1325                 rl = op->args[0];
1326                 rh = op->args[1];
1327                 tcg_opt_gen_movi(s, op, rl, (int32_t)a);
1328                 tcg_opt_gen_movi(s, op2, rh, (int32_t)(a >> 32));
1329                 break;
1330             }
1331             goto do_default;
1332 
1333         case INDEX_op_mulu2_i32:
1334             if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1335                 uint32_t a = arg_info(op->args[2])->val;
1336                 uint32_t b = arg_info(op->args[3])->val;
1337                 uint64_t r = (uint64_t)a * b;
1338                 TCGArg rl, rh;
1339                 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
1340 
1341                 rl = op->args[0];
1342                 rh = op->args[1];
1343                 tcg_opt_gen_movi(s, op, rl, (int32_t)r);
1344                 tcg_opt_gen_movi(s, op2, rh, (int32_t)(r >> 32));
1345                 break;
1346             }
1347             goto do_default;
1348 
1349         case INDEX_op_brcond2_i32:
1350             tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
1351                                             op->args[4]);
1352             if (tmp != 2) {
1353                 if (tmp) {
1354             do_brcond_true:
1355                     bitmap_zero(temps_used.l, nb_temps);
1356                     op->opc = INDEX_op_br;
1357                     op->args[0] = op->args[5];
1358                 } else {
1359             do_brcond_false:
1360                     tcg_op_remove(s, op);
1361                 }
1362             } else if ((op->args[4] == TCG_COND_LT
1363                         || op->args[4] == TCG_COND_GE)
1364                        && arg_is_const(op->args[2])
1365                        && arg_info(op->args[2])->val == 0
1366                        && arg_is_const(op->args[3])
1367                        && arg_info(op->args[3])->val == 0) {
1368                 /* Simplify LT/GE comparisons vs zero to a single compare
1369                    vs the high word of the input.  */
1370             do_brcond_high:
1371                 bitmap_zero(temps_used.l, nb_temps);
1372                 op->opc = INDEX_op_brcond_i32;
1373                 op->args[0] = op->args[1];
1374                 op->args[1] = op->args[3];
1375                 op->args[2] = op->args[4];
1376                 op->args[3] = op->args[5];
1377             } else if (op->args[4] == TCG_COND_EQ) {
1378                 /* Simplify EQ comparisons where one of the pairs
1379                    can be simplified.  */
1380                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1381                                                op->args[0], op->args[2],
1382                                                TCG_COND_EQ);
1383                 if (tmp == 0) {
1384                     goto do_brcond_false;
1385                 } else if (tmp == 1) {
1386                     goto do_brcond_high;
1387                 }
1388                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1389                                                op->args[1], op->args[3],
1390                                                TCG_COND_EQ);
1391                 if (tmp == 0) {
1392                     goto do_brcond_false;
1393                 } else if (tmp != 1) {
1394                     goto do_default;
1395                 }
1396             do_brcond_low:
1397                 bitmap_zero(temps_used.l, nb_temps);
1398                 op->opc = INDEX_op_brcond_i32;
1399                 op->args[1] = op->args[2];
1400                 op->args[2] = op->args[4];
1401                 op->args[3] = op->args[5];
1402             } else if (op->args[4] == TCG_COND_NE) {
1403                 /* Simplify NE comparisons where one of the pairs
1404                    can be simplified.  */
1405                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1406                                                op->args[0], op->args[2],
1407                                                TCG_COND_NE);
1408                 if (tmp == 0) {
1409                     goto do_brcond_high;
1410                 } else if (tmp == 1) {
1411                     goto do_brcond_true;
1412                 }
1413                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1414                                                op->args[1], op->args[3],
1415                                                TCG_COND_NE);
1416                 if (tmp == 0) {
1417                     goto do_brcond_low;
1418                 } else if (tmp == 1) {
1419                     goto do_brcond_true;
1420                 }
1421                 goto do_default;
1422             } else {
1423                 goto do_default;
1424             }
1425             break;
1426 
1427         case INDEX_op_setcond2_i32:
1428             tmp = do_constant_folding_cond2(&op->args[1], &op->args[3],
1429                                             op->args[5]);
1430             if (tmp != 2) {
1431             do_setcond_const:
1432                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1433             } else if ((op->args[5] == TCG_COND_LT
1434                         || op->args[5] == TCG_COND_GE)
1435                        && arg_is_const(op->args[3])
1436                        && arg_info(op->args[3])->val == 0
1437                        && arg_is_const(op->args[4])
1438                        && arg_info(op->args[4])->val == 0) {
1439                 /* Simplify LT/GE comparisons vs zero to a single compare
1440                    vs the high word of the input.  */
1441             do_setcond_high:
1442                 reset_temp(op->args[0]);
1443                 arg_info(op->args[0])->mask = 1;
1444                 op->opc = INDEX_op_setcond_i32;
1445                 op->args[1] = op->args[2];
1446                 op->args[2] = op->args[4];
1447                 op->args[3] = op->args[5];
1448             } else if (op->args[5] == TCG_COND_EQ) {
1449                 /* Simplify EQ comparisons where one of the pairs
1450                    can be simplified.  */
1451                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1452                                                op->args[1], op->args[3],
1453                                                TCG_COND_EQ);
1454                 if (tmp == 0) {
1455                     goto do_setcond_const;
1456                 } else if (tmp == 1) {
1457                     goto do_setcond_high;
1458                 }
1459                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1460                                                op->args[2], op->args[4],
1461                                                TCG_COND_EQ);
1462                 if (tmp == 0) {
1463                     goto do_setcond_high;
1464                 } else if (tmp != 1) {
1465                     goto do_default;
1466                 }
1467             do_setcond_low:
1468                 reset_temp(op->args[0]);
1469                 arg_info(op->args[0])->mask = 1;
1470                 op->opc = INDEX_op_setcond_i32;
1471                 op->args[2] = op->args[3];
1472                 op->args[3] = op->args[5];
1473             } else if (op->args[5] == TCG_COND_NE) {
1474                 /* Simplify NE comparisons where one of the pairs
1475                    can be simplified.  */
1476                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1477                                                op->args[1], op->args[3],
1478                                                TCG_COND_NE);
1479                 if (tmp == 0) {
1480                     goto do_setcond_high;
1481                 } else if (tmp == 1) {
1482                     goto do_setcond_const;
1483                 }
1484                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1485                                                op->args[2], op->args[4],
1486                                                TCG_COND_NE);
1487                 if (tmp == 0) {
1488                     goto do_setcond_low;
1489                 } else if (tmp == 1) {
1490                     goto do_setcond_const;
1491                 }
1492                 goto do_default;
1493             } else {
1494                 goto do_default;
1495             }
1496             break;
1497 
1498         case INDEX_op_call:
1499             if (!(op->args[nb_oargs + nb_iargs + 1]
1500                   & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1501                 for (i = 0; i < nb_globals; i++) {
1502                     if (test_bit(i, temps_used.l)) {
1503                         reset_ts(&s->temps[i]);
1504                     }
1505                 }
1506             }
1507             goto do_reset_output;
1508 
1509         default:
1510         do_default:
1511             /* Default case: we know nothing about operation (or were unable
1512                to compute the operation result) so no propagation is done.
1513                We trash everything if the operation is the end of a basic
1514                block, otherwise we only trash the output args.  "mask" is
1515                the non-zero bits mask for the first output arg.  */
1516             if (def->flags & TCG_OPF_BB_END) {
1517                 bitmap_zero(temps_used.l, nb_temps);
1518             } else {
1519         do_reset_output:
1520                 for (i = 0; i < nb_oargs; i++) {
1521                     reset_temp(op->args[i]);
1522                     /* Save the corresponding known-zero bits mask for the
1523                        first output argument (only one supported so far). */
1524                     if (i == 0) {
1525                         arg_info(op->args[i])->mask = mask;
1526                     }
1527                 }
1528             }
1529             break;
1530         }
1531 
1532         /* Eliminate duplicate and redundant fence instructions.  */
1533         if (prev_mb) {
1534             switch (opc) {
1535             case INDEX_op_mb:
1536                 /* Merge two barriers of the same type into one,
1537                  * or a weaker barrier into a stronger one,
1538                  * or two weaker barriers into a stronger one.
1539                  *   mb X; mb Y => mb X|Y
1540                  *   mb; strl => mb; st
1541                  *   ldaq; mb => ld; mb
1542                  *   ldaq; strl => ld; mb; st
1543                  * Other combinations are also merged into a strong
1544                  * barrier.  This is stricter than specified but for
1545                  * the purposes of TCG is better than not optimizing.
1546                  */
1547                 prev_mb->args[0] |= op->args[0];
1548                 tcg_op_remove(s, op);
1549                 break;
1550 
1551             default:
1552                 /* Opcodes that end the block stop the optimization.  */
1553                 if ((def->flags & TCG_OPF_BB_END) == 0) {
1554                     break;
1555                 }
1556                 /* fallthru */
1557             case INDEX_op_qemu_ld_i32:
1558             case INDEX_op_qemu_ld_i64:
1559             case INDEX_op_qemu_st_i32:
1560             case INDEX_op_qemu_st8_i32:
1561             case INDEX_op_qemu_st_i64:
1562             case INDEX_op_call:
1563                 /* Opcodes that touch guest memory stop the optimization.  */
1564                 prev_mb = NULL;
1565                 break;
1566             }
1567         } else if (opc == INDEX_op_mb) {
1568             prev_mb = op;
1569         }
1570     }
1571 }
1572