xref: /openbmc/qemu/tcg/optimize.c (revision 2ab6c494)
1 /*
2  * Optimizations for Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2010 Samsung Electronics.
5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "tcg/tcg-op.h"
28 
29 #define CASE_OP_32_64(x)                        \
30         glue(glue(case INDEX_op_, x), _i32):    \
31         glue(glue(case INDEX_op_, x), _i64)
32 
33 #define CASE_OP_32_64_VEC(x)                    \
34         glue(glue(case INDEX_op_, x), _i32):    \
35         glue(glue(case INDEX_op_, x), _i64):    \
36         glue(glue(case INDEX_op_, x), _vec)
37 
38 struct tcg_temp_info {
39     bool is_const;
40     TCGTemp *prev_copy;
41     TCGTemp *next_copy;
42     tcg_target_ulong val;
43     tcg_target_ulong mask;
44 };
45 
46 static inline struct tcg_temp_info *ts_info(TCGTemp *ts)
47 {
48     return ts->state_ptr;
49 }
50 
51 static inline struct tcg_temp_info *arg_info(TCGArg arg)
52 {
53     return ts_info(arg_temp(arg));
54 }
55 
56 static inline bool ts_is_const(TCGTemp *ts)
57 {
58     return ts_info(ts)->is_const;
59 }
60 
61 static inline bool arg_is_const(TCGArg arg)
62 {
63     return ts_is_const(arg_temp(arg));
64 }
65 
66 static inline bool ts_is_copy(TCGTemp *ts)
67 {
68     return ts_info(ts)->next_copy != ts;
69 }
70 
71 /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
72 static void reset_ts(TCGTemp *ts)
73 {
74     struct tcg_temp_info *ti = ts_info(ts);
75     struct tcg_temp_info *pi = ts_info(ti->prev_copy);
76     struct tcg_temp_info *ni = ts_info(ti->next_copy);
77 
78     ni->prev_copy = ti->prev_copy;
79     pi->next_copy = ti->next_copy;
80     ti->next_copy = ts;
81     ti->prev_copy = ts;
82     ti->is_const = false;
83     ti->mask = -1;
84 }
85 
86 static void reset_temp(TCGArg arg)
87 {
88     reset_ts(arg_temp(arg));
89 }
90 
91 /* Initialize and activate a temporary.  */
92 static void init_ts_info(struct tcg_temp_info *infos,
93                          TCGTempSet *temps_used, TCGTemp *ts)
94 {
95     size_t idx = temp_idx(ts);
96     if (!test_bit(idx, temps_used->l)) {
97         struct tcg_temp_info *ti = &infos[idx];
98 
99         ts->state_ptr = ti;
100         ti->next_copy = ts;
101         ti->prev_copy = ts;
102         ti->is_const = false;
103         ti->mask = -1;
104         set_bit(idx, temps_used->l);
105     }
106 }
107 
108 static void init_arg_info(struct tcg_temp_info *infos,
109                           TCGTempSet *temps_used, TCGArg arg)
110 {
111     init_ts_info(infos, temps_used, arg_temp(arg));
112 }
113 
114 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
115 {
116     TCGTemp *i;
117 
118     /* If this is already a global, we can't do better. */
119     if (ts->temp_global) {
120         return ts;
121     }
122 
123     /* Search for a global first. */
124     for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
125         if (i->temp_global) {
126             return i;
127         }
128     }
129 
130     /* If it is a temp, search for a temp local. */
131     if (!ts->temp_local) {
132         for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
133             if (ts->temp_local) {
134                 return i;
135             }
136         }
137     }
138 
139     /* Failure to find a better representation, return the same temp. */
140     return ts;
141 }
142 
143 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
144 {
145     TCGTemp *i;
146 
147     if (ts1 == ts2) {
148         return true;
149     }
150 
151     if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
152         return false;
153     }
154 
155     for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
156         if (i == ts2) {
157             return true;
158         }
159     }
160 
161     return false;
162 }
163 
164 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
165 {
166     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
167 }
168 
169 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val)
170 {
171     const TCGOpDef *def;
172     TCGOpcode new_op;
173     tcg_target_ulong mask;
174     struct tcg_temp_info *di = arg_info(dst);
175 
176     def = &tcg_op_defs[op->opc];
177     if (def->flags & TCG_OPF_VECTOR) {
178         new_op = INDEX_op_dupi_vec;
179     } else if (def->flags & TCG_OPF_64BIT) {
180         new_op = INDEX_op_movi_i64;
181     } else {
182         new_op = INDEX_op_movi_i32;
183     }
184     op->opc = new_op;
185     /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
186     op->args[0] = dst;
187     op->args[1] = val;
188 
189     reset_temp(dst);
190     di->is_const = true;
191     di->val = val;
192     mask = val;
193     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {
194         /* High bits of the destination are now garbage.  */
195         mask |= ~0xffffffffull;
196     }
197     di->mask = mask;
198 }
199 
200 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
201 {
202     TCGTemp *dst_ts = arg_temp(dst);
203     TCGTemp *src_ts = arg_temp(src);
204     const TCGOpDef *def;
205     struct tcg_temp_info *di;
206     struct tcg_temp_info *si;
207     tcg_target_ulong mask;
208     TCGOpcode new_op;
209 
210     if (ts_are_copies(dst_ts, src_ts)) {
211         tcg_op_remove(s, op);
212         return;
213     }
214 
215     reset_ts(dst_ts);
216     di = ts_info(dst_ts);
217     si = ts_info(src_ts);
218     def = &tcg_op_defs[op->opc];
219     if (def->flags & TCG_OPF_VECTOR) {
220         new_op = INDEX_op_mov_vec;
221     } else if (def->flags & TCG_OPF_64BIT) {
222         new_op = INDEX_op_mov_i64;
223     } else {
224         new_op = INDEX_op_mov_i32;
225     }
226     op->opc = new_op;
227     /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
228     op->args[0] = dst;
229     op->args[1] = src;
230 
231     mask = si->mask;
232     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
233         /* High bits of the destination are now garbage.  */
234         mask |= ~0xffffffffull;
235     }
236     di->mask = mask;
237 
238     if (src_ts->type == dst_ts->type) {
239         struct tcg_temp_info *ni = ts_info(si->next_copy);
240 
241         di->next_copy = si->next_copy;
242         di->prev_copy = src_ts;
243         ni->prev_copy = dst_ts;
244         si->next_copy = dst_ts;
245         di->is_const = si->is_const;
246         di->val = si->val;
247     }
248 }
249 
250 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
251 {
252     uint64_t l64, h64;
253 
254     switch (op) {
255     CASE_OP_32_64(add):
256         return x + y;
257 
258     CASE_OP_32_64(sub):
259         return x - y;
260 
261     CASE_OP_32_64(mul):
262         return x * y;
263 
264     CASE_OP_32_64(and):
265         return x & y;
266 
267     CASE_OP_32_64(or):
268         return x | y;
269 
270     CASE_OP_32_64(xor):
271         return x ^ y;
272 
273     case INDEX_op_shl_i32:
274         return (uint32_t)x << (y & 31);
275 
276     case INDEX_op_shl_i64:
277         return (uint64_t)x << (y & 63);
278 
279     case INDEX_op_shr_i32:
280         return (uint32_t)x >> (y & 31);
281 
282     case INDEX_op_shr_i64:
283         return (uint64_t)x >> (y & 63);
284 
285     case INDEX_op_sar_i32:
286         return (int32_t)x >> (y & 31);
287 
288     case INDEX_op_sar_i64:
289         return (int64_t)x >> (y & 63);
290 
291     case INDEX_op_rotr_i32:
292         return ror32(x, y & 31);
293 
294     case INDEX_op_rotr_i64:
295         return ror64(x, y & 63);
296 
297     case INDEX_op_rotl_i32:
298         return rol32(x, y & 31);
299 
300     case INDEX_op_rotl_i64:
301         return rol64(x, y & 63);
302 
303     CASE_OP_32_64(not):
304         return ~x;
305 
306     CASE_OP_32_64(neg):
307         return -x;
308 
309     CASE_OP_32_64(andc):
310         return x & ~y;
311 
312     CASE_OP_32_64(orc):
313         return x | ~y;
314 
315     CASE_OP_32_64(eqv):
316         return ~(x ^ y);
317 
318     CASE_OP_32_64(nand):
319         return ~(x & y);
320 
321     CASE_OP_32_64(nor):
322         return ~(x | y);
323 
324     case INDEX_op_clz_i32:
325         return (uint32_t)x ? clz32(x) : y;
326 
327     case INDEX_op_clz_i64:
328         return x ? clz64(x) : y;
329 
330     case INDEX_op_ctz_i32:
331         return (uint32_t)x ? ctz32(x) : y;
332 
333     case INDEX_op_ctz_i64:
334         return x ? ctz64(x) : y;
335 
336     case INDEX_op_ctpop_i32:
337         return ctpop32(x);
338 
339     case INDEX_op_ctpop_i64:
340         return ctpop64(x);
341 
342     CASE_OP_32_64(ext8s):
343         return (int8_t)x;
344 
345     CASE_OP_32_64(ext16s):
346         return (int16_t)x;
347 
348     CASE_OP_32_64(ext8u):
349         return (uint8_t)x;
350 
351     CASE_OP_32_64(ext16u):
352         return (uint16_t)x;
353 
354     CASE_OP_32_64(bswap16):
355         return bswap16(x);
356 
357     CASE_OP_32_64(bswap32):
358         return bswap32(x);
359 
360     case INDEX_op_bswap64_i64:
361         return bswap64(x);
362 
363     case INDEX_op_ext_i32_i64:
364     case INDEX_op_ext32s_i64:
365         return (int32_t)x;
366 
367     case INDEX_op_extu_i32_i64:
368     case INDEX_op_extrl_i64_i32:
369     case INDEX_op_ext32u_i64:
370         return (uint32_t)x;
371 
372     case INDEX_op_extrh_i64_i32:
373         return (uint64_t)x >> 32;
374 
375     case INDEX_op_muluh_i32:
376         return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
377     case INDEX_op_mulsh_i32:
378         return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
379 
380     case INDEX_op_muluh_i64:
381         mulu64(&l64, &h64, x, y);
382         return h64;
383     case INDEX_op_mulsh_i64:
384         muls64(&l64, &h64, x, y);
385         return h64;
386 
387     case INDEX_op_div_i32:
388         /* Avoid crashing on divide by zero, otherwise undefined.  */
389         return (int32_t)x / ((int32_t)y ? : 1);
390     case INDEX_op_divu_i32:
391         return (uint32_t)x / ((uint32_t)y ? : 1);
392     case INDEX_op_div_i64:
393         return (int64_t)x / ((int64_t)y ? : 1);
394     case INDEX_op_divu_i64:
395         return (uint64_t)x / ((uint64_t)y ? : 1);
396 
397     case INDEX_op_rem_i32:
398         return (int32_t)x % ((int32_t)y ? : 1);
399     case INDEX_op_remu_i32:
400         return (uint32_t)x % ((uint32_t)y ? : 1);
401     case INDEX_op_rem_i64:
402         return (int64_t)x % ((int64_t)y ? : 1);
403     case INDEX_op_remu_i64:
404         return (uint64_t)x % ((uint64_t)y ? : 1);
405 
406     default:
407         fprintf(stderr,
408                 "Unrecognized operation %d in do_constant_folding.\n", op);
409         tcg_abort();
410     }
411 }
412 
413 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
414 {
415     const TCGOpDef *def = &tcg_op_defs[op];
416     TCGArg res = do_constant_folding_2(op, x, y);
417     if (!(def->flags & TCG_OPF_64BIT)) {
418         res = (int32_t)res;
419     }
420     return res;
421 }
422 
423 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
424 {
425     switch (c) {
426     case TCG_COND_EQ:
427         return x == y;
428     case TCG_COND_NE:
429         return x != y;
430     case TCG_COND_LT:
431         return (int32_t)x < (int32_t)y;
432     case TCG_COND_GE:
433         return (int32_t)x >= (int32_t)y;
434     case TCG_COND_LE:
435         return (int32_t)x <= (int32_t)y;
436     case TCG_COND_GT:
437         return (int32_t)x > (int32_t)y;
438     case TCG_COND_LTU:
439         return x < y;
440     case TCG_COND_GEU:
441         return x >= y;
442     case TCG_COND_LEU:
443         return x <= y;
444     case TCG_COND_GTU:
445         return x > y;
446     default:
447         tcg_abort();
448     }
449 }
450 
451 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
452 {
453     switch (c) {
454     case TCG_COND_EQ:
455         return x == y;
456     case TCG_COND_NE:
457         return x != y;
458     case TCG_COND_LT:
459         return (int64_t)x < (int64_t)y;
460     case TCG_COND_GE:
461         return (int64_t)x >= (int64_t)y;
462     case TCG_COND_LE:
463         return (int64_t)x <= (int64_t)y;
464     case TCG_COND_GT:
465         return (int64_t)x > (int64_t)y;
466     case TCG_COND_LTU:
467         return x < y;
468     case TCG_COND_GEU:
469         return x >= y;
470     case TCG_COND_LEU:
471         return x <= y;
472     case TCG_COND_GTU:
473         return x > y;
474     default:
475         tcg_abort();
476     }
477 }
478 
479 static bool do_constant_folding_cond_eq(TCGCond c)
480 {
481     switch (c) {
482     case TCG_COND_GT:
483     case TCG_COND_LTU:
484     case TCG_COND_LT:
485     case TCG_COND_GTU:
486     case TCG_COND_NE:
487         return 0;
488     case TCG_COND_GE:
489     case TCG_COND_GEU:
490     case TCG_COND_LE:
491     case TCG_COND_LEU:
492     case TCG_COND_EQ:
493         return 1;
494     default:
495         tcg_abort();
496     }
497 }
498 
499 /* Return 2 if the condition can't be simplified, and the result
500    of the condition (0 or 1) if it can */
501 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
502                                        TCGArg y, TCGCond c)
503 {
504     tcg_target_ulong xv = arg_info(x)->val;
505     tcg_target_ulong yv = arg_info(y)->val;
506     if (arg_is_const(x) && arg_is_const(y)) {
507         const TCGOpDef *def = &tcg_op_defs[op];
508         tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
509         if (def->flags & TCG_OPF_64BIT) {
510             return do_constant_folding_cond_64(xv, yv, c);
511         } else {
512             return do_constant_folding_cond_32(xv, yv, c);
513         }
514     } else if (args_are_copies(x, y)) {
515         return do_constant_folding_cond_eq(c);
516     } else if (arg_is_const(y) && yv == 0) {
517         switch (c) {
518         case TCG_COND_LTU:
519             return 0;
520         case TCG_COND_GEU:
521             return 1;
522         default:
523             return 2;
524         }
525     }
526     return 2;
527 }
528 
529 /* Return 2 if the condition can't be simplified, and the result
530    of the condition (0 or 1) if it can */
531 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
532 {
533     TCGArg al = p1[0], ah = p1[1];
534     TCGArg bl = p2[0], bh = p2[1];
535 
536     if (arg_is_const(bl) && arg_is_const(bh)) {
537         tcg_target_ulong blv = arg_info(bl)->val;
538         tcg_target_ulong bhv = arg_info(bh)->val;
539         uint64_t b = deposit64(blv, 32, 32, bhv);
540 
541         if (arg_is_const(al) && arg_is_const(ah)) {
542             tcg_target_ulong alv = arg_info(al)->val;
543             tcg_target_ulong ahv = arg_info(ah)->val;
544             uint64_t a = deposit64(alv, 32, 32, ahv);
545             return do_constant_folding_cond_64(a, b, c);
546         }
547         if (b == 0) {
548             switch (c) {
549             case TCG_COND_LTU:
550                 return 0;
551             case TCG_COND_GEU:
552                 return 1;
553             default:
554                 break;
555             }
556         }
557     }
558     if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
559         return do_constant_folding_cond_eq(c);
560     }
561     return 2;
562 }
563 
564 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
565 {
566     TCGArg a1 = *p1, a2 = *p2;
567     int sum = 0;
568     sum += arg_is_const(a1);
569     sum -= arg_is_const(a2);
570 
571     /* Prefer the constant in second argument, and then the form
572        op a, a, b, which is better handled on non-RISC hosts. */
573     if (sum > 0 || (sum == 0 && dest == a2)) {
574         *p1 = a2;
575         *p2 = a1;
576         return true;
577     }
578     return false;
579 }
580 
581 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
582 {
583     int sum = 0;
584     sum += arg_is_const(p1[0]);
585     sum += arg_is_const(p1[1]);
586     sum -= arg_is_const(p2[0]);
587     sum -= arg_is_const(p2[1]);
588     if (sum > 0) {
589         TCGArg t;
590         t = p1[0], p1[0] = p2[0], p2[0] = t;
591         t = p1[1], p1[1] = p2[1], p2[1] = t;
592         return true;
593     }
594     return false;
595 }
596 
597 /* Propagate constants and copies, fold constant expressions. */
598 void tcg_optimize(TCGContext *s)
599 {
600     int nb_temps, nb_globals;
601     TCGOp *op, *op_next, *prev_mb = NULL;
602     struct tcg_temp_info *infos;
603     TCGTempSet temps_used;
604 
605     /* Array VALS has an element for each temp.
606        If this temp holds a constant then its value is kept in VALS' element.
607        If this temp is a copy of other ones then the other copies are
608        available through the doubly linked circular list. */
609 
610     nb_temps = s->nb_temps;
611     nb_globals = s->nb_globals;
612     bitmap_zero(temps_used.l, nb_temps);
613     infos = tcg_malloc(sizeof(struct tcg_temp_info) * nb_temps);
614 
615     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
616         tcg_target_ulong mask, partmask, affected;
617         int nb_oargs, nb_iargs, i;
618         TCGArg tmp;
619         TCGOpcode opc = op->opc;
620         const TCGOpDef *def = &tcg_op_defs[opc];
621 
622         /* Count the arguments, and initialize the temps that are
623            going to be used */
624         if (opc == INDEX_op_call) {
625             nb_oargs = TCGOP_CALLO(op);
626             nb_iargs = TCGOP_CALLI(op);
627             for (i = 0; i < nb_oargs + nb_iargs; i++) {
628                 TCGTemp *ts = arg_temp(op->args[i]);
629                 if (ts) {
630                     init_ts_info(infos, &temps_used, ts);
631                 }
632             }
633         } else {
634             nb_oargs = def->nb_oargs;
635             nb_iargs = def->nb_iargs;
636             for (i = 0; i < nb_oargs + nb_iargs; i++) {
637                 init_arg_info(infos, &temps_used, op->args[i]);
638             }
639         }
640 
641         /* Do copy propagation */
642         for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
643             TCGTemp *ts = arg_temp(op->args[i]);
644             if (ts && ts_is_copy(ts)) {
645                 op->args[i] = temp_arg(find_better_copy(s, ts));
646             }
647         }
648 
649         /* For commutative operations make constant second argument */
650         switch (opc) {
651         CASE_OP_32_64_VEC(add):
652         CASE_OP_32_64_VEC(mul):
653         CASE_OP_32_64_VEC(and):
654         CASE_OP_32_64_VEC(or):
655         CASE_OP_32_64_VEC(xor):
656         CASE_OP_32_64(eqv):
657         CASE_OP_32_64(nand):
658         CASE_OP_32_64(nor):
659         CASE_OP_32_64(muluh):
660         CASE_OP_32_64(mulsh):
661             swap_commutative(op->args[0], &op->args[1], &op->args[2]);
662             break;
663         CASE_OP_32_64(brcond):
664             if (swap_commutative(-1, &op->args[0], &op->args[1])) {
665                 op->args[2] = tcg_swap_cond(op->args[2]);
666             }
667             break;
668         CASE_OP_32_64(setcond):
669             if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
670                 op->args[3] = tcg_swap_cond(op->args[3]);
671             }
672             break;
673         CASE_OP_32_64(movcond):
674             if (swap_commutative(-1, &op->args[1], &op->args[2])) {
675                 op->args[5] = tcg_swap_cond(op->args[5]);
676             }
677             /* For movcond, we canonicalize the "false" input reg to match
678                the destination reg so that the tcg backend can implement
679                a "move if true" operation.  */
680             if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
681                 op->args[5] = tcg_invert_cond(op->args[5]);
682             }
683             break;
684         CASE_OP_32_64(add2):
685             swap_commutative(op->args[0], &op->args[2], &op->args[4]);
686             swap_commutative(op->args[1], &op->args[3], &op->args[5]);
687             break;
688         CASE_OP_32_64(mulu2):
689         CASE_OP_32_64(muls2):
690             swap_commutative(op->args[0], &op->args[2], &op->args[3]);
691             break;
692         case INDEX_op_brcond2_i32:
693             if (swap_commutative2(&op->args[0], &op->args[2])) {
694                 op->args[4] = tcg_swap_cond(op->args[4]);
695             }
696             break;
697         case INDEX_op_setcond2_i32:
698             if (swap_commutative2(&op->args[1], &op->args[3])) {
699                 op->args[5] = tcg_swap_cond(op->args[5]);
700             }
701             break;
702         default:
703             break;
704         }
705 
706         /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
707            and "sub r, 0, a => neg r, a" case.  */
708         switch (opc) {
709         CASE_OP_32_64(shl):
710         CASE_OP_32_64(shr):
711         CASE_OP_32_64(sar):
712         CASE_OP_32_64(rotl):
713         CASE_OP_32_64(rotr):
714             if (arg_is_const(op->args[1])
715                 && arg_info(op->args[1])->val == 0) {
716                 tcg_opt_gen_movi(s, op, op->args[0], 0);
717                 continue;
718             }
719             break;
720         CASE_OP_32_64_VEC(sub):
721             {
722                 TCGOpcode neg_op;
723                 bool have_neg;
724 
725                 if (arg_is_const(op->args[2])) {
726                     /* Proceed with possible constant folding. */
727                     break;
728                 }
729                 if (opc == INDEX_op_sub_i32) {
730                     neg_op = INDEX_op_neg_i32;
731                     have_neg = TCG_TARGET_HAS_neg_i32;
732                 } else if (opc == INDEX_op_sub_i64) {
733                     neg_op = INDEX_op_neg_i64;
734                     have_neg = TCG_TARGET_HAS_neg_i64;
735                 } else if (TCG_TARGET_HAS_neg_vec) {
736                     TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64;
737                     unsigned vece = TCGOP_VECE(op);
738                     neg_op = INDEX_op_neg_vec;
739                     have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0;
740                 } else {
741                     break;
742                 }
743                 if (!have_neg) {
744                     break;
745                 }
746                 if (arg_is_const(op->args[1])
747                     && arg_info(op->args[1])->val == 0) {
748                     op->opc = neg_op;
749                     reset_temp(op->args[0]);
750                     op->args[1] = op->args[2];
751                     continue;
752                 }
753             }
754             break;
755         CASE_OP_32_64_VEC(xor):
756         CASE_OP_32_64(nand):
757             if (!arg_is_const(op->args[1])
758                 && arg_is_const(op->args[2])
759                 && arg_info(op->args[2])->val == -1) {
760                 i = 1;
761                 goto try_not;
762             }
763             break;
764         CASE_OP_32_64(nor):
765             if (!arg_is_const(op->args[1])
766                 && arg_is_const(op->args[2])
767                 && arg_info(op->args[2])->val == 0) {
768                 i = 1;
769                 goto try_not;
770             }
771             break;
772         CASE_OP_32_64_VEC(andc):
773             if (!arg_is_const(op->args[2])
774                 && arg_is_const(op->args[1])
775                 && arg_info(op->args[1])->val == -1) {
776                 i = 2;
777                 goto try_not;
778             }
779             break;
780         CASE_OP_32_64_VEC(orc):
781         CASE_OP_32_64(eqv):
782             if (!arg_is_const(op->args[2])
783                 && arg_is_const(op->args[1])
784                 && arg_info(op->args[1])->val == 0) {
785                 i = 2;
786                 goto try_not;
787             }
788             break;
789         try_not:
790             {
791                 TCGOpcode not_op;
792                 bool have_not;
793 
794                 if (def->flags & TCG_OPF_VECTOR) {
795                     not_op = INDEX_op_not_vec;
796                     have_not = TCG_TARGET_HAS_not_vec;
797                 } else if (def->flags & TCG_OPF_64BIT) {
798                     not_op = INDEX_op_not_i64;
799                     have_not = TCG_TARGET_HAS_not_i64;
800                 } else {
801                     not_op = INDEX_op_not_i32;
802                     have_not = TCG_TARGET_HAS_not_i32;
803                 }
804                 if (!have_not) {
805                     break;
806                 }
807                 op->opc = not_op;
808                 reset_temp(op->args[0]);
809                 op->args[1] = op->args[i];
810                 continue;
811             }
812         default:
813             break;
814         }
815 
816         /* Simplify expression for "op r, a, const => mov r, a" cases */
817         switch (opc) {
818         CASE_OP_32_64_VEC(add):
819         CASE_OP_32_64_VEC(sub):
820         CASE_OP_32_64_VEC(or):
821         CASE_OP_32_64_VEC(xor):
822         CASE_OP_32_64_VEC(andc):
823         CASE_OP_32_64(shl):
824         CASE_OP_32_64(shr):
825         CASE_OP_32_64(sar):
826         CASE_OP_32_64(rotl):
827         CASE_OP_32_64(rotr):
828             if (!arg_is_const(op->args[1])
829                 && arg_is_const(op->args[2])
830                 && arg_info(op->args[2])->val == 0) {
831                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
832                 continue;
833             }
834             break;
835         CASE_OP_32_64_VEC(and):
836         CASE_OP_32_64_VEC(orc):
837         CASE_OP_32_64(eqv):
838             if (!arg_is_const(op->args[1])
839                 && arg_is_const(op->args[2])
840                 && arg_info(op->args[2])->val == -1) {
841                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
842                 continue;
843             }
844             break;
845         default:
846             break;
847         }
848 
849         /* Simplify using known-zero bits. Currently only ops with a single
850            output argument is supported. */
851         mask = -1;
852         affected = -1;
853         switch (opc) {
854         CASE_OP_32_64(ext8s):
855             if ((arg_info(op->args[1])->mask & 0x80) != 0) {
856                 break;
857             }
858         CASE_OP_32_64(ext8u):
859             mask = 0xff;
860             goto and_const;
861         CASE_OP_32_64(ext16s):
862             if ((arg_info(op->args[1])->mask & 0x8000) != 0) {
863                 break;
864             }
865         CASE_OP_32_64(ext16u):
866             mask = 0xffff;
867             goto and_const;
868         case INDEX_op_ext32s_i64:
869             if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
870                 break;
871             }
872         case INDEX_op_ext32u_i64:
873             mask = 0xffffffffU;
874             goto and_const;
875 
876         CASE_OP_32_64(and):
877             mask = arg_info(op->args[2])->mask;
878             if (arg_is_const(op->args[2])) {
879         and_const:
880                 affected = arg_info(op->args[1])->mask & ~mask;
881             }
882             mask = arg_info(op->args[1])->mask & mask;
883             break;
884 
885         case INDEX_op_ext_i32_i64:
886             if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
887                 break;
888             }
889         case INDEX_op_extu_i32_i64:
890             /* We do not compute affected as it is a size changing op.  */
891             mask = (uint32_t)arg_info(op->args[1])->mask;
892             break;
893 
894         CASE_OP_32_64(andc):
895             /* Known-zeros does not imply known-ones.  Therefore unless
896                op->args[2] is constant, we can't infer anything from it.  */
897             if (arg_is_const(op->args[2])) {
898                 mask = ~arg_info(op->args[2])->mask;
899                 goto and_const;
900             }
901             /* But we certainly know nothing outside args[1] may be set. */
902             mask = arg_info(op->args[1])->mask;
903             break;
904 
905         case INDEX_op_sar_i32:
906             if (arg_is_const(op->args[2])) {
907                 tmp = arg_info(op->args[2])->val & 31;
908                 mask = (int32_t)arg_info(op->args[1])->mask >> tmp;
909             }
910             break;
911         case INDEX_op_sar_i64:
912             if (arg_is_const(op->args[2])) {
913                 tmp = arg_info(op->args[2])->val & 63;
914                 mask = (int64_t)arg_info(op->args[1])->mask >> tmp;
915             }
916             break;
917 
918         case INDEX_op_shr_i32:
919             if (arg_is_const(op->args[2])) {
920                 tmp = arg_info(op->args[2])->val & 31;
921                 mask = (uint32_t)arg_info(op->args[1])->mask >> tmp;
922             }
923             break;
924         case INDEX_op_shr_i64:
925             if (arg_is_const(op->args[2])) {
926                 tmp = arg_info(op->args[2])->val & 63;
927                 mask = (uint64_t)arg_info(op->args[1])->mask >> tmp;
928             }
929             break;
930 
931         case INDEX_op_extrl_i64_i32:
932             mask = (uint32_t)arg_info(op->args[1])->mask;
933             break;
934         case INDEX_op_extrh_i64_i32:
935             mask = (uint64_t)arg_info(op->args[1])->mask >> 32;
936             break;
937 
938         CASE_OP_32_64(shl):
939             if (arg_is_const(op->args[2])) {
940                 tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
941                 mask = arg_info(op->args[1])->mask << tmp;
942             }
943             break;
944 
945         CASE_OP_32_64(neg):
946             /* Set to 1 all bits to the left of the rightmost.  */
947             mask = -(arg_info(op->args[1])->mask
948                      & -arg_info(op->args[1])->mask);
949             break;
950 
951         CASE_OP_32_64(deposit):
952             mask = deposit64(arg_info(op->args[1])->mask,
953                              op->args[3], op->args[4],
954                              arg_info(op->args[2])->mask);
955             break;
956 
957         CASE_OP_32_64(extract):
958             mask = extract64(arg_info(op->args[1])->mask,
959                              op->args[2], op->args[3]);
960             if (op->args[2] == 0) {
961                 affected = arg_info(op->args[1])->mask & ~mask;
962             }
963             break;
964         CASE_OP_32_64(sextract):
965             mask = sextract64(arg_info(op->args[1])->mask,
966                               op->args[2], op->args[3]);
967             if (op->args[2] == 0 && (tcg_target_long)mask >= 0) {
968                 affected = arg_info(op->args[1])->mask & ~mask;
969             }
970             break;
971 
972         CASE_OP_32_64(or):
973         CASE_OP_32_64(xor):
974             mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask;
975             break;
976 
977         case INDEX_op_clz_i32:
978         case INDEX_op_ctz_i32:
979             mask = arg_info(op->args[2])->mask | 31;
980             break;
981 
982         case INDEX_op_clz_i64:
983         case INDEX_op_ctz_i64:
984             mask = arg_info(op->args[2])->mask | 63;
985             break;
986 
987         case INDEX_op_ctpop_i32:
988             mask = 32 | 31;
989             break;
990         case INDEX_op_ctpop_i64:
991             mask = 64 | 63;
992             break;
993 
994         CASE_OP_32_64(setcond):
995         case INDEX_op_setcond2_i32:
996             mask = 1;
997             break;
998 
999         CASE_OP_32_64(movcond):
1000             mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask;
1001             break;
1002 
1003         CASE_OP_32_64(ld8u):
1004             mask = 0xff;
1005             break;
1006         CASE_OP_32_64(ld16u):
1007             mask = 0xffff;
1008             break;
1009         case INDEX_op_ld32u_i64:
1010             mask = 0xffffffffu;
1011             break;
1012 
1013         CASE_OP_32_64(qemu_ld):
1014             {
1015                 TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs];
1016                 MemOp mop = get_memop(oi);
1017                 if (!(mop & MO_SIGN)) {
1018                     mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
1019                 }
1020             }
1021             break;
1022 
1023         default:
1024             break;
1025         }
1026 
1027         /* 32-bit ops generate 32-bit results.  For the result is zero test
1028            below, we can ignore high bits, but for further optimizations we
1029            need to record that the high bits contain garbage.  */
1030         partmask = mask;
1031         if (!(def->flags & TCG_OPF_64BIT)) {
1032             mask |= ~(tcg_target_ulong)0xffffffffu;
1033             partmask &= 0xffffffffu;
1034             affected &= 0xffffffffu;
1035         }
1036 
1037         if (partmask == 0) {
1038             tcg_debug_assert(nb_oargs == 1);
1039             tcg_opt_gen_movi(s, op, op->args[0], 0);
1040             continue;
1041         }
1042         if (affected == 0) {
1043             tcg_debug_assert(nb_oargs == 1);
1044             tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1045             continue;
1046         }
1047 
1048         /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
1049         switch (opc) {
1050         CASE_OP_32_64_VEC(and):
1051         CASE_OP_32_64_VEC(mul):
1052         CASE_OP_32_64(muluh):
1053         CASE_OP_32_64(mulsh):
1054             if (arg_is_const(op->args[2])
1055                 && arg_info(op->args[2])->val == 0) {
1056                 tcg_opt_gen_movi(s, op, op->args[0], 0);
1057                 continue;
1058             }
1059             break;
1060         default:
1061             break;
1062         }
1063 
1064         /* Simplify expression for "op r, a, a => mov r, a" cases */
1065         switch (opc) {
1066         CASE_OP_32_64_VEC(or):
1067         CASE_OP_32_64_VEC(and):
1068             if (args_are_copies(op->args[1], op->args[2])) {
1069                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1070                 continue;
1071             }
1072             break;
1073         default:
1074             break;
1075         }
1076 
1077         /* Simplify expression for "op r, a, a => movi r, 0" cases */
1078         switch (opc) {
1079         CASE_OP_32_64_VEC(andc):
1080         CASE_OP_32_64_VEC(sub):
1081         CASE_OP_32_64_VEC(xor):
1082             if (args_are_copies(op->args[1], op->args[2])) {
1083                 tcg_opt_gen_movi(s, op, op->args[0], 0);
1084                 continue;
1085             }
1086             break;
1087         default:
1088             break;
1089         }
1090 
1091         /* Propagate constants through copy operations and do constant
1092            folding.  Constants will be substituted to arguments by register
1093            allocator where needed and possible.  Also detect copies. */
1094         switch (opc) {
1095         CASE_OP_32_64_VEC(mov):
1096             tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1097             break;
1098         CASE_OP_32_64(movi):
1099         case INDEX_op_dupi_vec:
1100             tcg_opt_gen_movi(s, op, op->args[0], op->args[1]);
1101             break;
1102 
1103         case INDEX_op_dup_vec:
1104             if (arg_is_const(op->args[1])) {
1105                 tmp = arg_info(op->args[1])->val;
1106                 tmp = dup_const(TCGOP_VECE(op), tmp);
1107                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1108                 break;
1109             }
1110             goto do_default;
1111 
1112         case INDEX_op_dup2_vec:
1113             assert(TCG_TARGET_REG_BITS == 32);
1114             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1115                 tmp = arg_info(op->args[1])->val;
1116                 if (tmp == arg_info(op->args[2])->val) {
1117                     tcg_opt_gen_movi(s, op, op->args[0], tmp);
1118                     break;
1119                 }
1120             } else if (args_are_copies(op->args[1], op->args[2])) {
1121                 op->opc = INDEX_op_dup_vec;
1122                 TCGOP_VECE(op) = MO_32;
1123                 nb_iargs = 1;
1124             }
1125             goto do_default;
1126 
1127         CASE_OP_32_64(not):
1128         CASE_OP_32_64(neg):
1129         CASE_OP_32_64(ext8s):
1130         CASE_OP_32_64(ext8u):
1131         CASE_OP_32_64(ext16s):
1132         CASE_OP_32_64(ext16u):
1133         CASE_OP_32_64(ctpop):
1134         CASE_OP_32_64(bswap16):
1135         CASE_OP_32_64(bswap32):
1136         case INDEX_op_bswap64_i64:
1137         case INDEX_op_ext32s_i64:
1138         case INDEX_op_ext32u_i64:
1139         case INDEX_op_ext_i32_i64:
1140         case INDEX_op_extu_i32_i64:
1141         case INDEX_op_extrl_i64_i32:
1142         case INDEX_op_extrh_i64_i32:
1143             if (arg_is_const(op->args[1])) {
1144                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
1145                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1146                 break;
1147             }
1148             goto do_default;
1149 
1150         CASE_OP_32_64(add):
1151         CASE_OP_32_64(sub):
1152         CASE_OP_32_64(mul):
1153         CASE_OP_32_64(or):
1154         CASE_OP_32_64(and):
1155         CASE_OP_32_64(xor):
1156         CASE_OP_32_64(shl):
1157         CASE_OP_32_64(shr):
1158         CASE_OP_32_64(sar):
1159         CASE_OP_32_64(rotl):
1160         CASE_OP_32_64(rotr):
1161         CASE_OP_32_64(andc):
1162         CASE_OP_32_64(orc):
1163         CASE_OP_32_64(eqv):
1164         CASE_OP_32_64(nand):
1165         CASE_OP_32_64(nor):
1166         CASE_OP_32_64(muluh):
1167         CASE_OP_32_64(mulsh):
1168         CASE_OP_32_64(div):
1169         CASE_OP_32_64(divu):
1170         CASE_OP_32_64(rem):
1171         CASE_OP_32_64(remu):
1172             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1173                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
1174                                           arg_info(op->args[2])->val);
1175                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1176                 break;
1177             }
1178             goto do_default;
1179 
1180         CASE_OP_32_64(clz):
1181         CASE_OP_32_64(ctz):
1182             if (arg_is_const(op->args[1])) {
1183                 TCGArg v = arg_info(op->args[1])->val;
1184                 if (v != 0) {
1185                     tmp = do_constant_folding(opc, v, 0);
1186                     tcg_opt_gen_movi(s, op, op->args[0], tmp);
1187                 } else {
1188                     tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
1189                 }
1190                 break;
1191             }
1192             goto do_default;
1193 
1194         CASE_OP_32_64(deposit):
1195             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1196                 tmp = deposit64(arg_info(op->args[1])->val,
1197                                 op->args[3], op->args[4],
1198                                 arg_info(op->args[2])->val);
1199                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1200                 break;
1201             }
1202             goto do_default;
1203 
1204         CASE_OP_32_64(extract):
1205             if (arg_is_const(op->args[1])) {
1206                 tmp = extract64(arg_info(op->args[1])->val,
1207                                 op->args[2], op->args[3]);
1208                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1209                 break;
1210             }
1211             goto do_default;
1212 
1213         CASE_OP_32_64(sextract):
1214             if (arg_is_const(op->args[1])) {
1215                 tmp = sextract64(arg_info(op->args[1])->val,
1216                                  op->args[2], op->args[3]);
1217                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1218                 break;
1219             }
1220             goto do_default;
1221 
1222         CASE_OP_32_64(extract2):
1223             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1224                 TCGArg v1 = arg_info(op->args[1])->val;
1225                 TCGArg v2 = arg_info(op->args[2])->val;
1226 
1227                 if (opc == INDEX_op_extract2_i64) {
1228                     tmp = (v1 >> op->args[3]) | (v2 << (64 - op->args[3]));
1229                 } else {
1230                     tmp = (int32_t)(((uint32_t)v1 >> op->args[3]) |
1231                                     ((uint32_t)v2 << (32 - op->args[3])));
1232                 }
1233                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1234                 break;
1235             }
1236             goto do_default;
1237 
1238         CASE_OP_32_64(setcond):
1239             tmp = do_constant_folding_cond(opc, op->args[1],
1240                                            op->args[2], op->args[3]);
1241             if (tmp != 2) {
1242                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1243                 break;
1244             }
1245             goto do_default;
1246 
1247         CASE_OP_32_64(brcond):
1248             tmp = do_constant_folding_cond(opc, op->args[0],
1249                                            op->args[1], op->args[2]);
1250             if (tmp != 2) {
1251                 if (tmp) {
1252                     bitmap_zero(temps_used.l, nb_temps);
1253                     op->opc = INDEX_op_br;
1254                     op->args[0] = op->args[3];
1255                 } else {
1256                     tcg_op_remove(s, op);
1257                 }
1258                 break;
1259             }
1260             goto do_default;
1261 
1262         CASE_OP_32_64(movcond):
1263             tmp = do_constant_folding_cond(opc, op->args[1],
1264                                            op->args[2], op->args[5]);
1265             if (tmp != 2) {
1266                 tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
1267                 break;
1268             }
1269             if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
1270                 tcg_target_ulong tv = arg_info(op->args[3])->val;
1271                 tcg_target_ulong fv = arg_info(op->args[4])->val;
1272                 TCGCond cond = op->args[5];
1273                 if (fv == 1 && tv == 0) {
1274                     cond = tcg_invert_cond(cond);
1275                 } else if (!(tv == 1 && fv == 0)) {
1276                     goto do_default;
1277                 }
1278                 op->args[3] = cond;
1279                 op->opc = opc = (opc == INDEX_op_movcond_i32
1280                                  ? INDEX_op_setcond_i32
1281                                  : INDEX_op_setcond_i64);
1282                 nb_iargs = 2;
1283             }
1284             goto do_default;
1285 
1286         case INDEX_op_add2_i32:
1287         case INDEX_op_sub2_i32:
1288             if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])
1289                 && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
1290                 uint32_t al = arg_info(op->args[2])->val;
1291                 uint32_t ah = arg_info(op->args[3])->val;
1292                 uint32_t bl = arg_info(op->args[4])->val;
1293                 uint32_t bh = arg_info(op->args[5])->val;
1294                 uint64_t a = ((uint64_t)ah << 32) | al;
1295                 uint64_t b = ((uint64_t)bh << 32) | bl;
1296                 TCGArg rl, rh;
1297                 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
1298 
1299                 if (opc == INDEX_op_add2_i32) {
1300                     a += b;
1301                 } else {
1302                     a -= b;
1303                 }
1304 
1305                 rl = op->args[0];
1306                 rh = op->args[1];
1307                 tcg_opt_gen_movi(s, op, rl, (int32_t)a);
1308                 tcg_opt_gen_movi(s, op2, rh, (int32_t)(a >> 32));
1309                 break;
1310             }
1311             goto do_default;
1312 
1313         case INDEX_op_mulu2_i32:
1314             if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1315                 uint32_t a = arg_info(op->args[2])->val;
1316                 uint32_t b = arg_info(op->args[3])->val;
1317                 uint64_t r = (uint64_t)a * b;
1318                 TCGArg rl, rh;
1319                 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
1320 
1321                 rl = op->args[0];
1322                 rh = op->args[1];
1323                 tcg_opt_gen_movi(s, op, rl, (int32_t)r);
1324                 tcg_opt_gen_movi(s, op2, rh, (int32_t)(r >> 32));
1325                 break;
1326             }
1327             goto do_default;
1328 
1329         case INDEX_op_brcond2_i32:
1330             tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
1331                                             op->args[4]);
1332             if (tmp != 2) {
1333                 if (tmp) {
1334             do_brcond_true:
1335                     bitmap_zero(temps_used.l, nb_temps);
1336                     op->opc = INDEX_op_br;
1337                     op->args[0] = op->args[5];
1338                 } else {
1339             do_brcond_false:
1340                     tcg_op_remove(s, op);
1341                 }
1342             } else if ((op->args[4] == TCG_COND_LT
1343                         || op->args[4] == TCG_COND_GE)
1344                        && arg_is_const(op->args[2])
1345                        && arg_info(op->args[2])->val == 0
1346                        && arg_is_const(op->args[3])
1347                        && arg_info(op->args[3])->val == 0) {
1348                 /* Simplify LT/GE comparisons vs zero to a single compare
1349                    vs the high word of the input.  */
1350             do_brcond_high:
1351                 bitmap_zero(temps_used.l, nb_temps);
1352                 op->opc = INDEX_op_brcond_i32;
1353                 op->args[0] = op->args[1];
1354                 op->args[1] = op->args[3];
1355                 op->args[2] = op->args[4];
1356                 op->args[3] = op->args[5];
1357             } else if (op->args[4] == TCG_COND_EQ) {
1358                 /* Simplify EQ comparisons where one of the pairs
1359                    can be simplified.  */
1360                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1361                                                op->args[0], op->args[2],
1362                                                TCG_COND_EQ);
1363                 if (tmp == 0) {
1364                     goto do_brcond_false;
1365                 } else if (tmp == 1) {
1366                     goto do_brcond_high;
1367                 }
1368                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1369                                                op->args[1], op->args[3],
1370                                                TCG_COND_EQ);
1371                 if (tmp == 0) {
1372                     goto do_brcond_false;
1373                 } else if (tmp != 1) {
1374                     goto do_default;
1375                 }
1376             do_brcond_low:
1377                 bitmap_zero(temps_used.l, nb_temps);
1378                 op->opc = INDEX_op_brcond_i32;
1379                 op->args[1] = op->args[2];
1380                 op->args[2] = op->args[4];
1381                 op->args[3] = op->args[5];
1382             } else if (op->args[4] == TCG_COND_NE) {
1383                 /* Simplify NE comparisons where one of the pairs
1384                    can be simplified.  */
1385                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1386                                                op->args[0], op->args[2],
1387                                                TCG_COND_NE);
1388                 if (tmp == 0) {
1389                     goto do_brcond_high;
1390                 } else if (tmp == 1) {
1391                     goto do_brcond_true;
1392                 }
1393                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1394                                                op->args[1], op->args[3],
1395                                                TCG_COND_NE);
1396                 if (tmp == 0) {
1397                     goto do_brcond_low;
1398                 } else if (tmp == 1) {
1399                     goto do_brcond_true;
1400                 }
1401                 goto do_default;
1402             } else {
1403                 goto do_default;
1404             }
1405             break;
1406 
1407         case INDEX_op_setcond2_i32:
1408             tmp = do_constant_folding_cond2(&op->args[1], &op->args[3],
1409                                             op->args[5]);
1410             if (tmp != 2) {
1411             do_setcond_const:
1412                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1413             } else if ((op->args[5] == TCG_COND_LT
1414                         || op->args[5] == TCG_COND_GE)
1415                        && arg_is_const(op->args[3])
1416                        && arg_info(op->args[3])->val == 0
1417                        && arg_is_const(op->args[4])
1418                        && arg_info(op->args[4])->val == 0) {
1419                 /* Simplify LT/GE comparisons vs zero to a single compare
1420                    vs the high word of the input.  */
1421             do_setcond_high:
1422                 reset_temp(op->args[0]);
1423                 arg_info(op->args[0])->mask = 1;
1424                 op->opc = INDEX_op_setcond_i32;
1425                 op->args[1] = op->args[2];
1426                 op->args[2] = op->args[4];
1427                 op->args[3] = op->args[5];
1428             } else if (op->args[5] == TCG_COND_EQ) {
1429                 /* Simplify EQ comparisons where one of the pairs
1430                    can be simplified.  */
1431                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1432                                                op->args[1], op->args[3],
1433                                                TCG_COND_EQ);
1434                 if (tmp == 0) {
1435                     goto do_setcond_const;
1436                 } else if (tmp == 1) {
1437                     goto do_setcond_high;
1438                 }
1439                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1440                                                op->args[2], op->args[4],
1441                                                TCG_COND_EQ);
1442                 if (tmp == 0) {
1443                     goto do_setcond_high;
1444                 } else if (tmp != 1) {
1445                     goto do_default;
1446                 }
1447             do_setcond_low:
1448                 reset_temp(op->args[0]);
1449                 arg_info(op->args[0])->mask = 1;
1450                 op->opc = INDEX_op_setcond_i32;
1451                 op->args[2] = op->args[3];
1452                 op->args[3] = op->args[5];
1453             } else if (op->args[5] == TCG_COND_NE) {
1454                 /* Simplify NE comparisons where one of the pairs
1455                    can be simplified.  */
1456                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1457                                                op->args[1], op->args[3],
1458                                                TCG_COND_NE);
1459                 if (tmp == 0) {
1460                     goto do_setcond_high;
1461                 } else if (tmp == 1) {
1462                     goto do_setcond_const;
1463                 }
1464                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1465                                                op->args[2], op->args[4],
1466                                                TCG_COND_NE);
1467                 if (tmp == 0) {
1468                     goto do_setcond_low;
1469                 } else if (tmp == 1) {
1470                     goto do_setcond_const;
1471                 }
1472                 goto do_default;
1473             } else {
1474                 goto do_default;
1475             }
1476             break;
1477 
1478         case INDEX_op_call:
1479             if (!(op->args[nb_oargs + nb_iargs + 1]
1480                   & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1481                 for (i = 0; i < nb_globals; i++) {
1482                     if (test_bit(i, temps_used.l)) {
1483                         reset_ts(&s->temps[i]);
1484                     }
1485                 }
1486             }
1487             /* fall through */
1488 
1489         default:
1490         do_default:
1491             /*
1492              * Default case: we know nothing about operation (or were unable
1493              * to compute the operation result) so no propagation is done.
1494              */
1495             for (i = 0; i < nb_oargs; i++) {
1496                 reset_temp(op->args[i]);
1497                 /*
1498                  * Save the corresponding known-zero bits mask for the
1499                  * first output argument (only one supported so far).
1500                  */
1501                 if (i == 0) {
1502                     arg_info(op->args[i])->mask = mask;
1503                 }
1504             }
1505             break;
1506 
1507         case INDEX_op_set_label:
1508             /* Trash everything at the start of a new extended bb. */
1509             bitmap_zero(temps_used.l, nb_temps);
1510             break;
1511         }
1512 
1513         /* Eliminate duplicate and redundant fence instructions.  */
1514         if (prev_mb) {
1515             switch (opc) {
1516             case INDEX_op_mb:
1517                 /* Merge two barriers of the same type into one,
1518                  * or a weaker barrier into a stronger one,
1519                  * or two weaker barriers into a stronger one.
1520                  *   mb X; mb Y => mb X|Y
1521                  *   mb; strl => mb; st
1522                  *   ldaq; mb => ld; mb
1523                  *   ldaq; strl => ld; mb; st
1524                  * Other combinations are also merged into a strong
1525                  * barrier.  This is stricter than specified but for
1526                  * the purposes of TCG is better than not optimizing.
1527                  */
1528                 prev_mb->args[0] |= op->args[0];
1529                 tcg_op_remove(s, op);
1530                 break;
1531 
1532             default:
1533                 /* Opcodes that end the block stop the optimization.  */
1534                 if ((def->flags & TCG_OPF_BB_END) == 0) {
1535                     break;
1536                 }
1537                 /* fallthru */
1538             case INDEX_op_qemu_ld_i32:
1539             case INDEX_op_qemu_ld_i64:
1540             case INDEX_op_qemu_st_i32:
1541             case INDEX_op_qemu_st_i64:
1542             case INDEX_op_call:
1543                 /* Opcodes that touch guest memory stop the optimization.  */
1544                 prev_mb = NULL;
1545                 break;
1546             }
1547         } else if (opc == INDEX_op_mb) {
1548             prev_mb = op;
1549         }
1550     }
1551 }
1552