xref: /openbmc/qemu/tcg/optimize.c (revision fb37726d)
1 /*
2  * Optimizations for Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2010 Samsung Electronics.
5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "config.h"
27 
28 #include <stdlib.h>
29 #include <stdio.h>
30 
31 #include "qemu-common.h"
32 #include "tcg-op.h"
33 
34 #define CASE_OP_32_64(x)                        \
35         glue(glue(case INDEX_op_, x), _i32):    \
36         glue(glue(case INDEX_op_, x), _i64)
37 
38 struct tcg_temp_info {
39     bool is_const;
40     uint16_t prev_copy;
41     uint16_t next_copy;
42     tcg_target_ulong val;
43     tcg_target_ulong mask;
44 };
45 
46 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
47 static TCGTempSet temps_used;
48 
49 static inline bool temp_is_const(TCGArg arg)
50 {
51     return temps[arg].is_const;
52 }
53 
54 static inline bool temp_is_copy(TCGArg arg)
55 {
56     return temps[arg].next_copy != arg;
57 }
58 
59 /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
60 static void reset_temp(TCGArg temp)
61 {
62     temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
63     temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
64     temps[temp].next_copy = temp;
65     temps[temp].prev_copy = temp;
66     temps[temp].is_const = false;
67     temps[temp].mask = -1;
68 }
69 
70 /* Reset all temporaries, given that there are NB_TEMPS of them.  */
71 static void reset_all_temps(int nb_temps)
72 {
73     bitmap_zero(temps_used.l, nb_temps);
74 }
75 
76 /* Initialize and activate a temporary.  */
77 static void init_temp_info(TCGArg temp)
78 {
79     if (!test_bit(temp, temps_used.l)) {
80         temps[temp].next_copy = temp;
81         temps[temp].prev_copy = temp;
82         temps[temp].is_const = false;
83         temps[temp].mask = -1;
84         set_bit(temp, temps_used.l);
85     }
86 }
87 
88 static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op,
89                                 TCGOpcode opc, int nargs)
90 {
91     int oi = s->gen_next_op_idx;
92     int pi = s->gen_next_parm_idx;
93     int prev = old_op->prev;
94     int next = old_op - s->gen_op_buf;
95     TCGOp *new_op;
96 
97     tcg_debug_assert(oi < OPC_BUF_SIZE);
98     tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
99     s->gen_next_op_idx = oi + 1;
100     s->gen_next_parm_idx = pi + nargs;
101 
102     new_op = &s->gen_op_buf[oi];
103     *new_op = (TCGOp){
104         .opc = opc,
105         .args = pi,
106         .prev = prev,
107         .next = next
108     };
109     if (prev >= 0) {
110         s->gen_op_buf[prev].next = oi;
111     } else {
112         s->gen_first_op_idx = oi;
113     }
114     old_op->prev = oi;
115 
116     return new_op;
117 }
118 
119 static int op_bits(TCGOpcode op)
120 {
121     const TCGOpDef *def = &tcg_op_defs[op];
122     return def->flags & TCG_OPF_64BIT ? 64 : 32;
123 }
124 
125 static TCGOpcode op_to_mov(TCGOpcode op)
126 {
127     switch (op_bits(op)) {
128     case 32:
129         return INDEX_op_mov_i32;
130     case 64:
131         return INDEX_op_mov_i64;
132     default:
133         fprintf(stderr, "op_to_mov: unexpected return value of "
134                 "function op_bits.\n");
135         tcg_abort();
136     }
137 }
138 
139 static TCGOpcode op_to_movi(TCGOpcode op)
140 {
141     switch (op_bits(op)) {
142     case 32:
143         return INDEX_op_movi_i32;
144     case 64:
145         return INDEX_op_movi_i64;
146     default:
147         fprintf(stderr, "op_to_movi: unexpected return value of "
148                 "function op_bits.\n");
149         tcg_abort();
150     }
151 }
152 
153 static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
154 {
155     TCGArg i;
156 
157     /* If this is already a global, we can't do better. */
158     if (temp < s->nb_globals) {
159         return temp;
160     }
161 
162     /* Search for a global first. */
163     for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
164         if (i < s->nb_globals) {
165             return i;
166         }
167     }
168 
169     /* If it is a temp, search for a temp local. */
170     if (!s->temps[temp].temp_local) {
171         for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
172             if (s->temps[i].temp_local) {
173                 return i;
174             }
175         }
176     }
177 
178     /* Failure to find a better representation, return the same temp. */
179     return temp;
180 }
181 
182 static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
183 {
184     TCGArg i;
185 
186     if (arg1 == arg2) {
187         return true;
188     }
189 
190     if (!temp_is_copy(arg1) || !temp_is_copy(arg2)) {
191         return false;
192     }
193 
194     for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
195         if (i == arg2) {
196             return true;
197         }
198     }
199 
200     return false;
201 }
202 
203 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args,
204                              TCGArg dst, TCGArg val)
205 {
206     TCGOpcode new_op = op_to_movi(op->opc);
207     tcg_target_ulong mask;
208 
209     op->opc = new_op;
210 
211     reset_temp(dst);
212     temps[dst].is_const = true;
213     temps[dst].val = val;
214     mask = val;
215     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {
216         /* High bits of the destination are now garbage.  */
217         mask |= ~0xffffffffull;
218     }
219     temps[dst].mask = mask;
220 
221     args[0] = dst;
222     args[1] = val;
223 }
224 
225 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args,
226                             TCGArg dst, TCGArg src)
227 {
228     if (temps_are_copies(dst, src)) {
229         tcg_op_remove(s, op);
230         return;
231     }
232 
233     TCGOpcode new_op = op_to_mov(op->opc);
234     tcg_target_ulong mask;
235 
236     op->opc = new_op;
237 
238     reset_temp(dst);
239     mask = temps[src].mask;
240     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
241         /* High bits of the destination are now garbage.  */
242         mask |= ~0xffffffffull;
243     }
244     temps[dst].mask = mask;
245 
246     if (s->temps[src].type == s->temps[dst].type) {
247         temps[dst].next_copy = temps[src].next_copy;
248         temps[dst].prev_copy = src;
249         temps[temps[dst].next_copy].prev_copy = dst;
250         temps[src].next_copy = dst;
251         temps[dst].is_const = temps[src].is_const;
252         temps[dst].val = temps[src].val;
253     }
254 
255     args[0] = dst;
256     args[1] = src;
257 }
258 
259 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
260 {
261     uint64_t l64, h64;
262 
263     switch (op) {
264     CASE_OP_32_64(add):
265         return x + y;
266 
267     CASE_OP_32_64(sub):
268         return x - y;
269 
270     CASE_OP_32_64(mul):
271         return x * y;
272 
273     CASE_OP_32_64(and):
274         return x & y;
275 
276     CASE_OP_32_64(or):
277         return x | y;
278 
279     CASE_OP_32_64(xor):
280         return x ^ y;
281 
282     case INDEX_op_shl_i32:
283         return (uint32_t)x << (y & 31);
284 
285     case INDEX_op_shl_i64:
286         return (uint64_t)x << (y & 63);
287 
288     case INDEX_op_shr_i32:
289         return (uint32_t)x >> (y & 31);
290 
291     case INDEX_op_shr_i64:
292         return (uint64_t)x >> (y & 63);
293 
294     case INDEX_op_sar_i32:
295         return (int32_t)x >> (y & 31);
296 
297     case INDEX_op_sar_i64:
298         return (int64_t)x >> (y & 63);
299 
300     case INDEX_op_rotr_i32:
301         return ror32(x, y & 31);
302 
303     case INDEX_op_rotr_i64:
304         return ror64(x, y & 63);
305 
306     case INDEX_op_rotl_i32:
307         return rol32(x, y & 31);
308 
309     case INDEX_op_rotl_i64:
310         return rol64(x, y & 63);
311 
312     CASE_OP_32_64(not):
313         return ~x;
314 
315     CASE_OP_32_64(neg):
316         return -x;
317 
318     CASE_OP_32_64(andc):
319         return x & ~y;
320 
321     CASE_OP_32_64(orc):
322         return x | ~y;
323 
324     CASE_OP_32_64(eqv):
325         return ~(x ^ y);
326 
327     CASE_OP_32_64(nand):
328         return ~(x & y);
329 
330     CASE_OP_32_64(nor):
331         return ~(x | y);
332 
333     CASE_OP_32_64(ext8s):
334         return (int8_t)x;
335 
336     CASE_OP_32_64(ext16s):
337         return (int16_t)x;
338 
339     CASE_OP_32_64(ext8u):
340         return (uint8_t)x;
341 
342     CASE_OP_32_64(ext16u):
343         return (uint16_t)x;
344 
345     case INDEX_op_ext_i32_i64:
346     case INDEX_op_ext32s_i64:
347         return (int32_t)x;
348 
349     case INDEX_op_extu_i32_i64:
350     case INDEX_op_extrl_i64_i32:
351     case INDEX_op_ext32u_i64:
352         return (uint32_t)x;
353 
354     case INDEX_op_extrh_i64_i32:
355         return (uint64_t)x >> 32;
356 
357     case INDEX_op_muluh_i32:
358         return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
359     case INDEX_op_mulsh_i32:
360         return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
361 
362     case INDEX_op_muluh_i64:
363         mulu64(&l64, &h64, x, y);
364         return h64;
365     case INDEX_op_mulsh_i64:
366         muls64(&l64, &h64, x, y);
367         return h64;
368 
369     case INDEX_op_div_i32:
370         /* Avoid crashing on divide by zero, otherwise undefined.  */
371         return (int32_t)x / ((int32_t)y ? : 1);
372     case INDEX_op_divu_i32:
373         return (uint32_t)x / ((uint32_t)y ? : 1);
374     case INDEX_op_div_i64:
375         return (int64_t)x / ((int64_t)y ? : 1);
376     case INDEX_op_divu_i64:
377         return (uint64_t)x / ((uint64_t)y ? : 1);
378 
379     case INDEX_op_rem_i32:
380         return (int32_t)x % ((int32_t)y ? : 1);
381     case INDEX_op_remu_i32:
382         return (uint32_t)x % ((uint32_t)y ? : 1);
383     case INDEX_op_rem_i64:
384         return (int64_t)x % ((int64_t)y ? : 1);
385     case INDEX_op_remu_i64:
386         return (uint64_t)x % ((uint64_t)y ? : 1);
387 
388     default:
389         fprintf(stderr,
390                 "Unrecognized operation %d in do_constant_folding.\n", op);
391         tcg_abort();
392     }
393 }
394 
395 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
396 {
397     TCGArg res = do_constant_folding_2(op, x, y);
398     if (op_bits(op) == 32) {
399         res = (int32_t)res;
400     }
401     return res;
402 }
403 
404 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
405 {
406     switch (c) {
407     case TCG_COND_EQ:
408         return x == y;
409     case TCG_COND_NE:
410         return x != y;
411     case TCG_COND_LT:
412         return (int32_t)x < (int32_t)y;
413     case TCG_COND_GE:
414         return (int32_t)x >= (int32_t)y;
415     case TCG_COND_LE:
416         return (int32_t)x <= (int32_t)y;
417     case TCG_COND_GT:
418         return (int32_t)x > (int32_t)y;
419     case TCG_COND_LTU:
420         return x < y;
421     case TCG_COND_GEU:
422         return x >= y;
423     case TCG_COND_LEU:
424         return x <= y;
425     case TCG_COND_GTU:
426         return x > y;
427     default:
428         tcg_abort();
429     }
430 }
431 
432 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
433 {
434     switch (c) {
435     case TCG_COND_EQ:
436         return x == y;
437     case TCG_COND_NE:
438         return x != y;
439     case TCG_COND_LT:
440         return (int64_t)x < (int64_t)y;
441     case TCG_COND_GE:
442         return (int64_t)x >= (int64_t)y;
443     case TCG_COND_LE:
444         return (int64_t)x <= (int64_t)y;
445     case TCG_COND_GT:
446         return (int64_t)x > (int64_t)y;
447     case TCG_COND_LTU:
448         return x < y;
449     case TCG_COND_GEU:
450         return x >= y;
451     case TCG_COND_LEU:
452         return x <= y;
453     case TCG_COND_GTU:
454         return x > y;
455     default:
456         tcg_abort();
457     }
458 }
459 
460 static bool do_constant_folding_cond_eq(TCGCond c)
461 {
462     switch (c) {
463     case TCG_COND_GT:
464     case TCG_COND_LTU:
465     case TCG_COND_LT:
466     case TCG_COND_GTU:
467     case TCG_COND_NE:
468         return 0;
469     case TCG_COND_GE:
470     case TCG_COND_GEU:
471     case TCG_COND_LE:
472     case TCG_COND_LEU:
473     case TCG_COND_EQ:
474         return 1;
475     default:
476         tcg_abort();
477     }
478 }
479 
480 /* Return 2 if the condition can't be simplified, and the result
481    of the condition (0 or 1) if it can */
482 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
483                                        TCGArg y, TCGCond c)
484 {
485     if (temp_is_const(x) && temp_is_const(y)) {
486         switch (op_bits(op)) {
487         case 32:
488             return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
489         case 64:
490             return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
491         default:
492             tcg_abort();
493         }
494     } else if (temps_are_copies(x, y)) {
495         return do_constant_folding_cond_eq(c);
496     } else if (temp_is_const(y) && temps[y].val == 0) {
497         switch (c) {
498         case TCG_COND_LTU:
499             return 0;
500         case TCG_COND_GEU:
501             return 1;
502         default:
503             return 2;
504         }
505     } else {
506         return 2;
507     }
508 }
509 
510 /* Return 2 if the condition can't be simplified, and the result
511    of the condition (0 or 1) if it can */
512 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
513 {
514     TCGArg al = p1[0], ah = p1[1];
515     TCGArg bl = p2[0], bh = p2[1];
516 
517     if (temp_is_const(bl) && temp_is_const(bh)) {
518         uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
519 
520         if (temp_is_const(al) && temp_is_const(ah)) {
521             uint64_t a;
522             a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
523             return do_constant_folding_cond_64(a, b, c);
524         }
525         if (b == 0) {
526             switch (c) {
527             case TCG_COND_LTU:
528                 return 0;
529             case TCG_COND_GEU:
530                 return 1;
531             default:
532                 break;
533             }
534         }
535     }
536     if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
537         return do_constant_folding_cond_eq(c);
538     }
539     return 2;
540 }
541 
542 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
543 {
544     TCGArg a1 = *p1, a2 = *p2;
545     int sum = 0;
546     sum += temp_is_const(a1);
547     sum -= temp_is_const(a2);
548 
549     /* Prefer the constant in second argument, and then the form
550        op a, a, b, which is better handled on non-RISC hosts. */
551     if (sum > 0 || (sum == 0 && dest == a2)) {
552         *p1 = a2;
553         *p2 = a1;
554         return true;
555     }
556     return false;
557 }
558 
559 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
560 {
561     int sum = 0;
562     sum += temp_is_const(p1[0]);
563     sum += temp_is_const(p1[1]);
564     sum -= temp_is_const(p2[0]);
565     sum -= temp_is_const(p2[1]);
566     if (sum > 0) {
567         TCGArg t;
568         t = p1[0], p1[0] = p2[0], p2[0] = t;
569         t = p1[1], p1[1] = p2[1], p2[1] = t;
570         return true;
571     }
572     return false;
573 }
574 
575 /* Propagate constants and copies, fold constant expressions. */
576 void tcg_optimize(TCGContext *s)
577 {
578     int oi, oi_next, nb_temps, nb_globals;
579 
580     /* Array VALS has an element for each temp.
581        If this temp holds a constant then its value is kept in VALS' element.
582        If this temp is a copy of other ones then the other copies are
583        available through the doubly linked circular list. */
584 
585     nb_temps = s->nb_temps;
586     nb_globals = s->nb_globals;
587     reset_all_temps(nb_temps);
588 
589     for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
590         tcg_target_ulong mask, partmask, affected;
591         int nb_oargs, nb_iargs, i;
592         TCGArg tmp;
593 
594         TCGOp * const op = &s->gen_op_buf[oi];
595         TCGArg * const args = &s->gen_opparam_buf[op->args];
596         TCGOpcode opc = op->opc;
597         const TCGOpDef *def = &tcg_op_defs[opc];
598 
599         oi_next = op->next;
600 
601         /* Count the arguments, and initialize the temps that are
602            going to be used */
603         if (opc == INDEX_op_call) {
604             nb_oargs = op->callo;
605             nb_iargs = op->calli;
606             for (i = 0; i < nb_oargs + nb_iargs; i++) {
607                 tmp = args[i];
608                 if (tmp != TCG_CALL_DUMMY_ARG) {
609                     init_temp_info(tmp);
610                 }
611             }
612         } else {
613             nb_oargs = def->nb_oargs;
614             nb_iargs = def->nb_iargs;
615             for (i = 0; i < nb_oargs + nb_iargs; i++) {
616                 init_temp_info(args[i]);
617             }
618         }
619 
620         /* Do copy propagation */
621         for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
622             if (temp_is_copy(args[i])) {
623                 args[i] = find_better_copy(s, args[i]);
624             }
625         }
626 
627         /* For commutative operations make constant second argument */
628         switch (opc) {
629         CASE_OP_32_64(add):
630         CASE_OP_32_64(mul):
631         CASE_OP_32_64(and):
632         CASE_OP_32_64(or):
633         CASE_OP_32_64(xor):
634         CASE_OP_32_64(eqv):
635         CASE_OP_32_64(nand):
636         CASE_OP_32_64(nor):
637         CASE_OP_32_64(muluh):
638         CASE_OP_32_64(mulsh):
639             swap_commutative(args[0], &args[1], &args[2]);
640             break;
641         CASE_OP_32_64(brcond):
642             if (swap_commutative(-1, &args[0], &args[1])) {
643                 args[2] = tcg_swap_cond(args[2]);
644             }
645             break;
646         CASE_OP_32_64(setcond):
647             if (swap_commutative(args[0], &args[1], &args[2])) {
648                 args[3] = tcg_swap_cond(args[3]);
649             }
650             break;
651         CASE_OP_32_64(movcond):
652             if (swap_commutative(-1, &args[1], &args[2])) {
653                 args[5] = tcg_swap_cond(args[5]);
654             }
655             /* For movcond, we canonicalize the "false" input reg to match
656                the destination reg so that the tcg backend can implement
657                a "move if true" operation.  */
658             if (swap_commutative(args[0], &args[4], &args[3])) {
659                 args[5] = tcg_invert_cond(args[5]);
660             }
661             break;
662         CASE_OP_32_64(add2):
663             swap_commutative(args[0], &args[2], &args[4]);
664             swap_commutative(args[1], &args[3], &args[5]);
665             break;
666         CASE_OP_32_64(mulu2):
667         CASE_OP_32_64(muls2):
668             swap_commutative(args[0], &args[2], &args[3]);
669             break;
670         case INDEX_op_brcond2_i32:
671             if (swap_commutative2(&args[0], &args[2])) {
672                 args[4] = tcg_swap_cond(args[4]);
673             }
674             break;
675         case INDEX_op_setcond2_i32:
676             if (swap_commutative2(&args[1], &args[3])) {
677                 args[5] = tcg_swap_cond(args[5]);
678             }
679             break;
680         default:
681             break;
682         }
683 
684         /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
685            and "sub r, 0, a => neg r, a" case.  */
686         switch (opc) {
687         CASE_OP_32_64(shl):
688         CASE_OP_32_64(shr):
689         CASE_OP_32_64(sar):
690         CASE_OP_32_64(rotl):
691         CASE_OP_32_64(rotr):
692             if (temp_is_const(args[1]) && temps[args[1]].val == 0) {
693                 tcg_opt_gen_movi(s, op, args, args[0], 0);
694                 continue;
695             }
696             break;
697         CASE_OP_32_64(sub):
698             {
699                 TCGOpcode neg_op;
700                 bool have_neg;
701 
702                 if (temp_is_const(args[2])) {
703                     /* Proceed with possible constant folding. */
704                     break;
705                 }
706                 if (opc == INDEX_op_sub_i32) {
707                     neg_op = INDEX_op_neg_i32;
708                     have_neg = TCG_TARGET_HAS_neg_i32;
709                 } else {
710                     neg_op = INDEX_op_neg_i64;
711                     have_neg = TCG_TARGET_HAS_neg_i64;
712                 }
713                 if (!have_neg) {
714                     break;
715                 }
716                 if (temp_is_const(args[1]) && temps[args[1]].val == 0) {
717                     op->opc = neg_op;
718                     reset_temp(args[0]);
719                     args[1] = args[2];
720                     continue;
721                 }
722             }
723             break;
724         CASE_OP_32_64(xor):
725         CASE_OP_32_64(nand):
726             if (!temp_is_const(args[1])
727                 && temp_is_const(args[2]) && temps[args[2]].val == -1) {
728                 i = 1;
729                 goto try_not;
730             }
731             break;
732         CASE_OP_32_64(nor):
733             if (!temp_is_const(args[1])
734                 && temp_is_const(args[2]) && temps[args[2]].val == 0) {
735                 i = 1;
736                 goto try_not;
737             }
738             break;
739         CASE_OP_32_64(andc):
740             if (!temp_is_const(args[2])
741                 && temp_is_const(args[1]) && temps[args[1]].val == -1) {
742                 i = 2;
743                 goto try_not;
744             }
745             break;
746         CASE_OP_32_64(orc):
747         CASE_OP_32_64(eqv):
748             if (!temp_is_const(args[2])
749                 && temp_is_const(args[1]) && temps[args[1]].val == 0) {
750                 i = 2;
751                 goto try_not;
752             }
753             break;
754         try_not:
755             {
756                 TCGOpcode not_op;
757                 bool have_not;
758 
759                 if (def->flags & TCG_OPF_64BIT) {
760                     not_op = INDEX_op_not_i64;
761                     have_not = TCG_TARGET_HAS_not_i64;
762                 } else {
763                     not_op = INDEX_op_not_i32;
764                     have_not = TCG_TARGET_HAS_not_i32;
765                 }
766                 if (!have_not) {
767                     break;
768                 }
769                 op->opc = not_op;
770                 reset_temp(args[0]);
771                 args[1] = args[i];
772                 continue;
773             }
774         default:
775             break;
776         }
777 
778         /* Simplify expression for "op r, a, const => mov r, a" cases */
779         switch (opc) {
780         CASE_OP_32_64(add):
781         CASE_OP_32_64(sub):
782         CASE_OP_32_64(shl):
783         CASE_OP_32_64(shr):
784         CASE_OP_32_64(sar):
785         CASE_OP_32_64(rotl):
786         CASE_OP_32_64(rotr):
787         CASE_OP_32_64(or):
788         CASE_OP_32_64(xor):
789         CASE_OP_32_64(andc):
790             if (!temp_is_const(args[1])
791                 && temp_is_const(args[2]) && temps[args[2]].val == 0) {
792                 tcg_opt_gen_mov(s, op, args, args[0], args[1]);
793                 continue;
794             }
795             break;
796         CASE_OP_32_64(and):
797         CASE_OP_32_64(orc):
798         CASE_OP_32_64(eqv):
799             if (!temp_is_const(args[1])
800                 && temp_is_const(args[2]) && temps[args[2]].val == -1) {
801                 tcg_opt_gen_mov(s, op, args, args[0], args[1]);
802                 continue;
803             }
804             break;
805         default:
806             break;
807         }
808 
809         /* Simplify using known-zero bits. Currently only ops with a single
810            output argument is supported. */
811         mask = -1;
812         affected = -1;
813         switch (opc) {
814         CASE_OP_32_64(ext8s):
815             if ((temps[args[1]].mask & 0x80) != 0) {
816                 break;
817             }
818         CASE_OP_32_64(ext8u):
819             mask = 0xff;
820             goto and_const;
821         CASE_OP_32_64(ext16s):
822             if ((temps[args[1]].mask & 0x8000) != 0) {
823                 break;
824             }
825         CASE_OP_32_64(ext16u):
826             mask = 0xffff;
827             goto and_const;
828         case INDEX_op_ext32s_i64:
829             if ((temps[args[1]].mask & 0x80000000) != 0) {
830                 break;
831             }
832         case INDEX_op_ext32u_i64:
833             mask = 0xffffffffU;
834             goto and_const;
835 
836         CASE_OP_32_64(and):
837             mask = temps[args[2]].mask;
838             if (temp_is_const(args[2])) {
839         and_const:
840                 affected = temps[args[1]].mask & ~mask;
841             }
842             mask = temps[args[1]].mask & mask;
843             break;
844 
845         case INDEX_op_ext_i32_i64:
846             if ((temps[args[1]].mask & 0x80000000) != 0) {
847                 break;
848             }
849         case INDEX_op_extu_i32_i64:
850             /* We do not compute affected as it is a size changing op.  */
851             mask = (uint32_t)temps[args[1]].mask;
852             break;
853 
854         CASE_OP_32_64(andc):
855             /* Known-zeros does not imply known-ones.  Therefore unless
856                args[2] is constant, we can't infer anything from it.  */
857             if (temp_is_const(args[2])) {
858                 mask = ~temps[args[2]].mask;
859                 goto and_const;
860             }
861             /* But we certainly know nothing outside args[1] may be set. */
862             mask = temps[args[1]].mask;
863             break;
864 
865         case INDEX_op_sar_i32:
866             if (temp_is_const(args[2])) {
867                 tmp = temps[args[2]].val & 31;
868                 mask = (int32_t)temps[args[1]].mask >> tmp;
869             }
870             break;
871         case INDEX_op_sar_i64:
872             if (temp_is_const(args[2])) {
873                 tmp = temps[args[2]].val & 63;
874                 mask = (int64_t)temps[args[1]].mask >> tmp;
875             }
876             break;
877 
878         case INDEX_op_shr_i32:
879             if (temp_is_const(args[2])) {
880                 tmp = temps[args[2]].val & 31;
881                 mask = (uint32_t)temps[args[1]].mask >> tmp;
882             }
883             break;
884         case INDEX_op_shr_i64:
885             if (temp_is_const(args[2])) {
886                 tmp = temps[args[2]].val & 63;
887                 mask = (uint64_t)temps[args[1]].mask >> tmp;
888             }
889             break;
890 
891         case INDEX_op_extrl_i64_i32:
892             mask = (uint32_t)temps[args[1]].mask;
893             break;
894         case INDEX_op_extrh_i64_i32:
895             mask = (uint64_t)temps[args[1]].mask >> 32;
896             break;
897 
898         CASE_OP_32_64(shl):
899             if (temp_is_const(args[2])) {
900                 tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1);
901                 mask = temps[args[1]].mask << tmp;
902             }
903             break;
904 
905         CASE_OP_32_64(neg):
906             /* Set to 1 all bits to the left of the rightmost.  */
907             mask = -(temps[args[1]].mask & -temps[args[1]].mask);
908             break;
909 
910         CASE_OP_32_64(deposit):
911             mask = deposit64(temps[args[1]].mask, args[3], args[4],
912                              temps[args[2]].mask);
913             break;
914 
915         CASE_OP_32_64(or):
916         CASE_OP_32_64(xor):
917             mask = temps[args[1]].mask | temps[args[2]].mask;
918             break;
919 
920         CASE_OP_32_64(setcond):
921         case INDEX_op_setcond2_i32:
922             mask = 1;
923             break;
924 
925         CASE_OP_32_64(movcond):
926             mask = temps[args[3]].mask | temps[args[4]].mask;
927             break;
928 
929         CASE_OP_32_64(ld8u):
930             mask = 0xff;
931             break;
932         CASE_OP_32_64(ld16u):
933             mask = 0xffff;
934             break;
935         case INDEX_op_ld32u_i64:
936             mask = 0xffffffffu;
937             break;
938 
939         CASE_OP_32_64(qemu_ld):
940             {
941                 TCGMemOpIdx oi = args[nb_oargs + nb_iargs];
942                 TCGMemOp mop = get_memop(oi);
943                 if (!(mop & MO_SIGN)) {
944                     mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
945                 }
946             }
947             break;
948 
949         default:
950             break;
951         }
952 
953         /* 32-bit ops generate 32-bit results.  For the result is zero test
954            below, we can ignore high bits, but for further optimizations we
955            need to record that the high bits contain garbage.  */
956         partmask = mask;
957         if (!(def->flags & TCG_OPF_64BIT)) {
958             mask |= ~(tcg_target_ulong)0xffffffffu;
959             partmask &= 0xffffffffu;
960             affected &= 0xffffffffu;
961         }
962 
963         if (partmask == 0) {
964             assert(nb_oargs == 1);
965             tcg_opt_gen_movi(s, op, args, args[0], 0);
966             continue;
967         }
968         if (affected == 0) {
969             assert(nb_oargs == 1);
970             tcg_opt_gen_mov(s, op, args, args[0], args[1]);
971             continue;
972         }
973 
974         /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
975         switch (opc) {
976         CASE_OP_32_64(and):
977         CASE_OP_32_64(mul):
978         CASE_OP_32_64(muluh):
979         CASE_OP_32_64(mulsh):
980             if ((temp_is_const(args[2]) && temps[args[2]].val == 0)) {
981                 tcg_opt_gen_movi(s, op, args, args[0], 0);
982                 continue;
983             }
984             break;
985         default:
986             break;
987         }
988 
989         /* Simplify expression for "op r, a, a => mov r, a" cases */
990         switch (opc) {
991         CASE_OP_32_64(or):
992         CASE_OP_32_64(and):
993             if (temps_are_copies(args[1], args[2])) {
994                 tcg_opt_gen_mov(s, op, args, args[0], args[1]);
995                 continue;
996             }
997             break;
998         default:
999             break;
1000         }
1001 
1002         /* Simplify expression for "op r, a, a => movi r, 0" cases */
1003         switch (opc) {
1004         CASE_OP_32_64(andc):
1005         CASE_OP_32_64(sub):
1006         CASE_OP_32_64(xor):
1007             if (temps_are_copies(args[1], args[2])) {
1008                 tcg_opt_gen_movi(s, op, args, args[0], 0);
1009                 continue;
1010             }
1011             break;
1012         default:
1013             break;
1014         }
1015 
1016         /* Propagate constants through copy operations and do constant
1017            folding.  Constants will be substituted to arguments by register
1018            allocator where needed and possible.  Also detect copies. */
1019         switch (opc) {
1020         CASE_OP_32_64(mov):
1021             tcg_opt_gen_mov(s, op, args, args[0], args[1]);
1022             break;
1023         CASE_OP_32_64(movi):
1024             tcg_opt_gen_movi(s, op, args, args[0], args[1]);
1025             break;
1026 
1027         CASE_OP_32_64(not):
1028         CASE_OP_32_64(neg):
1029         CASE_OP_32_64(ext8s):
1030         CASE_OP_32_64(ext8u):
1031         CASE_OP_32_64(ext16s):
1032         CASE_OP_32_64(ext16u):
1033         case INDEX_op_ext32s_i64:
1034         case INDEX_op_ext32u_i64:
1035         case INDEX_op_ext_i32_i64:
1036         case INDEX_op_extu_i32_i64:
1037         case INDEX_op_extrl_i64_i32:
1038         case INDEX_op_extrh_i64_i32:
1039             if (temp_is_const(args[1])) {
1040                 tmp = do_constant_folding(opc, temps[args[1]].val, 0);
1041                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
1042                 break;
1043             }
1044             goto do_default;
1045 
1046         CASE_OP_32_64(add):
1047         CASE_OP_32_64(sub):
1048         CASE_OP_32_64(mul):
1049         CASE_OP_32_64(or):
1050         CASE_OP_32_64(and):
1051         CASE_OP_32_64(xor):
1052         CASE_OP_32_64(shl):
1053         CASE_OP_32_64(shr):
1054         CASE_OP_32_64(sar):
1055         CASE_OP_32_64(rotl):
1056         CASE_OP_32_64(rotr):
1057         CASE_OP_32_64(andc):
1058         CASE_OP_32_64(orc):
1059         CASE_OP_32_64(eqv):
1060         CASE_OP_32_64(nand):
1061         CASE_OP_32_64(nor):
1062         CASE_OP_32_64(muluh):
1063         CASE_OP_32_64(mulsh):
1064         CASE_OP_32_64(div):
1065         CASE_OP_32_64(divu):
1066         CASE_OP_32_64(rem):
1067         CASE_OP_32_64(remu):
1068             if (temp_is_const(args[1]) && temp_is_const(args[2])) {
1069                 tmp = do_constant_folding(opc, temps[args[1]].val,
1070                                           temps[args[2]].val);
1071                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
1072                 break;
1073             }
1074             goto do_default;
1075 
1076         CASE_OP_32_64(deposit):
1077             if (temp_is_const(args[1]) && temp_is_const(args[2])) {
1078                 tmp = deposit64(temps[args[1]].val, args[3], args[4],
1079                                 temps[args[2]].val);
1080                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
1081                 break;
1082             }
1083             goto do_default;
1084 
1085         CASE_OP_32_64(setcond):
1086             tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]);
1087             if (tmp != 2) {
1088                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
1089                 break;
1090             }
1091             goto do_default;
1092 
1093         CASE_OP_32_64(brcond):
1094             tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]);
1095             if (tmp != 2) {
1096                 if (tmp) {
1097                     reset_all_temps(nb_temps);
1098                     op->opc = INDEX_op_br;
1099                     args[0] = args[3];
1100                 } else {
1101                     tcg_op_remove(s, op);
1102                 }
1103                 break;
1104             }
1105             goto do_default;
1106 
1107         CASE_OP_32_64(movcond):
1108             tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]);
1109             if (tmp != 2) {
1110                 tcg_opt_gen_mov(s, op, args, args[0], args[4-tmp]);
1111                 break;
1112             }
1113             goto do_default;
1114 
1115         case INDEX_op_add2_i32:
1116         case INDEX_op_sub2_i32:
1117             if (temp_is_const(args[2]) && temp_is_const(args[3])
1118                 && temp_is_const(args[4]) && temp_is_const(args[5])) {
1119                 uint32_t al = temps[args[2]].val;
1120                 uint32_t ah = temps[args[3]].val;
1121                 uint32_t bl = temps[args[4]].val;
1122                 uint32_t bh = temps[args[5]].val;
1123                 uint64_t a = ((uint64_t)ah << 32) | al;
1124                 uint64_t b = ((uint64_t)bh << 32) | bl;
1125                 TCGArg rl, rh;
1126                 TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
1127                 TCGArg *args2 = &s->gen_opparam_buf[op2->args];
1128 
1129                 if (opc == INDEX_op_add2_i32) {
1130                     a += b;
1131                 } else {
1132                     a -= b;
1133                 }
1134 
1135                 rl = args[0];
1136                 rh = args[1];
1137                 tcg_opt_gen_movi(s, op, args, rl, (int32_t)a);
1138                 tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(a >> 32));
1139 
1140                 /* We've done all we need to do with the movi.  Skip it.  */
1141                 oi_next = op2->next;
1142                 break;
1143             }
1144             goto do_default;
1145 
1146         case INDEX_op_mulu2_i32:
1147             if (temp_is_const(args[2]) && temp_is_const(args[3])) {
1148                 uint32_t a = temps[args[2]].val;
1149                 uint32_t b = temps[args[3]].val;
1150                 uint64_t r = (uint64_t)a * b;
1151                 TCGArg rl, rh;
1152                 TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
1153                 TCGArg *args2 = &s->gen_opparam_buf[op2->args];
1154 
1155                 rl = args[0];
1156                 rh = args[1];
1157                 tcg_opt_gen_movi(s, op, args, rl, (int32_t)r);
1158                 tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(r >> 32));
1159 
1160                 /* We've done all we need to do with the movi.  Skip it.  */
1161                 oi_next = op2->next;
1162                 break;
1163             }
1164             goto do_default;
1165 
1166         case INDEX_op_brcond2_i32:
1167             tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
1168             if (tmp != 2) {
1169                 if (tmp) {
1170             do_brcond_true:
1171                     reset_all_temps(nb_temps);
1172                     op->opc = INDEX_op_br;
1173                     args[0] = args[5];
1174                 } else {
1175             do_brcond_false:
1176                     tcg_op_remove(s, op);
1177                 }
1178             } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
1179                        && temp_is_const(args[2]) && temps[args[2]].val == 0
1180                        && temp_is_const(args[3]) && temps[args[3]].val == 0) {
1181                 /* Simplify LT/GE comparisons vs zero to a single compare
1182                    vs the high word of the input.  */
1183             do_brcond_high:
1184                 reset_all_temps(nb_temps);
1185                 op->opc = INDEX_op_brcond_i32;
1186                 args[0] = args[1];
1187                 args[1] = args[3];
1188                 args[2] = args[4];
1189                 args[3] = args[5];
1190             } else if (args[4] == TCG_COND_EQ) {
1191                 /* Simplify EQ comparisons where one of the pairs
1192                    can be simplified.  */
1193                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1194                                                args[0], args[2], TCG_COND_EQ);
1195                 if (tmp == 0) {
1196                     goto do_brcond_false;
1197                 } else if (tmp == 1) {
1198                     goto do_brcond_high;
1199                 }
1200                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1201                                                args[1], args[3], TCG_COND_EQ);
1202                 if (tmp == 0) {
1203                     goto do_brcond_false;
1204                 } else if (tmp != 1) {
1205                     goto do_default;
1206                 }
1207             do_brcond_low:
1208                 reset_all_temps(nb_temps);
1209                 op->opc = INDEX_op_brcond_i32;
1210                 args[1] = args[2];
1211                 args[2] = args[4];
1212                 args[3] = args[5];
1213             } else if (args[4] == TCG_COND_NE) {
1214                 /* Simplify NE comparisons where one of the pairs
1215                    can be simplified.  */
1216                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1217                                                args[0], args[2], TCG_COND_NE);
1218                 if (tmp == 0) {
1219                     goto do_brcond_high;
1220                 } else if (tmp == 1) {
1221                     goto do_brcond_true;
1222                 }
1223                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1224                                                args[1], args[3], TCG_COND_NE);
1225                 if (tmp == 0) {
1226                     goto do_brcond_low;
1227                 } else if (tmp == 1) {
1228                     goto do_brcond_true;
1229                 }
1230                 goto do_default;
1231             } else {
1232                 goto do_default;
1233             }
1234             break;
1235 
1236         case INDEX_op_setcond2_i32:
1237             tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
1238             if (tmp != 2) {
1239             do_setcond_const:
1240                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
1241             } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
1242                        && temp_is_const(args[3]) && temps[args[3]].val == 0
1243                        && temp_is_const(args[4]) && temps[args[4]].val == 0) {
1244                 /* Simplify LT/GE comparisons vs zero to a single compare
1245                    vs the high word of the input.  */
1246             do_setcond_high:
1247                 reset_temp(args[0]);
1248                 temps[args[0]].mask = 1;
1249                 op->opc = INDEX_op_setcond_i32;
1250                 args[1] = args[2];
1251                 args[2] = args[4];
1252                 args[3] = args[5];
1253             } else if (args[5] == TCG_COND_EQ) {
1254                 /* Simplify EQ comparisons where one of the pairs
1255                    can be simplified.  */
1256                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1257                                                args[1], args[3], TCG_COND_EQ);
1258                 if (tmp == 0) {
1259                     goto do_setcond_const;
1260                 } else if (tmp == 1) {
1261                     goto do_setcond_high;
1262                 }
1263                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1264                                                args[2], args[4], TCG_COND_EQ);
1265                 if (tmp == 0) {
1266                     goto do_setcond_high;
1267                 } else if (tmp != 1) {
1268                     goto do_default;
1269                 }
1270             do_setcond_low:
1271                 reset_temp(args[0]);
1272                 temps[args[0]].mask = 1;
1273                 op->opc = INDEX_op_setcond_i32;
1274                 args[2] = args[3];
1275                 args[3] = args[5];
1276             } else if (args[5] == TCG_COND_NE) {
1277                 /* Simplify NE comparisons where one of the pairs
1278                    can be simplified.  */
1279                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1280                                                args[1], args[3], TCG_COND_NE);
1281                 if (tmp == 0) {
1282                     goto do_setcond_high;
1283                 } else if (tmp == 1) {
1284                     goto do_setcond_const;
1285                 }
1286                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1287                                                args[2], args[4], TCG_COND_NE);
1288                 if (tmp == 0) {
1289                     goto do_setcond_low;
1290                 } else if (tmp == 1) {
1291                     goto do_setcond_const;
1292                 }
1293                 goto do_default;
1294             } else {
1295                 goto do_default;
1296             }
1297             break;
1298 
1299         case INDEX_op_call:
1300             if (!(args[nb_oargs + nb_iargs + 1]
1301                   & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1302                 for (i = 0; i < nb_globals; i++) {
1303                     if (test_bit(i, temps_used.l)) {
1304                         reset_temp(i);
1305                     }
1306                 }
1307             }
1308             goto do_reset_output;
1309 
1310         default:
1311         do_default:
1312             /* Default case: we know nothing about operation (or were unable
1313                to compute the operation result) so no propagation is done.
1314                We trash everything if the operation is the end of a basic
1315                block, otherwise we only trash the output args.  "mask" is
1316                the non-zero bits mask for the first output arg.  */
1317             if (def->flags & TCG_OPF_BB_END) {
1318                 reset_all_temps(nb_temps);
1319             } else {
1320         do_reset_output:
1321                 for (i = 0; i < nb_oargs; i++) {
1322                     reset_temp(args[i]);
1323                     /* Save the corresponding known-zero bits mask for the
1324                        first output argument (only one supported so far). */
1325                     if (i == 0) {
1326                         temps[args[i]].mask = mask;
1327                     }
1328                 }
1329             }
1330             break;
1331         }
1332     }
1333 }
1334