1 /*
2  * Optimizations for Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2010 Samsung Electronics.
5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "qemu/int128.h"
28 #include "qemu/interval-tree.h"
29 #include "tcg/tcg-op-common.h"
30 #include "tcg-internal.h"
31 #include "tcg-has.h"
32 
33 
34 typedef struct MemCopyInfo {
35     IntervalTreeNode itree;
36     QSIMPLEQ_ENTRY (MemCopyInfo) next;
37     TCGTemp *ts;
38     TCGType type;
39 } MemCopyInfo;
40 
41 typedef struct TempOptInfo {
42     TCGTemp *prev_copy;
43     TCGTemp *next_copy;
44     QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
45     uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
46     uint64_t o_mask;  /* mask bit is 1 if and only if value bit is 1 */
47     uint64_t s_mask;  /* mask bit is 1 if value bit matches msb */
48 } TempOptInfo;
49 
50 typedef struct OptContext {
51     TCGContext *tcg;
52     TCGOp *prev_mb;
53     TCGTempSet temps_used;
54 
55     IntervalTreeRoot mem_copy;
56     QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
57 
58     /* In flight values from optimization. */
59     TCGType type;
60     int carry_state;  /* -1 = non-constant, {0,1} = constant carry-in */
61 } OptContext;
62 
ts_info(TCGTemp * ts)63 static inline TempOptInfo *ts_info(TCGTemp *ts)
64 {
65     return ts->state_ptr;
66 }
67 
arg_info(TCGArg arg)68 static inline TempOptInfo *arg_info(TCGArg arg)
69 {
70     return ts_info(arg_temp(arg));
71 }
72 
ti_is_const(TempOptInfo * ti)73 static inline bool ti_is_const(TempOptInfo *ti)
74 {
75     /* If all bits that are not known zeros are known ones, it's constant. */
76     return ti->z_mask == ti->o_mask;
77 }
78 
ti_const_val(TempOptInfo * ti)79 static inline uint64_t ti_const_val(TempOptInfo *ti)
80 {
81     /* If constant, both z_mask and o_mask contain the value. */
82     return ti->z_mask;
83 }
84 
ti_is_const_val(TempOptInfo * ti,uint64_t val)85 static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
86 {
87     return ti_is_const(ti) && ti_const_val(ti) == val;
88 }
89 
ts_is_const(TCGTemp * ts)90 static inline bool ts_is_const(TCGTemp *ts)
91 {
92     return ti_is_const(ts_info(ts));
93 }
94 
ts_is_const_val(TCGTemp * ts,uint64_t val)95 static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
96 {
97     return ti_is_const_val(ts_info(ts), val);
98 }
99 
arg_is_const(TCGArg arg)100 static inline bool arg_is_const(TCGArg arg)
101 {
102     return ts_is_const(arg_temp(arg));
103 }
104 
arg_const_val(TCGArg arg)105 static inline uint64_t arg_const_val(TCGArg arg)
106 {
107     return ti_const_val(arg_info(arg));
108 }
109 
arg_is_const_val(TCGArg arg,uint64_t val)110 static inline bool arg_is_const_val(TCGArg arg, uint64_t val)
111 {
112     return ts_is_const_val(arg_temp(arg), val);
113 }
114 
ts_is_copy(TCGTemp * ts)115 static inline bool ts_is_copy(TCGTemp *ts)
116 {
117     return ts_info(ts)->next_copy != ts;
118 }
119 
cmp_better_copy(TCGTemp * a,TCGTemp * b)120 static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b)
121 {
122     return a->kind < b->kind ? b : a;
123 }
124 
125 /* Initialize and activate a temporary.  */
init_ts_info(OptContext * ctx,TCGTemp * ts)126 static void init_ts_info(OptContext *ctx, TCGTemp *ts)
127 {
128     size_t idx = temp_idx(ts);
129     TempOptInfo *ti;
130 
131     if (test_bit(idx, ctx->temps_used.l)) {
132         return;
133     }
134     set_bit(idx, ctx->temps_used.l);
135 
136     ti = ts->state_ptr;
137     if (ti == NULL) {
138         ti = tcg_malloc(sizeof(TempOptInfo));
139         ts->state_ptr = ti;
140     }
141 
142     ti->next_copy = ts;
143     ti->prev_copy = ts;
144     QSIMPLEQ_INIT(&ti->mem_copy);
145     if (ts->kind == TEMP_CONST) {
146         ti->z_mask = ts->val;
147         ti->o_mask = ts->val;
148         ti->s_mask = INT64_MIN >> clrsb64(ts->val);
149     } else {
150         ti->z_mask = -1;
151         ti->o_mask = 0;
152         ti->s_mask = 0;
153     }
154 }
155 
mem_copy_first(OptContext * ctx,intptr_t s,intptr_t l)156 static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l)
157 {
158     IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l);
159     return r ? container_of(r, MemCopyInfo, itree) : NULL;
160 }
161 
mem_copy_next(MemCopyInfo * mem,intptr_t s,intptr_t l)162 static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l)
163 {
164     IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l);
165     return r ? container_of(r, MemCopyInfo, itree) : NULL;
166 }
167 
remove_mem_copy(OptContext * ctx,MemCopyInfo * mc)168 static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc)
169 {
170     TCGTemp *ts = mc->ts;
171     TempOptInfo *ti = ts_info(ts);
172 
173     interval_tree_remove(&mc->itree, &ctx->mem_copy);
174     QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next);
175     QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next);
176 }
177 
remove_mem_copy_in(OptContext * ctx,intptr_t s,intptr_t l)178 static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l)
179 {
180     while (true) {
181         MemCopyInfo *mc = mem_copy_first(ctx, s, l);
182         if (!mc) {
183             break;
184         }
185         remove_mem_copy(ctx, mc);
186     }
187 }
188 
remove_mem_copy_all(OptContext * ctx)189 static void remove_mem_copy_all(OptContext *ctx)
190 {
191     remove_mem_copy_in(ctx, 0, -1);
192     tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy));
193 }
194 
find_better_copy(TCGTemp * ts)195 static TCGTemp *find_better_copy(TCGTemp *ts)
196 {
197     TCGTemp *i, *ret;
198 
199     /* If this is already readonly, we can't do better. */
200     if (temp_readonly(ts)) {
201         return ts;
202     }
203 
204     ret = ts;
205     for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
206         ret = cmp_better_copy(ret, i);
207     }
208     return ret;
209 }
210 
move_mem_copies(TCGTemp * dst_ts,TCGTemp * src_ts)211 static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts)
212 {
213     TempOptInfo *si = ts_info(src_ts);
214     TempOptInfo *di = ts_info(dst_ts);
215     MemCopyInfo *mc;
216 
217     QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) {
218         tcg_debug_assert(mc->ts == src_ts);
219         mc->ts = dst_ts;
220     }
221     QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy);
222 }
223 
224 /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
reset_ts(OptContext * ctx,TCGTemp * ts)225 static void reset_ts(OptContext *ctx, TCGTemp *ts)
226 {
227     TempOptInfo *ti = ts_info(ts);
228     TCGTemp *pts = ti->prev_copy;
229     TCGTemp *nts = ti->next_copy;
230     TempOptInfo *pi = ts_info(pts);
231     TempOptInfo *ni = ts_info(nts);
232 
233     ni->prev_copy = ti->prev_copy;
234     pi->next_copy = ti->next_copy;
235     ti->next_copy = ts;
236     ti->prev_copy = ts;
237     ti->z_mask = -1;
238     ti->o_mask = 0;
239     ti->s_mask = 0;
240 
241     if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) {
242         if (ts == nts) {
243             /* Last temp copy being removed, the mem copies die. */
244             MemCopyInfo *mc;
245             QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) {
246                 interval_tree_remove(&mc->itree, &ctx->mem_copy);
247             }
248             QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy);
249         } else {
250             move_mem_copies(find_better_copy(nts), ts);
251         }
252     }
253 }
254 
reset_temp(OptContext * ctx,TCGArg arg)255 static void reset_temp(OptContext *ctx, TCGArg arg)
256 {
257     reset_ts(ctx, arg_temp(arg));
258 }
259 
record_mem_copy(OptContext * ctx,TCGType type,TCGTemp * ts,intptr_t start,intptr_t last)260 static void record_mem_copy(OptContext *ctx, TCGType type,
261                             TCGTemp *ts, intptr_t start, intptr_t last)
262 {
263     MemCopyInfo *mc;
264     TempOptInfo *ti;
265 
266     mc = QSIMPLEQ_FIRST(&ctx->mem_free);
267     if (mc) {
268         QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next);
269     } else {
270         mc = tcg_malloc(sizeof(*mc));
271     }
272 
273     memset(mc, 0, sizeof(*mc));
274     mc->itree.start = start;
275     mc->itree.last = last;
276     mc->type = type;
277     interval_tree_insert(&mc->itree, &ctx->mem_copy);
278 
279     ts = find_better_copy(ts);
280     ti = ts_info(ts);
281     mc->ts = ts;
282     QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next);
283 }
284 
ts_are_copies(TCGTemp * ts1,TCGTemp * ts2)285 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
286 {
287     TCGTemp *i;
288 
289     if (ts1 == ts2) {
290         return true;
291     }
292 
293     if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
294         return false;
295     }
296 
297     for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
298         if (i == ts2) {
299             return true;
300         }
301     }
302 
303     return false;
304 }
305 
args_are_copies(TCGArg arg1,TCGArg arg2)306 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
307 {
308     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
309 }
310 
find_mem_copy_for(OptContext * ctx,TCGType type,intptr_t s)311 static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s)
312 {
313     MemCopyInfo *mc;
314 
315     for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) {
316         if (mc->itree.start == s && mc->type == type) {
317             return find_better_copy(mc->ts);
318         }
319     }
320     return NULL;
321 }
322 
arg_new_constant(OptContext * ctx,uint64_t val)323 static TCGArg arg_new_constant(OptContext *ctx, uint64_t val)
324 {
325     TCGType type = ctx->type;
326     TCGTemp *ts;
327 
328     if (type == TCG_TYPE_I32) {
329         val = (int32_t)val;
330     }
331 
332     ts = tcg_constant_internal(type, val);
333     init_ts_info(ctx, ts);
334 
335     return temp_arg(ts);
336 }
337 
arg_new_temp(OptContext * ctx)338 static TCGArg arg_new_temp(OptContext *ctx)
339 {
340     TCGTemp *ts = tcg_temp_new_internal(ctx->type, TEMP_EBB);
341     init_ts_info(ctx, ts);
342     return temp_arg(ts);
343 }
344 
opt_insert_after(OptContext * ctx,TCGOp * op,TCGOpcode opc,unsigned narg)345 static TCGOp *opt_insert_after(OptContext *ctx, TCGOp *op,
346                                TCGOpcode opc, unsigned narg)
347 {
348     return tcg_op_insert_after(ctx->tcg, op, opc, ctx->type, narg);
349 }
350 
opt_insert_before(OptContext * ctx,TCGOp * op,TCGOpcode opc,unsigned narg)351 static TCGOp *opt_insert_before(OptContext *ctx, TCGOp *op,
352                                 TCGOpcode opc, unsigned narg)
353 {
354     return tcg_op_insert_before(ctx->tcg, op, opc, ctx->type, narg);
355 }
356 
tcg_opt_gen_mov(OptContext * ctx,TCGOp * op,TCGArg dst,TCGArg src)357 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
358 {
359     TCGTemp *dst_ts = arg_temp(dst);
360     TCGTemp *src_ts = arg_temp(src);
361     TempOptInfo *di;
362     TempOptInfo *si;
363     TCGOpcode new_op;
364 
365     if (ts_are_copies(dst_ts, src_ts)) {
366         tcg_op_remove(ctx->tcg, op);
367         return true;
368     }
369 
370     reset_ts(ctx, dst_ts);
371     di = ts_info(dst_ts);
372     si = ts_info(src_ts);
373 
374     switch (ctx->type) {
375     case TCG_TYPE_I32:
376     case TCG_TYPE_I64:
377         new_op = INDEX_op_mov;
378         break;
379     case TCG_TYPE_V64:
380     case TCG_TYPE_V128:
381     case TCG_TYPE_V256:
382         /* TCGOP_TYPE and TCGOP_VECE remain unchanged.  */
383         new_op = INDEX_op_mov_vec;
384         break;
385     default:
386         g_assert_not_reached();
387     }
388     op->opc = new_op;
389     op->args[0] = dst;
390     op->args[1] = src;
391 
392     di->z_mask = si->z_mask;
393     di->o_mask = si->o_mask;
394     di->s_mask = si->s_mask;
395 
396     if (src_ts->type == dst_ts->type) {
397         TempOptInfo *ni = ts_info(si->next_copy);
398 
399         di->next_copy = si->next_copy;
400         di->prev_copy = src_ts;
401         ni->prev_copy = dst_ts;
402         si->next_copy = dst_ts;
403 
404         if (!QSIMPLEQ_EMPTY(&si->mem_copy)
405             && cmp_better_copy(src_ts, dst_ts) == dst_ts) {
406             move_mem_copies(dst_ts, src_ts);
407         }
408     } else if (dst_ts->type == TCG_TYPE_I32) {
409         di->z_mask = (int32_t)di->z_mask;
410         di->o_mask = (int32_t)di->o_mask;
411         di->s_mask |= INT32_MIN;
412     } else {
413         di->z_mask |= MAKE_64BIT_MASK(32, 32);
414         di->o_mask = (uint32_t)di->o_mask;
415         di->s_mask = INT64_MIN;
416     }
417     return true;
418 }
419 
tcg_opt_gen_movi(OptContext * ctx,TCGOp * op,TCGArg dst,uint64_t val)420 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
421                              TCGArg dst, uint64_t val)
422 {
423     /* Convert movi to mov with constant temp. */
424     return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val));
425 }
426 
do_constant_folding_2(TCGOpcode op,TCGType type,uint64_t x,uint64_t y)427 static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type,
428                                       uint64_t x, uint64_t y)
429 {
430     uint64_t l64, h64;
431 
432     switch (op) {
433     case INDEX_op_add:
434         return x + y;
435 
436     case INDEX_op_sub:
437         return x - y;
438 
439     case INDEX_op_mul:
440         return x * y;
441 
442     case INDEX_op_and:
443     case INDEX_op_and_vec:
444         return x & y;
445 
446     case INDEX_op_or:
447     case INDEX_op_or_vec:
448         return x | y;
449 
450     case INDEX_op_xor:
451     case INDEX_op_xor_vec:
452         return x ^ y;
453 
454     case INDEX_op_shl:
455         if (type == TCG_TYPE_I32) {
456             return (uint32_t)x << (y & 31);
457         }
458         return (uint64_t)x << (y & 63);
459 
460     case INDEX_op_shr:
461         if (type == TCG_TYPE_I32) {
462             return (uint32_t)x >> (y & 31);
463         }
464         return (uint64_t)x >> (y & 63);
465 
466     case INDEX_op_sar:
467         if (type == TCG_TYPE_I32) {
468             return (int32_t)x >> (y & 31);
469         }
470         return (int64_t)x >> (y & 63);
471 
472     case INDEX_op_rotr:
473         if (type == TCG_TYPE_I32) {
474             return ror32(x, y & 31);
475         }
476         return ror64(x, y & 63);
477 
478     case INDEX_op_rotl:
479         if (type == TCG_TYPE_I32) {
480             return rol32(x, y & 31);
481         }
482         return rol64(x, y & 63);
483 
484     case INDEX_op_not:
485     case INDEX_op_not_vec:
486         return ~x;
487 
488     case INDEX_op_neg:
489         return -x;
490 
491     case INDEX_op_andc:
492     case INDEX_op_andc_vec:
493         return x & ~y;
494 
495     case INDEX_op_orc:
496     case INDEX_op_orc_vec:
497         return x | ~y;
498 
499     case INDEX_op_eqv:
500     case INDEX_op_eqv_vec:
501         return ~(x ^ y);
502 
503     case INDEX_op_nand:
504     case INDEX_op_nand_vec:
505         return ~(x & y);
506 
507     case INDEX_op_nor:
508     case INDEX_op_nor_vec:
509         return ~(x | y);
510 
511     case INDEX_op_clz:
512         if (type == TCG_TYPE_I32) {
513             return (uint32_t)x ? clz32(x) : y;
514         }
515         return x ? clz64(x) : y;
516 
517     case INDEX_op_ctz:
518         if (type == TCG_TYPE_I32) {
519             return (uint32_t)x ? ctz32(x) : y;
520         }
521         return x ? ctz64(x) : y;
522 
523     case INDEX_op_ctpop:
524         return type == TCG_TYPE_I32 ? ctpop32(x) : ctpop64(x);
525 
526     case INDEX_op_bswap16:
527         x = bswap16(x);
528         return y & TCG_BSWAP_OS ? (int16_t)x : x;
529 
530     case INDEX_op_bswap32:
531         x = bswap32(x);
532         return y & TCG_BSWAP_OS ? (int32_t)x : x;
533 
534     case INDEX_op_bswap64:
535         return bswap64(x);
536 
537     case INDEX_op_ext_i32_i64:
538         return (int32_t)x;
539 
540     case INDEX_op_extu_i32_i64:
541     case INDEX_op_extrl_i64_i32:
542         return (uint32_t)x;
543 
544     case INDEX_op_extrh_i64_i32:
545         return (uint64_t)x >> 32;
546 
547     case INDEX_op_muluh:
548         if (type == TCG_TYPE_I32) {
549             return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
550         }
551         mulu64(&l64, &h64, x, y);
552         return h64;
553 
554     case INDEX_op_mulsh:
555         if (type == TCG_TYPE_I32) {
556             return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
557         }
558         muls64(&l64, &h64, x, y);
559         return h64;
560 
561     case INDEX_op_divs:
562         /* Avoid crashing on divide by zero, otherwise undefined.  */
563         if (type == TCG_TYPE_I32) {
564             return (int32_t)x / ((int32_t)y ? : 1);
565         }
566         return (int64_t)x / ((int64_t)y ? : 1);
567 
568     case INDEX_op_divu:
569         if (type == TCG_TYPE_I32) {
570             return (uint32_t)x / ((uint32_t)y ? : 1);
571         }
572         return (uint64_t)x / ((uint64_t)y ? : 1);
573 
574     case INDEX_op_rems:
575         if (type == TCG_TYPE_I32) {
576             return (int32_t)x % ((int32_t)y ? : 1);
577         }
578         return (int64_t)x % ((int64_t)y ? : 1);
579 
580     case INDEX_op_remu:
581         if (type == TCG_TYPE_I32) {
582             return (uint32_t)x % ((uint32_t)y ? : 1);
583         }
584         return (uint64_t)x % ((uint64_t)y ? : 1);
585 
586     default:
587         g_assert_not_reached();
588     }
589 }
590 
do_constant_folding(TCGOpcode op,TCGType type,uint64_t x,uint64_t y)591 static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
592                                     uint64_t x, uint64_t y)
593 {
594     uint64_t res = do_constant_folding_2(op, type, x, y);
595     if (type == TCG_TYPE_I32) {
596         res = (int32_t)res;
597     }
598     return res;
599 }
600 
do_constant_folding_cond_32(uint32_t x,uint32_t y,TCGCond c)601 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
602 {
603     switch (c) {
604     case TCG_COND_EQ:
605         return x == y;
606     case TCG_COND_NE:
607         return x != y;
608     case TCG_COND_LT:
609         return (int32_t)x < (int32_t)y;
610     case TCG_COND_GE:
611         return (int32_t)x >= (int32_t)y;
612     case TCG_COND_LE:
613         return (int32_t)x <= (int32_t)y;
614     case TCG_COND_GT:
615         return (int32_t)x > (int32_t)y;
616     case TCG_COND_LTU:
617         return x < y;
618     case TCG_COND_GEU:
619         return x >= y;
620     case TCG_COND_LEU:
621         return x <= y;
622     case TCG_COND_GTU:
623         return x > y;
624     case TCG_COND_TSTEQ:
625         return (x & y) == 0;
626     case TCG_COND_TSTNE:
627         return (x & y) != 0;
628     case TCG_COND_ALWAYS:
629     case TCG_COND_NEVER:
630         break;
631     }
632     g_assert_not_reached();
633 }
634 
do_constant_folding_cond_64(uint64_t x,uint64_t y,TCGCond c)635 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
636 {
637     switch (c) {
638     case TCG_COND_EQ:
639         return x == y;
640     case TCG_COND_NE:
641         return x != y;
642     case TCG_COND_LT:
643         return (int64_t)x < (int64_t)y;
644     case TCG_COND_GE:
645         return (int64_t)x >= (int64_t)y;
646     case TCG_COND_LE:
647         return (int64_t)x <= (int64_t)y;
648     case TCG_COND_GT:
649         return (int64_t)x > (int64_t)y;
650     case TCG_COND_LTU:
651         return x < y;
652     case TCG_COND_GEU:
653         return x >= y;
654     case TCG_COND_LEU:
655         return x <= y;
656     case TCG_COND_GTU:
657         return x > y;
658     case TCG_COND_TSTEQ:
659         return (x & y) == 0;
660     case TCG_COND_TSTNE:
661         return (x & y) != 0;
662     case TCG_COND_ALWAYS:
663     case TCG_COND_NEVER:
664         break;
665     }
666     g_assert_not_reached();
667 }
668 
do_constant_folding_cond_eq(TCGCond c)669 static int do_constant_folding_cond_eq(TCGCond c)
670 {
671     switch (c) {
672     case TCG_COND_GT:
673     case TCG_COND_LTU:
674     case TCG_COND_LT:
675     case TCG_COND_GTU:
676     case TCG_COND_NE:
677         return 0;
678     case TCG_COND_GE:
679     case TCG_COND_GEU:
680     case TCG_COND_LE:
681     case TCG_COND_LEU:
682     case TCG_COND_EQ:
683         return 1;
684     case TCG_COND_TSTEQ:
685     case TCG_COND_TSTNE:
686         return -1;
687     case TCG_COND_ALWAYS:
688     case TCG_COND_NEVER:
689         break;
690     }
691     g_assert_not_reached();
692 }
693 
694 /*
695  * Return -1 if the condition can't be simplified,
696  * and the result of the condition (0 or 1) if it can.
697  */
do_constant_folding_cond(TCGType type,TCGArg x,TCGArg y,TCGCond c)698 static int do_constant_folding_cond(TCGType type, TCGArg x,
699                                     TCGArg y, TCGCond c)
700 {
701     if (arg_is_const(x) && arg_is_const(y)) {
702         uint64_t xv = arg_const_val(x);
703         uint64_t yv = arg_const_val(y);
704 
705         switch (type) {
706         case TCG_TYPE_I32:
707             return do_constant_folding_cond_32(xv, yv, c);
708         case TCG_TYPE_I64:
709             return do_constant_folding_cond_64(xv, yv, c);
710         default:
711             /* Only scalar comparisons are optimizable */
712             return -1;
713         }
714     } else if (args_are_copies(x, y)) {
715         return do_constant_folding_cond_eq(c);
716     } else if (arg_is_const_val(y, 0)) {
717         switch (c) {
718         case TCG_COND_LTU:
719         case TCG_COND_TSTNE:
720             return 0;
721         case TCG_COND_GEU:
722         case TCG_COND_TSTEQ:
723             return 1;
724         default:
725             return -1;
726         }
727     }
728     return -1;
729 }
730 
731 /**
732  * swap_commutative:
733  * @dest: TCGArg of the destination argument, or NO_DEST.
734  * @p1: first paired argument
735  * @p2: second paired argument
736  *
737  * If *@p1 is a constant and *@p2 is not, swap.
738  * If *@p2 matches @dest, swap.
739  * Return true if a swap was performed.
740  */
741 
742 #define NO_DEST  temp_arg(NULL)
743 
pref_commutative(TempOptInfo * ti)744 static int pref_commutative(TempOptInfo *ti)
745 {
746     /* Slight preference for non-zero constants second. */
747     return !ti_is_const(ti) ? 0 : ti_const_val(ti) ? 3 : 2;
748 }
749 
swap_commutative(TCGArg dest,TCGArg * p1,TCGArg * p2)750 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
751 {
752     TCGArg a1 = *p1, a2 = *p2;
753     int sum = 0;
754     sum += pref_commutative(arg_info(a1));
755     sum -= pref_commutative(arg_info(a2));
756 
757     /* Prefer the constant in second argument, and then the form
758        op a, a, b, which is better handled on non-RISC hosts. */
759     if (sum > 0 || (sum == 0 && dest == a2)) {
760         *p1 = a2;
761         *p2 = a1;
762         return true;
763     }
764     return false;
765 }
766 
swap_commutative2(TCGArg * p1,TCGArg * p2)767 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
768 {
769     int sum = 0;
770     sum += pref_commutative(arg_info(p1[0]));
771     sum += pref_commutative(arg_info(p1[1]));
772     sum -= pref_commutative(arg_info(p2[0]));
773     sum -= pref_commutative(arg_info(p2[1]));
774     if (sum > 0) {
775         TCGArg t;
776         t = p1[0], p1[0] = p2[0], p2[0] = t;
777         t = p1[1], p1[1] = p2[1], p2[1] = t;
778         return true;
779     }
780     return false;
781 }
782 
783 /*
784  * Return -1 if the condition can't be simplified,
785  * and the result of the condition (0 or 1) if it can.
786  */
787 static bool fold_and(OptContext *ctx, TCGOp *op);
do_constant_folding_cond1(OptContext * ctx,TCGOp * op,TCGArg dest,TCGArg * p1,TCGArg * p2,TCGArg * pcond)788 static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest,
789                                      TCGArg *p1, TCGArg *p2, TCGArg *pcond)
790 {
791     TCGCond cond;
792     TempOptInfo *i1;
793     bool swap;
794     int r;
795 
796     swap = swap_commutative(dest, p1, p2);
797     cond = *pcond;
798     if (swap) {
799         *pcond = cond = tcg_swap_cond(cond);
800     }
801 
802     r = do_constant_folding_cond(ctx->type, *p1, *p2, cond);
803     if (r >= 0) {
804         return r;
805     }
806     if (!is_tst_cond(cond)) {
807         return -1;
808     }
809 
810     i1 = arg_info(*p1);
811 
812     /*
813      * TSTNE x,x -> NE x,0
814      * TSTNE x,i -> NE x,0 if i includes all nonzero bits of x
815      */
816     if (args_are_copies(*p1, *p2) ||
817         (arg_is_const(*p2) && (i1->z_mask & ~arg_const_val(*p2)) == 0)) {
818         *p2 = arg_new_constant(ctx, 0);
819         *pcond = tcg_tst_eqne_cond(cond);
820         return -1;
821     }
822 
823     /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */
824     if (arg_is_const(*p2) && (arg_const_val(*p2) & ~i1->s_mask) == 0) {
825         *p2 = arg_new_constant(ctx, 0);
826         *pcond = tcg_tst_ltge_cond(cond);
827         return -1;
828     }
829 
830     /* Expand to AND with a temporary if no backend support. */
831     if (!TCG_TARGET_HAS_tst) {
832         TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
833         TCGArg tmp = arg_new_temp(ctx);
834 
835         op2->args[0] = tmp;
836         op2->args[1] = *p1;
837         op2->args[2] = *p2;
838         fold_and(ctx, op2);
839 
840         *p1 = tmp;
841         *p2 = arg_new_constant(ctx, 0);
842         *pcond = tcg_tst_eqne_cond(cond);
843     }
844     return -1;
845 }
846 
do_constant_folding_cond2(OptContext * ctx,TCGOp * op,TCGArg * args)847 static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args)
848 {
849     TCGArg al, ah, bl, bh;
850     TCGCond c;
851     bool swap;
852     int r;
853 
854     swap = swap_commutative2(args, args + 2);
855     c = args[4];
856     if (swap) {
857         args[4] = c = tcg_swap_cond(c);
858     }
859 
860     al = args[0];
861     ah = args[1];
862     bl = args[2];
863     bh = args[3];
864 
865     if (arg_is_const(bl) && arg_is_const(bh)) {
866         tcg_target_ulong blv = arg_const_val(bl);
867         tcg_target_ulong bhv = arg_const_val(bh);
868         uint64_t b = deposit64(blv, 32, 32, bhv);
869 
870         if (arg_is_const(al) && arg_is_const(ah)) {
871             tcg_target_ulong alv = arg_const_val(al);
872             tcg_target_ulong ahv = arg_const_val(ah);
873             uint64_t a = deposit64(alv, 32, 32, ahv);
874 
875             r = do_constant_folding_cond_64(a, b, c);
876             if (r >= 0) {
877                 return r;
878             }
879         }
880 
881         if (b == 0) {
882             switch (c) {
883             case TCG_COND_LTU:
884             case TCG_COND_TSTNE:
885                 return 0;
886             case TCG_COND_GEU:
887             case TCG_COND_TSTEQ:
888                 return 1;
889             default:
890                 break;
891             }
892         }
893 
894         /* TSTNE x,-1 -> NE x,0 */
895         if (b == -1 && is_tst_cond(c)) {
896             args[3] = args[2] = arg_new_constant(ctx, 0);
897             args[4] = tcg_tst_eqne_cond(c);
898             return -1;
899         }
900 
901         /* TSTNE x,sign -> LT x,0 */
902         if (b == INT64_MIN && is_tst_cond(c)) {
903             /* bl must be 0, so copy that to bh */
904             args[3] = bl;
905             args[4] = tcg_tst_ltge_cond(c);
906             return -1;
907         }
908     }
909 
910     if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
911         r = do_constant_folding_cond_eq(c);
912         if (r >= 0) {
913             return r;
914         }
915 
916         /* TSTNE x,x -> NE x,0 */
917         if (is_tst_cond(c)) {
918             args[3] = args[2] = arg_new_constant(ctx, 0);
919             args[4] = tcg_tst_eqne_cond(c);
920             return -1;
921         }
922     }
923 
924     /* Expand to AND with a temporary if no backend support. */
925     if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) {
926         TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and, 3);
927         TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
928         TCGArg t1 = arg_new_temp(ctx);
929         TCGArg t2 = arg_new_temp(ctx);
930 
931         op1->args[0] = t1;
932         op1->args[1] = al;
933         op1->args[2] = bl;
934         fold_and(ctx, op1);
935 
936         op2->args[0] = t2;
937         op2->args[1] = ah;
938         op2->args[2] = bh;
939         fold_and(ctx, op1);
940 
941         args[0] = t1;
942         args[1] = t2;
943         args[3] = args[2] = arg_new_constant(ctx, 0);
944         args[4] = tcg_tst_eqne_cond(c);
945     }
946     return -1;
947 }
948 
init_arguments(OptContext * ctx,TCGOp * op,int nb_args)949 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
950 {
951     for (int i = 0; i < nb_args; i++) {
952         TCGTemp *ts = arg_temp(op->args[i]);
953         init_ts_info(ctx, ts);
954     }
955 }
956 
copy_propagate(OptContext * ctx,TCGOp * op,int nb_oargs,int nb_iargs)957 static void copy_propagate(OptContext *ctx, TCGOp *op,
958                            int nb_oargs, int nb_iargs)
959 {
960     for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
961         TCGTemp *ts = arg_temp(op->args[i]);
962         if (ts_is_copy(ts)) {
963             op->args[i] = temp_arg(find_better_copy(ts));
964         }
965     }
966 }
967 
finish_bb(OptContext * ctx)968 static void finish_bb(OptContext *ctx)
969 {
970     /* We only optimize memory barriers across basic blocks. */
971     ctx->prev_mb = NULL;
972 }
973 
finish_ebb(OptContext * ctx)974 static void finish_ebb(OptContext *ctx)
975 {
976     finish_bb(ctx);
977     /* We only optimize across extended basic blocks. */
978     memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
979     remove_mem_copy_all(ctx);
980 }
981 
finish_folding(OptContext * ctx,TCGOp * op)982 static bool finish_folding(OptContext *ctx, TCGOp *op)
983 {
984     const TCGOpDef *def = &tcg_op_defs[op->opc];
985     int i, nb_oargs;
986 
987     nb_oargs = def->nb_oargs;
988     for (i = 0; i < nb_oargs; i++) {
989         TCGTemp *ts = arg_temp(op->args[i]);
990         reset_ts(ctx, ts);
991     }
992     return true;
993 }
994 
995 /*
996  * The fold_* functions return true when processing is complete,
997  * usually by folding the operation to a constant or to a copy,
998  * and calling tcg_opt_gen_{mov,movi}.  They may do other things,
999  * like collect information about the value produced, for use in
1000  * optimizing a subsequent operation.
1001  *
1002  * These first fold_* functions are all helpers, used by other
1003  * folders for more specific operations.
1004  */
1005 
fold_const1(OptContext * ctx,TCGOp * op)1006 static bool fold_const1(OptContext *ctx, TCGOp *op)
1007 {
1008     if (arg_is_const(op->args[1])) {
1009         uint64_t t = arg_const_val(op->args[1]);
1010 
1011         t = do_constant_folding(op->opc, ctx->type, t, 0);
1012         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1013     }
1014     return false;
1015 }
1016 
fold_const2(OptContext * ctx,TCGOp * op)1017 static bool fold_const2(OptContext *ctx, TCGOp *op)
1018 {
1019     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1020         uint64_t t1 = arg_const_val(op->args[1]);
1021         uint64_t t2 = arg_const_val(op->args[2]);
1022 
1023         t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
1024         return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1025     }
1026     return false;
1027 }
1028 
fold_commutative(OptContext * ctx,TCGOp * op)1029 static bool fold_commutative(OptContext *ctx, TCGOp *op)
1030 {
1031     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1032     return false;
1033 }
1034 
fold_const2_commutative(OptContext * ctx,TCGOp * op)1035 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
1036 {
1037     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1038     return fold_const2(ctx, op);
1039 }
1040 
1041 /*
1042  * Record "zero" and "sign" masks for the single output of @op.
1043  * See TempOptInfo definition of z_mask and s_mask.
1044  * If z_mask allows, fold the output to constant zero.
1045  * The passed s_mask may be augmented by z_mask.
1046  */
fold_masks_zosa_int(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t o_mask,int64_t s_mask,uint64_t a_mask)1047 static bool fold_masks_zosa_int(OptContext *ctx, TCGOp *op,
1048                                 uint64_t z_mask, uint64_t o_mask,
1049                                 int64_t s_mask, uint64_t a_mask)
1050 {
1051     const TCGOpDef *def = &tcg_op_defs[op->opc];
1052     TCGTemp *ts;
1053     TempOptInfo *ti;
1054     int rep;
1055 
1056     /* Only single-output opcodes are supported here. */
1057     tcg_debug_assert(def->nb_oargs == 1);
1058 
1059     /*
1060      * 32-bit ops generate 32-bit results, which for the purpose of
1061      * simplifying tcg are sign-extended.  Certainly that's how we
1062      * represent our constants elsewhere.  Note that the bits will
1063      * be reset properly for a 64-bit value when encountering the
1064      * type changing opcodes.
1065      */
1066     if (ctx->type == TCG_TYPE_I32) {
1067         z_mask = (int32_t)z_mask;
1068         o_mask = (int32_t)o_mask;
1069         s_mask |= INT32_MIN;
1070         a_mask = (uint32_t)a_mask;
1071     }
1072 
1073     /* Bits that are known 1 and bits that are known 0 must not overlap. */
1074     tcg_debug_assert((o_mask & ~z_mask) == 0);
1075 
1076     /* All bits that are not known zero are known one is a constant. */
1077     if (z_mask == o_mask) {
1078         return tcg_opt_gen_movi(ctx, op, op->args[0], o_mask);
1079     }
1080 
1081     /* If no bits are affected, the operation devolves to a copy. */
1082     if (a_mask == 0) {
1083         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1084     }
1085 
1086     ts = arg_temp(op->args[0]);
1087     reset_ts(ctx, ts);
1088 
1089     ti = ts_info(ts);
1090     ti->z_mask = z_mask;
1091 
1092     /* Canonicalize s_mask and incorporate data from z_mask. */
1093     rep = clz64(~s_mask);
1094     rep = MAX(rep, clz64(z_mask));
1095     rep = MAX(rep, clz64(~o_mask));
1096     rep = MAX(rep - 1, 0);
1097     ti->s_mask = INT64_MIN >> rep;
1098 
1099     return false;
1100 }
1101 
fold_masks_zosa(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t o_mask,int64_t s_mask,uint64_t a_mask)1102 static bool fold_masks_zosa(OptContext *ctx, TCGOp *op, uint64_t z_mask,
1103                             uint64_t o_mask, int64_t s_mask, uint64_t a_mask)
1104 {
1105     fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, -1);
1106     return true;
1107 }
1108 
fold_masks_zos(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t o_mask,uint64_t s_mask)1109 static bool fold_masks_zos(OptContext *ctx, TCGOp *op,
1110                            uint64_t z_mask, uint64_t o_mask, uint64_t s_mask)
1111 {
1112     return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, -1);
1113 }
1114 
fold_masks_zo(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t o_mask)1115 static bool fold_masks_zo(OptContext *ctx, TCGOp *op,
1116                           uint64_t z_mask, uint64_t o_mask)
1117 {
1118     return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, -1);
1119 }
1120 
fold_masks_zs(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t s_mask)1121 static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
1122                           uint64_t z_mask, uint64_t s_mask)
1123 {
1124     return fold_masks_zosa(ctx, op, z_mask, 0, s_mask, -1);
1125 }
1126 
fold_masks_z(OptContext * ctx,TCGOp * op,uint64_t z_mask)1127 static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
1128 {
1129     return fold_masks_zosa(ctx, op, z_mask, 0, 0, -1);
1130 }
1131 
fold_masks_s(OptContext * ctx,TCGOp * op,uint64_t s_mask)1132 static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
1133 {
1134     return fold_masks_zosa(ctx, op, -1, 0, s_mask, -1);
1135 }
1136 
1137 /*
1138  * Convert @op to NOT, if NOT is supported by the host.
1139  * Return true f the conversion is successful, which will still
1140  * indicate that the processing is complete.
1141  */
1142 static bool fold_not(OptContext *ctx, TCGOp *op);
fold_to_not(OptContext * ctx,TCGOp * op,int idx)1143 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
1144 {
1145     TCGOpcode not_op;
1146     bool have_not;
1147 
1148     switch (ctx->type) {
1149     case TCG_TYPE_I32:
1150     case TCG_TYPE_I64:
1151         not_op = INDEX_op_not;
1152         have_not = tcg_op_supported(INDEX_op_not, ctx->type, 0);
1153         break;
1154     case TCG_TYPE_V64:
1155     case TCG_TYPE_V128:
1156     case TCG_TYPE_V256:
1157         not_op = INDEX_op_not_vec;
1158         have_not = TCG_TARGET_HAS_not_vec;
1159         break;
1160     default:
1161         g_assert_not_reached();
1162     }
1163     if (have_not) {
1164         op->opc = not_op;
1165         op->args[1] = op->args[idx];
1166         return fold_not(ctx, op);
1167     }
1168     return false;
1169 }
1170 
1171 /* If the binary operation has first argument @i, fold to @i. */
fold_ix_to_i(OptContext * ctx,TCGOp * op,uint64_t i)1172 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1173 {
1174     if (arg_is_const_val(op->args[1], i)) {
1175         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1176     }
1177     return false;
1178 }
1179 
1180 /* If the binary operation has first argument @i, fold to NOT. */
fold_ix_to_not(OptContext * ctx,TCGOp * op,uint64_t i)1181 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1182 {
1183     if (arg_is_const_val(op->args[1], i)) {
1184         return fold_to_not(ctx, op, 2);
1185     }
1186     return false;
1187 }
1188 
1189 /* If the binary operation has second argument @i, fold to @i. */
fold_xi_to_i(OptContext * ctx,TCGOp * op,uint64_t i)1190 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1191 {
1192     if (arg_is_const_val(op->args[2], i)) {
1193         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1194     }
1195     return false;
1196 }
1197 
1198 /* If the binary operation has second argument @i, fold to identity. */
fold_xi_to_x(OptContext * ctx,TCGOp * op,uint64_t i)1199 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
1200 {
1201     if (arg_is_const_val(op->args[2], i)) {
1202         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1203     }
1204     return false;
1205 }
1206 
1207 /* If the binary operation has second argument @i, fold to NOT. */
fold_xi_to_not(OptContext * ctx,TCGOp * op,uint64_t i)1208 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1209 {
1210     if (arg_is_const_val(op->args[2], i)) {
1211         return fold_to_not(ctx, op, 1);
1212     }
1213     return false;
1214 }
1215 
1216 /* If the binary operation has both arguments equal, fold to @i. */
fold_xx_to_i(OptContext * ctx,TCGOp * op,uint64_t i)1217 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1218 {
1219     if (args_are_copies(op->args[1], op->args[2])) {
1220         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1221     }
1222     return false;
1223 }
1224 
1225 /* If the binary operation has both arguments equal, fold to identity. */
fold_xx_to_x(OptContext * ctx,TCGOp * op)1226 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
1227 {
1228     if (args_are_copies(op->args[1], op->args[2])) {
1229         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1230     }
1231     return false;
1232 }
1233 
1234 /*
1235  * These outermost fold_<op> functions are sorted alphabetically.
1236  *
1237  * The ordering of the transformations should be:
1238  *   1) those that produce a constant
1239  *   2) those that produce a copy
1240  *   3) those that produce information about the result value.
1241  */
1242 
1243 static bool fold_addco(OptContext *ctx, TCGOp *op);
1244 static bool fold_or(OptContext *ctx, TCGOp *op);
1245 static bool fold_orc(OptContext *ctx, TCGOp *op);
1246 static bool fold_subbo(OptContext *ctx, TCGOp *op);
1247 static bool fold_xor(OptContext *ctx, TCGOp *op);
1248 
fold_add(OptContext * ctx,TCGOp * op)1249 static bool fold_add(OptContext *ctx, TCGOp *op)
1250 {
1251     if (fold_const2_commutative(ctx, op) ||
1252         fold_xi_to_x(ctx, op, 0)) {
1253         return true;
1254     }
1255     return finish_folding(ctx, op);
1256 }
1257 
1258 /* We cannot as yet do_constant_folding with vectors. */
fold_add_vec(OptContext * ctx,TCGOp * op)1259 static bool fold_add_vec(OptContext *ctx, TCGOp *op)
1260 {
1261     if (fold_commutative(ctx, op) ||
1262         fold_xi_to_x(ctx, op, 0)) {
1263         return true;
1264     }
1265     return finish_folding(ctx, op);
1266 }
1267 
squash_prev_carryout(OptContext * ctx,TCGOp * op)1268 static void squash_prev_carryout(OptContext *ctx, TCGOp *op)
1269 {
1270     TempOptInfo *t2;
1271 
1272     op = QTAILQ_PREV(op, link);
1273     switch (op->opc) {
1274     case INDEX_op_addco:
1275         op->opc = INDEX_op_add;
1276         fold_add(ctx, op);
1277         break;
1278     case INDEX_op_addcio:
1279         op->opc = INDEX_op_addci;
1280         break;
1281     case INDEX_op_addc1o:
1282         op->opc = INDEX_op_add;
1283         t2 = arg_info(op->args[2]);
1284         if (ti_is_const(t2)) {
1285             op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1286             /* Perform other constant folding, if needed. */
1287             fold_add(ctx, op);
1288         } else {
1289             TCGArg ret = op->args[0];
1290             op = opt_insert_after(ctx, op, INDEX_op_add, 3);
1291             op->args[0] = ret;
1292             op->args[1] = ret;
1293             op->args[2] = arg_new_constant(ctx, 1);
1294         }
1295         break;
1296     default:
1297         g_assert_not_reached();
1298     }
1299 }
1300 
fold_addci(OptContext * ctx,TCGOp * op)1301 static bool fold_addci(OptContext *ctx, TCGOp *op)
1302 {
1303     fold_commutative(ctx, op);
1304 
1305     if (ctx->carry_state < 0) {
1306         return finish_folding(ctx, op);
1307     }
1308 
1309     squash_prev_carryout(ctx, op);
1310     op->opc = INDEX_op_add;
1311 
1312     if (ctx->carry_state > 0) {
1313         TempOptInfo *t2 = arg_info(op->args[2]);
1314 
1315         /*
1316          * Propagate the known carry-in into a constant, if possible.
1317          * Otherwise emit a second add +1.
1318          */
1319         if (ti_is_const(t2)) {
1320             op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1321         } else {
1322             TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_add, 3);
1323 
1324             op2->args[0] = op->args[0];
1325             op2->args[1] = op->args[1];
1326             op2->args[2] = op->args[2];
1327             fold_add(ctx, op2);
1328 
1329             op->args[1] = op->args[0];
1330             op->args[2] = arg_new_constant(ctx, 1);
1331         }
1332     }
1333 
1334     ctx->carry_state = -1;
1335     return fold_add(ctx, op);
1336 }
1337 
fold_addcio(OptContext * ctx,TCGOp * op)1338 static bool fold_addcio(OptContext *ctx, TCGOp *op)
1339 {
1340     TempOptInfo *t1, *t2;
1341     int carry_out = -1;
1342     uint64_t sum, max;
1343 
1344     fold_commutative(ctx, op);
1345     t1 = arg_info(op->args[1]);
1346     t2 = arg_info(op->args[2]);
1347 
1348     /*
1349      * The z_mask value is >= the maximum value that can be represented
1350      * with the known zero bits.  So adding the z_mask values will not
1351      * overflow if and only if the true values cannot overflow.
1352      */
1353     if (!uadd64_overflow(t1->z_mask, t2->z_mask, &sum) &&
1354         !uadd64_overflow(sum, ctx->carry_state != 0, &sum)) {
1355         carry_out = 0;
1356     }
1357 
1358     if (ctx->carry_state < 0) {
1359         ctx->carry_state = carry_out;
1360         return finish_folding(ctx, op);
1361     }
1362 
1363     squash_prev_carryout(ctx, op);
1364     if (ctx->carry_state == 0) {
1365         goto do_addco;
1366     }
1367 
1368     /* Propagate the known carry-in into a constant, if possible. */
1369     max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
1370     if (ti_is_const(t2)) {
1371         uint64_t v = ti_const_val(t2) & max;
1372         if (v < max) {
1373             op->args[2] = arg_new_constant(ctx, v + 1);
1374             goto do_addco;
1375         }
1376         /* max + known carry in produces known carry out. */
1377         carry_out = 1;
1378     }
1379     if (ti_is_const(t1)) {
1380         uint64_t v = ti_const_val(t1) & max;
1381         if (v < max) {
1382             op->args[1] = arg_new_constant(ctx, v + 1);
1383             goto do_addco;
1384         }
1385         carry_out = 1;
1386     }
1387 
1388     /* Adjust the opcode to remember the known carry-in. */
1389     op->opc = INDEX_op_addc1o;
1390     ctx->carry_state = carry_out;
1391     return finish_folding(ctx, op);
1392 
1393  do_addco:
1394     op->opc = INDEX_op_addco;
1395     return fold_addco(ctx, op);
1396 }
1397 
fold_addco(OptContext * ctx,TCGOp * op)1398 static bool fold_addco(OptContext *ctx, TCGOp *op)
1399 {
1400     TempOptInfo *t1, *t2;
1401     int carry_out = -1;
1402     uint64_t ign;
1403 
1404     fold_commutative(ctx, op);
1405     t1 = arg_info(op->args[1]);
1406     t2 = arg_info(op->args[2]);
1407 
1408     if (ti_is_const(t2)) {
1409         uint64_t v2 = ti_const_val(t2);
1410 
1411         if (ti_is_const(t1)) {
1412             uint64_t v1 = ti_const_val(t1);
1413             /* Given sign-extension of z_mask for I32, we need not truncate. */
1414             carry_out = uadd64_overflow(v1, v2, &ign);
1415         } else if (v2 == 0) {
1416             carry_out = 0;
1417         }
1418     } else {
1419         /*
1420          * The z_mask value is >= the maximum value that can be represented
1421          * with the known zero bits.  So adding the z_mask values will not
1422          * overflow if and only if the true values cannot overflow.
1423          */
1424         if (!uadd64_overflow(t1->z_mask, t2->z_mask, &ign)) {
1425             carry_out = 0;
1426         }
1427     }
1428     ctx->carry_state = carry_out;
1429     return finish_folding(ctx, op);
1430 }
1431 
fold_and(OptContext * ctx,TCGOp * op)1432 static bool fold_and(OptContext *ctx, TCGOp *op)
1433 {
1434     uint64_t z_mask, o_mask, s_mask, a_mask;
1435     TempOptInfo *t1, *t2;
1436 
1437     if (fold_const2_commutative(ctx, op)) {
1438         return true;
1439     }
1440 
1441     t1 = arg_info(op->args[1]);
1442     t2 = arg_info(op->args[2]);
1443 
1444     z_mask = t1->z_mask & t2->z_mask;
1445     o_mask = t1->o_mask & t2->o_mask;
1446 
1447     /*
1448      * Sign repetitions are perforce all identical, whether they are 1 or 0.
1449      * Bitwise operations preserve the relative quantity of the repetitions.
1450      */
1451     s_mask = t1->s_mask & t2->s_mask;
1452 
1453     /* Affected bits are those not known zero, masked by those known one. */
1454     a_mask = t1->z_mask & ~t2->o_mask;
1455 
1456     if (!fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, a_mask)) {
1457         if (op->opc == INDEX_op_and && ti_is_const(t2)) {
1458             /*
1459              * Canonicalize on extract, if valid.  This aids x86 with its
1460              * 2 operand MOVZBL and 2 operand AND, selecting the TCGOpcode
1461              * which does not require matching operands.  Other backends can
1462              * trivially expand the extract to AND during code generation.
1463              */
1464             uint64_t val = ti_const_val(t2);
1465             if (!(val & (val + 1))) {
1466                 unsigned len = ctz64(~val);
1467                 if (TCG_TARGET_extract_valid(ctx->type, 0, len)) {
1468                     op->opc = INDEX_op_extract;
1469                     op->args[2] = 0;
1470                     op->args[3] = len;
1471                 }
1472             }
1473         } else {
1474             fold_xx_to_x(ctx, op);
1475         }
1476     }
1477     return true;
1478 }
1479 
fold_andc(OptContext * ctx,TCGOp * op)1480 static bool fold_andc(OptContext *ctx, TCGOp *op)
1481 {
1482     uint64_t z_mask, o_mask, s_mask, a_mask;
1483     TempOptInfo *t1, *t2;
1484 
1485     if (fold_const2(ctx, op)) {
1486         return true;
1487     }
1488 
1489     t1 = arg_info(op->args[1]);
1490     t2 = arg_info(op->args[2]);
1491 
1492     if (ti_is_const(t2)) {
1493         /* Fold andc r,x,i to and r,x,~i. */
1494         switch (ctx->type) {
1495         case TCG_TYPE_I32:
1496         case TCG_TYPE_I64:
1497             op->opc = INDEX_op_and;
1498             break;
1499         case TCG_TYPE_V64:
1500         case TCG_TYPE_V128:
1501         case TCG_TYPE_V256:
1502             op->opc = INDEX_op_and_vec;
1503             break;
1504         default:
1505             g_assert_not_reached();
1506         }
1507         op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1508         return fold_and(ctx, op);
1509     }
1510     if (fold_xx_to_i(ctx, op, 0) ||
1511         fold_ix_to_not(ctx, op, -1)) {
1512         return true;
1513     }
1514 
1515     z_mask = t1->z_mask & ~t2->o_mask;
1516     o_mask = t1->o_mask & ~t2->z_mask;
1517     s_mask = t1->s_mask & t2->s_mask;
1518 
1519     /* Affected bits are those not known zero, masked by those known zero. */
1520     a_mask = t1->z_mask & t2->z_mask;
1521 
1522     return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
1523 }
1524 
fold_bitsel_vec(OptContext * ctx,TCGOp * op)1525 static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
1526 {
1527     /* If true and false values are the same, eliminate the cmp. */
1528     if (args_are_copies(op->args[2], op->args[3])) {
1529         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1530     }
1531 
1532     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1533         uint64_t tv = arg_const_val(op->args[2]);
1534         uint64_t fv = arg_const_val(op->args[3]);
1535 
1536         if (tv == -1 && fv == 0) {
1537             return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1538         }
1539         if (tv == 0 && fv == -1) {
1540             if (TCG_TARGET_HAS_not_vec) {
1541                 op->opc = INDEX_op_not_vec;
1542                 return fold_not(ctx, op);
1543             } else {
1544                 op->opc = INDEX_op_xor_vec;
1545                 op->args[2] = arg_new_constant(ctx, -1);
1546                 return fold_xor(ctx, op);
1547             }
1548         }
1549     }
1550     if (arg_is_const(op->args[2])) {
1551         uint64_t tv = arg_const_val(op->args[2]);
1552         if (tv == -1) {
1553             op->opc = INDEX_op_or_vec;
1554             op->args[2] = op->args[3];
1555             return fold_or(ctx, op);
1556         }
1557         if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
1558             op->opc = INDEX_op_andc_vec;
1559             op->args[2] = op->args[1];
1560             op->args[1] = op->args[3];
1561             return fold_andc(ctx, op);
1562         }
1563     }
1564     if (arg_is_const(op->args[3])) {
1565         uint64_t fv = arg_const_val(op->args[3]);
1566         if (fv == 0) {
1567             op->opc = INDEX_op_and_vec;
1568             return fold_and(ctx, op);
1569         }
1570         if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
1571             TCGArg ta = op->args[2];
1572             op->opc = INDEX_op_orc_vec;
1573             op->args[2] = op->args[1];
1574             op->args[1] = ta;
1575             return fold_orc(ctx, op);
1576         }
1577     }
1578     return finish_folding(ctx, op);
1579 }
1580 
fold_brcond(OptContext * ctx,TCGOp * op)1581 static bool fold_brcond(OptContext *ctx, TCGOp *op)
1582 {
1583     int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0],
1584                                       &op->args[1], &op->args[2]);
1585     if (i == 0) {
1586         tcg_op_remove(ctx->tcg, op);
1587         return true;
1588     }
1589     if (i > 0) {
1590         op->opc = INDEX_op_br;
1591         op->args[0] = op->args[3];
1592         finish_ebb(ctx);
1593     } else {
1594         finish_bb(ctx);
1595     }
1596     return true;
1597 }
1598 
fold_brcond2(OptContext * ctx,TCGOp * op)1599 static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1600 {
1601     TCGCond cond;
1602     TCGArg label;
1603     int i, inv = 0;
1604 
1605     i = do_constant_folding_cond2(ctx, op, &op->args[0]);
1606     cond = op->args[4];
1607     label = op->args[5];
1608     if (i >= 0) {
1609         goto do_brcond_const;
1610     }
1611 
1612     switch (cond) {
1613     case TCG_COND_LT:
1614     case TCG_COND_GE:
1615         /*
1616          * Simplify LT/GE comparisons vs zero to a single compare
1617          * vs the high word of the input.
1618          */
1619         if (arg_is_const_val(op->args[2], 0) &&
1620             arg_is_const_val(op->args[3], 0)) {
1621             goto do_brcond_high;
1622         }
1623         break;
1624 
1625     case TCG_COND_NE:
1626         inv = 1;
1627         QEMU_FALLTHROUGH;
1628     case TCG_COND_EQ:
1629         /*
1630          * Simplify EQ/NE comparisons where one of the pairs
1631          * can be simplified.
1632          */
1633         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
1634                                      op->args[2], cond);
1635         switch (i ^ inv) {
1636         case 0:
1637             goto do_brcond_const;
1638         case 1:
1639             goto do_brcond_high;
1640         }
1641 
1642         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1643                                      op->args[3], cond);
1644         switch (i ^ inv) {
1645         case 0:
1646             goto do_brcond_const;
1647         case 1:
1648             goto do_brcond_low;
1649         }
1650         break;
1651 
1652     case TCG_COND_TSTEQ:
1653     case TCG_COND_TSTNE:
1654         if (arg_is_const_val(op->args[2], 0)) {
1655             goto do_brcond_high;
1656         }
1657         if (arg_is_const_val(op->args[3], 0)) {
1658             goto do_brcond_low;
1659         }
1660         break;
1661 
1662     default:
1663         break;
1664 
1665     do_brcond_low:
1666         op->opc = INDEX_op_brcond;
1667         op->args[1] = op->args[2];
1668         op->args[2] = cond;
1669         op->args[3] = label;
1670         return fold_brcond(ctx, op);
1671 
1672     do_brcond_high:
1673         op->opc = INDEX_op_brcond;
1674         op->args[0] = op->args[1];
1675         op->args[1] = op->args[3];
1676         op->args[2] = cond;
1677         op->args[3] = label;
1678         return fold_brcond(ctx, op);
1679 
1680     do_brcond_const:
1681         if (i == 0) {
1682             tcg_op_remove(ctx->tcg, op);
1683             return true;
1684         }
1685         op->opc = INDEX_op_br;
1686         op->args[0] = label;
1687         finish_ebb(ctx);
1688         return true;
1689     }
1690 
1691     finish_bb(ctx);
1692     return true;
1693 }
1694 
fold_bswap(OptContext * ctx,TCGOp * op)1695 static bool fold_bswap(OptContext *ctx, TCGOp *op)
1696 {
1697     uint64_t z_mask, o_mask, s_mask;
1698     TempOptInfo *t1 = arg_info(op->args[1]);
1699     int flags = op->args[2];
1700 
1701     if (ti_is_const(t1)) {
1702         return tcg_opt_gen_movi(ctx, op, op->args[0],
1703                                 do_constant_folding(op->opc, ctx->type,
1704                                                     ti_const_val(t1), flags));
1705     }
1706 
1707     z_mask = t1->z_mask;
1708     o_mask = t1->o_mask;
1709     s_mask = 0;
1710 
1711     switch (op->opc) {
1712     case INDEX_op_bswap16:
1713         z_mask = bswap16(z_mask);
1714         o_mask = bswap16(o_mask);
1715         if (flags & TCG_BSWAP_OS) {
1716             z_mask = (int16_t)z_mask;
1717             o_mask = (int16_t)o_mask;
1718             s_mask = INT16_MIN;
1719         } else if (!(flags & TCG_BSWAP_OZ)) {
1720             z_mask |= MAKE_64BIT_MASK(16, 48);
1721         }
1722         break;
1723     case INDEX_op_bswap32:
1724         z_mask = bswap32(z_mask);
1725         o_mask = bswap32(o_mask);
1726         if (flags & TCG_BSWAP_OS) {
1727             z_mask = (int32_t)z_mask;
1728             o_mask = (int32_t)o_mask;
1729             s_mask = INT32_MIN;
1730         } else if (!(flags & TCG_BSWAP_OZ)) {
1731             z_mask |= MAKE_64BIT_MASK(32, 32);
1732         }
1733         break;
1734     case INDEX_op_bswap64:
1735         z_mask = bswap64(z_mask);
1736         o_mask = bswap64(o_mask);
1737         break;
1738     default:
1739         g_assert_not_reached();
1740     }
1741 
1742     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
1743 }
1744 
fold_call(OptContext * ctx,TCGOp * op)1745 static bool fold_call(OptContext *ctx, TCGOp *op)
1746 {
1747     TCGContext *s = ctx->tcg;
1748     int nb_oargs = TCGOP_CALLO(op);
1749     int nb_iargs = TCGOP_CALLI(op);
1750     int flags, i;
1751 
1752     init_arguments(ctx, op, nb_oargs + nb_iargs);
1753     copy_propagate(ctx, op, nb_oargs, nb_iargs);
1754 
1755     /* If the function reads or writes globals, reset temp data. */
1756     flags = tcg_call_flags(op);
1757     if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1758         int nb_globals = s->nb_globals;
1759 
1760         for (i = 0; i < nb_globals; i++) {
1761             if (test_bit(i, ctx->temps_used.l)) {
1762                 reset_ts(ctx, &ctx->tcg->temps[i]);
1763             }
1764         }
1765     }
1766 
1767     /* If the function has side effects, reset mem data. */
1768     if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1769         remove_mem_copy_all(ctx);
1770     }
1771 
1772     /* Reset temp data for outputs. */
1773     for (i = 0; i < nb_oargs; i++) {
1774         reset_temp(ctx, op->args[i]);
1775     }
1776 
1777     /* Stop optimizing MB across calls. */
1778     ctx->prev_mb = NULL;
1779     return true;
1780 }
1781 
fold_cmp_vec(OptContext * ctx,TCGOp * op)1782 static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
1783 {
1784     /* Canonicalize the comparison to put immediate second. */
1785     if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1786         op->args[3] = tcg_swap_cond(op->args[3]);
1787     }
1788     return finish_folding(ctx, op);
1789 }
1790 
fold_cmpsel_vec(OptContext * ctx,TCGOp * op)1791 static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
1792 {
1793     /* If true and false values are the same, eliminate the cmp. */
1794     if (args_are_copies(op->args[3], op->args[4])) {
1795         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
1796     }
1797 
1798     /* Canonicalize the comparison to put immediate second. */
1799     if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1800         op->args[5] = tcg_swap_cond(op->args[5]);
1801     }
1802     /*
1803      * Canonicalize the "false" input reg to match the destination,
1804      * so that the tcg backend can implement "move if true".
1805      */
1806     if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1807         op->args[5] = tcg_invert_cond(op->args[5]);
1808     }
1809     return finish_folding(ctx, op);
1810 }
1811 
fold_count_zeros(OptContext * ctx,TCGOp * op)1812 static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1813 {
1814     uint64_t z_mask, s_mask;
1815     TempOptInfo *t1 = arg_info(op->args[1]);
1816     TempOptInfo *t2 = arg_info(op->args[2]);
1817 
1818     if (ti_is_const(t1)) {
1819         uint64_t t = ti_const_val(t1);
1820 
1821         if (t != 0) {
1822             t = do_constant_folding(op->opc, ctx->type, t, 0);
1823             return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1824         }
1825         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1826     }
1827 
1828     switch (ctx->type) {
1829     case TCG_TYPE_I32:
1830         z_mask = 31;
1831         break;
1832     case TCG_TYPE_I64:
1833         z_mask = 63;
1834         break;
1835     default:
1836         g_assert_not_reached();
1837     }
1838     s_mask = ~z_mask;
1839     z_mask |= t2->z_mask;
1840     s_mask &= t2->s_mask;
1841 
1842     return fold_masks_zs(ctx, op, z_mask, s_mask);
1843 }
1844 
fold_ctpop(OptContext * ctx,TCGOp * op)1845 static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1846 {
1847     uint64_t z_mask;
1848 
1849     if (fold_const1(ctx, op)) {
1850         return true;
1851     }
1852 
1853     switch (ctx->type) {
1854     case TCG_TYPE_I32:
1855         z_mask = 32 | 31;
1856         break;
1857     case TCG_TYPE_I64:
1858         z_mask = 64 | 63;
1859         break;
1860     default:
1861         g_assert_not_reached();
1862     }
1863     return fold_masks_z(ctx, op, z_mask);
1864 }
1865 
fold_deposit(OptContext * ctx,TCGOp * op)1866 static bool fold_deposit(OptContext *ctx, TCGOp *op)
1867 {
1868     TempOptInfo *t1 = arg_info(op->args[1]);
1869     TempOptInfo *t2 = arg_info(op->args[2]);
1870     int ofs = op->args[3];
1871     int len = op->args[4];
1872     int width = 8 * tcg_type_size(ctx->type);
1873     uint64_t z_mask, o_mask, s_mask;
1874 
1875     if (ti_is_const(t1) && ti_is_const(t2)) {
1876         return tcg_opt_gen_movi(ctx, op, op->args[0],
1877                                 deposit64(ti_const_val(t1), ofs, len,
1878                                           ti_const_val(t2)));
1879     }
1880 
1881     /* Inserting a value into zero at offset 0. */
1882     if (ti_is_const_val(t1, 0) && ofs == 0) {
1883         uint64_t mask = MAKE_64BIT_MASK(0, len);
1884 
1885         op->opc = INDEX_op_and;
1886         op->args[1] = op->args[2];
1887         op->args[2] = arg_new_constant(ctx, mask);
1888         return fold_and(ctx, op);
1889     }
1890 
1891     /* Inserting zero into a value. */
1892     if (ti_is_const_val(t2, 0)) {
1893         uint64_t mask = deposit64(-1, ofs, len, 0);
1894 
1895         op->opc = INDEX_op_and;
1896         op->args[2] = arg_new_constant(ctx, mask);
1897         return fold_and(ctx, op);
1898     }
1899 
1900     /* The s_mask from the top portion of the deposit is still valid. */
1901     if (ofs + len == width) {
1902         s_mask = t2->s_mask << ofs;
1903     } else {
1904         s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
1905     }
1906 
1907     z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
1908     o_mask = deposit64(t1->o_mask, ofs, len, t2->o_mask);
1909 
1910     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
1911 }
1912 
fold_divide(OptContext * ctx,TCGOp * op)1913 static bool fold_divide(OptContext *ctx, TCGOp *op)
1914 {
1915     if (fold_const2(ctx, op) ||
1916         fold_xi_to_x(ctx, op, 1)) {
1917         return true;
1918     }
1919     return finish_folding(ctx, op);
1920 }
1921 
fold_dup(OptContext * ctx,TCGOp * op)1922 static bool fold_dup(OptContext *ctx, TCGOp *op)
1923 {
1924     if (arg_is_const(op->args[1])) {
1925         uint64_t t = arg_const_val(op->args[1]);
1926         t = dup_const(TCGOP_VECE(op), t);
1927         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1928     }
1929     return finish_folding(ctx, op);
1930 }
1931 
fold_dup2(OptContext * ctx,TCGOp * op)1932 static bool fold_dup2(OptContext *ctx, TCGOp *op)
1933 {
1934     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1935         uint64_t t = deposit64(arg_const_val(op->args[1]), 32, 32,
1936                                arg_const_val(op->args[2]));
1937         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1938     }
1939 
1940     if (args_are_copies(op->args[1], op->args[2])) {
1941         op->opc = INDEX_op_dup_vec;
1942         TCGOP_VECE(op) = MO_32;
1943     }
1944     return finish_folding(ctx, op);
1945 }
1946 
fold_eqv(OptContext * ctx,TCGOp * op)1947 static bool fold_eqv(OptContext *ctx, TCGOp *op)
1948 {
1949     uint64_t z_mask, o_mask, s_mask;
1950     TempOptInfo *t1, *t2;
1951 
1952     if (fold_const2_commutative(ctx, op)) {
1953         return true;
1954     }
1955 
1956     t2 = arg_info(op->args[2]);
1957     if (ti_is_const(t2)) {
1958         /* Fold eqv r,x,i to xor r,x,~i. */
1959         switch (ctx->type) {
1960         case TCG_TYPE_I32:
1961         case TCG_TYPE_I64:
1962             op->opc = INDEX_op_xor;
1963             break;
1964         case TCG_TYPE_V64:
1965         case TCG_TYPE_V128:
1966         case TCG_TYPE_V256:
1967             op->opc = INDEX_op_xor_vec;
1968             break;
1969         default:
1970             g_assert_not_reached();
1971         }
1972         op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1973         return fold_xor(ctx, op);
1974     }
1975 
1976     t1 = arg_info(op->args[1]);
1977 
1978     z_mask = (t1->z_mask | ~t2->o_mask) & (t2->z_mask | ~t1->o_mask);
1979     o_mask = ~(t1->z_mask | t2->z_mask) | (t1->o_mask & t2->o_mask);
1980     s_mask = t1->s_mask & t2->s_mask;
1981 
1982     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
1983 }
1984 
fold_extract(OptContext * ctx,TCGOp * op)1985 static bool fold_extract(OptContext *ctx, TCGOp *op)
1986 {
1987     uint64_t z_mask, o_mask, a_mask;
1988     TempOptInfo *t1 = arg_info(op->args[1]);
1989     int pos = op->args[2];
1990     int len = op->args[3];
1991 
1992     if (ti_is_const(t1)) {
1993         return tcg_opt_gen_movi(ctx, op, op->args[0],
1994                                 extract64(ti_const_val(t1), pos, len));
1995     }
1996 
1997     z_mask = extract64(t1->z_mask, pos, len);
1998     o_mask = extract64(t1->o_mask, pos, len);
1999     a_mask = pos ? -1 : t1->z_mask ^ z_mask;
2000 
2001     return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, a_mask);
2002 }
2003 
fold_extract2(OptContext * ctx,TCGOp * op)2004 static bool fold_extract2(OptContext *ctx, TCGOp *op)
2005 {
2006     TempOptInfo *t1 = arg_info(op->args[1]);
2007     TempOptInfo *t2 = arg_info(op->args[2]);
2008     uint64_t z1 = t1->z_mask;
2009     uint64_t z2 = t2->z_mask;
2010     uint64_t o1 = t1->o_mask;
2011     uint64_t o2 = t2->o_mask;
2012     int shr = op->args[3];
2013 
2014     if (ctx->type == TCG_TYPE_I32) {
2015         z1 = (uint32_t)z1 >> shr;
2016         o1 = (uint32_t)o1 >> shr;
2017         z2 = (uint64_t)((int32_t)z2 << (32 - shr));
2018         o2 = (uint64_t)((int32_t)o2 << (32 - shr));
2019     } else {
2020         z1 >>= shr;
2021         o1 >>= shr;
2022         z2 <<= 64 - shr;
2023         o2 <<= 64 - shr;
2024     }
2025 
2026     return fold_masks_zo(ctx, op, z1 | z2, o1 | o2);
2027 }
2028 
fold_exts(OptContext * ctx,TCGOp * op)2029 static bool fold_exts(OptContext *ctx, TCGOp *op)
2030 {
2031     uint64_t z_mask, o_mask, s_mask;
2032     TempOptInfo *t1;
2033 
2034     if (fold_const1(ctx, op)) {
2035         return true;
2036     }
2037 
2038     t1 = arg_info(op->args[1]);
2039     z_mask = t1->z_mask;
2040     o_mask = t1->o_mask;
2041     s_mask = t1->s_mask;
2042 
2043     switch (op->opc) {
2044     case INDEX_op_ext_i32_i64:
2045         s_mask |= INT32_MIN;
2046         z_mask = (int32_t)z_mask;
2047         o_mask = (int32_t)o_mask;
2048         break;
2049     default:
2050         g_assert_not_reached();
2051     }
2052     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2053 }
2054 
fold_extu(OptContext * ctx,TCGOp * op)2055 static bool fold_extu(OptContext *ctx, TCGOp *op)
2056 {
2057     uint64_t z_mask, o_mask;
2058     TempOptInfo *t1;
2059 
2060     if (fold_const1(ctx, op)) {
2061         return true;
2062     }
2063 
2064     t1 = arg_info(op->args[1]);
2065     z_mask = t1->z_mask;
2066     o_mask = t1->o_mask;
2067 
2068     switch (op->opc) {
2069     case INDEX_op_extrl_i64_i32:
2070     case INDEX_op_extu_i32_i64:
2071         z_mask = (uint32_t)z_mask;
2072         o_mask = (uint32_t)o_mask;
2073         break;
2074     case INDEX_op_extrh_i64_i32:
2075         z_mask >>= 32;
2076         o_mask >>= 32;
2077         break;
2078     default:
2079         g_assert_not_reached();
2080     }
2081     return fold_masks_zo(ctx, op, z_mask, o_mask);
2082 }
2083 
fold_mb(OptContext * ctx,TCGOp * op)2084 static bool fold_mb(OptContext *ctx, TCGOp *op)
2085 {
2086     /* Eliminate duplicate and redundant fence instructions.  */
2087     if (ctx->prev_mb) {
2088         /*
2089          * Merge two barriers of the same type into one,
2090          * or a weaker barrier into a stronger one,
2091          * or two weaker barriers into a stronger one.
2092          *   mb X; mb Y => mb X|Y
2093          *   mb; strl => mb; st
2094          *   ldaq; mb => ld; mb
2095          *   ldaq; strl => ld; mb; st
2096          * Other combinations are also merged into a strong
2097          * barrier.  This is stricter than specified but for
2098          * the purposes of TCG is better than not optimizing.
2099          */
2100         ctx->prev_mb->args[0] |= op->args[0];
2101         tcg_op_remove(ctx->tcg, op);
2102     } else {
2103         ctx->prev_mb = op;
2104     }
2105     return true;
2106 }
2107 
fold_mov(OptContext * ctx,TCGOp * op)2108 static bool fold_mov(OptContext *ctx, TCGOp *op)
2109 {
2110     return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2111 }
2112 
fold_movcond(OptContext * ctx,TCGOp * op)2113 static bool fold_movcond(OptContext *ctx, TCGOp *op)
2114 {
2115     uint64_t z_mask, o_mask, s_mask;
2116     TempOptInfo *tt, *ft;
2117     int i;
2118 
2119     /* If true and false values are the same, eliminate the cmp. */
2120     if (args_are_copies(op->args[3], op->args[4])) {
2121         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
2122     }
2123 
2124     /*
2125      * Canonicalize the "false" input reg to match the destination reg so
2126      * that the tcg backend can implement a "move if true" operation.
2127      */
2128     if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
2129         op->args[5] = tcg_invert_cond(op->args[5]);
2130     }
2131 
2132     i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[1],
2133                                   &op->args[2], &op->args[5]);
2134     if (i >= 0) {
2135         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
2136     }
2137 
2138     tt = arg_info(op->args[3]);
2139     ft = arg_info(op->args[4]);
2140     z_mask = tt->z_mask | ft->z_mask;
2141     o_mask = tt->o_mask & ft->o_mask;
2142     s_mask = tt->s_mask & ft->s_mask;
2143 
2144     if (ti_is_const(tt) && ti_is_const(ft)) {
2145         uint64_t tv = ti_const_val(tt);
2146         uint64_t fv = ti_const_val(ft);
2147         TCGCond cond = op->args[5];
2148 
2149         if (tv == 1 && fv == 0) {
2150             op->opc = INDEX_op_setcond;
2151             op->args[3] = cond;
2152         } else if (fv == 1 && tv == 0) {
2153             op->opc = INDEX_op_setcond;
2154             op->args[3] = tcg_invert_cond(cond);
2155         } else if (tv == -1 && fv == 0) {
2156             op->opc = INDEX_op_negsetcond;
2157             op->args[3] = cond;
2158         } else if (fv == -1 && tv == 0) {
2159             op->opc = INDEX_op_negsetcond;
2160             op->args[3] = tcg_invert_cond(cond);
2161         }
2162     }
2163 
2164     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2165 }
2166 
fold_mul(OptContext * ctx,TCGOp * op)2167 static bool fold_mul(OptContext *ctx, TCGOp *op)
2168 {
2169     if (fold_const2(ctx, op) ||
2170         fold_xi_to_i(ctx, op, 0) ||
2171         fold_xi_to_x(ctx, op, 1)) {
2172         return true;
2173     }
2174     return finish_folding(ctx, op);
2175 }
2176 
fold_mul_highpart(OptContext * ctx,TCGOp * op)2177 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
2178 {
2179     if (fold_const2_commutative(ctx, op) ||
2180         fold_xi_to_i(ctx, op, 0)) {
2181         return true;
2182     }
2183     return finish_folding(ctx, op);
2184 }
2185 
fold_multiply2(OptContext * ctx,TCGOp * op)2186 static bool fold_multiply2(OptContext *ctx, TCGOp *op)
2187 {
2188     swap_commutative(op->args[0], &op->args[2], &op->args[3]);
2189 
2190     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
2191         uint64_t a = arg_const_val(op->args[2]);
2192         uint64_t b = arg_const_val(op->args[3]);
2193         uint64_t h, l;
2194         TCGArg rl, rh;
2195         TCGOp *op2;
2196 
2197         switch (op->opc) {
2198         case INDEX_op_mulu2:
2199             if (ctx->type == TCG_TYPE_I32) {
2200                 l = (uint64_t)(uint32_t)a * (uint32_t)b;
2201                 h = (int32_t)(l >> 32);
2202                 l = (int32_t)l;
2203             } else {
2204                 mulu64(&l, &h, a, b);
2205             }
2206             break;
2207         case INDEX_op_muls2:
2208             if (ctx->type == TCG_TYPE_I32) {
2209                 l = (int64_t)(int32_t)a * (int32_t)b;
2210                 h = l >> 32;
2211                 l = (int32_t)l;
2212             } else {
2213                 muls64(&l, &h, a, b);
2214             }
2215             break;
2216         default:
2217             g_assert_not_reached();
2218         }
2219 
2220         rl = op->args[0];
2221         rh = op->args[1];
2222 
2223         /* The proper opcode is supplied by tcg_opt_gen_mov. */
2224         op2 = opt_insert_before(ctx, op, 0, 2);
2225 
2226         tcg_opt_gen_movi(ctx, op, rl, l);
2227         tcg_opt_gen_movi(ctx, op2, rh, h);
2228         return true;
2229     }
2230     return finish_folding(ctx, op);
2231 }
2232 
fold_nand(OptContext * ctx,TCGOp * op)2233 static bool fold_nand(OptContext *ctx, TCGOp *op)
2234 {
2235     uint64_t z_mask, o_mask, s_mask;
2236     TempOptInfo *t1, *t2;
2237 
2238     if (fold_const2_commutative(ctx, op) ||
2239         fold_xi_to_not(ctx, op, -1)) {
2240         return true;
2241     }
2242 
2243     t1 = arg_info(op->args[1]);
2244     t2 = arg_info(op->args[2]);
2245 
2246     z_mask = ~(t1->o_mask & t2->o_mask);
2247     o_mask = ~(t1->z_mask & t2->z_mask);
2248     s_mask = t1->s_mask & t2->s_mask;
2249 
2250     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2251 }
2252 
fold_neg_no_const(OptContext * ctx,TCGOp * op)2253 static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
2254 {
2255     /* Set to 1 all bits to the left of the rightmost.  */
2256     uint64_t z_mask = arg_info(op->args[1])->z_mask;
2257     z_mask = -(z_mask & -z_mask);
2258 
2259     return fold_masks_z(ctx, op, z_mask);
2260 }
2261 
fold_neg(OptContext * ctx,TCGOp * op)2262 static bool fold_neg(OptContext *ctx, TCGOp *op)
2263 {
2264     return fold_const1(ctx, op) || fold_neg_no_const(ctx, op);
2265 }
2266 
fold_nor(OptContext * ctx,TCGOp * op)2267 static bool fold_nor(OptContext *ctx, TCGOp *op)
2268 {
2269     uint64_t z_mask, o_mask, s_mask;
2270     TempOptInfo *t1, *t2;
2271 
2272     if (fold_const2_commutative(ctx, op) ||
2273         fold_xi_to_not(ctx, op, 0)) {
2274         return true;
2275     }
2276 
2277     t1 = arg_info(op->args[1]);
2278     t2 = arg_info(op->args[2]);
2279 
2280     z_mask = ~(t1->o_mask | t2->o_mask);
2281     o_mask = ~(t1->z_mask | t2->z_mask);
2282     s_mask = t1->s_mask & t2->s_mask;
2283 
2284     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2285 }
2286 
fold_not(OptContext * ctx,TCGOp * op)2287 static bool fold_not(OptContext *ctx, TCGOp *op)
2288 {
2289     TempOptInfo *t1;
2290 
2291     if (fold_const1(ctx, op)) {
2292         return true;
2293     }
2294 
2295     t1 = arg_info(op->args[1]);
2296     return fold_masks_zos(ctx, op, ~t1->o_mask, ~t1->z_mask, t1->s_mask);
2297 }
2298 
fold_or(OptContext * ctx,TCGOp * op)2299 static bool fold_or(OptContext *ctx, TCGOp *op)
2300 {
2301     uint64_t z_mask, o_mask, s_mask, a_mask;
2302     TempOptInfo *t1, *t2;
2303 
2304     if (fold_const2_commutative(ctx, op) ||
2305         fold_xi_to_x(ctx, op, 0) ||
2306         fold_xx_to_x(ctx, op)) {
2307         return true;
2308     }
2309 
2310     t1 = arg_info(op->args[1]);
2311     t2 = arg_info(op->args[2]);
2312 
2313     z_mask = t1->z_mask | t2->z_mask;
2314     o_mask = t1->o_mask | t2->o_mask;
2315     s_mask = t1->s_mask & t2->s_mask;
2316 
2317     /* Affected bits are those not known one, masked by those known zero. */
2318     a_mask = ~t1->o_mask & t2->z_mask;
2319 
2320     return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
2321 }
2322 
fold_orc(OptContext * ctx,TCGOp * op)2323 static bool fold_orc(OptContext *ctx, TCGOp *op)
2324 {
2325     uint64_t z_mask, o_mask, s_mask, a_mask;
2326     TempOptInfo *t1, *t2;
2327 
2328     if (fold_const2(ctx, op)) {
2329         return true;
2330     }
2331 
2332     t2 = arg_info(op->args[2]);
2333     if (ti_is_const(t2)) {
2334         /* Fold orc r,x,i to or r,x,~i. */
2335         switch (ctx->type) {
2336         case TCG_TYPE_I32:
2337         case TCG_TYPE_I64:
2338             op->opc = INDEX_op_or;
2339             break;
2340         case TCG_TYPE_V64:
2341         case TCG_TYPE_V128:
2342         case TCG_TYPE_V256:
2343             op->opc = INDEX_op_or_vec;
2344             break;
2345         default:
2346             g_assert_not_reached();
2347         }
2348         op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
2349         return fold_or(ctx, op);
2350     }
2351     if (fold_xx_to_i(ctx, op, -1) ||
2352         fold_ix_to_not(ctx, op, 0)) {
2353         return true;
2354     }
2355     t1 = arg_info(op->args[1]);
2356 
2357     z_mask = t1->z_mask | ~t2->o_mask;
2358     o_mask = t1->o_mask | ~t2->z_mask;
2359     s_mask = t1->s_mask & t2->s_mask;
2360 
2361     /* Affected bits are those not known one, masked by those known one. */
2362     a_mask = ~t1->o_mask & t2->o_mask;
2363 
2364     return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
2365 }
2366 
fold_qemu_ld_1reg(OptContext * ctx,TCGOp * op)2367 static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
2368 {
2369     const TCGOpDef *def = &tcg_op_defs[op->opc];
2370     MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
2371     MemOp mop = get_memop(oi);
2372     int width = 8 * memop_size(mop);
2373     uint64_t z_mask = -1, s_mask = 0;
2374 
2375     if (width < 64) {
2376         if (mop & MO_SIGN) {
2377             s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1));
2378         } else {
2379             z_mask = MAKE_64BIT_MASK(0, width);
2380         }
2381     }
2382 
2383     /* Opcodes that touch guest memory stop the mb optimization.  */
2384     ctx->prev_mb = NULL;
2385 
2386     return fold_masks_zs(ctx, op, z_mask, s_mask);
2387 }
2388 
fold_qemu_ld_2reg(OptContext * ctx,TCGOp * op)2389 static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op)
2390 {
2391     /* Opcodes that touch guest memory stop the mb optimization.  */
2392     ctx->prev_mb = NULL;
2393     return finish_folding(ctx, op);
2394 }
2395 
fold_qemu_st(OptContext * ctx,TCGOp * op)2396 static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
2397 {
2398     /* Opcodes that touch guest memory stop the mb optimization.  */
2399     ctx->prev_mb = NULL;
2400     return true;
2401 }
2402 
fold_remainder(OptContext * ctx,TCGOp * op)2403 static bool fold_remainder(OptContext *ctx, TCGOp *op)
2404 {
2405     if (fold_const2(ctx, op) ||
2406         fold_xx_to_i(ctx, op, 0)) {
2407         return true;
2408     }
2409     return finish_folding(ctx, op);
2410 }
2411 
2412 /* Return 1 if finished, -1 if simplified, 0 if unchanged. */
fold_setcond_zmask(OptContext * ctx,TCGOp * op,bool neg)2413 static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
2414 {
2415     uint64_t a_zmask, b_val;
2416     TCGCond cond;
2417 
2418     if (!arg_is_const(op->args[2])) {
2419         return false;
2420     }
2421 
2422     a_zmask = arg_info(op->args[1])->z_mask;
2423     b_val = arg_const_val(op->args[2]);
2424     cond = op->args[3];
2425 
2426     if (ctx->type == TCG_TYPE_I32) {
2427         a_zmask = (uint32_t)a_zmask;
2428         b_val = (uint32_t)b_val;
2429     }
2430 
2431     /*
2432      * A with only low bits set vs B with high bits set means that A < B.
2433      */
2434     if (a_zmask < b_val) {
2435         bool inv = false;
2436 
2437         switch (cond) {
2438         case TCG_COND_NE:
2439         case TCG_COND_LEU:
2440         case TCG_COND_LTU:
2441             inv = true;
2442             /* fall through */
2443         case TCG_COND_GTU:
2444         case TCG_COND_GEU:
2445         case TCG_COND_EQ:
2446             return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv);
2447         default:
2448             break;
2449         }
2450     }
2451 
2452     /*
2453      * A with only lsb set is already boolean.
2454      */
2455     if (a_zmask <= 1) {
2456         bool convert = false;
2457         bool inv = false;
2458 
2459         switch (cond) {
2460         case TCG_COND_EQ:
2461             inv = true;
2462             /* fall through */
2463         case TCG_COND_NE:
2464             convert = (b_val == 0);
2465             break;
2466         case TCG_COND_LTU:
2467         case TCG_COND_TSTEQ:
2468             inv = true;
2469             /* fall through */
2470         case TCG_COND_GEU:
2471         case TCG_COND_TSTNE:
2472             convert = (b_val == 1);
2473             break;
2474         default:
2475             break;
2476         }
2477         if (convert) {
2478             if (!inv && !neg) {
2479                 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2480             }
2481 
2482             if (!inv) {
2483                 op->opc = INDEX_op_neg;
2484             } else if (neg) {
2485                 op->opc = INDEX_op_add;
2486                 op->args[2] = arg_new_constant(ctx, -1);
2487             } else {
2488                 op->opc = INDEX_op_xor;
2489                 op->args[2] = arg_new_constant(ctx, 1);
2490             }
2491             return -1;
2492         }
2493     }
2494     return 0;
2495 }
2496 
fold_setcond_tst_pow2(OptContext * ctx,TCGOp * op,bool neg)2497 static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
2498 {
2499     TCGCond cond = op->args[3];
2500     TCGArg ret, src1, src2;
2501     TCGOp *op2;
2502     uint64_t val;
2503     int sh;
2504     bool inv;
2505 
2506     if (!is_tst_cond(cond) || !arg_is_const(op->args[2])) {
2507         return;
2508     }
2509 
2510     src2 = op->args[2];
2511     val = arg_const_val(src2);
2512     if (!is_power_of_2(val)) {
2513         return;
2514     }
2515     sh = ctz64(val);
2516 
2517     ret = op->args[0];
2518     src1 = op->args[1];
2519     inv = cond == TCG_COND_TSTEQ;
2520 
2521     if (sh && neg && !inv && TCG_TARGET_sextract_valid(ctx->type, sh, 1)) {
2522         op->opc = INDEX_op_sextract;
2523         op->args[1] = src1;
2524         op->args[2] = sh;
2525         op->args[3] = 1;
2526         return;
2527     } else if (sh && TCG_TARGET_extract_valid(ctx->type, sh, 1)) {
2528         op->opc = INDEX_op_extract;
2529         op->args[1] = src1;
2530         op->args[2] = sh;
2531         op->args[3] = 1;
2532     } else {
2533         if (sh) {
2534             op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3);
2535             op2->args[0] = ret;
2536             op2->args[1] = src1;
2537             op2->args[2] = arg_new_constant(ctx, sh);
2538             src1 = ret;
2539         }
2540         op->opc = INDEX_op_and;
2541         op->args[1] = src1;
2542         op->args[2] = arg_new_constant(ctx, 1);
2543     }
2544 
2545     if (neg && inv) {
2546         op2 = opt_insert_after(ctx, op, INDEX_op_add, 3);
2547         op2->args[0] = ret;
2548         op2->args[1] = ret;
2549         op2->args[2] = arg_new_constant(ctx, -1);
2550     } else if (inv) {
2551         op2 = opt_insert_after(ctx, op, INDEX_op_xor, 3);
2552         op2->args[0] = ret;
2553         op2->args[1] = ret;
2554         op2->args[2] = arg_new_constant(ctx, 1);
2555     } else if (neg) {
2556         op2 = opt_insert_after(ctx, op, INDEX_op_neg, 2);
2557         op2->args[0] = ret;
2558         op2->args[1] = ret;
2559     }
2560 }
2561 
fold_setcond(OptContext * ctx,TCGOp * op)2562 static bool fold_setcond(OptContext *ctx, TCGOp *op)
2563 {
2564     int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
2565                                       &op->args[2], &op->args[3]);
2566     if (i >= 0) {
2567         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2568     }
2569 
2570     i = fold_setcond_zmask(ctx, op, false);
2571     if (i > 0) {
2572         return true;
2573     }
2574     if (i == 0) {
2575         fold_setcond_tst_pow2(ctx, op, false);
2576     }
2577 
2578     return fold_masks_z(ctx, op, 1);
2579 }
2580 
fold_negsetcond(OptContext * ctx,TCGOp * op)2581 static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
2582 {
2583     int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
2584                                       &op->args[2], &op->args[3]);
2585     if (i >= 0) {
2586         return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
2587     }
2588 
2589     i = fold_setcond_zmask(ctx, op, true);
2590     if (i > 0) {
2591         return true;
2592     }
2593     if (i == 0) {
2594         fold_setcond_tst_pow2(ctx, op, true);
2595     }
2596 
2597     /* Value is {0,-1} so all bits are repetitions of the sign. */
2598     return fold_masks_s(ctx, op, -1);
2599 }
2600 
fold_setcond2(OptContext * ctx,TCGOp * op)2601 static bool fold_setcond2(OptContext *ctx, TCGOp *op)
2602 {
2603     TCGCond cond;
2604     int i, inv = 0;
2605 
2606     i = do_constant_folding_cond2(ctx, op, &op->args[1]);
2607     cond = op->args[5];
2608     if (i >= 0) {
2609         goto do_setcond_const;
2610     }
2611 
2612     switch (cond) {
2613     case TCG_COND_LT:
2614     case TCG_COND_GE:
2615         /*
2616          * Simplify LT/GE comparisons vs zero to a single compare
2617          * vs the high word of the input.
2618          */
2619         if (arg_is_const_val(op->args[3], 0) &&
2620             arg_is_const_val(op->args[4], 0)) {
2621             goto do_setcond_high;
2622         }
2623         break;
2624 
2625     case TCG_COND_NE:
2626         inv = 1;
2627         QEMU_FALLTHROUGH;
2628     case TCG_COND_EQ:
2629         /*
2630          * Simplify EQ/NE comparisons where one of the pairs
2631          * can be simplified.
2632          */
2633         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
2634                                      op->args[3], cond);
2635         switch (i ^ inv) {
2636         case 0:
2637             goto do_setcond_const;
2638         case 1:
2639             goto do_setcond_high;
2640         }
2641 
2642         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
2643                                      op->args[4], cond);
2644         switch (i ^ inv) {
2645         case 0:
2646             goto do_setcond_const;
2647         case 1:
2648             goto do_setcond_low;
2649         }
2650         break;
2651 
2652     case TCG_COND_TSTEQ:
2653     case TCG_COND_TSTNE:
2654         if (arg_is_const_val(op->args[3], 0)) {
2655             goto do_setcond_high;
2656         }
2657         if (arg_is_const_val(op->args[4], 0)) {
2658             goto do_setcond_low;
2659         }
2660         break;
2661 
2662     default:
2663         break;
2664 
2665     do_setcond_low:
2666         op->args[2] = op->args[3];
2667         op->args[3] = cond;
2668         op->opc = INDEX_op_setcond;
2669         return fold_setcond(ctx, op);
2670 
2671     do_setcond_high:
2672         op->args[1] = op->args[2];
2673         op->args[2] = op->args[4];
2674         op->args[3] = cond;
2675         op->opc = INDEX_op_setcond;
2676         return fold_setcond(ctx, op);
2677     }
2678 
2679     return fold_masks_z(ctx, op, 1);
2680 
2681  do_setcond_const:
2682     return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2683 }
2684 
fold_sextract(OptContext * ctx,TCGOp * op)2685 static bool fold_sextract(OptContext *ctx, TCGOp *op)
2686 {
2687     uint64_t z_mask, o_mask, s_mask, a_mask;
2688     TempOptInfo *t1 = arg_info(op->args[1]);
2689     int pos = op->args[2];
2690     int len = op->args[3];
2691 
2692     if (ti_is_const(t1)) {
2693         return tcg_opt_gen_movi(ctx, op, op->args[0],
2694                                 sextract64(ti_const_val(t1), pos, len));
2695     }
2696 
2697     s_mask = t1->s_mask >> pos;
2698     s_mask |= -1ull << (len - 1);
2699     a_mask = pos ? -1 : s_mask & ~t1->s_mask;
2700 
2701     z_mask = sextract64(t1->z_mask, pos, len);
2702     o_mask = sextract64(t1->o_mask, pos, len);
2703 
2704     return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
2705 }
2706 
fold_shift(OptContext * ctx,TCGOp * op)2707 static bool fold_shift(OptContext *ctx, TCGOp *op)
2708 {
2709     uint64_t s_mask, z_mask, o_mask;
2710     TempOptInfo *t1, *t2;
2711 
2712     if (fold_const2(ctx, op) ||
2713         fold_ix_to_i(ctx, op, 0) ||
2714         fold_xi_to_x(ctx, op, 0)) {
2715         return true;
2716     }
2717 
2718     t1 = arg_info(op->args[1]);
2719     t2 = arg_info(op->args[2]);
2720     s_mask = t1->s_mask;
2721     z_mask = t1->z_mask;
2722     o_mask = t1->o_mask;
2723 
2724     if (ti_is_const(t2)) {
2725         int sh = ti_const_val(t2);
2726 
2727         z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
2728         o_mask = do_constant_folding(op->opc, ctx->type, o_mask, sh);
2729         s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
2730 
2731         return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2732     }
2733 
2734     switch (op->opc) {
2735     case INDEX_op_sar:
2736         /*
2737          * Arithmetic right shift will not reduce the number of
2738          * input sign repetitions.
2739          */
2740         return fold_masks_s(ctx, op, s_mask);
2741     case INDEX_op_shr:
2742         /*
2743          * If the sign bit is known zero, then logical right shift
2744          * will not reduce the number of input sign repetitions.
2745          */
2746         if (~z_mask & -s_mask) {
2747             return fold_masks_s(ctx, op, s_mask);
2748         }
2749         break;
2750     default:
2751         break;
2752     }
2753 
2754     return finish_folding(ctx, op);
2755 }
2756 
fold_sub_to_neg(OptContext * ctx,TCGOp * op)2757 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
2758 {
2759     TCGOpcode neg_op;
2760     bool have_neg;
2761 
2762     if (!arg_is_const_val(op->args[1], 0)) {
2763         return false;
2764     }
2765 
2766     switch (ctx->type) {
2767     case TCG_TYPE_I32:
2768     case TCG_TYPE_I64:
2769         neg_op = INDEX_op_neg;
2770         have_neg = true;
2771         break;
2772     case TCG_TYPE_V64:
2773     case TCG_TYPE_V128:
2774     case TCG_TYPE_V256:
2775         neg_op = INDEX_op_neg_vec;
2776         have_neg = (TCG_TARGET_HAS_neg_vec &&
2777                     tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
2778         break;
2779     default:
2780         g_assert_not_reached();
2781     }
2782     if (have_neg) {
2783         op->opc = neg_op;
2784         op->args[1] = op->args[2];
2785         return fold_neg_no_const(ctx, op);
2786     }
2787     return false;
2788 }
2789 
2790 /* We cannot as yet do_constant_folding with vectors. */
fold_sub_vec(OptContext * ctx,TCGOp * op)2791 static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
2792 {
2793     if (fold_xx_to_i(ctx, op, 0) ||
2794         fold_xi_to_x(ctx, op, 0) ||
2795         fold_sub_to_neg(ctx, op)) {
2796         return true;
2797     }
2798     return finish_folding(ctx, op);
2799 }
2800 
fold_sub(OptContext * ctx,TCGOp * op)2801 static bool fold_sub(OptContext *ctx, TCGOp *op)
2802 {
2803     if (fold_const2(ctx, op) ||
2804         fold_xx_to_i(ctx, op, 0) ||
2805         fold_xi_to_x(ctx, op, 0) ||
2806         fold_sub_to_neg(ctx, op)) {
2807         return true;
2808     }
2809 
2810     /* Fold sub r,x,i to add r,x,-i */
2811     if (arg_is_const(op->args[2])) {
2812         uint64_t val = arg_const_val(op->args[2]);
2813 
2814         op->opc = INDEX_op_add;
2815         op->args[2] = arg_new_constant(ctx, -val);
2816     }
2817     return finish_folding(ctx, op);
2818 }
2819 
squash_prev_borrowout(OptContext * ctx,TCGOp * op)2820 static void squash_prev_borrowout(OptContext *ctx, TCGOp *op)
2821 {
2822     TempOptInfo *t2;
2823 
2824     op = QTAILQ_PREV(op, link);
2825     switch (op->opc) {
2826     case INDEX_op_subbo:
2827         op->opc = INDEX_op_sub;
2828         fold_sub(ctx, op);
2829         break;
2830     case INDEX_op_subbio:
2831         op->opc = INDEX_op_subbi;
2832         break;
2833     case INDEX_op_subb1o:
2834         t2 = arg_info(op->args[2]);
2835         if (ti_is_const(t2)) {
2836             op->opc = INDEX_op_add;
2837             op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2838             /* Perform other constant folding, if needed. */
2839             fold_add(ctx, op);
2840         } else {
2841             TCGArg ret = op->args[0];
2842             op->opc = INDEX_op_sub;
2843             op = opt_insert_after(ctx, op, INDEX_op_add, 3);
2844             op->args[0] = ret;
2845             op->args[1] = ret;
2846             op->args[2] = arg_new_constant(ctx, -1);
2847         }
2848         break;
2849     default:
2850         g_assert_not_reached();
2851     }
2852 }
2853 
fold_subbi(OptContext * ctx,TCGOp * op)2854 static bool fold_subbi(OptContext *ctx, TCGOp *op)
2855 {
2856     TempOptInfo *t2;
2857     int borrow_in = ctx->carry_state;
2858 
2859     if (borrow_in < 0) {
2860         return finish_folding(ctx, op);
2861     }
2862     ctx->carry_state = -1;
2863 
2864     squash_prev_borrowout(ctx, op);
2865     if (borrow_in == 0) {
2866         op->opc = INDEX_op_sub;
2867         return fold_sub(ctx, op);
2868     }
2869 
2870     /*
2871      * Propagate the known carry-in into any constant, then negate to
2872      * transform from sub to add.  If there is no constant, emit a
2873      * separate add -1.
2874      */
2875     t2 = arg_info(op->args[2]);
2876     if (ti_is_const(t2)) {
2877         op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2878     } else {
2879         TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_sub, 3);
2880 
2881         op2->args[0] = op->args[0];
2882         op2->args[1] = op->args[1];
2883         op2->args[2] = op->args[2];
2884         fold_sub(ctx, op2);
2885 
2886         op->args[1] = op->args[0];
2887         op->args[2] = arg_new_constant(ctx, -1);
2888     }
2889     op->opc = INDEX_op_add;
2890     return fold_add(ctx, op);
2891 }
2892 
fold_subbio(OptContext * ctx,TCGOp * op)2893 static bool fold_subbio(OptContext *ctx, TCGOp *op)
2894 {
2895     TempOptInfo *t1, *t2;
2896     int borrow_out = -1;
2897 
2898     if (ctx->carry_state < 0) {
2899         return finish_folding(ctx, op);
2900     }
2901 
2902     squash_prev_borrowout(ctx, op);
2903     if (ctx->carry_state == 0) {
2904         goto do_subbo;
2905     }
2906 
2907     t1 = arg_info(op->args[1]);
2908     t2 = arg_info(op->args[2]);
2909 
2910     /* Propagate the known borrow-in into a constant, if possible. */
2911     if (ti_is_const(t2)) {
2912         uint64_t max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
2913         uint64_t v = ti_const_val(t2) & max;
2914 
2915         if (v < max) {
2916             op->args[2] = arg_new_constant(ctx, v + 1);
2917             goto do_subbo;
2918         }
2919         /* subtracting max + 1 produces known borrow out. */
2920         borrow_out = 1;
2921     }
2922     if (ti_is_const(t1)) {
2923         uint64_t v = ti_const_val(t1);
2924         if (v != 0) {
2925             op->args[2] = arg_new_constant(ctx, v - 1);
2926             goto do_subbo;
2927         }
2928     }
2929 
2930     /* Adjust the opcode to remember the known carry-in. */
2931     op->opc = INDEX_op_subb1o;
2932     ctx->carry_state = borrow_out;
2933     return finish_folding(ctx, op);
2934 
2935  do_subbo:
2936     op->opc = INDEX_op_subbo;
2937     return fold_subbo(ctx, op);
2938 }
2939 
fold_subbo(OptContext * ctx,TCGOp * op)2940 static bool fold_subbo(OptContext *ctx, TCGOp *op)
2941 {
2942     TempOptInfo *t1 = arg_info(op->args[1]);
2943     TempOptInfo *t2 = arg_info(op->args[2]);
2944     int borrow_out = -1;
2945 
2946     if (ti_is_const(t2)) {
2947         uint64_t v2 = ti_const_val(t2);
2948         if (v2 == 0) {
2949             borrow_out = 0;
2950         } else if (ti_is_const(t1)) {
2951             uint64_t v1 = ti_const_val(t1);
2952             borrow_out = v1 < v2;
2953         }
2954     }
2955     ctx->carry_state = borrow_out;
2956     return finish_folding(ctx, op);
2957 }
2958 
fold_tcg_ld(OptContext * ctx,TCGOp * op)2959 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
2960 {
2961     uint64_t z_mask = -1, s_mask = 0;
2962 
2963     /* We can't do any folding with a load, but we can record bits. */
2964     switch (op->opc) {
2965     case INDEX_op_ld8s:
2966         s_mask = INT8_MIN;
2967         break;
2968     case INDEX_op_ld8u:
2969         z_mask = MAKE_64BIT_MASK(0, 8);
2970         break;
2971     case INDEX_op_ld16s:
2972         s_mask = INT16_MIN;
2973         break;
2974     case INDEX_op_ld16u:
2975         z_mask = MAKE_64BIT_MASK(0, 16);
2976         break;
2977     case INDEX_op_ld32s:
2978         s_mask = INT32_MIN;
2979         break;
2980     case INDEX_op_ld32u:
2981         z_mask = MAKE_64BIT_MASK(0, 32);
2982         break;
2983     default:
2984         g_assert_not_reached();
2985     }
2986     return fold_masks_zs(ctx, op, z_mask, s_mask);
2987 }
2988 
fold_tcg_ld_memcopy(OptContext * ctx,TCGOp * op)2989 static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
2990 {
2991     TCGTemp *dst, *src;
2992     intptr_t ofs;
2993     TCGType type;
2994 
2995     if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2996         return finish_folding(ctx, op);
2997     }
2998 
2999     type = ctx->type;
3000     ofs = op->args[2];
3001     dst = arg_temp(op->args[0]);
3002     src = find_mem_copy_for(ctx, type, ofs);
3003     if (src && src->base_type == type) {
3004         return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src));
3005     }
3006 
3007     reset_ts(ctx, dst);
3008     record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1);
3009     return true;
3010 }
3011 
fold_tcg_st(OptContext * ctx,TCGOp * op)3012 static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
3013 {
3014     intptr_t ofs = op->args[2];
3015     intptr_t lm1;
3016 
3017     if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
3018         remove_mem_copy_all(ctx);
3019         return true;
3020     }
3021 
3022     switch (op->opc) {
3023     case INDEX_op_st8:
3024         lm1 = 0;
3025         break;
3026     case INDEX_op_st16:
3027         lm1 = 1;
3028         break;
3029     case INDEX_op_st32:
3030         lm1 = 3;
3031         break;
3032     case INDEX_op_st:
3033     case INDEX_op_st_vec:
3034         lm1 = tcg_type_size(ctx->type) - 1;
3035         break;
3036     default:
3037         g_assert_not_reached();
3038     }
3039     remove_mem_copy_in(ctx, ofs, ofs + lm1);
3040     return true;
3041 }
3042 
fold_tcg_st_memcopy(OptContext * ctx,TCGOp * op)3043 static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
3044 {
3045     TCGTemp *src;
3046     intptr_t ofs, last;
3047     TCGType type;
3048 
3049     if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
3050         return fold_tcg_st(ctx, op);
3051     }
3052 
3053     src = arg_temp(op->args[0]);
3054     ofs = op->args[2];
3055     type = ctx->type;
3056 
3057     /*
3058      * Eliminate duplicate stores of a constant.
3059      * This happens frequently when the target ISA zero-extends.
3060      */
3061     if (ts_is_const(src)) {
3062         TCGTemp *prev = find_mem_copy_for(ctx, type, ofs);
3063         if (src == prev) {
3064             tcg_op_remove(ctx->tcg, op);
3065             return true;
3066         }
3067     }
3068 
3069     last = ofs + tcg_type_size(type) - 1;
3070     remove_mem_copy_in(ctx, ofs, last);
3071     record_mem_copy(ctx, type, src, ofs, last);
3072     return true;
3073 }
3074 
fold_xor(OptContext * ctx,TCGOp * op)3075 static bool fold_xor(OptContext *ctx, TCGOp *op)
3076 {
3077     uint64_t z_mask, o_mask, s_mask;
3078     TempOptInfo *t1, *t2;
3079 
3080     if (fold_const2_commutative(ctx, op) ||
3081         fold_xx_to_i(ctx, op, 0) ||
3082         fold_xi_to_x(ctx, op, 0) ||
3083         fold_xi_to_not(ctx, op, -1)) {
3084         return true;
3085     }
3086 
3087     t1 = arg_info(op->args[1]);
3088     t2 = arg_info(op->args[2]);
3089 
3090     z_mask = (t1->z_mask | t2->z_mask) & ~(t1->o_mask & t2->o_mask);
3091     o_mask = (t1->o_mask & ~t2->z_mask) | (t2->o_mask & ~t1->z_mask);
3092     s_mask = t1->s_mask & t2->s_mask;
3093 
3094     return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
3095 }
3096 
3097 /* Propagate constants and copies, fold constant expressions. */
tcg_optimize(TCGContext * s)3098 void tcg_optimize(TCGContext *s)
3099 {
3100     int nb_temps, i;
3101     TCGOp *op, *op_next;
3102     OptContext ctx = { .tcg = s };
3103 
3104     QSIMPLEQ_INIT(&ctx.mem_free);
3105 
3106     /* Array VALS has an element for each temp.
3107        If this temp holds a constant then its value is kept in VALS' element.
3108        If this temp is a copy of other ones then the other copies are
3109        available through the doubly linked circular list. */
3110 
3111     nb_temps = s->nb_temps;
3112     for (i = 0; i < nb_temps; ++i) {
3113         s->temps[i].state_ptr = NULL;
3114     }
3115 
3116     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3117         TCGOpcode opc = op->opc;
3118         const TCGOpDef *def;
3119         bool done = false;
3120 
3121         /* Calls are special. */
3122         if (opc == INDEX_op_call) {
3123             fold_call(&ctx, op);
3124             continue;
3125         }
3126 
3127         def = &tcg_op_defs[opc];
3128         init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
3129         copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
3130 
3131         /* Pre-compute the type of the operation. */
3132         ctx.type = TCGOP_TYPE(op);
3133 
3134         /*
3135          * Process each opcode.
3136          * Sorted alphabetically by opcode as much as possible.
3137          */
3138         switch (opc) {
3139         case INDEX_op_add:
3140             done = fold_add(&ctx, op);
3141             break;
3142         case INDEX_op_add_vec:
3143             done = fold_add_vec(&ctx, op);
3144             break;
3145         case INDEX_op_addci:
3146             done = fold_addci(&ctx, op);
3147             break;
3148         case INDEX_op_addcio:
3149             done = fold_addcio(&ctx, op);
3150             break;
3151         case INDEX_op_addco:
3152             done = fold_addco(&ctx, op);
3153             break;
3154         case INDEX_op_and:
3155         case INDEX_op_and_vec:
3156             done = fold_and(&ctx, op);
3157             break;
3158         case INDEX_op_andc:
3159         case INDEX_op_andc_vec:
3160             done = fold_andc(&ctx, op);
3161             break;
3162         case INDEX_op_brcond:
3163             done = fold_brcond(&ctx, op);
3164             break;
3165         case INDEX_op_brcond2_i32:
3166             done = fold_brcond2(&ctx, op);
3167             break;
3168         case INDEX_op_bswap16:
3169         case INDEX_op_bswap32:
3170         case INDEX_op_bswap64:
3171             done = fold_bswap(&ctx, op);
3172             break;
3173         case INDEX_op_clz:
3174         case INDEX_op_ctz:
3175             done = fold_count_zeros(&ctx, op);
3176             break;
3177         case INDEX_op_ctpop:
3178             done = fold_ctpop(&ctx, op);
3179             break;
3180         case INDEX_op_deposit:
3181             done = fold_deposit(&ctx, op);
3182             break;
3183         case INDEX_op_divs:
3184         case INDEX_op_divu:
3185             done = fold_divide(&ctx, op);
3186             break;
3187         case INDEX_op_dup_vec:
3188             done = fold_dup(&ctx, op);
3189             break;
3190         case INDEX_op_dup2_vec:
3191             done = fold_dup2(&ctx, op);
3192             break;
3193         case INDEX_op_eqv:
3194         case INDEX_op_eqv_vec:
3195             done = fold_eqv(&ctx, op);
3196             break;
3197         case INDEX_op_extract:
3198             done = fold_extract(&ctx, op);
3199             break;
3200         case INDEX_op_extract2:
3201             done = fold_extract2(&ctx, op);
3202             break;
3203         case INDEX_op_ext_i32_i64:
3204             done = fold_exts(&ctx, op);
3205             break;
3206         case INDEX_op_extu_i32_i64:
3207         case INDEX_op_extrl_i64_i32:
3208         case INDEX_op_extrh_i64_i32:
3209             done = fold_extu(&ctx, op);
3210             break;
3211         case INDEX_op_ld8s:
3212         case INDEX_op_ld8u:
3213         case INDEX_op_ld16s:
3214         case INDEX_op_ld16u:
3215         case INDEX_op_ld32s:
3216         case INDEX_op_ld32u:
3217             done = fold_tcg_ld(&ctx, op);
3218             break;
3219         case INDEX_op_ld:
3220         case INDEX_op_ld_vec:
3221             done = fold_tcg_ld_memcopy(&ctx, op);
3222             break;
3223         case INDEX_op_st8:
3224         case INDEX_op_st16:
3225         case INDEX_op_st32:
3226             done = fold_tcg_st(&ctx, op);
3227             break;
3228         case INDEX_op_st:
3229         case INDEX_op_st_vec:
3230             done = fold_tcg_st_memcopy(&ctx, op);
3231             break;
3232         case INDEX_op_mb:
3233             done = fold_mb(&ctx, op);
3234             break;
3235         case INDEX_op_mov:
3236         case INDEX_op_mov_vec:
3237             done = fold_mov(&ctx, op);
3238             break;
3239         case INDEX_op_movcond:
3240             done = fold_movcond(&ctx, op);
3241             break;
3242         case INDEX_op_mul:
3243             done = fold_mul(&ctx, op);
3244             break;
3245         case INDEX_op_mulsh:
3246         case INDEX_op_muluh:
3247             done = fold_mul_highpart(&ctx, op);
3248             break;
3249         case INDEX_op_muls2:
3250         case INDEX_op_mulu2:
3251             done = fold_multiply2(&ctx, op);
3252             break;
3253         case INDEX_op_nand:
3254         case INDEX_op_nand_vec:
3255             done = fold_nand(&ctx, op);
3256             break;
3257         case INDEX_op_neg:
3258             done = fold_neg(&ctx, op);
3259             break;
3260         case INDEX_op_nor:
3261         case INDEX_op_nor_vec:
3262             done = fold_nor(&ctx, op);
3263             break;
3264         case INDEX_op_not:
3265         case INDEX_op_not_vec:
3266             done = fold_not(&ctx, op);
3267             break;
3268         case INDEX_op_or:
3269         case INDEX_op_or_vec:
3270             done = fold_or(&ctx, op);
3271             break;
3272         case INDEX_op_orc:
3273         case INDEX_op_orc_vec:
3274             done = fold_orc(&ctx, op);
3275             break;
3276         case INDEX_op_qemu_ld:
3277             done = fold_qemu_ld_1reg(&ctx, op);
3278             break;
3279         case INDEX_op_qemu_ld2:
3280             done = fold_qemu_ld_2reg(&ctx, op);
3281             break;
3282         case INDEX_op_qemu_st:
3283         case INDEX_op_qemu_st2:
3284             done = fold_qemu_st(&ctx, op);
3285             break;
3286         case INDEX_op_rems:
3287         case INDEX_op_remu:
3288             done = fold_remainder(&ctx, op);
3289             break;
3290         case INDEX_op_rotl:
3291         case INDEX_op_rotr:
3292         case INDEX_op_sar:
3293         case INDEX_op_shl:
3294         case INDEX_op_shr:
3295             done = fold_shift(&ctx, op);
3296             break;
3297         case INDEX_op_setcond:
3298             done = fold_setcond(&ctx, op);
3299             break;
3300         case INDEX_op_negsetcond:
3301             done = fold_negsetcond(&ctx, op);
3302             break;
3303         case INDEX_op_setcond2_i32:
3304             done = fold_setcond2(&ctx, op);
3305             break;
3306         case INDEX_op_cmp_vec:
3307             done = fold_cmp_vec(&ctx, op);
3308             break;
3309         case INDEX_op_cmpsel_vec:
3310             done = fold_cmpsel_vec(&ctx, op);
3311             break;
3312         case INDEX_op_bitsel_vec:
3313             done = fold_bitsel_vec(&ctx, op);
3314             break;
3315         case INDEX_op_sextract:
3316             done = fold_sextract(&ctx, op);
3317             break;
3318         case INDEX_op_sub:
3319             done = fold_sub(&ctx, op);
3320             break;
3321         case INDEX_op_subbi:
3322             done = fold_subbi(&ctx, op);
3323             break;
3324         case INDEX_op_subbio:
3325             done = fold_subbio(&ctx, op);
3326             break;
3327         case INDEX_op_subbo:
3328             done = fold_subbo(&ctx, op);
3329             break;
3330         case INDEX_op_sub_vec:
3331             done = fold_sub_vec(&ctx, op);
3332             break;
3333         case INDEX_op_xor:
3334         case INDEX_op_xor_vec:
3335             done = fold_xor(&ctx, op);
3336             break;
3337         case INDEX_op_set_label:
3338         case INDEX_op_br:
3339         case INDEX_op_exit_tb:
3340         case INDEX_op_goto_tb:
3341         case INDEX_op_goto_ptr:
3342             finish_ebb(&ctx);
3343             done = true;
3344             break;
3345         default:
3346             done = finish_folding(&ctx, op);
3347             break;
3348         }
3349         tcg_debug_assert(done);
3350     }
3351 }
3352