1 /*
2 * Optimizations for Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2010 Samsung Electronics.
5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
26 #include "qemu/osdep.h"
27 #include "qemu/int128.h"
28 #include "qemu/interval-tree.h"
29 #include "tcg/tcg-op-common.h"
30 #include "tcg-internal.h"
31 #include "tcg-has.h"
32
33
34 typedef struct MemCopyInfo {
35 IntervalTreeNode itree;
36 QSIMPLEQ_ENTRY (MemCopyInfo) next;
37 TCGTemp *ts;
38 TCGType type;
39 } MemCopyInfo;
40
41 typedef struct TempOptInfo {
42 TCGTemp *prev_copy;
43 TCGTemp *next_copy;
44 QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
45 uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
46 uint64_t o_mask; /* mask bit is 1 if and only if value bit is 1 */
47 uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
48 } TempOptInfo;
49
50 typedef struct OptContext {
51 TCGContext *tcg;
52 TCGOp *prev_mb;
53 TCGTempSet temps_used;
54
55 IntervalTreeRoot mem_copy;
56 QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
57
58 /* In flight values from optimization. */
59 TCGType type;
60 int carry_state; /* -1 = non-constant, {0,1} = constant carry-in */
61 } OptContext;
62
ts_info(TCGTemp * ts)63 static inline TempOptInfo *ts_info(TCGTemp *ts)
64 {
65 return ts->state_ptr;
66 }
67
arg_info(TCGArg arg)68 static inline TempOptInfo *arg_info(TCGArg arg)
69 {
70 return ts_info(arg_temp(arg));
71 }
72
ti_is_const(TempOptInfo * ti)73 static inline bool ti_is_const(TempOptInfo *ti)
74 {
75 /* If all bits that are not known zeros are known ones, it's constant. */
76 return ti->z_mask == ti->o_mask;
77 }
78
ti_const_val(TempOptInfo * ti)79 static inline uint64_t ti_const_val(TempOptInfo *ti)
80 {
81 /* If constant, both z_mask and o_mask contain the value. */
82 return ti->z_mask;
83 }
84
ti_is_const_val(TempOptInfo * ti,uint64_t val)85 static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
86 {
87 return ti_is_const(ti) && ti_const_val(ti) == val;
88 }
89
ts_is_const(TCGTemp * ts)90 static inline bool ts_is_const(TCGTemp *ts)
91 {
92 return ti_is_const(ts_info(ts));
93 }
94
ts_is_const_val(TCGTemp * ts,uint64_t val)95 static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
96 {
97 return ti_is_const_val(ts_info(ts), val);
98 }
99
arg_is_const(TCGArg arg)100 static inline bool arg_is_const(TCGArg arg)
101 {
102 return ts_is_const(arg_temp(arg));
103 }
104
arg_const_val(TCGArg arg)105 static inline uint64_t arg_const_val(TCGArg arg)
106 {
107 return ti_const_val(arg_info(arg));
108 }
109
arg_is_const_val(TCGArg arg,uint64_t val)110 static inline bool arg_is_const_val(TCGArg arg, uint64_t val)
111 {
112 return ts_is_const_val(arg_temp(arg), val);
113 }
114
ts_is_copy(TCGTemp * ts)115 static inline bool ts_is_copy(TCGTemp *ts)
116 {
117 return ts_info(ts)->next_copy != ts;
118 }
119
cmp_better_copy(TCGTemp * a,TCGTemp * b)120 static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b)
121 {
122 return a->kind < b->kind ? b : a;
123 }
124
125 /* Initialize and activate a temporary. */
init_ts_info(OptContext * ctx,TCGTemp * ts)126 static void init_ts_info(OptContext *ctx, TCGTemp *ts)
127 {
128 size_t idx = temp_idx(ts);
129 TempOptInfo *ti;
130
131 if (test_bit(idx, ctx->temps_used.l)) {
132 return;
133 }
134 set_bit(idx, ctx->temps_used.l);
135
136 ti = ts->state_ptr;
137 if (ti == NULL) {
138 ti = tcg_malloc(sizeof(TempOptInfo));
139 ts->state_ptr = ti;
140 }
141
142 ti->next_copy = ts;
143 ti->prev_copy = ts;
144 QSIMPLEQ_INIT(&ti->mem_copy);
145 if (ts->kind == TEMP_CONST) {
146 ti->z_mask = ts->val;
147 ti->o_mask = ts->val;
148 ti->s_mask = INT64_MIN >> clrsb64(ts->val);
149 } else {
150 ti->z_mask = -1;
151 ti->o_mask = 0;
152 ti->s_mask = 0;
153 }
154 }
155
mem_copy_first(OptContext * ctx,intptr_t s,intptr_t l)156 static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l)
157 {
158 IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l);
159 return r ? container_of(r, MemCopyInfo, itree) : NULL;
160 }
161
mem_copy_next(MemCopyInfo * mem,intptr_t s,intptr_t l)162 static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l)
163 {
164 IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l);
165 return r ? container_of(r, MemCopyInfo, itree) : NULL;
166 }
167
remove_mem_copy(OptContext * ctx,MemCopyInfo * mc)168 static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc)
169 {
170 TCGTemp *ts = mc->ts;
171 TempOptInfo *ti = ts_info(ts);
172
173 interval_tree_remove(&mc->itree, &ctx->mem_copy);
174 QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next);
175 QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next);
176 }
177
remove_mem_copy_in(OptContext * ctx,intptr_t s,intptr_t l)178 static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l)
179 {
180 while (true) {
181 MemCopyInfo *mc = mem_copy_first(ctx, s, l);
182 if (!mc) {
183 break;
184 }
185 remove_mem_copy(ctx, mc);
186 }
187 }
188
remove_mem_copy_all(OptContext * ctx)189 static void remove_mem_copy_all(OptContext *ctx)
190 {
191 remove_mem_copy_in(ctx, 0, -1);
192 tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy));
193 }
194
find_better_copy(TCGTemp * ts)195 static TCGTemp *find_better_copy(TCGTemp *ts)
196 {
197 TCGTemp *i, *ret;
198
199 /* If this is already readonly, we can't do better. */
200 if (temp_readonly(ts)) {
201 return ts;
202 }
203
204 ret = ts;
205 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
206 ret = cmp_better_copy(ret, i);
207 }
208 return ret;
209 }
210
move_mem_copies(TCGTemp * dst_ts,TCGTemp * src_ts)211 static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts)
212 {
213 TempOptInfo *si = ts_info(src_ts);
214 TempOptInfo *di = ts_info(dst_ts);
215 MemCopyInfo *mc;
216
217 QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) {
218 tcg_debug_assert(mc->ts == src_ts);
219 mc->ts = dst_ts;
220 }
221 QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy);
222 }
223
224 /* Reset TEMP's state, possibly removing the temp for the list of copies. */
reset_ts(OptContext * ctx,TCGTemp * ts)225 static void reset_ts(OptContext *ctx, TCGTemp *ts)
226 {
227 TempOptInfo *ti = ts_info(ts);
228 TCGTemp *pts = ti->prev_copy;
229 TCGTemp *nts = ti->next_copy;
230 TempOptInfo *pi = ts_info(pts);
231 TempOptInfo *ni = ts_info(nts);
232
233 ni->prev_copy = ti->prev_copy;
234 pi->next_copy = ti->next_copy;
235 ti->next_copy = ts;
236 ti->prev_copy = ts;
237 ti->z_mask = -1;
238 ti->o_mask = 0;
239 ti->s_mask = 0;
240
241 if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) {
242 if (ts == nts) {
243 /* Last temp copy being removed, the mem copies die. */
244 MemCopyInfo *mc;
245 QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) {
246 interval_tree_remove(&mc->itree, &ctx->mem_copy);
247 }
248 QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy);
249 } else {
250 move_mem_copies(find_better_copy(nts), ts);
251 }
252 }
253 }
254
reset_temp(OptContext * ctx,TCGArg arg)255 static void reset_temp(OptContext *ctx, TCGArg arg)
256 {
257 reset_ts(ctx, arg_temp(arg));
258 }
259
record_mem_copy(OptContext * ctx,TCGType type,TCGTemp * ts,intptr_t start,intptr_t last)260 static void record_mem_copy(OptContext *ctx, TCGType type,
261 TCGTemp *ts, intptr_t start, intptr_t last)
262 {
263 MemCopyInfo *mc;
264 TempOptInfo *ti;
265
266 mc = QSIMPLEQ_FIRST(&ctx->mem_free);
267 if (mc) {
268 QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next);
269 } else {
270 mc = tcg_malloc(sizeof(*mc));
271 }
272
273 memset(mc, 0, sizeof(*mc));
274 mc->itree.start = start;
275 mc->itree.last = last;
276 mc->type = type;
277 interval_tree_insert(&mc->itree, &ctx->mem_copy);
278
279 ts = find_better_copy(ts);
280 ti = ts_info(ts);
281 mc->ts = ts;
282 QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next);
283 }
284
ts_are_copies(TCGTemp * ts1,TCGTemp * ts2)285 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
286 {
287 TCGTemp *i;
288
289 if (ts1 == ts2) {
290 return true;
291 }
292
293 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
294 return false;
295 }
296
297 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
298 if (i == ts2) {
299 return true;
300 }
301 }
302
303 return false;
304 }
305
args_are_copies(TCGArg arg1,TCGArg arg2)306 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
307 {
308 return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
309 }
310
find_mem_copy_for(OptContext * ctx,TCGType type,intptr_t s)311 static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s)
312 {
313 MemCopyInfo *mc;
314
315 for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) {
316 if (mc->itree.start == s && mc->type == type) {
317 return find_better_copy(mc->ts);
318 }
319 }
320 return NULL;
321 }
322
arg_new_constant(OptContext * ctx,uint64_t val)323 static TCGArg arg_new_constant(OptContext *ctx, uint64_t val)
324 {
325 TCGType type = ctx->type;
326 TCGTemp *ts;
327
328 if (type == TCG_TYPE_I32) {
329 val = (int32_t)val;
330 }
331
332 ts = tcg_constant_internal(type, val);
333 init_ts_info(ctx, ts);
334
335 return temp_arg(ts);
336 }
337
arg_new_temp(OptContext * ctx)338 static TCGArg arg_new_temp(OptContext *ctx)
339 {
340 TCGTemp *ts = tcg_temp_new_internal(ctx->type, TEMP_EBB);
341 init_ts_info(ctx, ts);
342 return temp_arg(ts);
343 }
344
opt_insert_after(OptContext * ctx,TCGOp * op,TCGOpcode opc,unsigned narg)345 static TCGOp *opt_insert_after(OptContext *ctx, TCGOp *op,
346 TCGOpcode opc, unsigned narg)
347 {
348 return tcg_op_insert_after(ctx->tcg, op, opc, ctx->type, narg);
349 }
350
opt_insert_before(OptContext * ctx,TCGOp * op,TCGOpcode opc,unsigned narg)351 static TCGOp *opt_insert_before(OptContext *ctx, TCGOp *op,
352 TCGOpcode opc, unsigned narg)
353 {
354 return tcg_op_insert_before(ctx->tcg, op, opc, ctx->type, narg);
355 }
356
tcg_opt_gen_mov(OptContext * ctx,TCGOp * op,TCGArg dst,TCGArg src)357 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
358 {
359 TCGTemp *dst_ts = arg_temp(dst);
360 TCGTemp *src_ts = arg_temp(src);
361 TempOptInfo *di;
362 TempOptInfo *si;
363 TCGOpcode new_op;
364
365 if (ts_are_copies(dst_ts, src_ts)) {
366 tcg_op_remove(ctx->tcg, op);
367 return true;
368 }
369
370 reset_ts(ctx, dst_ts);
371 di = ts_info(dst_ts);
372 si = ts_info(src_ts);
373
374 switch (ctx->type) {
375 case TCG_TYPE_I32:
376 case TCG_TYPE_I64:
377 new_op = INDEX_op_mov;
378 break;
379 case TCG_TYPE_V64:
380 case TCG_TYPE_V128:
381 case TCG_TYPE_V256:
382 /* TCGOP_TYPE and TCGOP_VECE remain unchanged. */
383 new_op = INDEX_op_mov_vec;
384 break;
385 default:
386 g_assert_not_reached();
387 }
388 op->opc = new_op;
389 op->args[0] = dst;
390 op->args[1] = src;
391
392 di->z_mask = si->z_mask;
393 di->o_mask = si->o_mask;
394 di->s_mask = si->s_mask;
395
396 if (src_ts->type == dst_ts->type) {
397 TempOptInfo *ni = ts_info(si->next_copy);
398
399 di->next_copy = si->next_copy;
400 di->prev_copy = src_ts;
401 ni->prev_copy = dst_ts;
402 si->next_copy = dst_ts;
403
404 if (!QSIMPLEQ_EMPTY(&si->mem_copy)
405 && cmp_better_copy(src_ts, dst_ts) == dst_ts) {
406 move_mem_copies(dst_ts, src_ts);
407 }
408 } else if (dst_ts->type == TCG_TYPE_I32) {
409 di->z_mask = (int32_t)di->z_mask;
410 di->o_mask = (int32_t)di->o_mask;
411 di->s_mask |= INT32_MIN;
412 } else {
413 di->z_mask |= MAKE_64BIT_MASK(32, 32);
414 di->o_mask = (uint32_t)di->o_mask;
415 di->s_mask = INT64_MIN;
416 }
417 return true;
418 }
419
tcg_opt_gen_movi(OptContext * ctx,TCGOp * op,TCGArg dst,uint64_t val)420 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
421 TCGArg dst, uint64_t val)
422 {
423 /* Convert movi to mov with constant temp. */
424 return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val));
425 }
426
do_constant_folding_2(TCGOpcode op,TCGType type,uint64_t x,uint64_t y)427 static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type,
428 uint64_t x, uint64_t y)
429 {
430 uint64_t l64, h64;
431
432 switch (op) {
433 case INDEX_op_add:
434 return x + y;
435
436 case INDEX_op_sub:
437 return x - y;
438
439 case INDEX_op_mul:
440 return x * y;
441
442 case INDEX_op_and:
443 case INDEX_op_and_vec:
444 return x & y;
445
446 case INDEX_op_or:
447 case INDEX_op_or_vec:
448 return x | y;
449
450 case INDEX_op_xor:
451 case INDEX_op_xor_vec:
452 return x ^ y;
453
454 case INDEX_op_shl:
455 if (type == TCG_TYPE_I32) {
456 return (uint32_t)x << (y & 31);
457 }
458 return (uint64_t)x << (y & 63);
459
460 case INDEX_op_shr:
461 if (type == TCG_TYPE_I32) {
462 return (uint32_t)x >> (y & 31);
463 }
464 return (uint64_t)x >> (y & 63);
465
466 case INDEX_op_sar:
467 if (type == TCG_TYPE_I32) {
468 return (int32_t)x >> (y & 31);
469 }
470 return (int64_t)x >> (y & 63);
471
472 case INDEX_op_rotr:
473 if (type == TCG_TYPE_I32) {
474 return ror32(x, y & 31);
475 }
476 return ror64(x, y & 63);
477
478 case INDEX_op_rotl:
479 if (type == TCG_TYPE_I32) {
480 return rol32(x, y & 31);
481 }
482 return rol64(x, y & 63);
483
484 case INDEX_op_not:
485 case INDEX_op_not_vec:
486 return ~x;
487
488 case INDEX_op_neg:
489 return -x;
490
491 case INDEX_op_andc:
492 case INDEX_op_andc_vec:
493 return x & ~y;
494
495 case INDEX_op_orc:
496 case INDEX_op_orc_vec:
497 return x | ~y;
498
499 case INDEX_op_eqv:
500 case INDEX_op_eqv_vec:
501 return ~(x ^ y);
502
503 case INDEX_op_nand:
504 case INDEX_op_nand_vec:
505 return ~(x & y);
506
507 case INDEX_op_nor:
508 case INDEX_op_nor_vec:
509 return ~(x | y);
510
511 case INDEX_op_clz:
512 if (type == TCG_TYPE_I32) {
513 return (uint32_t)x ? clz32(x) : y;
514 }
515 return x ? clz64(x) : y;
516
517 case INDEX_op_ctz:
518 if (type == TCG_TYPE_I32) {
519 return (uint32_t)x ? ctz32(x) : y;
520 }
521 return x ? ctz64(x) : y;
522
523 case INDEX_op_ctpop:
524 return type == TCG_TYPE_I32 ? ctpop32(x) : ctpop64(x);
525
526 case INDEX_op_bswap16:
527 x = bswap16(x);
528 return y & TCG_BSWAP_OS ? (int16_t)x : x;
529
530 case INDEX_op_bswap32:
531 x = bswap32(x);
532 return y & TCG_BSWAP_OS ? (int32_t)x : x;
533
534 case INDEX_op_bswap64:
535 return bswap64(x);
536
537 case INDEX_op_ext_i32_i64:
538 return (int32_t)x;
539
540 case INDEX_op_extu_i32_i64:
541 case INDEX_op_extrl_i64_i32:
542 return (uint32_t)x;
543
544 case INDEX_op_extrh_i64_i32:
545 return (uint64_t)x >> 32;
546
547 case INDEX_op_muluh:
548 if (type == TCG_TYPE_I32) {
549 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
550 }
551 mulu64(&l64, &h64, x, y);
552 return h64;
553
554 case INDEX_op_mulsh:
555 if (type == TCG_TYPE_I32) {
556 return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
557 }
558 muls64(&l64, &h64, x, y);
559 return h64;
560
561 case INDEX_op_divs:
562 /* Avoid crashing on divide by zero, otherwise undefined. */
563 if (type == TCG_TYPE_I32) {
564 return (int32_t)x / ((int32_t)y ? : 1);
565 }
566 return (int64_t)x / ((int64_t)y ? : 1);
567
568 case INDEX_op_divu:
569 if (type == TCG_TYPE_I32) {
570 return (uint32_t)x / ((uint32_t)y ? : 1);
571 }
572 return (uint64_t)x / ((uint64_t)y ? : 1);
573
574 case INDEX_op_rems:
575 if (type == TCG_TYPE_I32) {
576 return (int32_t)x % ((int32_t)y ? : 1);
577 }
578 return (int64_t)x % ((int64_t)y ? : 1);
579
580 case INDEX_op_remu:
581 if (type == TCG_TYPE_I32) {
582 return (uint32_t)x % ((uint32_t)y ? : 1);
583 }
584 return (uint64_t)x % ((uint64_t)y ? : 1);
585
586 default:
587 g_assert_not_reached();
588 }
589 }
590
do_constant_folding(TCGOpcode op,TCGType type,uint64_t x,uint64_t y)591 static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
592 uint64_t x, uint64_t y)
593 {
594 uint64_t res = do_constant_folding_2(op, type, x, y);
595 if (type == TCG_TYPE_I32) {
596 res = (int32_t)res;
597 }
598 return res;
599 }
600
do_constant_folding_cond_32(uint32_t x,uint32_t y,TCGCond c)601 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
602 {
603 switch (c) {
604 case TCG_COND_EQ:
605 return x == y;
606 case TCG_COND_NE:
607 return x != y;
608 case TCG_COND_LT:
609 return (int32_t)x < (int32_t)y;
610 case TCG_COND_GE:
611 return (int32_t)x >= (int32_t)y;
612 case TCG_COND_LE:
613 return (int32_t)x <= (int32_t)y;
614 case TCG_COND_GT:
615 return (int32_t)x > (int32_t)y;
616 case TCG_COND_LTU:
617 return x < y;
618 case TCG_COND_GEU:
619 return x >= y;
620 case TCG_COND_LEU:
621 return x <= y;
622 case TCG_COND_GTU:
623 return x > y;
624 case TCG_COND_TSTEQ:
625 return (x & y) == 0;
626 case TCG_COND_TSTNE:
627 return (x & y) != 0;
628 case TCG_COND_ALWAYS:
629 case TCG_COND_NEVER:
630 break;
631 }
632 g_assert_not_reached();
633 }
634
do_constant_folding_cond_64(uint64_t x,uint64_t y,TCGCond c)635 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
636 {
637 switch (c) {
638 case TCG_COND_EQ:
639 return x == y;
640 case TCG_COND_NE:
641 return x != y;
642 case TCG_COND_LT:
643 return (int64_t)x < (int64_t)y;
644 case TCG_COND_GE:
645 return (int64_t)x >= (int64_t)y;
646 case TCG_COND_LE:
647 return (int64_t)x <= (int64_t)y;
648 case TCG_COND_GT:
649 return (int64_t)x > (int64_t)y;
650 case TCG_COND_LTU:
651 return x < y;
652 case TCG_COND_GEU:
653 return x >= y;
654 case TCG_COND_LEU:
655 return x <= y;
656 case TCG_COND_GTU:
657 return x > y;
658 case TCG_COND_TSTEQ:
659 return (x & y) == 0;
660 case TCG_COND_TSTNE:
661 return (x & y) != 0;
662 case TCG_COND_ALWAYS:
663 case TCG_COND_NEVER:
664 break;
665 }
666 g_assert_not_reached();
667 }
668
do_constant_folding_cond_eq(TCGCond c)669 static int do_constant_folding_cond_eq(TCGCond c)
670 {
671 switch (c) {
672 case TCG_COND_GT:
673 case TCG_COND_LTU:
674 case TCG_COND_LT:
675 case TCG_COND_GTU:
676 case TCG_COND_NE:
677 return 0;
678 case TCG_COND_GE:
679 case TCG_COND_GEU:
680 case TCG_COND_LE:
681 case TCG_COND_LEU:
682 case TCG_COND_EQ:
683 return 1;
684 case TCG_COND_TSTEQ:
685 case TCG_COND_TSTNE:
686 return -1;
687 case TCG_COND_ALWAYS:
688 case TCG_COND_NEVER:
689 break;
690 }
691 g_assert_not_reached();
692 }
693
694 /*
695 * Return -1 if the condition can't be simplified,
696 * and the result of the condition (0 or 1) if it can.
697 */
do_constant_folding_cond(TCGType type,TCGArg x,TCGArg y,TCGCond c)698 static int do_constant_folding_cond(TCGType type, TCGArg x,
699 TCGArg y, TCGCond c)
700 {
701 if (arg_is_const(x) && arg_is_const(y)) {
702 uint64_t xv = arg_const_val(x);
703 uint64_t yv = arg_const_val(y);
704
705 switch (type) {
706 case TCG_TYPE_I32:
707 return do_constant_folding_cond_32(xv, yv, c);
708 case TCG_TYPE_I64:
709 return do_constant_folding_cond_64(xv, yv, c);
710 default:
711 /* Only scalar comparisons are optimizable */
712 return -1;
713 }
714 } else if (args_are_copies(x, y)) {
715 return do_constant_folding_cond_eq(c);
716 } else if (arg_is_const_val(y, 0)) {
717 switch (c) {
718 case TCG_COND_LTU:
719 case TCG_COND_TSTNE:
720 return 0;
721 case TCG_COND_GEU:
722 case TCG_COND_TSTEQ:
723 return 1;
724 default:
725 return -1;
726 }
727 }
728 return -1;
729 }
730
731 /**
732 * swap_commutative:
733 * @dest: TCGArg of the destination argument, or NO_DEST.
734 * @p1: first paired argument
735 * @p2: second paired argument
736 *
737 * If *@p1 is a constant and *@p2 is not, swap.
738 * If *@p2 matches @dest, swap.
739 * Return true if a swap was performed.
740 */
741
742 #define NO_DEST temp_arg(NULL)
743
pref_commutative(TempOptInfo * ti)744 static int pref_commutative(TempOptInfo *ti)
745 {
746 /* Slight preference for non-zero constants second. */
747 return !ti_is_const(ti) ? 0 : ti_const_val(ti) ? 3 : 2;
748 }
749
swap_commutative(TCGArg dest,TCGArg * p1,TCGArg * p2)750 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
751 {
752 TCGArg a1 = *p1, a2 = *p2;
753 int sum = 0;
754 sum += pref_commutative(arg_info(a1));
755 sum -= pref_commutative(arg_info(a2));
756
757 /* Prefer the constant in second argument, and then the form
758 op a, a, b, which is better handled on non-RISC hosts. */
759 if (sum > 0 || (sum == 0 && dest == a2)) {
760 *p1 = a2;
761 *p2 = a1;
762 return true;
763 }
764 return false;
765 }
766
swap_commutative2(TCGArg * p1,TCGArg * p2)767 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
768 {
769 int sum = 0;
770 sum += pref_commutative(arg_info(p1[0]));
771 sum += pref_commutative(arg_info(p1[1]));
772 sum -= pref_commutative(arg_info(p2[0]));
773 sum -= pref_commutative(arg_info(p2[1]));
774 if (sum > 0) {
775 TCGArg t;
776 t = p1[0], p1[0] = p2[0], p2[0] = t;
777 t = p1[1], p1[1] = p2[1], p2[1] = t;
778 return true;
779 }
780 return false;
781 }
782
783 /*
784 * Return -1 if the condition can't be simplified,
785 * and the result of the condition (0 or 1) if it can.
786 */
787 static bool fold_and(OptContext *ctx, TCGOp *op);
do_constant_folding_cond1(OptContext * ctx,TCGOp * op,TCGArg dest,TCGArg * p1,TCGArg * p2,TCGArg * pcond)788 static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest,
789 TCGArg *p1, TCGArg *p2, TCGArg *pcond)
790 {
791 TCGCond cond;
792 TempOptInfo *i1;
793 bool swap;
794 int r;
795
796 swap = swap_commutative(dest, p1, p2);
797 cond = *pcond;
798 if (swap) {
799 *pcond = cond = tcg_swap_cond(cond);
800 }
801
802 r = do_constant_folding_cond(ctx->type, *p1, *p2, cond);
803 if (r >= 0) {
804 return r;
805 }
806 if (!is_tst_cond(cond)) {
807 return -1;
808 }
809
810 i1 = arg_info(*p1);
811
812 /*
813 * TSTNE x,x -> NE x,0
814 * TSTNE x,i -> NE x,0 if i includes all nonzero bits of x
815 */
816 if (args_are_copies(*p1, *p2) ||
817 (arg_is_const(*p2) && (i1->z_mask & ~arg_const_val(*p2)) == 0)) {
818 *p2 = arg_new_constant(ctx, 0);
819 *pcond = tcg_tst_eqne_cond(cond);
820 return -1;
821 }
822
823 /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */
824 if (arg_is_const(*p2) && (arg_const_val(*p2) & ~i1->s_mask) == 0) {
825 *p2 = arg_new_constant(ctx, 0);
826 *pcond = tcg_tst_ltge_cond(cond);
827 return -1;
828 }
829
830 /* Expand to AND with a temporary if no backend support. */
831 if (!TCG_TARGET_HAS_tst) {
832 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
833 TCGArg tmp = arg_new_temp(ctx);
834
835 op2->args[0] = tmp;
836 op2->args[1] = *p1;
837 op2->args[2] = *p2;
838 fold_and(ctx, op2);
839
840 *p1 = tmp;
841 *p2 = arg_new_constant(ctx, 0);
842 *pcond = tcg_tst_eqne_cond(cond);
843 }
844 return -1;
845 }
846
do_constant_folding_cond2(OptContext * ctx,TCGOp * op,TCGArg * args)847 static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args)
848 {
849 TCGArg al, ah, bl, bh;
850 TCGCond c;
851 bool swap;
852 int r;
853
854 swap = swap_commutative2(args, args + 2);
855 c = args[4];
856 if (swap) {
857 args[4] = c = tcg_swap_cond(c);
858 }
859
860 al = args[0];
861 ah = args[1];
862 bl = args[2];
863 bh = args[3];
864
865 if (arg_is_const(bl) && arg_is_const(bh)) {
866 tcg_target_ulong blv = arg_const_val(bl);
867 tcg_target_ulong bhv = arg_const_val(bh);
868 uint64_t b = deposit64(blv, 32, 32, bhv);
869
870 if (arg_is_const(al) && arg_is_const(ah)) {
871 tcg_target_ulong alv = arg_const_val(al);
872 tcg_target_ulong ahv = arg_const_val(ah);
873 uint64_t a = deposit64(alv, 32, 32, ahv);
874
875 r = do_constant_folding_cond_64(a, b, c);
876 if (r >= 0) {
877 return r;
878 }
879 }
880
881 if (b == 0) {
882 switch (c) {
883 case TCG_COND_LTU:
884 case TCG_COND_TSTNE:
885 return 0;
886 case TCG_COND_GEU:
887 case TCG_COND_TSTEQ:
888 return 1;
889 default:
890 break;
891 }
892 }
893
894 /* TSTNE x,-1 -> NE x,0 */
895 if (b == -1 && is_tst_cond(c)) {
896 args[3] = args[2] = arg_new_constant(ctx, 0);
897 args[4] = tcg_tst_eqne_cond(c);
898 return -1;
899 }
900
901 /* TSTNE x,sign -> LT x,0 */
902 if (b == INT64_MIN && is_tst_cond(c)) {
903 /* bl must be 0, so copy that to bh */
904 args[3] = bl;
905 args[4] = tcg_tst_ltge_cond(c);
906 return -1;
907 }
908 }
909
910 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
911 r = do_constant_folding_cond_eq(c);
912 if (r >= 0) {
913 return r;
914 }
915
916 /* TSTNE x,x -> NE x,0 */
917 if (is_tst_cond(c)) {
918 args[3] = args[2] = arg_new_constant(ctx, 0);
919 args[4] = tcg_tst_eqne_cond(c);
920 return -1;
921 }
922 }
923
924 /* Expand to AND with a temporary if no backend support. */
925 if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) {
926 TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and, 3);
927 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
928 TCGArg t1 = arg_new_temp(ctx);
929 TCGArg t2 = arg_new_temp(ctx);
930
931 op1->args[0] = t1;
932 op1->args[1] = al;
933 op1->args[2] = bl;
934 fold_and(ctx, op1);
935
936 op2->args[0] = t2;
937 op2->args[1] = ah;
938 op2->args[2] = bh;
939 fold_and(ctx, op1);
940
941 args[0] = t1;
942 args[1] = t2;
943 args[3] = args[2] = arg_new_constant(ctx, 0);
944 args[4] = tcg_tst_eqne_cond(c);
945 }
946 return -1;
947 }
948
init_arguments(OptContext * ctx,TCGOp * op,int nb_args)949 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
950 {
951 for (int i = 0; i < nb_args; i++) {
952 TCGTemp *ts = arg_temp(op->args[i]);
953 init_ts_info(ctx, ts);
954 }
955 }
956
copy_propagate(OptContext * ctx,TCGOp * op,int nb_oargs,int nb_iargs)957 static void copy_propagate(OptContext *ctx, TCGOp *op,
958 int nb_oargs, int nb_iargs)
959 {
960 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
961 TCGTemp *ts = arg_temp(op->args[i]);
962 if (ts_is_copy(ts)) {
963 op->args[i] = temp_arg(find_better_copy(ts));
964 }
965 }
966 }
967
finish_bb(OptContext * ctx)968 static void finish_bb(OptContext *ctx)
969 {
970 /* We only optimize memory barriers across basic blocks. */
971 ctx->prev_mb = NULL;
972 }
973
finish_ebb(OptContext * ctx)974 static void finish_ebb(OptContext *ctx)
975 {
976 finish_bb(ctx);
977 /* We only optimize across extended basic blocks. */
978 memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
979 remove_mem_copy_all(ctx);
980 }
981
finish_folding(OptContext * ctx,TCGOp * op)982 static bool finish_folding(OptContext *ctx, TCGOp *op)
983 {
984 const TCGOpDef *def = &tcg_op_defs[op->opc];
985 int i, nb_oargs;
986
987 nb_oargs = def->nb_oargs;
988 for (i = 0; i < nb_oargs; i++) {
989 TCGTemp *ts = arg_temp(op->args[i]);
990 reset_ts(ctx, ts);
991 }
992 return true;
993 }
994
995 /*
996 * The fold_* functions return true when processing is complete,
997 * usually by folding the operation to a constant or to a copy,
998 * and calling tcg_opt_gen_{mov,movi}. They may do other things,
999 * like collect information about the value produced, for use in
1000 * optimizing a subsequent operation.
1001 *
1002 * These first fold_* functions are all helpers, used by other
1003 * folders for more specific operations.
1004 */
1005
fold_const1(OptContext * ctx,TCGOp * op)1006 static bool fold_const1(OptContext *ctx, TCGOp *op)
1007 {
1008 if (arg_is_const(op->args[1])) {
1009 uint64_t t = arg_const_val(op->args[1]);
1010
1011 t = do_constant_folding(op->opc, ctx->type, t, 0);
1012 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1013 }
1014 return false;
1015 }
1016
fold_const2(OptContext * ctx,TCGOp * op)1017 static bool fold_const2(OptContext *ctx, TCGOp *op)
1018 {
1019 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1020 uint64_t t1 = arg_const_val(op->args[1]);
1021 uint64_t t2 = arg_const_val(op->args[2]);
1022
1023 t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
1024 return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1025 }
1026 return false;
1027 }
1028
fold_commutative(OptContext * ctx,TCGOp * op)1029 static bool fold_commutative(OptContext *ctx, TCGOp *op)
1030 {
1031 swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1032 return false;
1033 }
1034
fold_const2_commutative(OptContext * ctx,TCGOp * op)1035 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
1036 {
1037 swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1038 return fold_const2(ctx, op);
1039 }
1040
1041 /*
1042 * Record "zero" and "sign" masks for the single output of @op.
1043 * See TempOptInfo definition of z_mask and s_mask.
1044 * If z_mask allows, fold the output to constant zero.
1045 * The passed s_mask may be augmented by z_mask.
1046 */
fold_masks_zosa_int(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t o_mask,int64_t s_mask,uint64_t a_mask)1047 static bool fold_masks_zosa_int(OptContext *ctx, TCGOp *op,
1048 uint64_t z_mask, uint64_t o_mask,
1049 int64_t s_mask, uint64_t a_mask)
1050 {
1051 const TCGOpDef *def = &tcg_op_defs[op->opc];
1052 TCGTemp *ts;
1053 TempOptInfo *ti;
1054 int rep;
1055
1056 /* Only single-output opcodes are supported here. */
1057 tcg_debug_assert(def->nb_oargs == 1);
1058
1059 /*
1060 * 32-bit ops generate 32-bit results, which for the purpose of
1061 * simplifying tcg are sign-extended. Certainly that's how we
1062 * represent our constants elsewhere. Note that the bits will
1063 * be reset properly for a 64-bit value when encountering the
1064 * type changing opcodes.
1065 */
1066 if (ctx->type == TCG_TYPE_I32) {
1067 z_mask = (int32_t)z_mask;
1068 o_mask = (int32_t)o_mask;
1069 s_mask |= INT32_MIN;
1070 a_mask = (uint32_t)a_mask;
1071 }
1072
1073 /* Bits that are known 1 and bits that are known 0 must not overlap. */
1074 tcg_debug_assert((o_mask & ~z_mask) == 0);
1075
1076 /* All bits that are not known zero are known one is a constant. */
1077 if (z_mask == o_mask) {
1078 return tcg_opt_gen_movi(ctx, op, op->args[0], o_mask);
1079 }
1080
1081 /* If no bits are affected, the operation devolves to a copy. */
1082 if (a_mask == 0) {
1083 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1084 }
1085
1086 ts = arg_temp(op->args[0]);
1087 reset_ts(ctx, ts);
1088
1089 ti = ts_info(ts);
1090 ti->z_mask = z_mask;
1091
1092 /* Canonicalize s_mask and incorporate data from z_mask. */
1093 rep = clz64(~s_mask);
1094 rep = MAX(rep, clz64(z_mask));
1095 rep = MAX(rep, clz64(~o_mask));
1096 rep = MAX(rep - 1, 0);
1097 ti->s_mask = INT64_MIN >> rep;
1098
1099 return false;
1100 }
1101
fold_masks_zosa(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t o_mask,int64_t s_mask,uint64_t a_mask)1102 static bool fold_masks_zosa(OptContext *ctx, TCGOp *op, uint64_t z_mask,
1103 uint64_t o_mask, int64_t s_mask, uint64_t a_mask)
1104 {
1105 fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, -1);
1106 return true;
1107 }
1108
fold_masks_zos(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t o_mask,uint64_t s_mask)1109 static bool fold_masks_zos(OptContext *ctx, TCGOp *op,
1110 uint64_t z_mask, uint64_t o_mask, uint64_t s_mask)
1111 {
1112 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, -1);
1113 }
1114
fold_masks_zo(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t o_mask)1115 static bool fold_masks_zo(OptContext *ctx, TCGOp *op,
1116 uint64_t z_mask, uint64_t o_mask)
1117 {
1118 return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, -1);
1119 }
1120
fold_masks_zs(OptContext * ctx,TCGOp * op,uint64_t z_mask,uint64_t s_mask)1121 static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
1122 uint64_t z_mask, uint64_t s_mask)
1123 {
1124 return fold_masks_zosa(ctx, op, z_mask, 0, s_mask, -1);
1125 }
1126
fold_masks_z(OptContext * ctx,TCGOp * op,uint64_t z_mask)1127 static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
1128 {
1129 return fold_masks_zosa(ctx, op, z_mask, 0, 0, -1);
1130 }
1131
fold_masks_s(OptContext * ctx,TCGOp * op,uint64_t s_mask)1132 static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
1133 {
1134 return fold_masks_zosa(ctx, op, -1, 0, s_mask, -1);
1135 }
1136
1137 /*
1138 * Convert @op to NOT, if NOT is supported by the host.
1139 * Return true f the conversion is successful, which will still
1140 * indicate that the processing is complete.
1141 */
1142 static bool fold_not(OptContext *ctx, TCGOp *op);
fold_to_not(OptContext * ctx,TCGOp * op,int idx)1143 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
1144 {
1145 TCGOpcode not_op;
1146 bool have_not;
1147
1148 switch (ctx->type) {
1149 case TCG_TYPE_I32:
1150 case TCG_TYPE_I64:
1151 not_op = INDEX_op_not;
1152 have_not = tcg_op_supported(INDEX_op_not, ctx->type, 0);
1153 break;
1154 case TCG_TYPE_V64:
1155 case TCG_TYPE_V128:
1156 case TCG_TYPE_V256:
1157 not_op = INDEX_op_not_vec;
1158 have_not = TCG_TARGET_HAS_not_vec;
1159 break;
1160 default:
1161 g_assert_not_reached();
1162 }
1163 if (have_not) {
1164 op->opc = not_op;
1165 op->args[1] = op->args[idx];
1166 return fold_not(ctx, op);
1167 }
1168 return false;
1169 }
1170
1171 /* If the binary operation has first argument @i, fold to @i. */
fold_ix_to_i(OptContext * ctx,TCGOp * op,uint64_t i)1172 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1173 {
1174 if (arg_is_const_val(op->args[1], i)) {
1175 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1176 }
1177 return false;
1178 }
1179
1180 /* If the binary operation has first argument @i, fold to NOT. */
fold_ix_to_not(OptContext * ctx,TCGOp * op,uint64_t i)1181 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1182 {
1183 if (arg_is_const_val(op->args[1], i)) {
1184 return fold_to_not(ctx, op, 2);
1185 }
1186 return false;
1187 }
1188
1189 /* If the binary operation has second argument @i, fold to @i. */
fold_xi_to_i(OptContext * ctx,TCGOp * op,uint64_t i)1190 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1191 {
1192 if (arg_is_const_val(op->args[2], i)) {
1193 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1194 }
1195 return false;
1196 }
1197
1198 /* If the binary operation has second argument @i, fold to identity. */
fold_xi_to_x(OptContext * ctx,TCGOp * op,uint64_t i)1199 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
1200 {
1201 if (arg_is_const_val(op->args[2], i)) {
1202 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1203 }
1204 return false;
1205 }
1206
1207 /* If the binary operation has second argument @i, fold to NOT. */
fold_xi_to_not(OptContext * ctx,TCGOp * op,uint64_t i)1208 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1209 {
1210 if (arg_is_const_val(op->args[2], i)) {
1211 return fold_to_not(ctx, op, 1);
1212 }
1213 return false;
1214 }
1215
1216 /* If the binary operation has both arguments equal, fold to @i. */
fold_xx_to_i(OptContext * ctx,TCGOp * op,uint64_t i)1217 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1218 {
1219 if (args_are_copies(op->args[1], op->args[2])) {
1220 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1221 }
1222 return false;
1223 }
1224
1225 /* If the binary operation has both arguments equal, fold to identity. */
fold_xx_to_x(OptContext * ctx,TCGOp * op)1226 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
1227 {
1228 if (args_are_copies(op->args[1], op->args[2])) {
1229 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1230 }
1231 return false;
1232 }
1233
1234 /*
1235 * These outermost fold_<op> functions are sorted alphabetically.
1236 *
1237 * The ordering of the transformations should be:
1238 * 1) those that produce a constant
1239 * 2) those that produce a copy
1240 * 3) those that produce information about the result value.
1241 */
1242
1243 static bool fold_addco(OptContext *ctx, TCGOp *op);
1244 static bool fold_or(OptContext *ctx, TCGOp *op);
1245 static bool fold_orc(OptContext *ctx, TCGOp *op);
1246 static bool fold_subbo(OptContext *ctx, TCGOp *op);
1247 static bool fold_xor(OptContext *ctx, TCGOp *op);
1248
fold_add(OptContext * ctx,TCGOp * op)1249 static bool fold_add(OptContext *ctx, TCGOp *op)
1250 {
1251 if (fold_const2_commutative(ctx, op) ||
1252 fold_xi_to_x(ctx, op, 0)) {
1253 return true;
1254 }
1255 return finish_folding(ctx, op);
1256 }
1257
1258 /* We cannot as yet do_constant_folding with vectors. */
fold_add_vec(OptContext * ctx,TCGOp * op)1259 static bool fold_add_vec(OptContext *ctx, TCGOp *op)
1260 {
1261 if (fold_commutative(ctx, op) ||
1262 fold_xi_to_x(ctx, op, 0)) {
1263 return true;
1264 }
1265 return finish_folding(ctx, op);
1266 }
1267
squash_prev_carryout(OptContext * ctx,TCGOp * op)1268 static void squash_prev_carryout(OptContext *ctx, TCGOp *op)
1269 {
1270 TempOptInfo *t2;
1271
1272 op = QTAILQ_PREV(op, link);
1273 switch (op->opc) {
1274 case INDEX_op_addco:
1275 op->opc = INDEX_op_add;
1276 fold_add(ctx, op);
1277 break;
1278 case INDEX_op_addcio:
1279 op->opc = INDEX_op_addci;
1280 break;
1281 case INDEX_op_addc1o:
1282 op->opc = INDEX_op_add;
1283 t2 = arg_info(op->args[2]);
1284 if (ti_is_const(t2)) {
1285 op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1286 /* Perform other constant folding, if needed. */
1287 fold_add(ctx, op);
1288 } else {
1289 TCGArg ret = op->args[0];
1290 op = opt_insert_after(ctx, op, INDEX_op_add, 3);
1291 op->args[0] = ret;
1292 op->args[1] = ret;
1293 op->args[2] = arg_new_constant(ctx, 1);
1294 }
1295 break;
1296 default:
1297 g_assert_not_reached();
1298 }
1299 }
1300
fold_addci(OptContext * ctx,TCGOp * op)1301 static bool fold_addci(OptContext *ctx, TCGOp *op)
1302 {
1303 fold_commutative(ctx, op);
1304
1305 if (ctx->carry_state < 0) {
1306 return finish_folding(ctx, op);
1307 }
1308
1309 squash_prev_carryout(ctx, op);
1310 op->opc = INDEX_op_add;
1311
1312 if (ctx->carry_state > 0) {
1313 TempOptInfo *t2 = arg_info(op->args[2]);
1314
1315 /*
1316 * Propagate the known carry-in into a constant, if possible.
1317 * Otherwise emit a second add +1.
1318 */
1319 if (ti_is_const(t2)) {
1320 op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1321 } else {
1322 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_add, 3);
1323
1324 op2->args[0] = op->args[0];
1325 op2->args[1] = op->args[1];
1326 op2->args[2] = op->args[2];
1327 fold_add(ctx, op2);
1328
1329 op->args[1] = op->args[0];
1330 op->args[2] = arg_new_constant(ctx, 1);
1331 }
1332 }
1333
1334 ctx->carry_state = -1;
1335 return fold_add(ctx, op);
1336 }
1337
fold_addcio(OptContext * ctx,TCGOp * op)1338 static bool fold_addcio(OptContext *ctx, TCGOp *op)
1339 {
1340 TempOptInfo *t1, *t2;
1341 int carry_out = -1;
1342 uint64_t sum, max;
1343
1344 fold_commutative(ctx, op);
1345 t1 = arg_info(op->args[1]);
1346 t2 = arg_info(op->args[2]);
1347
1348 /*
1349 * The z_mask value is >= the maximum value that can be represented
1350 * with the known zero bits. So adding the z_mask values will not
1351 * overflow if and only if the true values cannot overflow.
1352 */
1353 if (!uadd64_overflow(t1->z_mask, t2->z_mask, &sum) &&
1354 !uadd64_overflow(sum, ctx->carry_state != 0, &sum)) {
1355 carry_out = 0;
1356 }
1357
1358 if (ctx->carry_state < 0) {
1359 ctx->carry_state = carry_out;
1360 return finish_folding(ctx, op);
1361 }
1362
1363 squash_prev_carryout(ctx, op);
1364 if (ctx->carry_state == 0) {
1365 goto do_addco;
1366 }
1367
1368 /* Propagate the known carry-in into a constant, if possible. */
1369 max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
1370 if (ti_is_const(t2)) {
1371 uint64_t v = ti_const_val(t2) & max;
1372 if (v < max) {
1373 op->args[2] = arg_new_constant(ctx, v + 1);
1374 goto do_addco;
1375 }
1376 /* max + known carry in produces known carry out. */
1377 carry_out = 1;
1378 }
1379 if (ti_is_const(t1)) {
1380 uint64_t v = ti_const_val(t1) & max;
1381 if (v < max) {
1382 op->args[1] = arg_new_constant(ctx, v + 1);
1383 goto do_addco;
1384 }
1385 carry_out = 1;
1386 }
1387
1388 /* Adjust the opcode to remember the known carry-in. */
1389 op->opc = INDEX_op_addc1o;
1390 ctx->carry_state = carry_out;
1391 return finish_folding(ctx, op);
1392
1393 do_addco:
1394 op->opc = INDEX_op_addco;
1395 return fold_addco(ctx, op);
1396 }
1397
fold_addco(OptContext * ctx,TCGOp * op)1398 static bool fold_addco(OptContext *ctx, TCGOp *op)
1399 {
1400 TempOptInfo *t1, *t2;
1401 int carry_out = -1;
1402 uint64_t ign;
1403
1404 fold_commutative(ctx, op);
1405 t1 = arg_info(op->args[1]);
1406 t2 = arg_info(op->args[2]);
1407
1408 if (ti_is_const(t2)) {
1409 uint64_t v2 = ti_const_val(t2);
1410
1411 if (ti_is_const(t1)) {
1412 uint64_t v1 = ti_const_val(t1);
1413 /* Given sign-extension of z_mask for I32, we need not truncate. */
1414 carry_out = uadd64_overflow(v1, v2, &ign);
1415 } else if (v2 == 0) {
1416 carry_out = 0;
1417 }
1418 } else {
1419 /*
1420 * The z_mask value is >= the maximum value that can be represented
1421 * with the known zero bits. So adding the z_mask values will not
1422 * overflow if and only if the true values cannot overflow.
1423 */
1424 if (!uadd64_overflow(t1->z_mask, t2->z_mask, &ign)) {
1425 carry_out = 0;
1426 }
1427 }
1428 ctx->carry_state = carry_out;
1429 return finish_folding(ctx, op);
1430 }
1431
fold_and(OptContext * ctx,TCGOp * op)1432 static bool fold_and(OptContext *ctx, TCGOp *op)
1433 {
1434 uint64_t z_mask, o_mask, s_mask, a_mask;
1435 TempOptInfo *t1, *t2;
1436
1437 if (fold_const2_commutative(ctx, op)) {
1438 return true;
1439 }
1440
1441 t1 = arg_info(op->args[1]);
1442 t2 = arg_info(op->args[2]);
1443
1444 z_mask = t1->z_mask & t2->z_mask;
1445 o_mask = t1->o_mask & t2->o_mask;
1446
1447 /*
1448 * Sign repetitions are perforce all identical, whether they are 1 or 0.
1449 * Bitwise operations preserve the relative quantity of the repetitions.
1450 */
1451 s_mask = t1->s_mask & t2->s_mask;
1452
1453 /* Affected bits are those not known zero, masked by those known one. */
1454 a_mask = t1->z_mask & ~t2->o_mask;
1455
1456 if (!fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, a_mask)) {
1457 if (op->opc == INDEX_op_and && ti_is_const(t2)) {
1458 /*
1459 * Canonicalize on extract, if valid. This aids x86 with its
1460 * 2 operand MOVZBL and 2 operand AND, selecting the TCGOpcode
1461 * which does not require matching operands. Other backends can
1462 * trivially expand the extract to AND during code generation.
1463 */
1464 uint64_t val = ti_const_val(t2);
1465 if (!(val & (val + 1))) {
1466 unsigned len = ctz64(~val);
1467 if (TCG_TARGET_extract_valid(ctx->type, 0, len)) {
1468 op->opc = INDEX_op_extract;
1469 op->args[2] = 0;
1470 op->args[3] = len;
1471 }
1472 }
1473 } else {
1474 fold_xx_to_x(ctx, op);
1475 }
1476 }
1477 return true;
1478 }
1479
fold_andc(OptContext * ctx,TCGOp * op)1480 static bool fold_andc(OptContext *ctx, TCGOp *op)
1481 {
1482 uint64_t z_mask, o_mask, s_mask, a_mask;
1483 TempOptInfo *t1, *t2;
1484
1485 if (fold_const2(ctx, op)) {
1486 return true;
1487 }
1488
1489 t1 = arg_info(op->args[1]);
1490 t2 = arg_info(op->args[2]);
1491
1492 if (ti_is_const(t2)) {
1493 /* Fold andc r,x,i to and r,x,~i. */
1494 switch (ctx->type) {
1495 case TCG_TYPE_I32:
1496 case TCG_TYPE_I64:
1497 op->opc = INDEX_op_and;
1498 break;
1499 case TCG_TYPE_V64:
1500 case TCG_TYPE_V128:
1501 case TCG_TYPE_V256:
1502 op->opc = INDEX_op_and_vec;
1503 break;
1504 default:
1505 g_assert_not_reached();
1506 }
1507 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1508 return fold_and(ctx, op);
1509 }
1510 if (fold_xx_to_i(ctx, op, 0) ||
1511 fold_ix_to_not(ctx, op, -1)) {
1512 return true;
1513 }
1514
1515 z_mask = t1->z_mask & ~t2->o_mask;
1516 o_mask = t1->o_mask & ~t2->z_mask;
1517 s_mask = t1->s_mask & t2->s_mask;
1518
1519 /* Affected bits are those not known zero, masked by those known zero. */
1520 a_mask = t1->z_mask & t2->z_mask;
1521
1522 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
1523 }
1524
fold_bitsel_vec(OptContext * ctx,TCGOp * op)1525 static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
1526 {
1527 /* If true and false values are the same, eliminate the cmp. */
1528 if (args_are_copies(op->args[2], op->args[3])) {
1529 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1530 }
1531
1532 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1533 uint64_t tv = arg_const_val(op->args[2]);
1534 uint64_t fv = arg_const_val(op->args[3]);
1535
1536 if (tv == -1 && fv == 0) {
1537 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1538 }
1539 if (tv == 0 && fv == -1) {
1540 if (TCG_TARGET_HAS_not_vec) {
1541 op->opc = INDEX_op_not_vec;
1542 return fold_not(ctx, op);
1543 } else {
1544 op->opc = INDEX_op_xor_vec;
1545 op->args[2] = arg_new_constant(ctx, -1);
1546 return fold_xor(ctx, op);
1547 }
1548 }
1549 }
1550 if (arg_is_const(op->args[2])) {
1551 uint64_t tv = arg_const_val(op->args[2]);
1552 if (tv == -1) {
1553 op->opc = INDEX_op_or_vec;
1554 op->args[2] = op->args[3];
1555 return fold_or(ctx, op);
1556 }
1557 if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
1558 op->opc = INDEX_op_andc_vec;
1559 op->args[2] = op->args[1];
1560 op->args[1] = op->args[3];
1561 return fold_andc(ctx, op);
1562 }
1563 }
1564 if (arg_is_const(op->args[3])) {
1565 uint64_t fv = arg_const_val(op->args[3]);
1566 if (fv == 0) {
1567 op->opc = INDEX_op_and_vec;
1568 return fold_and(ctx, op);
1569 }
1570 if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
1571 TCGArg ta = op->args[2];
1572 op->opc = INDEX_op_orc_vec;
1573 op->args[2] = op->args[1];
1574 op->args[1] = ta;
1575 return fold_orc(ctx, op);
1576 }
1577 }
1578 return finish_folding(ctx, op);
1579 }
1580
fold_brcond(OptContext * ctx,TCGOp * op)1581 static bool fold_brcond(OptContext *ctx, TCGOp *op)
1582 {
1583 int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0],
1584 &op->args[1], &op->args[2]);
1585 if (i == 0) {
1586 tcg_op_remove(ctx->tcg, op);
1587 return true;
1588 }
1589 if (i > 0) {
1590 op->opc = INDEX_op_br;
1591 op->args[0] = op->args[3];
1592 finish_ebb(ctx);
1593 } else {
1594 finish_bb(ctx);
1595 }
1596 return true;
1597 }
1598
fold_brcond2(OptContext * ctx,TCGOp * op)1599 static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1600 {
1601 TCGCond cond;
1602 TCGArg label;
1603 int i, inv = 0;
1604
1605 i = do_constant_folding_cond2(ctx, op, &op->args[0]);
1606 cond = op->args[4];
1607 label = op->args[5];
1608 if (i >= 0) {
1609 goto do_brcond_const;
1610 }
1611
1612 switch (cond) {
1613 case TCG_COND_LT:
1614 case TCG_COND_GE:
1615 /*
1616 * Simplify LT/GE comparisons vs zero to a single compare
1617 * vs the high word of the input.
1618 */
1619 if (arg_is_const_val(op->args[2], 0) &&
1620 arg_is_const_val(op->args[3], 0)) {
1621 goto do_brcond_high;
1622 }
1623 break;
1624
1625 case TCG_COND_NE:
1626 inv = 1;
1627 QEMU_FALLTHROUGH;
1628 case TCG_COND_EQ:
1629 /*
1630 * Simplify EQ/NE comparisons where one of the pairs
1631 * can be simplified.
1632 */
1633 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
1634 op->args[2], cond);
1635 switch (i ^ inv) {
1636 case 0:
1637 goto do_brcond_const;
1638 case 1:
1639 goto do_brcond_high;
1640 }
1641
1642 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1643 op->args[3], cond);
1644 switch (i ^ inv) {
1645 case 0:
1646 goto do_brcond_const;
1647 case 1:
1648 goto do_brcond_low;
1649 }
1650 break;
1651
1652 case TCG_COND_TSTEQ:
1653 case TCG_COND_TSTNE:
1654 if (arg_is_const_val(op->args[2], 0)) {
1655 goto do_brcond_high;
1656 }
1657 if (arg_is_const_val(op->args[3], 0)) {
1658 goto do_brcond_low;
1659 }
1660 break;
1661
1662 default:
1663 break;
1664
1665 do_brcond_low:
1666 op->opc = INDEX_op_brcond;
1667 op->args[1] = op->args[2];
1668 op->args[2] = cond;
1669 op->args[3] = label;
1670 return fold_brcond(ctx, op);
1671
1672 do_brcond_high:
1673 op->opc = INDEX_op_brcond;
1674 op->args[0] = op->args[1];
1675 op->args[1] = op->args[3];
1676 op->args[2] = cond;
1677 op->args[3] = label;
1678 return fold_brcond(ctx, op);
1679
1680 do_brcond_const:
1681 if (i == 0) {
1682 tcg_op_remove(ctx->tcg, op);
1683 return true;
1684 }
1685 op->opc = INDEX_op_br;
1686 op->args[0] = label;
1687 finish_ebb(ctx);
1688 return true;
1689 }
1690
1691 finish_bb(ctx);
1692 return true;
1693 }
1694
fold_bswap(OptContext * ctx,TCGOp * op)1695 static bool fold_bswap(OptContext *ctx, TCGOp *op)
1696 {
1697 uint64_t z_mask, o_mask, s_mask;
1698 TempOptInfo *t1 = arg_info(op->args[1]);
1699 int flags = op->args[2];
1700
1701 if (ti_is_const(t1)) {
1702 return tcg_opt_gen_movi(ctx, op, op->args[0],
1703 do_constant_folding(op->opc, ctx->type,
1704 ti_const_val(t1), flags));
1705 }
1706
1707 z_mask = t1->z_mask;
1708 o_mask = t1->o_mask;
1709 s_mask = 0;
1710
1711 switch (op->opc) {
1712 case INDEX_op_bswap16:
1713 z_mask = bswap16(z_mask);
1714 o_mask = bswap16(o_mask);
1715 if (flags & TCG_BSWAP_OS) {
1716 z_mask = (int16_t)z_mask;
1717 o_mask = (int16_t)o_mask;
1718 s_mask = INT16_MIN;
1719 } else if (!(flags & TCG_BSWAP_OZ)) {
1720 z_mask |= MAKE_64BIT_MASK(16, 48);
1721 }
1722 break;
1723 case INDEX_op_bswap32:
1724 z_mask = bswap32(z_mask);
1725 o_mask = bswap32(o_mask);
1726 if (flags & TCG_BSWAP_OS) {
1727 z_mask = (int32_t)z_mask;
1728 o_mask = (int32_t)o_mask;
1729 s_mask = INT32_MIN;
1730 } else if (!(flags & TCG_BSWAP_OZ)) {
1731 z_mask |= MAKE_64BIT_MASK(32, 32);
1732 }
1733 break;
1734 case INDEX_op_bswap64:
1735 z_mask = bswap64(z_mask);
1736 o_mask = bswap64(o_mask);
1737 break;
1738 default:
1739 g_assert_not_reached();
1740 }
1741
1742 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
1743 }
1744
fold_call(OptContext * ctx,TCGOp * op)1745 static bool fold_call(OptContext *ctx, TCGOp *op)
1746 {
1747 TCGContext *s = ctx->tcg;
1748 int nb_oargs = TCGOP_CALLO(op);
1749 int nb_iargs = TCGOP_CALLI(op);
1750 int flags, i;
1751
1752 init_arguments(ctx, op, nb_oargs + nb_iargs);
1753 copy_propagate(ctx, op, nb_oargs, nb_iargs);
1754
1755 /* If the function reads or writes globals, reset temp data. */
1756 flags = tcg_call_flags(op);
1757 if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1758 int nb_globals = s->nb_globals;
1759
1760 for (i = 0; i < nb_globals; i++) {
1761 if (test_bit(i, ctx->temps_used.l)) {
1762 reset_ts(ctx, &ctx->tcg->temps[i]);
1763 }
1764 }
1765 }
1766
1767 /* If the function has side effects, reset mem data. */
1768 if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1769 remove_mem_copy_all(ctx);
1770 }
1771
1772 /* Reset temp data for outputs. */
1773 for (i = 0; i < nb_oargs; i++) {
1774 reset_temp(ctx, op->args[i]);
1775 }
1776
1777 /* Stop optimizing MB across calls. */
1778 ctx->prev_mb = NULL;
1779 return true;
1780 }
1781
fold_cmp_vec(OptContext * ctx,TCGOp * op)1782 static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
1783 {
1784 /* Canonicalize the comparison to put immediate second. */
1785 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1786 op->args[3] = tcg_swap_cond(op->args[3]);
1787 }
1788 return finish_folding(ctx, op);
1789 }
1790
fold_cmpsel_vec(OptContext * ctx,TCGOp * op)1791 static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
1792 {
1793 /* If true and false values are the same, eliminate the cmp. */
1794 if (args_are_copies(op->args[3], op->args[4])) {
1795 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
1796 }
1797
1798 /* Canonicalize the comparison to put immediate second. */
1799 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1800 op->args[5] = tcg_swap_cond(op->args[5]);
1801 }
1802 /*
1803 * Canonicalize the "false" input reg to match the destination,
1804 * so that the tcg backend can implement "move if true".
1805 */
1806 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1807 op->args[5] = tcg_invert_cond(op->args[5]);
1808 }
1809 return finish_folding(ctx, op);
1810 }
1811
fold_count_zeros(OptContext * ctx,TCGOp * op)1812 static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1813 {
1814 uint64_t z_mask, s_mask;
1815 TempOptInfo *t1 = arg_info(op->args[1]);
1816 TempOptInfo *t2 = arg_info(op->args[2]);
1817
1818 if (ti_is_const(t1)) {
1819 uint64_t t = ti_const_val(t1);
1820
1821 if (t != 0) {
1822 t = do_constant_folding(op->opc, ctx->type, t, 0);
1823 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1824 }
1825 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1826 }
1827
1828 switch (ctx->type) {
1829 case TCG_TYPE_I32:
1830 z_mask = 31;
1831 break;
1832 case TCG_TYPE_I64:
1833 z_mask = 63;
1834 break;
1835 default:
1836 g_assert_not_reached();
1837 }
1838 s_mask = ~z_mask;
1839 z_mask |= t2->z_mask;
1840 s_mask &= t2->s_mask;
1841
1842 return fold_masks_zs(ctx, op, z_mask, s_mask);
1843 }
1844
fold_ctpop(OptContext * ctx,TCGOp * op)1845 static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1846 {
1847 uint64_t z_mask;
1848
1849 if (fold_const1(ctx, op)) {
1850 return true;
1851 }
1852
1853 switch (ctx->type) {
1854 case TCG_TYPE_I32:
1855 z_mask = 32 | 31;
1856 break;
1857 case TCG_TYPE_I64:
1858 z_mask = 64 | 63;
1859 break;
1860 default:
1861 g_assert_not_reached();
1862 }
1863 return fold_masks_z(ctx, op, z_mask);
1864 }
1865
fold_deposit(OptContext * ctx,TCGOp * op)1866 static bool fold_deposit(OptContext *ctx, TCGOp *op)
1867 {
1868 TempOptInfo *t1 = arg_info(op->args[1]);
1869 TempOptInfo *t2 = arg_info(op->args[2]);
1870 int ofs = op->args[3];
1871 int len = op->args[4];
1872 int width = 8 * tcg_type_size(ctx->type);
1873 uint64_t z_mask, o_mask, s_mask;
1874
1875 if (ti_is_const(t1) && ti_is_const(t2)) {
1876 return tcg_opt_gen_movi(ctx, op, op->args[0],
1877 deposit64(ti_const_val(t1), ofs, len,
1878 ti_const_val(t2)));
1879 }
1880
1881 /* Inserting a value into zero at offset 0. */
1882 if (ti_is_const_val(t1, 0) && ofs == 0) {
1883 uint64_t mask = MAKE_64BIT_MASK(0, len);
1884
1885 op->opc = INDEX_op_and;
1886 op->args[1] = op->args[2];
1887 op->args[2] = arg_new_constant(ctx, mask);
1888 return fold_and(ctx, op);
1889 }
1890
1891 /* Inserting zero into a value. */
1892 if (ti_is_const_val(t2, 0)) {
1893 uint64_t mask = deposit64(-1, ofs, len, 0);
1894
1895 op->opc = INDEX_op_and;
1896 op->args[2] = arg_new_constant(ctx, mask);
1897 return fold_and(ctx, op);
1898 }
1899
1900 /* The s_mask from the top portion of the deposit is still valid. */
1901 if (ofs + len == width) {
1902 s_mask = t2->s_mask << ofs;
1903 } else {
1904 s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
1905 }
1906
1907 z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
1908 o_mask = deposit64(t1->o_mask, ofs, len, t2->o_mask);
1909
1910 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
1911 }
1912
fold_divide(OptContext * ctx,TCGOp * op)1913 static bool fold_divide(OptContext *ctx, TCGOp *op)
1914 {
1915 if (fold_const2(ctx, op) ||
1916 fold_xi_to_x(ctx, op, 1)) {
1917 return true;
1918 }
1919 return finish_folding(ctx, op);
1920 }
1921
fold_dup(OptContext * ctx,TCGOp * op)1922 static bool fold_dup(OptContext *ctx, TCGOp *op)
1923 {
1924 if (arg_is_const(op->args[1])) {
1925 uint64_t t = arg_const_val(op->args[1]);
1926 t = dup_const(TCGOP_VECE(op), t);
1927 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1928 }
1929 return finish_folding(ctx, op);
1930 }
1931
fold_dup2(OptContext * ctx,TCGOp * op)1932 static bool fold_dup2(OptContext *ctx, TCGOp *op)
1933 {
1934 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1935 uint64_t t = deposit64(arg_const_val(op->args[1]), 32, 32,
1936 arg_const_val(op->args[2]));
1937 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1938 }
1939
1940 if (args_are_copies(op->args[1], op->args[2])) {
1941 op->opc = INDEX_op_dup_vec;
1942 TCGOP_VECE(op) = MO_32;
1943 }
1944 return finish_folding(ctx, op);
1945 }
1946
fold_eqv(OptContext * ctx,TCGOp * op)1947 static bool fold_eqv(OptContext *ctx, TCGOp *op)
1948 {
1949 uint64_t z_mask, o_mask, s_mask;
1950 TempOptInfo *t1, *t2;
1951
1952 if (fold_const2_commutative(ctx, op)) {
1953 return true;
1954 }
1955
1956 t2 = arg_info(op->args[2]);
1957 if (ti_is_const(t2)) {
1958 /* Fold eqv r,x,i to xor r,x,~i. */
1959 switch (ctx->type) {
1960 case TCG_TYPE_I32:
1961 case TCG_TYPE_I64:
1962 op->opc = INDEX_op_xor;
1963 break;
1964 case TCG_TYPE_V64:
1965 case TCG_TYPE_V128:
1966 case TCG_TYPE_V256:
1967 op->opc = INDEX_op_xor_vec;
1968 break;
1969 default:
1970 g_assert_not_reached();
1971 }
1972 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1973 return fold_xor(ctx, op);
1974 }
1975
1976 t1 = arg_info(op->args[1]);
1977
1978 z_mask = (t1->z_mask | ~t2->o_mask) & (t2->z_mask | ~t1->o_mask);
1979 o_mask = ~(t1->z_mask | t2->z_mask) | (t1->o_mask & t2->o_mask);
1980 s_mask = t1->s_mask & t2->s_mask;
1981
1982 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
1983 }
1984
fold_extract(OptContext * ctx,TCGOp * op)1985 static bool fold_extract(OptContext *ctx, TCGOp *op)
1986 {
1987 uint64_t z_mask, o_mask, a_mask;
1988 TempOptInfo *t1 = arg_info(op->args[1]);
1989 int pos = op->args[2];
1990 int len = op->args[3];
1991
1992 if (ti_is_const(t1)) {
1993 return tcg_opt_gen_movi(ctx, op, op->args[0],
1994 extract64(ti_const_val(t1), pos, len));
1995 }
1996
1997 z_mask = extract64(t1->z_mask, pos, len);
1998 o_mask = extract64(t1->o_mask, pos, len);
1999 a_mask = pos ? -1 : t1->z_mask ^ z_mask;
2000
2001 return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, a_mask);
2002 }
2003
fold_extract2(OptContext * ctx,TCGOp * op)2004 static bool fold_extract2(OptContext *ctx, TCGOp *op)
2005 {
2006 TempOptInfo *t1 = arg_info(op->args[1]);
2007 TempOptInfo *t2 = arg_info(op->args[2]);
2008 uint64_t z1 = t1->z_mask;
2009 uint64_t z2 = t2->z_mask;
2010 uint64_t o1 = t1->o_mask;
2011 uint64_t o2 = t2->o_mask;
2012 int shr = op->args[3];
2013
2014 if (ctx->type == TCG_TYPE_I32) {
2015 z1 = (uint32_t)z1 >> shr;
2016 o1 = (uint32_t)o1 >> shr;
2017 z2 = (uint64_t)((int32_t)z2 << (32 - shr));
2018 o2 = (uint64_t)((int32_t)o2 << (32 - shr));
2019 } else {
2020 z1 >>= shr;
2021 o1 >>= shr;
2022 z2 <<= 64 - shr;
2023 o2 <<= 64 - shr;
2024 }
2025
2026 return fold_masks_zo(ctx, op, z1 | z2, o1 | o2);
2027 }
2028
fold_exts(OptContext * ctx,TCGOp * op)2029 static bool fold_exts(OptContext *ctx, TCGOp *op)
2030 {
2031 uint64_t z_mask, o_mask, s_mask;
2032 TempOptInfo *t1;
2033
2034 if (fold_const1(ctx, op)) {
2035 return true;
2036 }
2037
2038 t1 = arg_info(op->args[1]);
2039 z_mask = t1->z_mask;
2040 o_mask = t1->o_mask;
2041 s_mask = t1->s_mask;
2042
2043 switch (op->opc) {
2044 case INDEX_op_ext_i32_i64:
2045 s_mask |= INT32_MIN;
2046 z_mask = (int32_t)z_mask;
2047 o_mask = (int32_t)o_mask;
2048 break;
2049 default:
2050 g_assert_not_reached();
2051 }
2052 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2053 }
2054
fold_extu(OptContext * ctx,TCGOp * op)2055 static bool fold_extu(OptContext *ctx, TCGOp *op)
2056 {
2057 uint64_t z_mask, o_mask;
2058 TempOptInfo *t1;
2059
2060 if (fold_const1(ctx, op)) {
2061 return true;
2062 }
2063
2064 t1 = arg_info(op->args[1]);
2065 z_mask = t1->z_mask;
2066 o_mask = t1->o_mask;
2067
2068 switch (op->opc) {
2069 case INDEX_op_extrl_i64_i32:
2070 case INDEX_op_extu_i32_i64:
2071 z_mask = (uint32_t)z_mask;
2072 o_mask = (uint32_t)o_mask;
2073 break;
2074 case INDEX_op_extrh_i64_i32:
2075 z_mask >>= 32;
2076 o_mask >>= 32;
2077 break;
2078 default:
2079 g_assert_not_reached();
2080 }
2081 return fold_masks_zo(ctx, op, z_mask, o_mask);
2082 }
2083
fold_mb(OptContext * ctx,TCGOp * op)2084 static bool fold_mb(OptContext *ctx, TCGOp *op)
2085 {
2086 /* Eliminate duplicate and redundant fence instructions. */
2087 if (ctx->prev_mb) {
2088 /*
2089 * Merge two barriers of the same type into one,
2090 * or a weaker barrier into a stronger one,
2091 * or two weaker barriers into a stronger one.
2092 * mb X; mb Y => mb X|Y
2093 * mb; strl => mb; st
2094 * ldaq; mb => ld; mb
2095 * ldaq; strl => ld; mb; st
2096 * Other combinations are also merged into a strong
2097 * barrier. This is stricter than specified but for
2098 * the purposes of TCG is better than not optimizing.
2099 */
2100 ctx->prev_mb->args[0] |= op->args[0];
2101 tcg_op_remove(ctx->tcg, op);
2102 } else {
2103 ctx->prev_mb = op;
2104 }
2105 return true;
2106 }
2107
fold_mov(OptContext * ctx,TCGOp * op)2108 static bool fold_mov(OptContext *ctx, TCGOp *op)
2109 {
2110 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2111 }
2112
fold_movcond(OptContext * ctx,TCGOp * op)2113 static bool fold_movcond(OptContext *ctx, TCGOp *op)
2114 {
2115 uint64_t z_mask, o_mask, s_mask;
2116 TempOptInfo *tt, *ft;
2117 int i;
2118
2119 /* If true and false values are the same, eliminate the cmp. */
2120 if (args_are_copies(op->args[3], op->args[4])) {
2121 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
2122 }
2123
2124 /*
2125 * Canonicalize the "false" input reg to match the destination reg so
2126 * that the tcg backend can implement a "move if true" operation.
2127 */
2128 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
2129 op->args[5] = tcg_invert_cond(op->args[5]);
2130 }
2131
2132 i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[1],
2133 &op->args[2], &op->args[5]);
2134 if (i >= 0) {
2135 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
2136 }
2137
2138 tt = arg_info(op->args[3]);
2139 ft = arg_info(op->args[4]);
2140 z_mask = tt->z_mask | ft->z_mask;
2141 o_mask = tt->o_mask & ft->o_mask;
2142 s_mask = tt->s_mask & ft->s_mask;
2143
2144 if (ti_is_const(tt) && ti_is_const(ft)) {
2145 uint64_t tv = ti_const_val(tt);
2146 uint64_t fv = ti_const_val(ft);
2147 TCGCond cond = op->args[5];
2148
2149 if (tv == 1 && fv == 0) {
2150 op->opc = INDEX_op_setcond;
2151 op->args[3] = cond;
2152 } else if (fv == 1 && tv == 0) {
2153 op->opc = INDEX_op_setcond;
2154 op->args[3] = tcg_invert_cond(cond);
2155 } else if (tv == -1 && fv == 0) {
2156 op->opc = INDEX_op_negsetcond;
2157 op->args[3] = cond;
2158 } else if (fv == -1 && tv == 0) {
2159 op->opc = INDEX_op_negsetcond;
2160 op->args[3] = tcg_invert_cond(cond);
2161 }
2162 }
2163
2164 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2165 }
2166
fold_mul(OptContext * ctx,TCGOp * op)2167 static bool fold_mul(OptContext *ctx, TCGOp *op)
2168 {
2169 if (fold_const2(ctx, op) ||
2170 fold_xi_to_i(ctx, op, 0) ||
2171 fold_xi_to_x(ctx, op, 1)) {
2172 return true;
2173 }
2174 return finish_folding(ctx, op);
2175 }
2176
fold_mul_highpart(OptContext * ctx,TCGOp * op)2177 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
2178 {
2179 if (fold_const2_commutative(ctx, op) ||
2180 fold_xi_to_i(ctx, op, 0)) {
2181 return true;
2182 }
2183 return finish_folding(ctx, op);
2184 }
2185
fold_multiply2(OptContext * ctx,TCGOp * op)2186 static bool fold_multiply2(OptContext *ctx, TCGOp *op)
2187 {
2188 swap_commutative(op->args[0], &op->args[2], &op->args[3]);
2189
2190 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
2191 uint64_t a = arg_const_val(op->args[2]);
2192 uint64_t b = arg_const_val(op->args[3]);
2193 uint64_t h, l;
2194 TCGArg rl, rh;
2195 TCGOp *op2;
2196
2197 switch (op->opc) {
2198 case INDEX_op_mulu2:
2199 if (ctx->type == TCG_TYPE_I32) {
2200 l = (uint64_t)(uint32_t)a * (uint32_t)b;
2201 h = (int32_t)(l >> 32);
2202 l = (int32_t)l;
2203 } else {
2204 mulu64(&l, &h, a, b);
2205 }
2206 break;
2207 case INDEX_op_muls2:
2208 if (ctx->type == TCG_TYPE_I32) {
2209 l = (int64_t)(int32_t)a * (int32_t)b;
2210 h = l >> 32;
2211 l = (int32_t)l;
2212 } else {
2213 muls64(&l, &h, a, b);
2214 }
2215 break;
2216 default:
2217 g_assert_not_reached();
2218 }
2219
2220 rl = op->args[0];
2221 rh = op->args[1];
2222
2223 /* The proper opcode is supplied by tcg_opt_gen_mov. */
2224 op2 = opt_insert_before(ctx, op, 0, 2);
2225
2226 tcg_opt_gen_movi(ctx, op, rl, l);
2227 tcg_opt_gen_movi(ctx, op2, rh, h);
2228 return true;
2229 }
2230 return finish_folding(ctx, op);
2231 }
2232
fold_nand(OptContext * ctx,TCGOp * op)2233 static bool fold_nand(OptContext *ctx, TCGOp *op)
2234 {
2235 uint64_t z_mask, o_mask, s_mask;
2236 TempOptInfo *t1, *t2;
2237
2238 if (fold_const2_commutative(ctx, op) ||
2239 fold_xi_to_not(ctx, op, -1)) {
2240 return true;
2241 }
2242
2243 t1 = arg_info(op->args[1]);
2244 t2 = arg_info(op->args[2]);
2245
2246 z_mask = ~(t1->o_mask & t2->o_mask);
2247 o_mask = ~(t1->z_mask & t2->z_mask);
2248 s_mask = t1->s_mask & t2->s_mask;
2249
2250 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2251 }
2252
fold_neg_no_const(OptContext * ctx,TCGOp * op)2253 static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
2254 {
2255 /* Set to 1 all bits to the left of the rightmost. */
2256 uint64_t z_mask = arg_info(op->args[1])->z_mask;
2257 z_mask = -(z_mask & -z_mask);
2258
2259 return fold_masks_z(ctx, op, z_mask);
2260 }
2261
fold_neg(OptContext * ctx,TCGOp * op)2262 static bool fold_neg(OptContext *ctx, TCGOp *op)
2263 {
2264 return fold_const1(ctx, op) || fold_neg_no_const(ctx, op);
2265 }
2266
fold_nor(OptContext * ctx,TCGOp * op)2267 static bool fold_nor(OptContext *ctx, TCGOp *op)
2268 {
2269 uint64_t z_mask, o_mask, s_mask;
2270 TempOptInfo *t1, *t2;
2271
2272 if (fold_const2_commutative(ctx, op) ||
2273 fold_xi_to_not(ctx, op, 0)) {
2274 return true;
2275 }
2276
2277 t1 = arg_info(op->args[1]);
2278 t2 = arg_info(op->args[2]);
2279
2280 z_mask = ~(t1->o_mask | t2->o_mask);
2281 o_mask = ~(t1->z_mask | t2->z_mask);
2282 s_mask = t1->s_mask & t2->s_mask;
2283
2284 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2285 }
2286
fold_not(OptContext * ctx,TCGOp * op)2287 static bool fold_not(OptContext *ctx, TCGOp *op)
2288 {
2289 TempOptInfo *t1;
2290
2291 if (fold_const1(ctx, op)) {
2292 return true;
2293 }
2294
2295 t1 = arg_info(op->args[1]);
2296 return fold_masks_zos(ctx, op, ~t1->o_mask, ~t1->z_mask, t1->s_mask);
2297 }
2298
fold_or(OptContext * ctx,TCGOp * op)2299 static bool fold_or(OptContext *ctx, TCGOp *op)
2300 {
2301 uint64_t z_mask, o_mask, s_mask, a_mask;
2302 TempOptInfo *t1, *t2;
2303
2304 if (fold_const2_commutative(ctx, op) ||
2305 fold_xi_to_x(ctx, op, 0) ||
2306 fold_xx_to_x(ctx, op)) {
2307 return true;
2308 }
2309
2310 t1 = arg_info(op->args[1]);
2311 t2 = arg_info(op->args[2]);
2312
2313 z_mask = t1->z_mask | t2->z_mask;
2314 o_mask = t1->o_mask | t2->o_mask;
2315 s_mask = t1->s_mask & t2->s_mask;
2316
2317 /* Affected bits are those not known one, masked by those known zero. */
2318 a_mask = ~t1->o_mask & t2->z_mask;
2319
2320 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
2321 }
2322
fold_orc(OptContext * ctx,TCGOp * op)2323 static bool fold_orc(OptContext *ctx, TCGOp *op)
2324 {
2325 uint64_t z_mask, o_mask, s_mask, a_mask;
2326 TempOptInfo *t1, *t2;
2327
2328 if (fold_const2(ctx, op)) {
2329 return true;
2330 }
2331
2332 t2 = arg_info(op->args[2]);
2333 if (ti_is_const(t2)) {
2334 /* Fold orc r,x,i to or r,x,~i. */
2335 switch (ctx->type) {
2336 case TCG_TYPE_I32:
2337 case TCG_TYPE_I64:
2338 op->opc = INDEX_op_or;
2339 break;
2340 case TCG_TYPE_V64:
2341 case TCG_TYPE_V128:
2342 case TCG_TYPE_V256:
2343 op->opc = INDEX_op_or_vec;
2344 break;
2345 default:
2346 g_assert_not_reached();
2347 }
2348 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
2349 return fold_or(ctx, op);
2350 }
2351 if (fold_xx_to_i(ctx, op, -1) ||
2352 fold_ix_to_not(ctx, op, 0)) {
2353 return true;
2354 }
2355 t1 = arg_info(op->args[1]);
2356
2357 z_mask = t1->z_mask | ~t2->o_mask;
2358 o_mask = t1->o_mask | ~t2->z_mask;
2359 s_mask = t1->s_mask & t2->s_mask;
2360
2361 /* Affected bits are those not known one, masked by those known one. */
2362 a_mask = ~t1->o_mask & t2->o_mask;
2363
2364 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
2365 }
2366
fold_qemu_ld_1reg(OptContext * ctx,TCGOp * op)2367 static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
2368 {
2369 const TCGOpDef *def = &tcg_op_defs[op->opc];
2370 MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
2371 MemOp mop = get_memop(oi);
2372 int width = 8 * memop_size(mop);
2373 uint64_t z_mask = -1, s_mask = 0;
2374
2375 if (width < 64) {
2376 if (mop & MO_SIGN) {
2377 s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1));
2378 } else {
2379 z_mask = MAKE_64BIT_MASK(0, width);
2380 }
2381 }
2382
2383 /* Opcodes that touch guest memory stop the mb optimization. */
2384 ctx->prev_mb = NULL;
2385
2386 return fold_masks_zs(ctx, op, z_mask, s_mask);
2387 }
2388
fold_qemu_ld_2reg(OptContext * ctx,TCGOp * op)2389 static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op)
2390 {
2391 /* Opcodes that touch guest memory stop the mb optimization. */
2392 ctx->prev_mb = NULL;
2393 return finish_folding(ctx, op);
2394 }
2395
fold_qemu_st(OptContext * ctx,TCGOp * op)2396 static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
2397 {
2398 /* Opcodes that touch guest memory stop the mb optimization. */
2399 ctx->prev_mb = NULL;
2400 return true;
2401 }
2402
fold_remainder(OptContext * ctx,TCGOp * op)2403 static bool fold_remainder(OptContext *ctx, TCGOp *op)
2404 {
2405 if (fold_const2(ctx, op) ||
2406 fold_xx_to_i(ctx, op, 0)) {
2407 return true;
2408 }
2409 return finish_folding(ctx, op);
2410 }
2411
2412 /* Return 1 if finished, -1 if simplified, 0 if unchanged. */
fold_setcond_zmask(OptContext * ctx,TCGOp * op,bool neg)2413 static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
2414 {
2415 uint64_t a_zmask, b_val;
2416 TCGCond cond;
2417
2418 if (!arg_is_const(op->args[2])) {
2419 return false;
2420 }
2421
2422 a_zmask = arg_info(op->args[1])->z_mask;
2423 b_val = arg_const_val(op->args[2]);
2424 cond = op->args[3];
2425
2426 if (ctx->type == TCG_TYPE_I32) {
2427 a_zmask = (uint32_t)a_zmask;
2428 b_val = (uint32_t)b_val;
2429 }
2430
2431 /*
2432 * A with only low bits set vs B with high bits set means that A < B.
2433 */
2434 if (a_zmask < b_val) {
2435 bool inv = false;
2436
2437 switch (cond) {
2438 case TCG_COND_NE:
2439 case TCG_COND_LEU:
2440 case TCG_COND_LTU:
2441 inv = true;
2442 /* fall through */
2443 case TCG_COND_GTU:
2444 case TCG_COND_GEU:
2445 case TCG_COND_EQ:
2446 return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv);
2447 default:
2448 break;
2449 }
2450 }
2451
2452 /*
2453 * A with only lsb set is already boolean.
2454 */
2455 if (a_zmask <= 1) {
2456 bool convert = false;
2457 bool inv = false;
2458
2459 switch (cond) {
2460 case TCG_COND_EQ:
2461 inv = true;
2462 /* fall through */
2463 case TCG_COND_NE:
2464 convert = (b_val == 0);
2465 break;
2466 case TCG_COND_LTU:
2467 case TCG_COND_TSTEQ:
2468 inv = true;
2469 /* fall through */
2470 case TCG_COND_GEU:
2471 case TCG_COND_TSTNE:
2472 convert = (b_val == 1);
2473 break;
2474 default:
2475 break;
2476 }
2477 if (convert) {
2478 if (!inv && !neg) {
2479 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2480 }
2481
2482 if (!inv) {
2483 op->opc = INDEX_op_neg;
2484 } else if (neg) {
2485 op->opc = INDEX_op_add;
2486 op->args[2] = arg_new_constant(ctx, -1);
2487 } else {
2488 op->opc = INDEX_op_xor;
2489 op->args[2] = arg_new_constant(ctx, 1);
2490 }
2491 return -1;
2492 }
2493 }
2494 return 0;
2495 }
2496
fold_setcond_tst_pow2(OptContext * ctx,TCGOp * op,bool neg)2497 static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
2498 {
2499 TCGCond cond = op->args[3];
2500 TCGArg ret, src1, src2;
2501 TCGOp *op2;
2502 uint64_t val;
2503 int sh;
2504 bool inv;
2505
2506 if (!is_tst_cond(cond) || !arg_is_const(op->args[2])) {
2507 return;
2508 }
2509
2510 src2 = op->args[2];
2511 val = arg_const_val(src2);
2512 if (!is_power_of_2(val)) {
2513 return;
2514 }
2515 sh = ctz64(val);
2516
2517 ret = op->args[0];
2518 src1 = op->args[1];
2519 inv = cond == TCG_COND_TSTEQ;
2520
2521 if (sh && neg && !inv && TCG_TARGET_sextract_valid(ctx->type, sh, 1)) {
2522 op->opc = INDEX_op_sextract;
2523 op->args[1] = src1;
2524 op->args[2] = sh;
2525 op->args[3] = 1;
2526 return;
2527 } else if (sh && TCG_TARGET_extract_valid(ctx->type, sh, 1)) {
2528 op->opc = INDEX_op_extract;
2529 op->args[1] = src1;
2530 op->args[2] = sh;
2531 op->args[3] = 1;
2532 } else {
2533 if (sh) {
2534 op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3);
2535 op2->args[0] = ret;
2536 op2->args[1] = src1;
2537 op2->args[2] = arg_new_constant(ctx, sh);
2538 src1 = ret;
2539 }
2540 op->opc = INDEX_op_and;
2541 op->args[1] = src1;
2542 op->args[2] = arg_new_constant(ctx, 1);
2543 }
2544
2545 if (neg && inv) {
2546 op2 = opt_insert_after(ctx, op, INDEX_op_add, 3);
2547 op2->args[0] = ret;
2548 op2->args[1] = ret;
2549 op2->args[2] = arg_new_constant(ctx, -1);
2550 } else if (inv) {
2551 op2 = opt_insert_after(ctx, op, INDEX_op_xor, 3);
2552 op2->args[0] = ret;
2553 op2->args[1] = ret;
2554 op2->args[2] = arg_new_constant(ctx, 1);
2555 } else if (neg) {
2556 op2 = opt_insert_after(ctx, op, INDEX_op_neg, 2);
2557 op2->args[0] = ret;
2558 op2->args[1] = ret;
2559 }
2560 }
2561
fold_setcond(OptContext * ctx,TCGOp * op)2562 static bool fold_setcond(OptContext *ctx, TCGOp *op)
2563 {
2564 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
2565 &op->args[2], &op->args[3]);
2566 if (i >= 0) {
2567 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2568 }
2569
2570 i = fold_setcond_zmask(ctx, op, false);
2571 if (i > 0) {
2572 return true;
2573 }
2574 if (i == 0) {
2575 fold_setcond_tst_pow2(ctx, op, false);
2576 }
2577
2578 return fold_masks_z(ctx, op, 1);
2579 }
2580
fold_negsetcond(OptContext * ctx,TCGOp * op)2581 static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
2582 {
2583 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
2584 &op->args[2], &op->args[3]);
2585 if (i >= 0) {
2586 return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
2587 }
2588
2589 i = fold_setcond_zmask(ctx, op, true);
2590 if (i > 0) {
2591 return true;
2592 }
2593 if (i == 0) {
2594 fold_setcond_tst_pow2(ctx, op, true);
2595 }
2596
2597 /* Value is {0,-1} so all bits are repetitions of the sign. */
2598 return fold_masks_s(ctx, op, -1);
2599 }
2600
fold_setcond2(OptContext * ctx,TCGOp * op)2601 static bool fold_setcond2(OptContext *ctx, TCGOp *op)
2602 {
2603 TCGCond cond;
2604 int i, inv = 0;
2605
2606 i = do_constant_folding_cond2(ctx, op, &op->args[1]);
2607 cond = op->args[5];
2608 if (i >= 0) {
2609 goto do_setcond_const;
2610 }
2611
2612 switch (cond) {
2613 case TCG_COND_LT:
2614 case TCG_COND_GE:
2615 /*
2616 * Simplify LT/GE comparisons vs zero to a single compare
2617 * vs the high word of the input.
2618 */
2619 if (arg_is_const_val(op->args[3], 0) &&
2620 arg_is_const_val(op->args[4], 0)) {
2621 goto do_setcond_high;
2622 }
2623 break;
2624
2625 case TCG_COND_NE:
2626 inv = 1;
2627 QEMU_FALLTHROUGH;
2628 case TCG_COND_EQ:
2629 /*
2630 * Simplify EQ/NE comparisons where one of the pairs
2631 * can be simplified.
2632 */
2633 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
2634 op->args[3], cond);
2635 switch (i ^ inv) {
2636 case 0:
2637 goto do_setcond_const;
2638 case 1:
2639 goto do_setcond_high;
2640 }
2641
2642 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
2643 op->args[4], cond);
2644 switch (i ^ inv) {
2645 case 0:
2646 goto do_setcond_const;
2647 case 1:
2648 goto do_setcond_low;
2649 }
2650 break;
2651
2652 case TCG_COND_TSTEQ:
2653 case TCG_COND_TSTNE:
2654 if (arg_is_const_val(op->args[3], 0)) {
2655 goto do_setcond_high;
2656 }
2657 if (arg_is_const_val(op->args[4], 0)) {
2658 goto do_setcond_low;
2659 }
2660 break;
2661
2662 default:
2663 break;
2664
2665 do_setcond_low:
2666 op->args[2] = op->args[3];
2667 op->args[3] = cond;
2668 op->opc = INDEX_op_setcond;
2669 return fold_setcond(ctx, op);
2670
2671 do_setcond_high:
2672 op->args[1] = op->args[2];
2673 op->args[2] = op->args[4];
2674 op->args[3] = cond;
2675 op->opc = INDEX_op_setcond;
2676 return fold_setcond(ctx, op);
2677 }
2678
2679 return fold_masks_z(ctx, op, 1);
2680
2681 do_setcond_const:
2682 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2683 }
2684
fold_sextract(OptContext * ctx,TCGOp * op)2685 static bool fold_sextract(OptContext *ctx, TCGOp *op)
2686 {
2687 uint64_t z_mask, o_mask, s_mask, a_mask;
2688 TempOptInfo *t1 = arg_info(op->args[1]);
2689 int pos = op->args[2];
2690 int len = op->args[3];
2691
2692 if (ti_is_const(t1)) {
2693 return tcg_opt_gen_movi(ctx, op, op->args[0],
2694 sextract64(ti_const_val(t1), pos, len));
2695 }
2696
2697 s_mask = t1->s_mask >> pos;
2698 s_mask |= -1ull << (len - 1);
2699 a_mask = pos ? -1 : s_mask & ~t1->s_mask;
2700
2701 z_mask = sextract64(t1->z_mask, pos, len);
2702 o_mask = sextract64(t1->o_mask, pos, len);
2703
2704 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
2705 }
2706
fold_shift(OptContext * ctx,TCGOp * op)2707 static bool fold_shift(OptContext *ctx, TCGOp *op)
2708 {
2709 uint64_t s_mask, z_mask, o_mask;
2710 TempOptInfo *t1, *t2;
2711
2712 if (fold_const2(ctx, op) ||
2713 fold_ix_to_i(ctx, op, 0) ||
2714 fold_xi_to_x(ctx, op, 0)) {
2715 return true;
2716 }
2717
2718 t1 = arg_info(op->args[1]);
2719 t2 = arg_info(op->args[2]);
2720 s_mask = t1->s_mask;
2721 z_mask = t1->z_mask;
2722 o_mask = t1->o_mask;
2723
2724 if (ti_is_const(t2)) {
2725 int sh = ti_const_val(t2);
2726
2727 z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
2728 o_mask = do_constant_folding(op->opc, ctx->type, o_mask, sh);
2729 s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
2730
2731 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
2732 }
2733
2734 switch (op->opc) {
2735 case INDEX_op_sar:
2736 /*
2737 * Arithmetic right shift will not reduce the number of
2738 * input sign repetitions.
2739 */
2740 return fold_masks_s(ctx, op, s_mask);
2741 case INDEX_op_shr:
2742 /*
2743 * If the sign bit is known zero, then logical right shift
2744 * will not reduce the number of input sign repetitions.
2745 */
2746 if (~z_mask & -s_mask) {
2747 return fold_masks_s(ctx, op, s_mask);
2748 }
2749 break;
2750 default:
2751 break;
2752 }
2753
2754 return finish_folding(ctx, op);
2755 }
2756
fold_sub_to_neg(OptContext * ctx,TCGOp * op)2757 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
2758 {
2759 TCGOpcode neg_op;
2760 bool have_neg;
2761
2762 if (!arg_is_const_val(op->args[1], 0)) {
2763 return false;
2764 }
2765
2766 switch (ctx->type) {
2767 case TCG_TYPE_I32:
2768 case TCG_TYPE_I64:
2769 neg_op = INDEX_op_neg;
2770 have_neg = true;
2771 break;
2772 case TCG_TYPE_V64:
2773 case TCG_TYPE_V128:
2774 case TCG_TYPE_V256:
2775 neg_op = INDEX_op_neg_vec;
2776 have_neg = (TCG_TARGET_HAS_neg_vec &&
2777 tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
2778 break;
2779 default:
2780 g_assert_not_reached();
2781 }
2782 if (have_neg) {
2783 op->opc = neg_op;
2784 op->args[1] = op->args[2];
2785 return fold_neg_no_const(ctx, op);
2786 }
2787 return false;
2788 }
2789
2790 /* We cannot as yet do_constant_folding with vectors. */
fold_sub_vec(OptContext * ctx,TCGOp * op)2791 static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
2792 {
2793 if (fold_xx_to_i(ctx, op, 0) ||
2794 fold_xi_to_x(ctx, op, 0) ||
2795 fold_sub_to_neg(ctx, op)) {
2796 return true;
2797 }
2798 return finish_folding(ctx, op);
2799 }
2800
fold_sub(OptContext * ctx,TCGOp * op)2801 static bool fold_sub(OptContext *ctx, TCGOp *op)
2802 {
2803 if (fold_const2(ctx, op) ||
2804 fold_xx_to_i(ctx, op, 0) ||
2805 fold_xi_to_x(ctx, op, 0) ||
2806 fold_sub_to_neg(ctx, op)) {
2807 return true;
2808 }
2809
2810 /* Fold sub r,x,i to add r,x,-i */
2811 if (arg_is_const(op->args[2])) {
2812 uint64_t val = arg_const_val(op->args[2]);
2813
2814 op->opc = INDEX_op_add;
2815 op->args[2] = arg_new_constant(ctx, -val);
2816 }
2817 return finish_folding(ctx, op);
2818 }
2819
squash_prev_borrowout(OptContext * ctx,TCGOp * op)2820 static void squash_prev_borrowout(OptContext *ctx, TCGOp *op)
2821 {
2822 TempOptInfo *t2;
2823
2824 op = QTAILQ_PREV(op, link);
2825 switch (op->opc) {
2826 case INDEX_op_subbo:
2827 op->opc = INDEX_op_sub;
2828 fold_sub(ctx, op);
2829 break;
2830 case INDEX_op_subbio:
2831 op->opc = INDEX_op_subbi;
2832 break;
2833 case INDEX_op_subb1o:
2834 t2 = arg_info(op->args[2]);
2835 if (ti_is_const(t2)) {
2836 op->opc = INDEX_op_add;
2837 op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2838 /* Perform other constant folding, if needed. */
2839 fold_add(ctx, op);
2840 } else {
2841 TCGArg ret = op->args[0];
2842 op->opc = INDEX_op_sub;
2843 op = opt_insert_after(ctx, op, INDEX_op_add, 3);
2844 op->args[0] = ret;
2845 op->args[1] = ret;
2846 op->args[2] = arg_new_constant(ctx, -1);
2847 }
2848 break;
2849 default:
2850 g_assert_not_reached();
2851 }
2852 }
2853
fold_subbi(OptContext * ctx,TCGOp * op)2854 static bool fold_subbi(OptContext *ctx, TCGOp *op)
2855 {
2856 TempOptInfo *t2;
2857 int borrow_in = ctx->carry_state;
2858
2859 if (borrow_in < 0) {
2860 return finish_folding(ctx, op);
2861 }
2862 ctx->carry_state = -1;
2863
2864 squash_prev_borrowout(ctx, op);
2865 if (borrow_in == 0) {
2866 op->opc = INDEX_op_sub;
2867 return fold_sub(ctx, op);
2868 }
2869
2870 /*
2871 * Propagate the known carry-in into any constant, then negate to
2872 * transform from sub to add. If there is no constant, emit a
2873 * separate add -1.
2874 */
2875 t2 = arg_info(op->args[2]);
2876 if (ti_is_const(t2)) {
2877 op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2878 } else {
2879 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_sub, 3);
2880
2881 op2->args[0] = op->args[0];
2882 op2->args[1] = op->args[1];
2883 op2->args[2] = op->args[2];
2884 fold_sub(ctx, op2);
2885
2886 op->args[1] = op->args[0];
2887 op->args[2] = arg_new_constant(ctx, -1);
2888 }
2889 op->opc = INDEX_op_add;
2890 return fold_add(ctx, op);
2891 }
2892
fold_subbio(OptContext * ctx,TCGOp * op)2893 static bool fold_subbio(OptContext *ctx, TCGOp *op)
2894 {
2895 TempOptInfo *t1, *t2;
2896 int borrow_out = -1;
2897
2898 if (ctx->carry_state < 0) {
2899 return finish_folding(ctx, op);
2900 }
2901
2902 squash_prev_borrowout(ctx, op);
2903 if (ctx->carry_state == 0) {
2904 goto do_subbo;
2905 }
2906
2907 t1 = arg_info(op->args[1]);
2908 t2 = arg_info(op->args[2]);
2909
2910 /* Propagate the known borrow-in into a constant, if possible. */
2911 if (ti_is_const(t2)) {
2912 uint64_t max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
2913 uint64_t v = ti_const_val(t2) & max;
2914
2915 if (v < max) {
2916 op->args[2] = arg_new_constant(ctx, v + 1);
2917 goto do_subbo;
2918 }
2919 /* subtracting max + 1 produces known borrow out. */
2920 borrow_out = 1;
2921 }
2922 if (ti_is_const(t1)) {
2923 uint64_t v = ti_const_val(t1);
2924 if (v != 0) {
2925 op->args[2] = arg_new_constant(ctx, v - 1);
2926 goto do_subbo;
2927 }
2928 }
2929
2930 /* Adjust the opcode to remember the known carry-in. */
2931 op->opc = INDEX_op_subb1o;
2932 ctx->carry_state = borrow_out;
2933 return finish_folding(ctx, op);
2934
2935 do_subbo:
2936 op->opc = INDEX_op_subbo;
2937 return fold_subbo(ctx, op);
2938 }
2939
fold_subbo(OptContext * ctx,TCGOp * op)2940 static bool fold_subbo(OptContext *ctx, TCGOp *op)
2941 {
2942 TempOptInfo *t1 = arg_info(op->args[1]);
2943 TempOptInfo *t2 = arg_info(op->args[2]);
2944 int borrow_out = -1;
2945
2946 if (ti_is_const(t2)) {
2947 uint64_t v2 = ti_const_val(t2);
2948 if (v2 == 0) {
2949 borrow_out = 0;
2950 } else if (ti_is_const(t1)) {
2951 uint64_t v1 = ti_const_val(t1);
2952 borrow_out = v1 < v2;
2953 }
2954 }
2955 ctx->carry_state = borrow_out;
2956 return finish_folding(ctx, op);
2957 }
2958
fold_tcg_ld(OptContext * ctx,TCGOp * op)2959 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
2960 {
2961 uint64_t z_mask = -1, s_mask = 0;
2962
2963 /* We can't do any folding with a load, but we can record bits. */
2964 switch (op->opc) {
2965 case INDEX_op_ld8s:
2966 s_mask = INT8_MIN;
2967 break;
2968 case INDEX_op_ld8u:
2969 z_mask = MAKE_64BIT_MASK(0, 8);
2970 break;
2971 case INDEX_op_ld16s:
2972 s_mask = INT16_MIN;
2973 break;
2974 case INDEX_op_ld16u:
2975 z_mask = MAKE_64BIT_MASK(0, 16);
2976 break;
2977 case INDEX_op_ld32s:
2978 s_mask = INT32_MIN;
2979 break;
2980 case INDEX_op_ld32u:
2981 z_mask = MAKE_64BIT_MASK(0, 32);
2982 break;
2983 default:
2984 g_assert_not_reached();
2985 }
2986 return fold_masks_zs(ctx, op, z_mask, s_mask);
2987 }
2988
fold_tcg_ld_memcopy(OptContext * ctx,TCGOp * op)2989 static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
2990 {
2991 TCGTemp *dst, *src;
2992 intptr_t ofs;
2993 TCGType type;
2994
2995 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2996 return finish_folding(ctx, op);
2997 }
2998
2999 type = ctx->type;
3000 ofs = op->args[2];
3001 dst = arg_temp(op->args[0]);
3002 src = find_mem_copy_for(ctx, type, ofs);
3003 if (src && src->base_type == type) {
3004 return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src));
3005 }
3006
3007 reset_ts(ctx, dst);
3008 record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1);
3009 return true;
3010 }
3011
fold_tcg_st(OptContext * ctx,TCGOp * op)3012 static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
3013 {
3014 intptr_t ofs = op->args[2];
3015 intptr_t lm1;
3016
3017 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
3018 remove_mem_copy_all(ctx);
3019 return true;
3020 }
3021
3022 switch (op->opc) {
3023 case INDEX_op_st8:
3024 lm1 = 0;
3025 break;
3026 case INDEX_op_st16:
3027 lm1 = 1;
3028 break;
3029 case INDEX_op_st32:
3030 lm1 = 3;
3031 break;
3032 case INDEX_op_st:
3033 case INDEX_op_st_vec:
3034 lm1 = tcg_type_size(ctx->type) - 1;
3035 break;
3036 default:
3037 g_assert_not_reached();
3038 }
3039 remove_mem_copy_in(ctx, ofs, ofs + lm1);
3040 return true;
3041 }
3042
fold_tcg_st_memcopy(OptContext * ctx,TCGOp * op)3043 static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
3044 {
3045 TCGTemp *src;
3046 intptr_t ofs, last;
3047 TCGType type;
3048
3049 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
3050 return fold_tcg_st(ctx, op);
3051 }
3052
3053 src = arg_temp(op->args[0]);
3054 ofs = op->args[2];
3055 type = ctx->type;
3056
3057 /*
3058 * Eliminate duplicate stores of a constant.
3059 * This happens frequently when the target ISA zero-extends.
3060 */
3061 if (ts_is_const(src)) {
3062 TCGTemp *prev = find_mem_copy_for(ctx, type, ofs);
3063 if (src == prev) {
3064 tcg_op_remove(ctx->tcg, op);
3065 return true;
3066 }
3067 }
3068
3069 last = ofs + tcg_type_size(type) - 1;
3070 remove_mem_copy_in(ctx, ofs, last);
3071 record_mem_copy(ctx, type, src, ofs, last);
3072 return true;
3073 }
3074
fold_xor(OptContext * ctx,TCGOp * op)3075 static bool fold_xor(OptContext *ctx, TCGOp *op)
3076 {
3077 uint64_t z_mask, o_mask, s_mask;
3078 TempOptInfo *t1, *t2;
3079
3080 if (fold_const2_commutative(ctx, op) ||
3081 fold_xx_to_i(ctx, op, 0) ||
3082 fold_xi_to_x(ctx, op, 0) ||
3083 fold_xi_to_not(ctx, op, -1)) {
3084 return true;
3085 }
3086
3087 t1 = arg_info(op->args[1]);
3088 t2 = arg_info(op->args[2]);
3089
3090 z_mask = (t1->z_mask | t2->z_mask) & ~(t1->o_mask & t2->o_mask);
3091 o_mask = (t1->o_mask & ~t2->z_mask) | (t2->o_mask & ~t1->z_mask);
3092 s_mask = t1->s_mask & t2->s_mask;
3093
3094 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
3095 }
3096
3097 /* Propagate constants and copies, fold constant expressions. */
tcg_optimize(TCGContext * s)3098 void tcg_optimize(TCGContext *s)
3099 {
3100 int nb_temps, i;
3101 TCGOp *op, *op_next;
3102 OptContext ctx = { .tcg = s };
3103
3104 QSIMPLEQ_INIT(&ctx.mem_free);
3105
3106 /* Array VALS has an element for each temp.
3107 If this temp holds a constant then its value is kept in VALS' element.
3108 If this temp is a copy of other ones then the other copies are
3109 available through the doubly linked circular list. */
3110
3111 nb_temps = s->nb_temps;
3112 for (i = 0; i < nb_temps; ++i) {
3113 s->temps[i].state_ptr = NULL;
3114 }
3115
3116 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3117 TCGOpcode opc = op->opc;
3118 const TCGOpDef *def;
3119 bool done = false;
3120
3121 /* Calls are special. */
3122 if (opc == INDEX_op_call) {
3123 fold_call(&ctx, op);
3124 continue;
3125 }
3126
3127 def = &tcg_op_defs[opc];
3128 init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
3129 copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
3130
3131 /* Pre-compute the type of the operation. */
3132 ctx.type = TCGOP_TYPE(op);
3133
3134 /*
3135 * Process each opcode.
3136 * Sorted alphabetically by opcode as much as possible.
3137 */
3138 switch (opc) {
3139 case INDEX_op_add:
3140 done = fold_add(&ctx, op);
3141 break;
3142 case INDEX_op_add_vec:
3143 done = fold_add_vec(&ctx, op);
3144 break;
3145 case INDEX_op_addci:
3146 done = fold_addci(&ctx, op);
3147 break;
3148 case INDEX_op_addcio:
3149 done = fold_addcio(&ctx, op);
3150 break;
3151 case INDEX_op_addco:
3152 done = fold_addco(&ctx, op);
3153 break;
3154 case INDEX_op_and:
3155 case INDEX_op_and_vec:
3156 done = fold_and(&ctx, op);
3157 break;
3158 case INDEX_op_andc:
3159 case INDEX_op_andc_vec:
3160 done = fold_andc(&ctx, op);
3161 break;
3162 case INDEX_op_brcond:
3163 done = fold_brcond(&ctx, op);
3164 break;
3165 case INDEX_op_brcond2_i32:
3166 done = fold_brcond2(&ctx, op);
3167 break;
3168 case INDEX_op_bswap16:
3169 case INDEX_op_bswap32:
3170 case INDEX_op_bswap64:
3171 done = fold_bswap(&ctx, op);
3172 break;
3173 case INDEX_op_clz:
3174 case INDEX_op_ctz:
3175 done = fold_count_zeros(&ctx, op);
3176 break;
3177 case INDEX_op_ctpop:
3178 done = fold_ctpop(&ctx, op);
3179 break;
3180 case INDEX_op_deposit:
3181 done = fold_deposit(&ctx, op);
3182 break;
3183 case INDEX_op_divs:
3184 case INDEX_op_divu:
3185 done = fold_divide(&ctx, op);
3186 break;
3187 case INDEX_op_dup_vec:
3188 done = fold_dup(&ctx, op);
3189 break;
3190 case INDEX_op_dup2_vec:
3191 done = fold_dup2(&ctx, op);
3192 break;
3193 case INDEX_op_eqv:
3194 case INDEX_op_eqv_vec:
3195 done = fold_eqv(&ctx, op);
3196 break;
3197 case INDEX_op_extract:
3198 done = fold_extract(&ctx, op);
3199 break;
3200 case INDEX_op_extract2:
3201 done = fold_extract2(&ctx, op);
3202 break;
3203 case INDEX_op_ext_i32_i64:
3204 done = fold_exts(&ctx, op);
3205 break;
3206 case INDEX_op_extu_i32_i64:
3207 case INDEX_op_extrl_i64_i32:
3208 case INDEX_op_extrh_i64_i32:
3209 done = fold_extu(&ctx, op);
3210 break;
3211 case INDEX_op_ld8s:
3212 case INDEX_op_ld8u:
3213 case INDEX_op_ld16s:
3214 case INDEX_op_ld16u:
3215 case INDEX_op_ld32s:
3216 case INDEX_op_ld32u:
3217 done = fold_tcg_ld(&ctx, op);
3218 break;
3219 case INDEX_op_ld:
3220 case INDEX_op_ld_vec:
3221 done = fold_tcg_ld_memcopy(&ctx, op);
3222 break;
3223 case INDEX_op_st8:
3224 case INDEX_op_st16:
3225 case INDEX_op_st32:
3226 done = fold_tcg_st(&ctx, op);
3227 break;
3228 case INDEX_op_st:
3229 case INDEX_op_st_vec:
3230 done = fold_tcg_st_memcopy(&ctx, op);
3231 break;
3232 case INDEX_op_mb:
3233 done = fold_mb(&ctx, op);
3234 break;
3235 case INDEX_op_mov:
3236 case INDEX_op_mov_vec:
3237 done = fold_mov(&ctx, op);
3238 break;
3239 case INDEX_op_movcond:
3240 done = fold_movcond(&ctx, op);
3241 break;
3242 case INDEX_op_mul:
3243 done = fold_mul(&ctx, op);
3244 break;
3245 case INDEX_op_mulsh:
3246 case INDEX_op_muluh:
3247 done = fold_mul_highpart(&ctx, op);
3248 break;
3249 case INDEX_op_muls2:
3250 case INDEX_op_mulu2:
3251 done = fold_multiply2(&ctx, op);
3252 break;
3253 case INDEX_op_nand:
3254 case INDEX_op_nand_vec:
3255 done = fold_nand(&ctx, op);
3256 break;
3257 case INDEX_op_neg:
3258 done = fold_neg(&ctx, op);
3259 break;
3260 case INDEX_op_nor:
3261 case INDEX_op_nor_vec:
3262 done = fold_nor(&ctx, op);
3263 break;
3264 case INDEX_op_not:
3265 case INDEX_op_not_vec:
3266 done = fold_not(&ctx, op);
3267 break;
3268 case INDEX_op_or:
3269 case INDEX_op_or_vec:
3270 done = fold_or(&ctx, op);
3271 break;
3272 case INDEX_op_orc:
3273 case INDEX_op_orc_vec:
3274 done = fold_orc(&ctx, op);
3275 break;
3276 case INDEX_op_qemu_ld:
3277 done = fold_qemu_ld_1reg(&ctx, op);
3278 break;
3279 case INDEX_op_qemu_ld2:
3280 done = fold_qemu_ld_2reg(&ctx, op);
3281 break;
3282 case INDEX_op_qemu_st:
3283 case INDEX_op_qemu_st2:
3284 done = fold_qemu_st(&ctx, op);
3285 break;
3286 case INDEX_op_rems:
3287 case INDEX_op_remu:
3288 done = fold_remainder(&ctx, op);
3289 break;
3290 case INDEX_op_rotl:
3291 case INDEX_op_rotr:
3292 case INDEX_op_sar:
3293 case INDEX_op_shl:
3294 case INDEX_op_shr:
3295 done = fold_shift(&ctx, op);
3296 break;
3297 case INDEX_op_setcond:
3298 done = fold_setcond(&ctx, op);
3299 break;
3300 case INDEX_op_negsetcond:
3301 done = fold_negsetcond(&ctx, op);
3302 break;
3303 case INDEX_op_setcond2_i32:
3304 done = fold_setcond2(&ctx, op);
3305 break;
3306 case INDEX_op_cmp_vec:
3307 done = fold_cmp_vec(&ctx, op);
3308 break;
3309 case INDEX_op_cmpsel_vec:
3310 done = fold_cmpsel_vec(&ctx, op);
3311 break;
3312 case INDEX_op_bitsel_vec:
3313 done = fold_bitsel_vec(&ctx, op);
3314 break;
3315 case INDEX_op_sextract:
3316 done = fold_sextract(&ctx, op);
3317 break;
3318 case INDEX_op_sub:
3319 done = fold_sub(&ctx, op);
3320 break;
3321 case INDEX_op_subbi:
3322 done = fold_subbi(&ctx, op);
3323 break;
3324 case INDEX_op_subbio:
3325 done = fold_subbio(&ctx, op);
3326 break;
3327 case INDEX_op_subbo:
3328 done = fold_subbo(&ctx, op);
3329 break;
3330 case INDEX_op_sub_vec:
3331 done = fold_sub_vec(&ctx, op);
3332 break;
3333 case INDEX_op_xor:
3334 case INDEX_op_xor_vec:
3335 done = fold_xor(&ctx, op);
3336 break;
3337 case INDEX_op_set_label:
3338 case INDEX_op_br:
3339 case INDEX_op_exit_tb:
3340 case INDEX_op_goto_tb:
3341 case INDEX_op_goto_ptr:
3342 finish_ebb(&ctx);
3343 done = true;
3344 break;
3345 default:
3346 done = finish_folding(&ctx, op);
3347 break;
3348 }
3349 tcg_debug_assert(done);
3350 }
3351 }
3352