18f2e8c07SKirill Batuzov /* 28f2e8c07SKirill Batuzov * Optimizations for Tiny Code Generator for QEMU 38f2e8c07SKirill Batuzov * 48f2e8c07SKirill Batuzov * Copyright (c) 2010 Samsung Electronics. 58f2e8c07SKirill Batuzov * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 68f2e8c07SKirill Batuzov * 78f2e8c07SKirill Batuzov * Permission is hereby granted, free of charge, to any person obtaining a copy 88f2e8c07SKirill Batuzov * of this software and associated documentation files (the "Software"), to deal 98f2e8c07SKirill Batuzov * in the Software without restriction, including without limitation the rights 108f2e8c07SKirill Batuzov * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 118f2e8c07SKirill Batuzov * copies of the Software, and to permit persons to whom the Software is 128f2e8c07SKirill Batuzov * furnished to do so, subject to the following conditions: 138f2e8c07SKirill Batuzov * 148f2e8c07SKirill Batuzov * The above copyright notice and this permission notice shall be included in 158f2e8c07SKirill Batuzov * all copies or substantial portions of the Software. 168f2e8c07SKirill Batuzov * 178f2e8c07SKirill Batuzov * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 188f2e8c07SKirill Batuzov * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 198f2e8c07SKirill Batuzov * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 208f2e8c07SKirill Batuzov * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 218f2e8c07SKirill Batuzov * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 228f2e8c07SKirill Batuzov * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 238f2e8c07SKirill Batuzov * THE SOFTWARE. 248f2e8c07SKirill Batuzov */ 258f2e8c07SKirill Batuzov 26757e725bSPeter Maydell #include "qemu/osdep.h" 279531c078SRichard Henderson #include "qemu/int128.h" 28*ad3d0e4dSRichard Henderson #include "tcg/tcg-op-common.h" 2990163900SRichard Henderson #include "tcg-internal.h" 308f2e8c07SKirill Batuzov 318f2e8c07SKirill Batuzov #define CASE_OP_32_64(x) \ 328f2e8c07SKirill Batuzov glue(glue(case INDEX_op_, x), _i32): \ 338f2e8c07SKirill Batuzov glue(glue(case INDEX_op_, x), _i64) 348f2e8c07SKirill Batuzov 35170ba88fSRichard Henderson #define CASE_OP_32_64_VEC(x) \ 36170ba88fSRichard Henderson glue(glue(case INDEX_op_, x), _i32): \ 37170ba88fSRichard Henderson glue(glue(case INDEX_op_, x), _i64): \ 38170ba88fSRichard Henderson glue(glue(case INDEX_op_, x), _vec) 39170ba88fSRichard Henderson 406fcb98edSRichard Henderson typedef struct TempOptInfo { 41b41059ddSAurelien Jarno bool is_const; 426349039dSRichard Henderson TCGTemp *prev_copy; 436349039dSRichard Henderson TCGTemp *next_copy; 4454795544SRichard Henderson uint64_t val; 45b1fde411SRichard Henderson uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */ 4657fe5c6dSRichard Henderson uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */ 476fcb98edSRichard Henderson } TempOptInfo; 4822613af4SKirill Batuzov 493b3f847dSRichard Henderson typedef struct OptContext { 50dc84988aSRichard Henderson TCGContext *tcg; 51d0ed5151SRichard Henderson TCGOp *prev_mb; 523b3f847dSRichard Henderson TCGTempSet temps_used; 53137f1f44SRichard Henderson 54137f1f44SRichard Henderson /* In flight values from optimization. */ 55fae450baSRichard Henderson uint64_t a_mask; /* mask bit is 0 iff value identical to first input */ 56fae450baSRichard Henderson uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */ 5757fe5c6dSRichard Henderson uint64_t s_mask; /* mask of clrsb(value) bits */ 5867f84c96SRichard Henderson TCGType type; 593b3f847dSRichard Henderson } OptContext; 603b3f847dSRichard Henderson 6157fe5c6dSRichard Henderson /* Calculate the smask for a specific value. */ 6257fe5c6dSRichard Henderson static uint64_t smask_from_value(uint64_t value) 6357fe5c6dSRichard Henderson { 6457fe5c6dSRichard Henderson int rep = clrsb64(value); 6557fe5c6dSRichard Henderson return ~(~0ull >> rep); 6657fe5c6dSRichard Henderson } 6757fe5c6dSRichard Henderson 6857fe5c6dSRichard Henderson /* 6957fe5c6dSRichard Henderson * Calculate the smask for a given set of known-zeros. 7057fe5c6dSRichard Henderson * If there are lots of zeros on the left, we can consider the remainder 7157fe5c6dSRichard Henderson * an unsigned field, and thus the corresponding signed field is one bit 7257fe5c6dSRichard Henderson * larger. 7357fe5c6dSRichard Henderson */ 7457fe5c6dSRichard Henderson static uint64_t smask_from_zmask(uint64_t zmask) 7557fe5c6dSRichard Henderson { 7657fe5c6dSRichard Henderson /* 7757fe5c6dSRichard Henderson * Only the 0 bits are significant for zmask, thus the msb itself 7857fe5c6dSRichard Henderson * must be zero, else we have no sign information. 7957fe5c6dSRichard Henderson */ 8057fe5c6dSRichard Henderson int rep = clz64(zmask); 8157fe5c6dSRichard Henderson if (rep == 0) { 8257fe5c6dSRichard Henderson return 0; 8357fe5c6dSRichard Henderson } 8457fe5c6dSRichard Henderson rep -= 1; 8557fe5c6dSRichard Henderson return ~(~0ull >> rep); 8657fe5c6dSRichard Henderson } 8757fe5c6dSRichard Henderson 8893a967fbSRichard Henderson /* 8993a967fbSRichard Henderson * Recreate a properly left-aligned smask after manipulation. 9093a967fbSRichard Henderson * Some bit-shuffling, particularly shifts and rotates, may 9193a967fbSRichard Henderson * retain sign bits on the left, but may scatter disconnected 9293a967fbSRichard Henderson * sign bits on the right. Retain only what remains to the left. 9393a967fbSRichard Henderson */ 9493a967fbSRichard Henderson static uint64_t smask_from_smask(int64_t smask) 9593a967fbSRichard Henderson { 9693a967fbSRichard Henderson /* Only the 1 bits are significant for smask */ 9793a967fbSRichard Henderson return smask_from_zmask(~smask); 9893a967fbSRichard Henderson } 9993a967fbSRichard Henderson 1006fcb98edSRichard Henderson static inline TempOptInfo *ts_info(TCGTemp *ts) 101d9c769c6SAurelien Jarno { 1026349039dSRichard Henderson return ts->state_ptr; 103d9c769c6SAurelien Jarno } 104d9c769c6SAurelien Jarno 1056fcb98edSRichard Henderson static inline TempOptInfo *arg_info(TCGArg arg) 106d9c769c6SAurelien Jarno { 1076349039dSRichard Henderson return ts_info(arg_temp(arg)); 1086349039dSRichard Henderson } 1096349039dSRichard Henderson 1106349039dSRichard Henderson static inline bool ts_is_const(TCGTemp *ts) 1116349039dSRichard Henderson { 1126349039dSRichard Henderson return ts_info(ts)->is_const; 1136349039dSRichard Henderson } 1146349039dSRichard Henderson 1156349039dSRichard Henderson static inline bool arg_is_const(TCGArg arg) 1166349039dSRichard Henderson { 1176349039dSRichard Henderson return ts_is_const(arg_temp(arg)); 1186349039dSRichard Henderson } 1196349039dSRichard Henderson 1206349039dSRichard Henderson static inline bool ts_is_copy(TCGTemp *ts) 1216349039dSRichard Henderson { 1226349039dSRichard Henderson return ts_info(ts)->next_copy != ts; 123d9c769c6SAurelien Jarno } 124d9c769c6SAurelien Jarno 125b41059ddSAurelien Jarno /* Reset TEMP's state, possibly removing the temp for the list of copies. */ 1266349039dSRichard Henderson static void reset_ts(TCGTemp *ts) 12722613af4SKirill Batuzov { 1286fcb98edSRichard Henderson TempOptInfo *ti = ts_info(ts); 1296fcb98edSRichard Henderson TempOptInfo *pi = ts_info(ti->prev_copy); 1306fcb98edSRichard Henderson TempOptInfo *ni = ts_info(ti->next_copy); 1316349039dSRichard Henderson 1326349039dSRichard Henderson ni->prev_copy = ti->prev_copy; 1336349039dSRichard Henderson pi->next_copy = ti->next_copy; 1346349039dSRichard Henderson ti->next_copy = ts; 1356349039dSRichard Henderson ti->prev_copy = ts; 1366349039dSRichard Henderson ti->is_const = false; 137b1fde411SRichard Henderson ti->z_mask = -1; 13857fe5c6dSRichard Henderson ti->s_mask = 0; 1396349039dSRichard Henderson } 1406349039dSRichard Henderson 1416349039dSRichard Henderson static void reset_temp(TCGArg arg) 1426349039dSRichard Henderson { 1436349039dSRichard Henderson reset_ts(arg_temp(arg)); 14422613af4SKirill Batuzov } 14522613af4SKirill Batuzov 1461208d7ddSAurelien Jarno /* Initialize and activate a temporary. */ 1473b3f847dSRichard Henderson static void init_ts_info(OptContext *ctx, TCGTemp *ts) 1481208d7ddSAurelien Jarno { 1496349039dSRichard Henderson size_t idx = temp_idx(ts); 1508f17a975SRichard Henderson TempOptInfo *ti; 1516349039dSRichard Henderson 1523b3f847dSRichard Henderson if (test_bit(idx, ctx->temps_used.l)) { 1538f17a975SRichard Henderson return; 1548f17a975SRichard Henderson } 1553b3f847dSRichard Henderson set_bit(idx, ctx->temps_used.l); 1568f17a975SRichard Henderson 1578f17a975SRichard Henderson ti = ts->state_ptr; 1588f17a975SRichard Henderson if (ti == NULL) { 1598f17a975SRichard Henderson ti = tcg_malloc(sizeof(TempOptInfo)); 1606349039dSRichard Henderson ts->state_ptr = ti; 1618f17a975SRichard Henderson } 1628f17a975SRichard Henderson 1636349039dSRichard Henderson ti->next_copy = ts; 1646349039dSRichard Henderson ti->prev_copy = ts; 165c0522136SRichard Henderson if (ts->kind == TEMP_CONST) { 166c0522136SRichard Henderson ti->is_const = true; 1678f17a975SRichard Henderson ti->val = ts->val; 168b1fde411SRichard Henderson ti->z_mask = ts->val; 16957fe5c6dSRichard Henderson ti->s_mask = smask_from_value(ts->val); 170c0522136SRichard Henderson } else { 1716349039dSRichard Henderson ti->is_const = false; 172b1fde411SRichard Henderson ti->z_mask = -1; 17357fe5c6dSRichard Henderson ti->s_mask = 0; 174c0522136SRichard Henderson } 1751208d7ddSAurelien Jarno } 1761208d7ddSAurelien Jarno 1776349039dSRichard Henderson static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts) 178e590d4e6SAurelien Jarno { 1794c868ce6SRichard Henderson TCGTemp *i, *g, *l; 180e590d4e6SAurelien Jarno 1814c868ce6SRichard Henderson /* If this is already readonly, we can't do better. */ 1824c868ce6SRichard Henderson if (temp_readonly(ts)) { 1836349039dSRichard Henderson return ts; 184e590d4e6SAurelien Jarno } 185e590d4e6SAurelien Jarno 1864c868ce6SRichard Henderson g = l = NULL; 1876349039dSRichard Henderson for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { 1884c868ce6SRichard Henderson if (temp_readonly(i)) { 189e590d4e6SAurelien Jarno return i; 1904c868ce6SRichard Henderson } else if (i->kind > ts->kind) { 1914c868ce6SRichard Henderson if (i->kind == TEMP_GLOBAL) { 1924c868ce6SRichard Henderson g = i; 193f57c6915SRichard Henderson } else if (i->kind == TEMP_TB) { 1944c868ce6SRichard Henderson l = i; 195e590d4e6SAurelien Jarno } 196e590d4e6SAurelien Jarno } 197e590d4e6SAurelien Jarno } 198e590d4e6SAurelien Jarno 1994c868ce6SRichard Henderson /* If we didn't find a better representation, return the same temp. */ 2004c868ce6SRichard Henderson return g ? g : l ? l : ts; 201e590d4e6SAurelien Jarno } 202e590d4e6SAurelien Jarno 2036349039dSRichard Henderson static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) 204e590d4e6SAurelien Jarno { 2056349039dSRichard Henderson TCGTemp *i; 206e590d4e6SAurelien Jarno 2076349039dSRichard Henderson if (ts1 == ts2) { 208e590d4e6SAurelien Jarno return true; 209e590d4e6SAurelien Jarno } 210e590d4e6SAurelien Jarno 2116349039dSRichard Henderson if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) { 212e590d4e6SAurelien Jarno return false; 213e590d4e6SAurelien Jarno } 214e590d4e6SAurelien Jarno 2156349039dSRichard Henderson for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) { 2166349039dSRichard Henderson if (i == ts2) { 217e590d4e6SAurelien Jarno return true; 218e590d4e6SAurelien Jarno } 219e590d4e6SAurelien Jarno } 220e590d4e6SAurelien Jarno 221e590d4e6SAurelien Jarno return false; 222e590d4e6SAurelien Jarno } 223e590d4e6SAurelien Jarno 2246349039dSRichard Henderson static bool args_are_copies(TCGArg arg1, TCGArg arg2) 2256349039dSRichard Henderson { 2266349039dSRichard Henderson return ts_are_copies(arg_temp(arg1), arg_temp(arg2)); 2276349039dSRichard Henderson } 2286349039dSRichard Henderson 2296b99d5bfSRichard Henderson static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) 23022613af4SKirill Batuzov { 2316349039dSRichard Henderson TCGTemp *dst_ts = arg_temp(dst); 2326349039dSRichard Henderson TCGTemp *src_ts = arg_temp(src); 2336fcb98edSRichard Henderson TempOptInfo *di; 2346fcb98edSRichard Henderson TempOptInfo *si; 2356349039dSRichard Henderson TCGOpcode new_op; 2366349039dSRichard Henderson 2376349039dSRichard Henderson if (ts_are_copies(dst_ts, src_ts)) { 238dc84988aSRichard Henderson tcg_op_remove(ctx->tcg, op); 2396b99d5bfSRichard Henderson return true; 2405365718aSAurelien Jarno } 2415365718aSAurelien Jarno 2426349039dSRichard Henderson reset_ts(dst_ts); 2436349039dSRichard Henderson di = ts_info(dst_ts); 2446349039dSRichard Henderson si = ts_info(src_ts); 24567f84c96SRichard Henderson 24667f84c96SRichard Henderson switch (ctx->type) { 24767f84c96SRichard Henderson case TCG_TYPE_I32: 248170ba88fSRichard Henderson new_op = INDEX_op_mov_i32; 24967f84c96SRichard Henderson break; 25067f84c96SRichard Henderson case TCG_TYPE_I64: 25167f84c96SRichard Henderson new_op = INDEX_op_mov_i64; 25267f84c96SRichard Henderson break; 25367f84c96SRichard Henderson case TCG_TYPE_V64: 25467f84c96SRichard Henderson case TCG_TYPE_V128: 25567f84c96SRichard Henderson case TCG_TYPE_V256: 25667f84c96SRichard Henderson /* TCGOP_VECL and TCGOP_VECE remain unchanged. */ 25767f84c96SRichard Henderson new_op = INDEX_op_mov_vec; 25867f84c96SRichard Henderson break; 25967f84c96SRichard Henderson default: 26067f84c96SRichard Henderson g_assert_not_reached(); 261170ba88fSRichard Henderson } 262c45cb8bbSRichard Henderson op->opc = new_op; 2636349039dSRichard Henderson op->args[0] = dst; 2646349039dSRichard Henderson op->args[1] = src; 265a62f6f56SRichard Henderson 266faa2e100SRichard Henderson di->z_mask = si->z_mask; 26757fe5c6dSRichard Henderson di->s_mask = si->s_mask; 26824666bafSRichard Henderson 2696349039dSRichard Henderson if (src_ts->type == dst_ts->type) { 2706fcb98edSRichard Henderson TempOptInfo *ni = ts_info(si->next_copy); 2716349039dSRichard Henderson 2726349039dSRichard Henderson di->next_copy = si->next_copy; 2736349039dSRichard Henderson di->prev_copy = src_ts; 2746349039dSRichard Henderson ni->prev_copy = dst_ts; 2756349039dSRichard Henderson si->next_copy = dst_ts; 2766349039dSRichard Henderson di->is_const = si->is_const; 2776349039dSRichard Henderson di->val = si->val; 27822613af4SKirill Batuzov } 2796b99d5bfSRichard Henderson return true; 28022613af4SKirill Batuzov } 28122613af4SKirill Batuzov 2826b99d5bfSRichard Henderson static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op, 283dc84988aSRichard Henderson TCGArg dst, uint64_t val) 2848fe35e04SRichard Henderson { 285faa2e100SRichard Henderson TCGTemp *tv; 28667f84c96SRichard Henderson 287faa2e100SRichard Henderson if (ctx->type == TCG_TYPE_I32) { 288faa2e100SRichard Henderson val = (int32_t)val; 289faa2e100SRichard Henderson } 290faa2e100SRichard Henderson 291faa2e100SRichard Henderson /* Convert movi to mov with constant temp. */ 292faa2e100SRichard Henderson tv = tcg_constant_internal(ctx->type, val); 2933b3f847dSRichard Henderson init_ts_info(ctx, tv); 2946b99d5bfSRichard Henderson return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv)); 2958fe35e04SRichard Henderson } 2968fe35e04SRichard Henderson 29754795544SRichard Henderson static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) 29853108fb5SKirill Batuzov { 29903271524SRichard Henderson uint64_t l64, h64; 30003271524SRichard Henderson 30153108fb5SKirill Batuzov switch (op) { 30253108fb5SKirill Batuzov CASE_OP_32_64(add): 30353108fb5SKirill Batuzov return x + y; 30453108fb5SKirill Batuzov 30553108fb5SKirill Batuzov CASE_OP_32_64(sub): 30653108fb5SKirill Batuzov return x - y; 30753108fb5SKirill Batuzov 30853108fb5SKirill Batuzov CASE_OP_32_64(mul): 30953108fb5SKirill Batuzov return x * y; 31053108fb5SKirill Batuzov 311c578ff18SRichard Henderson CASE_OP_32_64_VEC(and): 3129a81090bSKirill Batuzov return x & y; 3139a81090bSKirill Batuzov 314c578ff18SRichard Henderson CASE_OP_32_64_VEC(or): 3159a81090bSKirill Batuzov return x | y; 3169a81090bSKirill Batuzov 317c578ff18SRichard Henderson CASE_OP_32_64_VEC(xor): 3189a81090bSKirill Batuzov return x ^ y; 3199a81090bSKirill Batuzov 32055c0975cSKirill Batuzov case INDEX_op_shl_i32: 32150c5c4d1SRichard Henderson return (uint32_t)x << (y & 31); 32255c0975cSKirill Batuzov 32355c0975cSKirill Batuzov case INDEX_op_shl_i64: 32450c5c4d1SRichard Henderson return (uint64_t)x << (y & 63); 32555c0975cSKirill Batuzov 32655c0975cSKirill Batuzov case INDEX_op_shr_i32: 32750c5c4d1SRichard Henderson return (uint32_t)x >> (y & 31); 32855c0975cSKirill Batuzov 32955c0975cSKirill Batuzov case INDEX_op_shr_i64: 33050c5c4d1SRichard Henderson return (uint64_t)x >> (y & 63); 33155c0975cSKirill Batuzov 33255c0975cSKirill Batuzov case INDEX_op_sar_i32: 33350c5c4d1SRichard Henderson return (int32_t)x >> (y & 31); 33455c0975cSKirill Batuzov 33555c0975cSKirill Batuzov case INDEX_op_sar_i64: 33650c5c4d1SRichard Henderson return (int64_t)x >> (y & 63); 33755c0975cSKirill Batuzov 33855c0975cSKirill Batuzov case INDEX_op_rotr_i32: 33950c5c4d1SRichard Henderson return ror32(x, y & 31); 34055c0975cSKirill Batuzov 34155c0975cSKirill Batuzov case INDEX_op_rotr_i64: 34250c5c4d1SRichard Henderson return ror64(x, y & 63); 34355c0975cSKirill Batuzov 34455c0975cSKirill Batuzov case INDEX_op_rotl_i32: 34550c5c4d1SRichard Henderson return rol32(x, y & 31); 34655c0975cSKirill Batuzov 34755c0975cSKirill Batuzov case INDEX_op_rotl_i64: 34850c5c4d1SRichard Henderson return rol64(x, y & 63); 34955c0975cSKirill Batuzov 350c578ff18SRichard Henderson CASE_OP_32_64_VEC(not): 351a640f031SKirill Batuzov return ~x; 352a640f031SKirill Batuzov 353cb25c80aSRichard Henderson CASE_OP_32_64(neg): 354cb25c80aSRichard Henderson return -x; 355cb25c80aSRichard Henderson 356c578ff18SRichard Henderson CASE_OP_32_64_VEC(andc): 357cb25c80aSRichard Henderson return x & ~y; 358cb25c80aSRichard Henderson 359c578ff18SRichard Henderson CASE_OP_32_64_VEC(orc): 360cb25c80aSRichard Henderson return x | ~y; 361cb25c80aSRichard Henderson 362ed523473SRichard Henderson CASE_OP_32_64_VEC(eqv): 363cb25c80aSRichard Henderson return ~(x ^ y); 364cb25c80aSRichard Henderson 365ed523473SRichard Henderson CASE_OP_32_64_VEC(nand): 366cb25c80aSRichard Henderson return ~(x & y); 367cb25c80aSRichard Henderson 368ed523473SRichard Henderson CASE_OP_32_64_VEC(nor): 369cb25c80aSRichard Henderson return ~(x | y); 370cb25c80aSRichard Henderson 3710e28d006SRichard Henderson case INDEX_op_clz_i32: 3720e28d006SRichard Henderson return (uint32_t)x ? clz32(x) : y; 3730e28d006SRichard Henderson 3740e28d006SRichard Henderson case INDEX_op_clz_i64: 3750e28d006SRichard Henderson return x ? clz64(x) : y; 3760e28d006SRichard Henderson 3770e28d006SRichard Henderson case INDEX_op_ctz_i32: 3780e28d006SRichard Henderson return (uint32_t)x ? ctz32(x) : y; 3790e28d006SRichard Henderson 3800e28d006SRichard Henderson case INDEX_op_ctz_i64: 3810e28d006SRichard Henderson return x ? ctz64(x) : y; 3820e28d006SRichard Henderson 383a768e4e9SRichard Henderson case INDEX_op_ctpop_i32: 384a768e4e9SRichard Henderson return ctpop32(x); 385a768e4e9SRichard Henderson 386a768e4e9SRichard Henderson case INDEX_op_ctpop_i64: 387a768e4e9SRichard Henderson return ctpop64(x); 388a768e4e9SRichard Henderson 38925c4d9ccSRichard Henderson CASE_OP_32_64(ext8s): 390a640f031SKirill Batuzov return (int8_t)x; 391a640f031SKirill Batuzov 39225c4d9ccSRichard Henderson CASE_OP_32_64(ext16s): 393a640f031SKirill Batuzov return (int16_t)x; 394a640f031SKirill Batuzov 39525c4d9ccSRichard Henderson CASE_OP_32_64(ext8u): 396a640f031SKirill Batuzov return (uint8_t)x; 397a640f031SKirill Batuzov 39825c4d9ccSRichard Henderson CASE_OP_32_64(ext16u): 399a640f031SKirill Batuzov return (uint16_t)x; 400a640f031SKirill Batuzov 4016498594cSRichard Henderson CASE_OP_32_64(bswap16): 4020b76ff8fSRichard Henderson x = bswap16(x); 4030b76ff8fSRichard Henderson return y & TCG_BSWAP_OS ? (int16_t)x : x; 4046498594cSRichard Henderson 4056498594cSRichard Henderson CASE_OP_32_64(bswap32): 4060b76ff8fSRichard Henderson x = bswap32(x); 4070b76ff8fSRichard Henderson return y & TCG_BSWAP_OS ? (int32_t)x : x; 4086498594cSRichard Henderson 4096498594cSRichard Henderson case INDEX_op_bswap64_i64: 4106498594cSRichard Henderson return bswap64(x); 4116498594cSRichard Henderson 4128bcb5c8fSAurelien Jarno case INDEX_op_ext_i32_i64: 413a640f031SKirill Batuzov case INDEX_op_ext32s_i64: 414a640f031SKirill Batuzov return (int32_t)x; 415a640f031SKirill Batuzov 4168bcb5c8fSAurelien Jarno case INDEX_op_extu_i32_i64: 417609ad705SRichard Henderson case INDEX_op_extrl_i64_i32: 418a640f031SKirill Batuzov case INDEX_op_ext32u_i64: 419a640f031SKirill Batuzov return (uint32_t)x; 420a640f031SKirill Batuzov 421609ad705SRichard Henderson case INDEX_op_extrh_i64_i32: 422609ad705SRichard Henderson return (uint64_t)x >> 32; 423609ad705SRichard Henderson 42403271524SRichard Henderson case INDEX_op_muluh_i32: 42503271524SRichard Henderson return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 42603271524SRichard Henderson case INDEX_op_mulsh_i32: 42703271524SRichard Henderson return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 42803271524SRichard Henderson 42903271524SRichard Henderson case INDEX_op_muluh_i64: 43003271524SRichard Henderson mulu64(&l64, &h64, x, y); 43103271524SRichard Henderson return h64; 43203271524SRichard Henderson case INDEX_op_mulsh_i64: 43303271524SRichard Henderson muls64(&l64, &h64, x, y); 43403271524SRichard Henderson return h64; 43503271524SRichard Henderson 43601547f7fSRichard Henderson case INDEX_op_div_i32: 43701547f7fSRichard Henderson /* Avoid crashing on divide by zero, otherwise undefined. */ 43801547f7fSRichard Henderson return (int32_t)x / ((int32_t)y ? : 1); 43901547f7fSRichard Henderson case INDEX_op_divu_i32: 44001547f7fSRichard Henderson return (uint32_t)x / ((uint32_t)y ? : 1); 44101547f7fSRichard Henderson case INDEX_op_div_i64: 44201547f7fSRichard Henderson return (int64_t)x / ((int64_t)y ? : 1); 44301547f7fSRichard Henderson case INDEX_op_divu_i64: 44401547f7fSRichard Henderson return (uint64_t)x / ((uint64_t)y ? : 1); 44501547f7fSRichard Henderson 44601547f7fSRichard Henderson case INDEX_op_rem_i32: 44701547f7fSRichard Henderson return (int32_t)x % ((int32_t)y ? : 1); 44801547f7fSRichard Henderson case INDEX_op_remu_i32: 44901547f7fSRichard Henderson return (uint32_t)x % ((uint32_t)y ? : 1); 45001547f7fSRichard Henderson case INDEX_op_rem_i64: 45101547f7fSRichard Henderson return (int64_t)x % ((int64_t)y ? : 1); 45201547f7fSRichard Henderson case INDEX_op_remu_i64: 45301547f7fSRichard Henderson return (uint64_t)x % ((uint64_t)y ? : 1); 45401547f7fSRichard Henderson 45553108fb5SKirill Batuzov default: 456732e89f4SRichard Henderson g_assert_not_reached(); 45753108fb5SKirill Batuzov } 45853108fb5SKirill Batuzov } 45953108fb5SKirill Batuzov 46067f84c96SRichard Henderson static uint64_t do_constant_folding(TCGOpcode op, TCGType type, 46167f84c96SRichard Henderson uint64_t x, uint64_t y) 46253108fb5SKirill Batuzov { 46354795544SRichard Henderson uint64_t res = do_constant_folding_2(op, x, y); 46467f84c96SRichard Henderson if (type == TCG_TYPE_I32) { 46529f3ff8dSAurelien Jarno res = (int32_t)res; 46653108fb5SKirill Batuzov } 46753108fb5SKirill Batuzov return res; 46853108fb5SKirill Batuzov } 46953108fb5SKirill Batuzov 4709519da7eSRichard Henderson static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 471f8dd19e5SAurelien Jarno { 472f8dd19e5SAurelien Jarno switch (c) { 473f8dd19e5SAurelien Jarno case TCG_COND_EQ: 4749519da7eSRichard Henderson return x == y; 475f8dd19e5SAurelien Jarno case TCG_COND_NE: 4769519da7eSRichard Henderson return x != y; 477f8dd19e5SAurelien Jarno case TCG_COND_LT: 4789519da7eSRichard Henderson return (int32_t)x < (int32_t)y; 479f8dd19e5SAurelien Jarno case TCG_COND_GE: 4809519da7eSRichard Henderson return (int32_t)x >= (int32_t)y; 481f8dd19e5SAurelien Jarno case TCG_COND_LE: 4829519da7eSRichard Henderson return (int32_t)x <= (int32_t)y; 483f8dd19e5SAurelien Jarno case TCG_COND_GT: 4849519da7eSRichard Henderson return (int32_t)x > (int32_t)y; 485f8dd19e5SAurelien Jarno case TCG_COND_LTU: 4869519da7eSRichard Henderson return x < y; 487f8dd19e5SAurelien Jarno case TCG_COND_GEU: 4889519da7eSRichard Henderson return x >= y; 489f8dd19e5SAurelien Jarno case TCG_COND_LEU: 4909519da7eSRichard Henderson return x <= y; 491f8dd19e5SAurelien Jarno case TCG_COND_GTU: 4929519da7eSRichard Henderson return x > y; 4930aed257fSRichard Henderson default: 494732e89f4SRichard Henderson g_assert_not_reached(); 495f8dd19e5SAurelien Jarno } 4969519da7eSRichard Henderson } 4979519da7eSRichard Henderson 4989519da7eSRichard Henderson static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 4999519da7eSRichard Henderson { 500f8dd19e5SAurelien Jarno switch (c) { 501f8dd19e5SAurelien Jarno case TCG_COND_EQ: 5029519da7eSRichard Henderson return x == y; 503f8dd19e5SAurelien Jarno case TCG_COND_NE: 5049519da7eSRichard Henderson return x != y; 505f8dd19e5SAurelien Jarno case TCG_COND_LT: 5069519da7eSRichard Henderson return (int64_t)x < (int64_t)y; 507f8dd19e5SAurelien Jarno case TCG_COND_GE: 5089519da7eSRichard Henderson return (int64_t)x >= (int64_t)y; 509f8dd19e5SAurelien Jarno case TCG_COND_LE: 5109519da7eSRichard Henderson return (int64_t)x <= (int64_t)y; 511f8dd19e5SAurelien Jarno case TCG_COND_GT: 5129519da7eSRichard Henderson return (int64_t)x > (int64_t)y; 513f8dd19e5SAurelien Jarno case TCG_COND_LTU: 5149519da7eSRichard Henderson return x < y; 515f8dd19e5SAurelien Jarno case TCG_COND_GEU: 5169519da7eSRichard Henderson return x >= y; 517f8dd19e5SAurelien Jarno case TCG_COND_LEU: 5189519da7eSRichard Henderson return x <= y; 519f8dd19e5SAurelien Jarno case TCG_COND_GTU: 5209519da7eSRichard Henderson return x > y; 5210aed257fSRichard Henderson default: 522732e89f4SRichard Henderson g_assert_not_reached(); 523f8dd19e5SAurelien Jarno } 524f8dd19e5SAurelien Jarno } 5259519da7eSRichard Henderson 5269519da7eSRichard Henderson static bool do_constant_folding_cond_eq(TCGCond c) 5279519da7eSRichard Henderson { 528b336ceb6SAurelien Jarno switch (c) { 529b336ceb6SAurelien Jarno case TCG_COND_GT: 530b336ceb6SAurelien Jarno case TCG_COND_LTU: 531b336ceb6SAurelien Jarno case TCG_COND_LT: 532b336ceb6SAurelien Jarno case TCG_COND_GTU: 533b336ceb6SAurelien Jarno case TCG_COND_NE: 534b336ceb6SAurelien Jarno return 0; 535b336ceb6SAurelien Jarno case TCG_COND_GE: 536b336ceb6SAurelien Jarno case TCG_COND_GEU: 537b336ceb6SAurelien Jarno case TCG_COND_LE: 538b336ceb6SAurelien Jarno case TCG_COND_LEU: 539b336ceb6SAurelien Jarno case TCG_COND_EQ: 540b336ceb6SAurelien Jarno return 1; 5410aed257fSRichard Henderson default: 542732e89f4SRichard Henderson g_assert_not_reached(); 543b336ceb6SAurelien Jarno } 5449519da7eSRichard Henderson } 5459519da7eSRichard Henderson 5468d57bf1eSRichard Henderson /* 5478d57bf1eSRichard Henderson * Return -1 if the condition can't be simplified, 5488d57bf1eSRichard Henderson * and the result of the condition (0 or 1) if it can. 5498d57bf1eSRichard Henderson */ 55067f84c96SRichard Henderson static int do_constant_folding_cond(TCGType type, TCGArg x, 5519519da7eSRichard Henderson TCGArg y, TCGCond c) 5529519da7eSRichard Henderson { 5539becc36fSAlex Bennée if (arg_is_const(x) && arg_is_const(y)) { 55454795544SRichard Henderson uint64_t xv = arg_info(x)->val; 55554795544SRichard Henderson uint64_t yv = arg_info(y)->val; 55654795544SRichard Henderson 55767f84c96SRichard Henderson switch (type) { 55867f84c96SRichard Henderson case TCG_TYPE_I32: 559170ba88fSRichard Henderson return do_constant_folding_cond_32(xv, yv, c); 56067f84c96SRichard Henderson case TCG_TYPE_I64: 56167f84c96SRichard Henderson return do_constant_folding_cond_64(xv, yv, c); 56267f84c96SRichard Henderson default: 56367f84c96SRichard Henderson /* Only scalar comparisons are optimizable */ 56467f84c96SRichard Henderson return -1; 5659519da7eSRichard Henderson } 5666349039dSRichard Henderson } else if (args_are_copies(x, y)) { 5679519da7eSRichard Henderson return do_constant_folding_cond_eq(c); 5689becc36fSAlex Bennée } else if (arg_is_const(y) && arg_info(y)->val == 0) { 569b336ceb6SAurelien Jarno switch (c) { 570b336ceb6SAurelien Jarno case TCG_COND_LTU: 571b336ceb6SAurelien Jarno return 0; 572b336ceb6SAurelien Jarno case TCG_COND_GEU: 573b336ceb6SAurelien Jarno return 1; 574b336ceb6SAurelien Jarno default: 5758d57bf1eSRichard Henderson return -1; 576b336ceb6SAurelien Jarno } 577b336ceb6SAurelien Jarno } 5788d57bf1eSRichard Henderson return -1; 579f8dd19e5SAurelien Jarno } 580f8dd19e5SAurelien Jarno 5818d57bf1eSRichard Henderson /* 5828d57bf1eSRichard Henderson * Return -1 if the condition can't be simplified, 5838d57bf1eSRichard Henderson * and the result of the condition (0 or 1) if it can. 5848d57bf1eSRichard Henderson */ 5858d57bf1eSRichard Henderson static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 5866c4382f8SRichard Henderson { 5876c4382f8SRichard Henderson TCGArg al = p1[0], ah = p1[1]; 5886c4382f8SRichard Henderson TCGArg bl = p2[0], bh = p2[1]; 5896c4382f8SRichard Henderson 5906349039dSRichard Henderson if (arg_is_const(bl) && arg_is_const(bh)) { 5916349039dSRichard Henderson tcg_target_ulong blv = arg_info(bl)->val; 5926349039dSRichard Henderson tcg_target_ulong bhv = arg_info(bh)->val; 5936349039dSRichard Henderson uint64_t b = deposit64(blv, 32, 32, bhv); 5946c4382f8SRichard Henderson 5956349039dSRichard Henderson if (arg_is_const(al) && arg_is_const(ah)) { 5966349039dSRichard Henderson tcg_target_ulong alv = arg_info(al)->val; 5976349039dSRichard Henderson tcg_target_ulong ahv = arg_info(ah)->val; 5986349039dSRichard Henderson uint64_t a = deposit64(alv, 32, 32, ahv); 5996c4382f8SRichard Henderson return do_constant_folding_cond_64(a, b, c); 6006c4382f8SRichard Henderson } 6016c4382f8SRichard Henderson if (b == 0) { 6026c4382f8SRichard Henderson switch (c) { 6036c4382f8SRichard Henderson case TCG_COND_LTU: 6046c4382f8SRichard Henderson return 0; 6056c4382f8SRichard Henderson case TCG_COND_GEU: 6066c4382f8SRichard Henderson return 1; 6076c4382f8SRichard Henderson default: 6086c4382f8SRichard Henderson break; 6096c4382f8SRichard Henderson } 6106c4382f8SRichard Henderson } 6116c4382f8SRichard Henderson } 6126349039dSRichard Henderson if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { 6136c4382f8SRichard Henderson return do_constant_folding_cond_eq(c); 6146c4382f8SRichard Henderson } 6158d57bf1eSRichard Henderson return -1; 6166c4382f8SRichard Henderson } 6176c4382f8SRichard Henderson 6187a2f7084SRichard Henderson /** 6197a2f7084SRichard Henderson * swap_commutative: 6207a2f7084SRichard Henderson * @dest: TCGArg of the destination argument, or NO_DEST. 6217a2f7084SRichard Henderson * @p1: first paired argument 6227a2f7084SRichard Henderson * @p2: second paired argument 6237a2f7084SRichard Henderson * 6247a2f7084SRichard Henderson * If *@p1 is a constant and *@p2 is not, swap. 6257a2f7084SRichard Henderson * If *@p2 matches @dest, swap. 6267a2f7084SRichard Henderson * Return true if a swap was performed. 6277a2f7084SRichard Henderson */ 6287a2f7084SRichard Henderson 6297a2f7084SRichard Henderson #define NO_DEST temp_arg(NULL) 6307a2f7084SRichard Henderson 63124c9ae4eSRichard Henderson static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 63224c9ae4eSRichard Henderson { 63324c9ae4eSRichard Henderson TCGArg a1 = *p1, a2 = *p2; 63424c9ae4eSRichard Henderson int sum = 0; 6356349039dSRichard Henderson sum += arg_is_const(a1); 6366349039dSRichard Henderson sum -= arg_is_const(a2); 63724c9ae4eSRichard Henderson 63824c9ae4eSRichard Henderson /* Prefer the constant in second argument, and then the form 63924c9ae4eSRichard Henderson op a, a, b, which is better handled on non-RISC hosts. */ 64024c9ae4eSRichard Henderson if (sum > 0 || (sum == 0 && dest == a2)) { 64124c9ae4eSRichard Henderson *p1 = a2; 64224c9ae4eSRichard Henderson *p2 = a1; 64324c9ae4eSRichard Henderson return true; 64424c9ae4eSRichard Henderson } 64524c9ae4eSRichard Henderson return false; 64624c9ae4eSRichard Henderson } 64724c9ae4eSRichard Henderson 6480bfcb865SRichard Henderson static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 6490bfcb865SRichard Henderson { 6500bfcb865SRichard Henderson int sum = 0; 6516349039dSRichard Henderson sum += arg_is_const(p1[0]); 6526349039dSRichard Henderson sum += arg_is_const(p1[1]); 6536349039dSRichard Henderson sum -= arg_is_const(p2[0]); 6546349039dSRichard Henderson sum -= arg_is_const(p2[1]); 6550bfcb865SRichard Henderson if (sum > 0) { 6560bfcb865SRichard Henderson TCGArg t; 6570bfcb865SRichard Henderson t = p1[0], p1[0] = p2[0], p2[0] = t; 6580bfcb865SRichard Henderson t = p1[1], p1[1] = p2[1], p2[1] = t; 6590bfcb865SRichard Henderson return true; 6600bfcb865SRichard Henderson } 6610bfcb865SRichard Henderson return false; 6620bfcb865SRichard Henderson } 6630bfcb865SRichard Henderson 664e2577ea2SRichard Henderson static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args) 665e2577ea2SRichard Henderson { 666e2577ea2SRichard Henderson for (int i = 0; i < nb_args; i++) { 667e2577ea2SRichard Henderson TCGTemp *ts = arg_temp(op->args[i]); 668e2577ea2SRichard Henderson init_ts_info(ctx, ts); 669e2577ea2SRichard Henderson } 670e2577ea2SRichard Henderson } 671e2577ea2SRichard Henderson 6728774ddedSRichard Henderson static void copy_propagate(OptContext *ctx, TCGOp *op, 6738774ddedSRichard Henderson int nb_oargs, int nb_iargs) 6748774ddedSRichard Henderson { 6758774ddedSRichard Henderson TCGContext *s = ctx->tcg; 6768774ddedSRichard Henderson 6778774ddedSRichard Henderson for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 6788774ddedSRichard Henderson TCGTemp *ts = arg_temp(op->args[i]); 67939004a71SRichard Henderson if (ts_is_copy(ts)) { 6808774ddedSRichard Henderson op->args[i] = temp_arg(find_better_copy(s, ts)); 6818774ddedSRichard Henderson } 6828774ddedSRichard Henderson } 6838774ddedSRichard Henderson } 6848774ddedSRichard Henderson 685137f1f44SRichard Henderson static void finish_folding(OptContext *ctx, TCGOp *op) 686137f1f44SRichard Henderson { 687137f1f44SRichard Henderson const TCGOpDef *def = &tcg_op_defs[op->opc]; 688137f1f44SRichard Henderson int i, nb_oargs; 689137f1f44SRichard Henderson 690137f1f44SRichard Henderson /* 691137f1f44SRichard Henderson * For an opcode that ends a BB, reset all temp data. 692137f1f44SRichard Henderson * We do no cross-BB optimization. 693137f1f44SRichard Henderson */ 694137f1f44SRichard Henderson if (def->flags & TCG_OPF_BB_END) { 695137f1f44SRichard Henderson memset(&ctx->temps_used, 0, sizeof(ctx->temps_used)); 696137f1f44SRichard Henderson ctx->prev_mb = NULL; 697137f1f44SRichard Henderson return; 698137f1f44SRichard Henderson } 699137f1f44SRichard Henderson 700137f1f44SRichard Henderson nb_oargs = def->nb_oargs; 701137f1f44SRichard Henderson for (i = 0; i < nb_oargs; i++) { 70257fe5c6dSRichard Henderson TCGTemp *ts = arg_temp(op->args[i]); 70357fe5c6dSRichard Henderson reset_ts(ts); 704137f1f44SRichard Henderson /* 70557fe5c6dSRichard Henderson * Save the corresponding known-zero/sign bits mask for the 706137f1f44SRichard Henderson * first output argument (only one supported so far). 707137f1f44SRichard Henderson */ 708137f1f44SRichard Henderson if (i == 0) { 70957fe5c6dSRichard Henderson ts_info(ts)->z_mask = ctx->z_mask; 71057fe5c6dSRichard Henderson ts_info(ts)->s_mask = ctx->s_mask; 711137f1f44SRichard Henderson } 712137f1f44SRichard Henderson } 713137f1f44SRichard Henderson } 714137f1f44SRichard Henderson 7152f9f08baSRichard Henderson /* 7162f9f08baSRichard Henderson * The fold_* functions return true when processing is complete, 7172f9f08baSRichard Henderson * usually by folding the operation to a constant or to a copy, 7182f9f08baSRichard Henderson * and calling tcg_opt_gen_{mov,movi}. They may do other things, 7192f9f08baSRichard Henderson * like collect information about the value produced, for use in 7202f9f08baSRichard Henderson * optimizing a subsequent operation. 7212f9f08baSRichard Henderson * 7222f9f08baSRichard Henderson * These first fold_* functions are all helpers, used by other 7232f9f08baSRichard Henderson * folders for more specific operations. 7242f9f08baSRichard Henderson */ 7252f9f08baSRichard Henderson 7262f9f08baSRichard Henderson static bool fold_const1(OptContext *ctx, TCGOp *op) 7272f9f08baSRichard Henderson { 7282f9f08baSRichard Henderson if (arg_is_const(op->args[1])) { 7292f9f08baSRichard Henderson uint64_t t; 7302f9f08baSRichard Henderson 7312f9f08baSRichard Henderson t = arg_info(op->args[1])->val; 73267f84c96SRichard Henderson t = do_constant_folding(op->opc, ctx->type, t, 0); 7332f9f08baSRichard Henderson return tcg_opt_gen_movi(ctx, op, op->args[0], t); 7342f9f08baSRichard Henderson } 7352f9f08baSRichard Henderson return false; 7362f9f08baSRichard Henderson } 7372f9f08baSRichard Henderson 7382f9f08baSRichard Henderson static bool fold_const2(OptContext *ctx, TCGOp *op) 7392f9f08baSRichard Henderson { 7402f9f08baSRichard Henderson if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 7412f9f08baSRichard Henderson uint64_t t1 = arg_info(op->args[1])->val; 7422f9f08baSRichard Henderson uint64_t t2 = arg_info(op->args[2])->val; 7432f9f08baSRichard Henderson 74467f84c96SRichard Henderson t1 = do_constant_folding(op->opc, ctx->type, t1, t2); 7452f9f08baSRichard Henderson return tcg_opt_gen_movi(ctx, op, op->args[0], t1); 7462f9f08baSRichard Henderson } 7472f9f08baSRichard Henderson return false; 7482f9f08baSRichard Henderson } 7492f9f08baSRichard Henderson 750c578ff18SRichard Henderson static bool fold_commutative(OptContext *ctx, TCGOp *op) 751c578ff18SRichard Henderson { 752c578ff18SRichard Henderson swap_commutative(op->args[0], &op->args[1], &op->args[2]); 753c578ff18SRichard Henderson return false; 754c578ff18SRichard Henderson } 755c578ff18SRichard Henderson 7567a2f7084SRichard Henderson static bool fold_const2_commutative(OptContext *ctx, TCGOp *op) 7577a2f7084SRichard Henderson { 7587a2f7084SRichard Henderson swap_commutative(op->args[0], &op->args[1], &op->args[2]); 7597a2f7084SRichard Henderson return fold_const2(ctx, op); 7607a2f7084SRichard Henderson } 7617a2f7084SRichard Henderson 762fae450baSRichard Henderson static bool fold_masks(OptContext *ctx, TCGOp *op) 763fae450baSRichard Henderson { 764fae450baSRichard Henderson uint64_t a_mask = ctx->a_mask; 765fae450baSRichard Henderson uint64_t z_mask = ctx->z_mask; 76657fe5c6dSRichard Henderson uint64_t s_mask = ctx->s_mask; 767fae450baSRichard Henderson 768fae450baSRichard Henderson /* 769faa2e100SRichard Henderson * 32-bit ops generate 32-bit results, which for the purpose of 770faa2e100SRichard Henderson * simplifying tcg are sign-extended. Certainly that's how we 771faa2e100SRichard Henderson * represent our constants elsewhere. Note that the bits will 772faa2e100SRichard Henderson * be reset properly for a 64-bit value when encountering the 773faa2e100SRichard Henderson * type changing opcodes. 774fae450baSRichard Henderson */ 775fae450baSRichard Henderson if (ctx->type == TCG_TYPE_I32) { 776faa2e100SRichard Henderson a_mask = (int32_t)a_mask; 777faa2e100SRichard Henderson z_mask = (int32_t)z_mask; 77857fe5c6dSRichard Henderson s_mask |= MAKE_64BIT_MASK(32, 32); 779faa2e100SRichard Henderson ctx->z_mask = z_mask; 78057fe5c6dSRichard Henderson ctx->s_mask = s_mask; 781fae450baSRichard Henderson } 782fae450baSRichard Henderson 783fae450baSRichard Henderson if (z_mask == 0) { 784fae450baSRichard Henderson return tcg_opt_gen_movi(ctx, op, op->args[0], 0); 785fae450baSRichard Henderson } 786fae450baSRichard Henderson if (a_mask == 0) { 787fae450baSRichard Henderson return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 788fae450baSRichard Henderson } 789fae450baSRichard Henderson return false; 790fae450baSRichard Henderson } 791fae450baSRichard Henderson 7920e0a32baSRichard Henderson /* 7930e0a32baSRichard Henderson * Convert @op to NOT, if NOT is supported by the host. 7940e0a32baSRichard Henderson * Return true f the conversion is successful, which will still 7950e0a32baSRichard Henderson * indicate that the processing is complete. 7960e0a32baSRichard Henderson */ 7970e0a32baSRichard Henderson static bool fold_not(OptContext *ctx, TCGOp *op); 7980e0a32baSRichard Henderson static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx) 7990e0a32baSRichard Henderson { 8000e0a32baSRichard Henderson TCGOpcode not_op; 8010e0a32baSRichard Henderson bool have_not; 8020e0a32baSRichard Henderson 8030e0a32baSRichard Henderson switch (ctx->type) { 8040e0a32baSRichard Henderson case TCG_TYPE_I32: 8050e0a32baSRichard Henderson not_op = INDEX_op_not_i32; 8060e0a32baSRichard Henderson have_not = TCG_TARGET_HAS_not_i32; 8070e0a32baSRichard Henderson break; 8080e0a32baSRichard Henderson case TCG_TYPE_I64: 8090e0a32baSRichard Henderson not_op = INDEX_op_not_i64; 8100e0a32baSRichard Henderson have_not = TCG_TARGET_HAS_not_i64; 8110e0a32baSRichard Henderson break; 8120e0a32baSRichard Henderson case TCG_TYPE_V64: 8130e0a32baSRichard Henderson case TCG_TYPE_V128: 8140e0a32baSRichard Henderson case TCG_TYPE_V256: 8150e0a32baSRichard Henderson not_op = INDEX_op_not_vec; 8160e0a32baSRichard Henderson have_not = TCG_TARGET_HAS_not_vec; 8170e0a32baSRichard Henderson break; 8180e0a32baSRichard Henderson default: 8190e0a32baSRichard Henderson g_assert_not_reached(); 8200e0a32baSRichard Henderson } 8210e0a32baSRichard Henderson if (have_not) { 8220e0a32baSRichard Henderson op->opc = not_op; 8230e0a32baSRichard Henderson op->args[1] = op->args[idx]; 8240e0a32baSRichard Henderson return fold_not(ctx, op); 8250e0a32baSRichard Henderson } 8260e0a32baSRichard Henderson return false; 8270e0a32baSRichard Henderson } 8280e0a32baSRichard Henderson 829da48e272SRichard Henderson /* If the binary operation has first argument @i, fold to @i. */ 830da48e272SRichard Henderson static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 831da48e272SRichard Henderson { 832da48e272SRichard Henderson if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) { 833da48e272SRichard Henderson return tcg_opt_gen_movi(ctx, op, op->args[0], i); 834da48e272SRichard Henderson } 835da48e272SRichard Henderson return false; 836da48e272SRichard Henderson } 837da48e272SRichard Henderson 8380e0a32baSRichard Henderson /* If the binary operation has first argument @i, fold to NOT. */ 8390e0a32baSRichard Henderson static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i) 8400e0a32baSRichard Henderson { 8410e0a32baSRichard Henderson if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) { 8420e0a32baSRichard Henderson return fold_to_not(ctx, op, 2); 8430e0a32baSRichard Henderson } 8440e0a32baSRichard Henderson return false; 8450e0a32baSRichard Henderson } 8460e0a32baSRichard Henderson 847e8679955SRichard Henderson /* If the binary operation has second argument @i, fold to @i. */ 848e8679955SRichard Henderson static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 849e8679955SRichard Henderson { 850e8679955SRichard Henderson if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) { 851e8679955SRichard Henderson return tcg_opt_gen_movi(ctx, op, op->args[0], i); 852e8679955SRichard Henderson } 853e8679955SRichard Henderson return false; 854e8679955SRichard Henderson } 855e8679955SRichard Henderson 856a63ce0e9SRichard Henderson /* If the binary operation has second argument @i, fold to identity. */ 857a63ce0e9SRichard Henderson static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i) 858a63ce0e9SRichard Henderson { 859a63ce0e9SRichard Henderson if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) { 860a63ce0e9SRichard Henderson return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 861a63ce0e9SRichard Henderson } 862a63ce0e9SRichard Henderson return false; 863a63ce0e9SRichard Henderson } 864a63ce0e9SRichard Henderson 8650e0a32baSRichard Henderson /* If the binary operation has second argument @i, fold to NOT. */ 8660e0a32baSRichard Henderson static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i) 8670e0a32baSRichard Henderson { 8680e0a32baSRichard Henderson if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) { 8690e0a32baSRichard Henderson return fold_to_not(ctx, op, 1); 8700e0a32baSRichard Henderson } 8710e0a32baSRichard Henderson return false; 8720e0a32baSRichard Henderson } 8730e0a32baSRichard Henderson 874cbe42fb2SRichard Henderson /* If the binary operation has both arguments equal, fold to @i. */ 875cbe42fb2SRichard Henderson static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i) 876cbe42fb2SRichard Henderson { 877cbe42fb2SRichard Henderson if (args_are_copies(op->args[1], op->args[2])) { 878cbe42fb2SRichard Henderson return tcg_opt_gen_movi(ctx, op, op->args[0], i); 879cbe42fb2SRichard Henderson } 880cbe42fb2SRichard Henderson return false; 881cbe42fb2SRichard Henderson } 882cbe42fb2SRichard Henderson 883ca7bb049SRichard Henderson /* If the binary operation has both arguments equal, fold to identity. */ 884ca7bb049SRichard Henderson static bool fold_xx_to_x(OptContext *ctx, TCGOp *op) 885ca7bb049SRichard Henderson { 886ca7bb049SRichard Henderson if (args_are_copies(op->args[1], op->args[2])) { 887ca7bb049SRichard Henderson return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 888ca7bb049SRichard Henderson } 889ca7bb049SRichard Henderson return false; 890ca7bb049SRichard Henderson } 891ca7bb049SRichard Henderson 8922f9f08baSRichard Henderson /* 8932f9f08baSRichard Henderson * These outermost fold_<op> functions are sorted alphabetically. 894ca7bb049SRichard Henderson * 895ca7bb049SRichard Henderson * The ordering of the transformations should be: 896ca7bb049SRichard Henderson * 1) those that produce a constant 897ca7bb049SRichard Henderson * 2) those that produce a copy 898ca7bb049SRichard Henderson * 3) those that produce information about the result value. 8992f9f08baSRichard Henderson */ 9002f9f08baSRichard Henderson 9012f9f08baSRichard Henderson static bool fold_add(OptContext *ctx, TCGOp *op) 9022f9f08baSRichard Henderson { 9037a2f7084SRichard Henderson if (fold_const2_commutative(ctx, op) || 904a63ce0e9SRichard Henderson fold_xi_to_x(ctx, op, 0)) { 905a63ce0e9SRichard Henderson return true; 906a63ce0e9SRichard Henderson } 907a63ce0e9SRichard Henderson return false; 9082f9f08baSRichard Henderson } 9092f9f08baSRichard Henderson 910c578ff18SRichard Henderson /* We cannot as yet do_constant_folding with vectors. */ 911c578ff18SRichard Henderson static bool fold_add_vec(OptContext *ctx, TCGOp *op) 912c578ff18SRichard Henderson { 913c578ff18SRichard Henderson if (fold_commutative(ctx, op) || 914c578ff18SRichard Henderson fold_xi_to_x(ctx, op, 0)) { 915c578ff18SRichard Henderson return true; 916c578ff18SRichard Henderson } 917c578ff18SRichard Henderson return false; 918c578ff18SRichard Henderson } 919c578ff18SRichard Henderson 9209531c078SRichard Henderson static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add) 921e3f7dc21SRichard Henderson { 922e3f7dc21SRichard Henderson if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) && 923e3f7dc21SRichard Henderson arg_is_const(op->args[4]) && arg_is_const(op->args[5])) { 9249531c078SRichard Henderson uint64_t al = arg_info(op->args[2])->val; 9259531c078SRichard Henderson uint64_t ah = arg_info(op->args[3])->val; 9269531c078SRichard Henderson uint64_t bl = arg_info(op->args[4])->val; 9279531c078SRichard Henderson uint64_t bh = arg_info(op->args[5])->val; 928e3f7dc21SRichard Henderson TCGArg rl, rh; 9299531c078SRichard Henderson TCGOp *op2; 9309531c078SRichard Henderson 9319531c078SRichard Henderson if (ctx->type == TCG_TYPE_I32) { 9329531c078SRichard Henderson uint64_t a = deposit64(al, 32, 32, ah); 9339531c078SRichard Henderson uint64_t b = deposit64(bl, 32, 32, bh); 934e3f7dc21SRichard Henderson 935e3f7dc21SRichard Henderson if (add) { 936e3f7dc21SRichard Henderson a += b; 937e3f7dc21SRichard Henderson } else { 938e3f7dc21SRichard Henderson a -= b; 939e3f7dc21SRichard Henderson } 940e3f7dc21SRichard Henderson 9419531c078SRichard Henderson al = sextract64(a, 0, 32); 9429531c078SRichard Henderson ah = sextract64(a, 32, 32); 9439531c078SRichard Henderson } else { 9449531c078SRichard Henderson Int128 a = int128_make128(al, ah); 9459531c078SRichard Henderson Int128 b = int128_make128(bl, bh); 9469531c078SRichard Henderson 9479531c078SRichard Henderson if (add) { 9489531c078SRichard Henderson a = int128_add(a, b); 9499531c078SRichard Henderson } else { 9509531c078SRichard Henderson a = int128_sub(a, b); 9519531c078SRichard Henderson } 9529531c078SRichard Henderson 9539531c078SRichard Henderson al = int128_getlo(a); 9549531c078SRichard Henderson ah = int128_gethi(a); 9559531c078SRichard Henderson } 9569531c078SRichard Henderson 957e3f7dc21SRichard Henderson rl = op->args[0]; 958e3f7dc21SRichard Henderson rh = op->args[1]; 9599531c078SRichard Henderson 9609531c078SRichard Henderson /* The proper opcode is supplied by tcg_opt_gen_mov. */ 961d4478943SPhilippe Mathieu-Daudé op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); 9629531c078SRichard Henderson 9639531c078SRichard Henderson tcg_opt_gen_movi(ctx, op, rl, al); 9649531c078SRichard Henderson tcg_opt_gen_movi(ctx, op2, rh, ah); 965e3f7dc21SRichard Henderson return true; 966e3f7dc21SRichard Henderson } 967e3f7dc21SRichard Henderson return false; 968e3f7dc21SRichard Henderson } 969e3f7dc21SRichard Henderson 9709531c078SRichard Henderson static bool fold_add2(OptContext *ctx, TCGOp *op) 971e3f7dc21SRichard Henderson { 9727a2f7084SRichard Henderson /* Note that the high and low parts may be independently swapped. */ 9737a2f7084SRichard Henderson swap_commutative(op->args[0], &op->args[2], &op->args[4]); 9747a2f7084SRichard Henderson swap_commutative(op->args[1], &op->args[3], &op->args[5]); 9757a2f7084SRichard Henderson 9769531c078SRichard Henderson return fold_addsub2(ctx, op, true); 977e3f7dc21SRichard Henderson } 978e3f7dc21SRichard Henderson 9792f9f08baSRichard Henderson static bool fold_and(OptContext *ctx, TCGOp *op) 9802f9f08baSRichard Henderson { 981fae450baSRichard Henderson uint64_t z1, z2; 982fae450baSRichard Henderson 9837a2f7084SRichard Henderson if (fold_const2_commutative(ctx, op) || 984e8679955SRichard Henderson fold_xi_to_i(ctx, op, 0) || 985a63ce0e9SRichard Henderson fold_xi_to_x(ctx, op, -1) || 986ca7bb049SRichard Henderson fold_xx_to_x(ctx, op)) { 987ca7bb049SRichard Henderson return true; 988ca7bb049SRichard Henderson } 989fae450baSRichard Henderson 990fae450baSRichard Henderson z1 = arg_info(op->args[1])->z_mask; 991fae450baSRichard Henderson z2 = arg_info(op->args[2])->z_mask; 992fae450baSRichard Henderson ctx->z_mask = z1 & z2; 993fae450baSRichard Henderson 994fae450baSRichard Henderson /* 9953f2b1f83SRichard Henderson * Sign repetitions are perforce all identical, whether they are 1 or 0. 9963f2b1f83SRichard Henderson * Bitwise operations preserve the relative quantity of the repetitions. 9973f2b1f83SRichard Henderson */ 9983f2b1f83SRichard Henderson ctx->s_mask = arg_info(op->args[1])->s_mask 9993f2b1f83SRichard Henderson & arg_info(op->args[2])->s_mask; 10003f2b1f83SRichard Henderson 10013f2b1f83SRichard Henderson /* 1002fae450baSRichard Henderson * Known-zeros does not imply known-ones. Therefore unless 1003fae450baSRichard Henderson * arg2 is constant, we can't infer affected bits from it. 1004fae450baSRichard Henderson */ 1005fae450baSRichard Henderson if (arg_is_const(op->args[2])) { 1006fae450baSRichard Henderson ctx->a_mask = z1 & ~z2; 1007fae450baSRichard Henderson } 1008fae450baSRichard Henderson 1009fae450baSRichard Henderson return fold_masks(ctx, op); 10102f9f08baSRichard Henderson } 10112f9f08baSRichard Henderson 10122f9f08baSRichard Henderson static bool fold_andc(OptContext *ctx, TCGOp *op) 10132f9f08baSRichard Henderson { 1014fae450baSRichard Henderson uint64_t z1; 1015fae450baSRichard Henderson 1016cbe42fb2SRichard Henderson if (fold_const2(ctx, op) || 10170e0a32baSRichard Henderson fold_xx_to_i(ctx, op, 0) || 1018a63ce0e9SRichard Henderson fold_xi_to_x(ctx, op, 0) || 10190e0a32baSRichard Henderson fold_ix_to_not(ctx, op, -1)) { 1020cbe42fb2SRichard Henderson return true; 1021cbe42fb2SRichard Henderson } 1022fae450baSRichard Henderson 1023fae450baSRichard Henderson z1 = arg_info(op->args[1])->z_mask; 1024fae450baSRichard Henderson 1025fae450baSRichard Henderson /* 1026fae450baSRichard Henderson * Known-zeros does not imply known-ones. Therefore unless 1027fae450baSRichard Henderson * arg2 is constant, we can't infer anything from it. 1028fae450baSRichard Henderson */ 1029fae450baSRichard Henderson if (arg_is_const(op->args[2])) { 1030fae450baSRichard Henderson uint64_t z2 = ~arg_info(op->args[2])->z_mask; 1031fae450baSRichard Henderson ctx->a_mask = z1 & ~z2; 1032fae450baSRichard Henderson z1 &= z2; 1033fae450baSRichard Henderson } 1034fae450baSRichard Henderson ctx->z_mask = z1; 1035fae450baSRichard Henderson 10363f2b1f83SRichard Henderson ctx->s_mask = arg_info(op->args[1])->s_mask 10373f2b1f83SRichard Henderson & arg_info(op->args[2])->s_mask; 1038fae450baSRichard Henderson return fold_masks(ctx, op); 10392f9f08baSRichard Henderson } 10402f9f08baSRichard Henderson 1041079b0804SRichard Henderson static bool fold_brcond(OptContext *ctx, TCGOp *op) 1042079b0804SRichard Henderson { 1043079b0804SRichard Henderson TCGCond cond = op->args[2]; 10447a2f7084SRichard Henderson int i; 1045079b0804SRichard Henderson 10467a2f7084SRichard Henderson if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) { 10477a2f7084SRichard Henderson op->args[2] = cond = tcg_swap_cond(cond); 10487a2f7084SRichard Henderson } 10497a2f7084SRichard Henderson 10507a2f7084SRichard Henderson i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond); 1051079b0804SRichard Henderson if (i == 0) { 1052079b0804SRichard Henderson tcg_op_remove(ctx->tcg, op); 1053079b0804SRichard Henderson return true; 1054079b0804SRichard Henderson } 1055079b0804SRichard Henderson if (i > 0) { 1056079b0804SRichard Henderson op->opc = INDEX_op_br; 1057079b0804SRichard Henderson op->args[0] = op->args[3]; 1058079b0804SRichard Henderson } 1059079b0804SRichard Henderson return false; 1060079b0804SRichard Henderson } 1061079b0804SRichard Henderson 1062764d2abaSRichard Henderson static bool fold_brcond2(OptContext *ctx, TCGOp *op) 1063764d2abaSRichard Henderson { 1064764d2abaSRichard Henderson TCGCond cond = op->args[4]; 1065764d2abaSRichard Henderson TCGArg label = op->args[5]; 10667a2f7084SRichard Henderson int i, inv = 0; 1067764d2abaSRichard Henderson 10687a2f7084SRichard Henderson if (swap_commutative2(&op->args[0], &op->args[2])) { 10697a2f7084SRichard Henderson op->args[4] = cond = tcg_swap_cond(cond); 10707a2f7084SRichard Henderson } 10717a2f7084SRichard Henderson 10727a2f7084SRichard Henderson i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond); 1073764d2abaSRichard Henderson if (i >= 0) { 1074764d2abaSRichard Henderson goto do_brcond_const; 1075764d2abaSRichard Henderson } 1076764d2abaSRichard Henderson 1077764d2abaSRichard Henderson switch (cond) { 1078764d2abaSRichard Henderson case TCG_COND_LT: 1079764d2abaSRichard Henderson case TCG_COND_GE: 1080764d2abaSRichard Henderson /* 1081764d2abaSRichard Henderson * Simplify LT/GE comparisons vs zero to a single compare 1082764d2abaSRichard Henderson * vs the high word of the input. 1083764d2abaSRichard Henderson */ 1084764d2abaSRichard Henderson if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 && 1085764d2abaSRichard Henderson arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) { 1086764d2abaSRichard Henderson goto do_brcond_high; 1087764d2abaSRichard Henderson } 1088764d2abaSRichard Henderson break; 1089764d2abaSRichard Henderson 1090764d2abaSRichard Henderson case TCG_COND_NE: 1091764d2abaSRichard Henderson inv = 1; 1092764d2abaSRichard Henderson QEMU_FALLTHROUGH; 1093764d2abaSRichard Henderson case TCG_COND_EQ: 1094764d2abaSRichard Henderson /* 1095764d2abaSRichard Henderson * Simplify EQ/NE comparisons where one of the pairs 1096764d2abaSRichard Henderson * can be simplified. 1097764d2abaSRichard Henderson */ 109867f84c96SRichard Henderson i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0], 1099764d2abaSRichard Henderson op->args[2], cond); 1100764d2abaSRichard Henderson switch (i ^ inv) { 1101764d2abaSRichard Henderson case 0: 1102764d2abaSRichard Henderson goto do_brcond_const; 1103764d2abaSRichard Henderson case 1: 1104764d2abaSRichard Henderson goto do_brcond_high; 1105764d2abaSRichard Henderson } 1106764d2abaSRichard Henderson 110767f84c96SRichard Henderson i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1], 1108764d2abaSRichard Henderson op->args[3], cond); 1109764d2abaSRichard Henderson switch (i ^ inv) { 1110764d2abaSRichard Henderson case 0: 1111764d2abaSRichard Henderson goto do_brcond_const; 1112764d2abaSRichard Henderson case 1: 1113764d2abaSRichard Henderson op->opc = INDEX_op_brcond_i32; 1114764d2abaSRichard Henderson op->args[1] = op->args[2]; 1115764d2abaSRichard Henderson op->args[2] = cond; 1116764d2abaSRichard Henderson op->args[3] = label; 1117764d2abaSRichard Henderson break; 1118764d2abaSRichard Henderson } 1119764d2abaSRichard Henderson break; 1120764d2abaSRichard Henderson 1121764d2abaSRichard Henderson default: 1122764d2abaSRichard Henderson break; 1123764d2abaSRichard Henderson 1124764d2abaSRichard Henderson do_brcond_high: 1125764d2abaSRichard Henderson op->opc = INDEX_op_brcond_i32; 1126764d2abaSRichard Henderson op->args[0] = op->args[1]; 1127764d2abaSRichard Henderson op->args[1] = op->args[3]; 1128764d2abaSRichard Henderson op->args[2] = cond; 1129764d2abaSRichard Henderson op->args[3] = label; 1130764d2abaSRichard Henderson break; 1131764d2abaSRichard Henderson 1132764d2abaSRichard Henderson do_brcond_const: 1133764d2abaSRichard Henderson if (i == 0) { 1134764d2abaSRichard Henderson tcg_op_remove(ctx->tcg, op); 1135764d2abaSRichard Henderson return true; 1136764d2abaSRichard Henderson } 1137764d2abaSRichard Henderson op->opc = INDEX_op_br; 1138764d2abaSRichard Henderson op->args[0] = label; 1139764d2abaSRichard Henderson break; 1140764d2abaSRichard Henderson } 1141764d2abaSRichard Henderson return false; 1142764d2abaSRichard Henderson } 1143764d2abaSRichard Henderson 114409bacdc2SRichard Henderson static bool fold_bswap(OptContext *ctx, TCGOp *op) 114509bacdc2SRichard Henderson { 114657fe5c6dSRichard Henderson uint64_t z_mask, s_mask, sign; 1147fae450baSRichard Henderson 114809bacdc2SRichard Henderson if (arg_is_const(op->args[1])) { 114909bacdc2SRichard Henderson uint64_t t = arg_info(op->args[1])->val; 115009bacdc2SRichard Henderson 115167f84c96SRichard Henderson t = do_constant_folding(op->opc, ctx->type, t, op->args[2]); 115209bacdc2SRichard Henderson return tcg_opt_gen_movi(ctx, op, op->args[0], t); 115309bacdc2SRichard Henderson } 1154fae450baSRichard Henderson 1155fae450baSRichard Henderson z_mask = arg_info(op->args[1])->z_mask; 115657fe5c6dSRichard Henderson 1157fae450baSRichard Henderson switch (op->opc) { 1158fae450baSRichard Henderson case INDEX_op_bswap16_i32: 1159fae450baSRichard Henderson case INDEX_op_bswap16_i64: 1160fae450baSRichard Henderson z_mask = bswap16(z_mask); 1161fae450baSRichard Henderson sign = INT16_MIN; 1162fae450baSRichard Henderson break; 1163fae450baSRichard Henderson case INDEX_op_bswap32_i32: 1164fae450baSRichard Henderson case INDEX_op_bswap32_i64: 1165fae450baSRichard Henderson z_mask = bswap32(z_mask); 1166fae450baSRichard Henderson sign = INT32_MIN; 1167fae450baSRichard Henderson break; 1168fae450baSRichard Henderson case INDEX_op_bswap64_i64: 1169fae450baSRichard Henderson z_mask = bswap64(z_mask); 1170fae450baSRichard Henderson sign = INT64_MIN; 1171fae450baSRichard Henderson break; 1172fae450baSRichard Henderson default: 1173fae450baSRichard Henderson g_assert_not_reached(); 1174fae450baSRichard Henderson } 117557fe5c6dSRichard Henderson s_mask = smask_from_zmask(z_mask); 1176fae450baSRichard Henderson 1177fae450baSRichard Henderson switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) { 1178fae450baSRichard Henderson case TCG_BSWAP_OZ: 1179fae450baSRichard Henderson break; 1180fae450baSRichard Henderson case TCG_BSWAP_OS: 1181fae450baSRichard Henderson /* If the sign bit may be 1, force all the bits above to 1. */ 1182fae450baSRichard Henderson if (z_mask & sign) { 1183fae450baSRichard Henderson z_mask |= sign; 118457fe5c6dSRichard Henderson s_mask = sign << 1; 1185fae450baSRichard Henderson } 1186fae450baSRichard Henderson break; 1187fae450baSRichard Henderson default: 1188fae450baSRichard Henderson /* The high bits are undefined: force all bits above the sign to 1. */ 1189fae450baSRichard Henderson z_mask |= sign << 1; 119057fe5c6dSRichard Henderson s_mask = 0; 1191fae450baSRichard Henderson break; 1192fae450baSRichard Henderson } 1193fae450baSRichard Henderson ctx->z_mask = z_mask; 119457fe5c6dSRichard Henderson ctx->s_mask = s_mask; 1195fae450baSRichard Henderson 1196fae450baSRichard Henderson return fold_masks(ctx, op); 119709bacdc2SRichard Henderson } 119809bacdc2SRichard Henderson 11995cf32be7SRichard Henderson static bool fold_call(OptContext *ctx, TCGOp *op) 12005cf32be7SRichard Henderson { 12015cf32be7SRichard Henderson TCGContext *s = ctx->tcg; 12025cf32be7SRichard Henderson int nb_oargs = TCGOP_CALLO(op); 12035cf32be7SRichard Henderson int nb_iargs = TCGOP_CALLI(op); 12045cf32be7SRichard Henderson int flags, i; 12055cf32be7SRichard Henderson 12065cf32be7SRichard Henderson init_arguments(ctx, op, nb_oargs + nb_iargs); 12075cf32be7SRichard Henderson copy_propagate(ctx, op, nb_oargs, nb_iargs); 12085cf32be7SRichard Henderson 12095cf32be7SRichard Henderson /* If the function reads or writes globals, reset temp data. */ 12105cf32be7SRichard Henderson flags = tcg_call_flags(op); 12115cf32be7SRichard Henderson if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 12125cf32be7SRichard Henderson int nb_globals = s->nb_globals; 12135cf32be7SRichard Henderson 12145cf32be7SRichard Henderson for (i = 0; i < nb_globals; i++) { 12155cf32be7SRichard Henderson if (test_bit(i, ctx->temps_used.l)) { 12165cf32be7SRichard Henderson reset_ts(&ctx->tcg->temps[i]); 12175cf32be7SRichard Henderson } 12185cf32be7SRichard Henderson } 12195cf32be7SRichard Henderson } 12205cf32be7SRichard Henderson 12215cf32be7SRichard Henderson /* Reset temp data for outputs. */ 12225cf32be7SRichard Henderson for (i = 0; i < nb_oargs; i++) { 12235cf32be7SRichard Henderson reset_temp(op->args[i]); 12245cf32be7SRichard Henderson } 12255cf32be7SRichard Henderson 12265cf32be7SRichard Henderson /* Stop optimizing MB across calls. */ 12275cf32be7SRichard Henderson ctx->prev_mb = NULL; 12285cf32be7SRichard Henderson return true; 12295cf32be7SRichard Henderson } 12305cf32be7SRichard Henderson 123130dd0bfeSRichard Henderson static bool fold_count_zeros(OptContext *ctx, TCGOp *op) 123230dd0bfeSRichard Henderson { 1233fae450baSRichard Henderson uint64_t z_mask; 1234fae450baSRichard Henderson 123530dd0bfeSRichard Henderson if (arg_is_const(op->args[1])) { 123630dd0bfeSRichard Henderson uint64_t t = arg_info(op->args[1])->val; 123730dd0bfeSRichard Henderson 123830dd0bfeSRichard Henderson if (t != 0) { 123967f84c96SRichard Henderson t = do_constant_folding(op->opc, ctx->type, t, 0); 124030dd0bfeSRichard Henderson return tcg_opt_gen_movi(ctx, op, op->args[0], t); 124130dd0bfeSRichard Henderson } 124230dd0bfeSRichard Henderson return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]); 124330dd0bfeSRichard Henderson } 1244fae450baSRichard Henderson 1245fae450baSRichard Henderson switch (ctx->type) { 1246fae450baSRichard Henderson case TCG_TYPE_I32: 1247fae450baSRichard Henderson z_mask = 31; 1248fae450baSRichard Henderson break; 1249fae450baSRichard Henderson case TCG_TYPE_I64: 1250fae450baSRichard Henderson z_mask = 63; 1251fae450baSRichard Henderson break; 1252fae450baSRichard Henderson default: 1253fae450baSRichard Henderson g_assert_not_reached(); 1254fae450baSRichard Henderson } 1255fae450baSRichard Henderson ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask; 12562b9d0c59SRichard Henderson ctx->s_mask = smask_from_zmask(ctx->z_mask); 125730dd0bfeSRichard Henderson return false; 125830dd0bfeSRichard Henderson } 125930dd0bfeSRichard Henderson 12602f9f08baSRichard Henderson static bool fold_ctpop(OptContext *ctx, TCGOp *op) 12612f9f08baSRichard Henderson { 1262fae450baSRichard Henderson if (fold_const1(ctx, op)) { 1263fae450baSRichard Henderson return true; 1264fae450baSRichard Henderson } 1265fae450baSRichard Henderson 1266fae450baSRichard Henderson switch (ctx->type) { 1267fae450baSRichard Henderson case TCG_TYPE_I32: 1268fae450baSRichard Henderson ctx->z_mask = 32 | 31; 1269fae450baSRichard Henderson break; 1270fae450baSRichard Henderson case TCG_TYPE_I64: 1271fae450baSRichard Henderson ctx->z_mask = 64 | 63; 1272fae450baSRichard Henderson break; 1273fae450baSRichard Henderson default: 1274fae450baSRichard Henderson g_assert_not_reached(); 1275fae450baSRichard Henderson } 12762b9d0c59SRichard Henderson ctx->s_mask = smask_from_zmask(ctx->z_mask); 1277fae450baSRichard Henderson return false; 12782f9f08baSRichard Henderson } 12792f9f08baSRichard Henderson 12801b1907b8SRichard Henderson static bool fold_deposit(OptContext *ctx, TCGOp *op) 12811b1907b8SRichard Henderson { 12821b1907b8SRichard Henderson if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 12831b1907b8SRichard Henderson uint64_t t1 = arg_info(op->args[1])->val; 12841b1907b8SRichard Henderson uint64_t t2 = arg_info(op->args[2])->val; 12851b1907b8SRichard Henderson 12861b1907b8SRichard Henderson t1 = deposit64(t1, op->args[3], op->args[4], t2); 12871b1907b8SRichard Henderson return tcg_opt_gen_movi(ctx, op, op->args[0], t1); 12881b1907b8SRichard Henderson } 1289fae450baSRichard Henderson 1290fae450baSRichard Henderson ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask, 1291fae450baSRichard Henderson op->args[3], op->args[4], 1292fae450baSRichard Henderson arg_info(op->args[2])->z_mask); 12931b1907b8SRichard Henderson return false; 12941b1907b8SRichard Henderson } 12951b1907b8SRichard Henderson 12962f9f08baSRichard Henderson static bool fold_divide(OptContext *ctx, TCGOp *op) 12972f9f08baSRichard Henderson { 12982f9d9a34SRichard Henderson if (fold_const2(ctx, op) || 12992f9d9a34SRichard Henderson fold_xi_to_x(ctx, op, 1)) { 13002f9d9a34SRichard Henderson return true; 13012f9d9a34SRichard Henderson } 13022f9d9a34SRichard Henderson return false; 13032f9f08baSRichard Henderson } 13042f9f08baSRichard Henderson 13058cdb3fcbSRichard Henderson static bool fold_dup(OptContext *ctx, TCGOp *op) 13068cdb3fcbSRichard Henderson { 13078cdb3fcbSRichard Henderson if (arg_is_const(op->args[1])) { 13088cdb3fcbSRichard Henderson uint64_t t = arg_info(op->args[1])->val; 13098cdb3fcbSRichard Henderson t = dup_const(TCGOP_VECE(op), t); 13108cdb3fcbSRichard Henderson return tcg_opt_gen_movi(ctx, op, op->args[0], t); 13118cdb3fcbSRichard Henderson } 13128cdb3fcbSRichard Henderson return false; 13138cdb3fcbSRichard Henderson } 13148cdb3fcbSRichard Henderson 13158cdb3fcbSRichard Henderson static bool fold_dup2(OptContext *ctx, TCGOp *op) 13168cdb3fcbSRichard Henderson { 13178cdb3fcbSRichard Henderson if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 13188cdb3fcbSRichard Henderson uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32, 13198cdb3fcbSRichard Henderson arg_info(op->args[2])->val); 13208cdb3fcbSRichard Henderson return tcg_opt_gen_movi(ctx, op, op->args[0], t); 13218cdb3fcbSRichard Henderson } 13228cdb3fcbSRichard Henderson 13238cdb3fcbSRichard Henderson if (args_are_copies(op->args[1], op->args[2])) { 13248cdb3fcbSRichard Henderson op->opc = INDEX_op_dup_vec; 13258cdb3fcbSRichard Henderson TCGOP_VECE(op) = MO_32; 13268cdb3fcbSRichard Henderson } 13278cdb3fcbSRichard Henderson return false; 13288cdb3fcbSRichard Henderson } 13298cdb3fcbSRichard Henderson 13302f9f08baSRichard Henderson static bool fold_eqv(OptContext *ctx, TCGOp *op) 13312f9f08baSRichard Henderson { 13327a2f7084SRichard Henderson if (fold_const2_commutative(ctx, op) || 1333a63ce0e9SRichard Henderson fold_xi_to_x(ctx, op, -1) || 13340e0a32baSRichard Henderson fold_xi_to_not(ctx, op, 0)) { 13350e0a32baSRichard Henderson return true; 13360e0a32baSRichard Henderson } 13373f2b1f83SRichard Henderson 13383f2b1f83SRichard Henderson ctx->s_mask = arg_info(op->args[1])->s_mask 13393f2b1f83SRichard Henderson & arg_info(op->args[2])->s_mask; 13400e0a32baSRichard Henderson return false; 13412f9f08baSRichard Henderson } 13422f9f08baSRichard Henderson 1343b6617c88SRichard Henderson static bool fold_extract(OptContext *ctx, TCGOp *op) 1344b6617c88SRichard Henderson { 1345fae450baSRichard Henderson uint64_t z_mask_old, z_mask; 134657fe5c6dSRichard Henderson int pos = op->args[2]; 134757fe5c6dSRichard Henderson int len = op->args[3]; 1348fae450baSRichard Henderson 1349b6617c88SRichard Henderson if (arg_is_const(op->args[1])) { 1350b6617c88SRichard Henderson uint64_t t; 1351b6617c88SRichard Henderson 1352b6617c88SRichard Henderson t = arg_info(op->args[1])->val; 135357fe5c6dSRichard Henderson t = extract64(t, pos, len); 1354b6617c88SRichard Henderson return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1355b6617c88SRichard Henderson } 1356fae450baSRichard Henderson 1357fae450baSRichard Henderson z_mask_old = arg_info(op->args[1])->z_mask; 135857fe5c6dSRichard Henderson z_mask = extract64(z_mask_old, pos, len); 135957fe5c6dSRichard Henderson if (pos == 0) { 1360fae450baSRichard Henderson ctx->a_mask = z_mask_old ^ z_mask; 1361fae450baSRichard Henderson } 1362fae450baSRichard Henderson ctx->z_mask = z_mask; 136357fe5c6dSRichard Henderson ctx->s_mask = smask_from_zmask(z_mask); 1364fae450baSRichard Henderson 1365fae450baSRichard Henderson return fold_masks(ctx, op); 1366b6617c88SRichard Henderson } 1367b6617c88SRichard Henderson 1368dcd08996SRichard Henderson static bool fold_extract2(OptContext *ctx, TCGOp *op) 1369dcd08996SRichard Henderson { 1370dcd08996SRichard Henderson if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { 1371dcd08996SRichard Henderson uint64_t v1 = arg_info(op->args[1])->val; 1372dcd08996SRichard Henderson uint64_t v2 = arg_info(op->args[2])->val; 1373dcd08996SRichard Henderson int shr = op->args[3]; 1374dcd08996SRichard Henderson 1375dcd08996SRichard Henderson if (op->opc == INDEX_op_extract2_i64) { 1376dcd08996SRichard Henderson v1 >>= shr; 1377dcd08996SRichard Henderson v2 <<= 64 - shr; 1378dcd08996SRichard Henderson } else { 1379dcd08996SRichard Henderson v1 = (uint32_t)v1 >> shr; 1380225bec0cSRichard Henderson v2 = (uint64_t)((int32_t)v2 << (32 - shr)); 1381dcd08996SRichard Henderson } 1382dcd08996SRichard Henderson return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2); 1383dcd08996SRichard Henderson } 1384dcd08996SRichard Henderson return false; 1385dcd08996SRichard Henderson } 1386dcd08996SRichard Henderson 13872f9f08baSRichard Henderson static bool fold_exts(OptContext *ctx, TCGOp *op) 13882f9f08baSRichard Henderson { 138957fe5c6dSRichard Henderson uint64_t s_mask_old, s_mask, z_mask, sign; 1390fae450baSRichard Henderson bool type_change = false; 1391fae450baSRichard Henderson 1392fae450baSRichard Henderson if (fold_const1(ctx, op)) { 1393fae450baSRichard Henderson return true; 1394fae450baSRichard Henderson } 1395fae450baSRichard Henderson 139657fe5c6dSRichard Henderson z_mask = arg_info(op->args[1])->z_mask; 139757fe5c6dSRichard Henderson s_mask = arg_info(op->args[1])->s_mask; 139857fe5c6dSRichard Henderson s_mask_old = s_mask; 1399fae450baSRichard Henderson 1400fae450baSRichard Henderson switch (op->opc) { 1401fae450baSRichard Henderson CASE_OP_32_64(ext8s): 1402fae450baSRichard Henderson sign = INT8_MIN; 1403fae450baSRichard Henderson z_mask = (uint8_t)z_mask; 1404fae450baSRichard Henderson break; 1405fae450baSRichard Henderson CASE_OP_32_64(ext16s): 1406fae450baSRichard Henderson sign = INT16_MIN; 1407fae450baSRichard Henderson z_mask = (uint16_t)z_mask; 1408fae450baSRichard Henderson break; 1409fae450baSRichard Henderson case INDEX_op_ext_i32_i64: 1410fae450baSRichard Henderson type_change = true; 1411fae450baSRichard Henderson QEMU_FALLTHROUGH; 1412fae450baSRichard Henderson case INDEX_op_ext32s_i64: 1413fae450baSRichard Henderson sign = INT32_MIN; 1414fae450baSRichard Henderson z_mask = (uint32_t)z_mask; 1415fae450baSRichard Henderson break; 1416fae450baSRichard Henderson default: 1417fae450baSRichard Henderson g_assert_not_reached(); 1418fae450baSRichard Henderson } 1419fae450baSRichard Henderson 1420fae450baSRichard Henderson if (z_mask & sign) { 1421fae450baSRichard Henderson z_mask |= sign; 1422fae450baSRichard Henderson } 142357fe5c6dSRichard Henderson s_mask |= sign << 1; 142457fe5c6dSRichard Henderson 1425fae450baSRichard Henderson ctx->z_mask = z_mask; 142657fe5c6dSRichard Henderson ctx->s_mask = s_mask; 142757fe5c6dSRichard Henderson if (!type_change) { 142857fe5c6dSRichard Henderson ctx->a_mask = s_mask & ~s_mask_old; 142957fe5c6dSRichard Henderson } 1430fae450baSRichard Henderson 1431fae450baSRichard Henderson return fold_masks(ctx, op); 14322f9f08baSRichard Henderson } 14332f9f08baSRichard Henderson 14342f9f08baSRichard Henderson static bool fold_extu(OptContext *ctx, TCGOp *op) 14352f9f08baSRichard Henderson { 1436fae450baSRichard Henderson uint64_t z_mask_old, z_mask; 1437fae450baSRichard Henderson bool type_change = false; 1438fae450baSRichard Henderson 1439fae450baSRichard Henderson if (fold_const1(ctx, op)) { 1440fae450baSRichard Henderson return true; 1441fae450baSRichard Henderson } 1442fae450baSRichard Henderson 1443fae450baSRichard Henderson z_mask_old = z_mask = arg_info(op->args[1])->z_mask; 1444fae450baSRichard Henderson 1445fae450baSRichard Henderson switch (op->opc) { 1446fae450baSRichard Henderson CASE_OP_32_64(ext8u): 1447fae450baSRichard Henderson z_mask = (uint8_t)z_mask; 1448fae450baSRichard Henderson break; 1449fae450baSRichard Henderson CASE_OP_32_64(ext16u): 1450fae450baSRichard Henderson z_mask = (uint16_t)z_mask; 1451fae450baSRichard Henderson break; 1452fae450baSRichard Henderson case INDEX_op_extrl_i64_i32: 1453fae450baSRichard Henderson case INDEX_op_extu_i32_i64: 1454fae450baSRichard Henderson type_change = true; 1455fae450baSRichard Henderson QEMU_FALLTHROUGH; 1456fae450baSRichard Henderson case INDEX_op_ext32u_i64: 1457fae450baSRichard Henderson z_mask = (uint32_t)z_mask; 1458fae450baSRichard Henderson break; 1459fae450baSRichard Henderson case INDEX_op_extrh_i64_i32: 1460fae450baSRichard Henderson type_change = true; 1461fae450baSRichard Henderson z_mask >>= 32; 1462fae450baSRichard Henderson break; 1463fae450baSRichard Henderson default: 1464fae450baSRichard Henderson g_assert_not_reached(); 1465fae450baSRichard Henderson } 1466fae450baSRichard Henderson 1467fae450baSRichard Henderson ctx->z_mask = z_mask; 146857fe5c6dSRichard Henderson ctx->s_mask = smask_from_zmask(z_mask); 1469fae450baSRichard Henderson if (!type_change) { 1470fae450baSRichard Henderson ctx->a_mask = z_mask_old ^ z_mask; 1471fae450baSRichard Henderson } 1472fae450baSRichard Henderson return fold_masks(ctx, op); 14732f9f08baSRichard Henderson } 14742f9f08baSRichard Henderson 14753eefdf2bSRichard Henderson static bool fold_mb(OptContext *ctx, TCGOp *op) 14763eefdf2bSRichard Henderson { 14773eefdf2bSRichard Henderson /* Eliminate duplicate and redundant fence instructions. */ 14783eefdf2bSRichard Henderson if (ctx->prev_mb) { 14793eefdf2bSRichard Henderson /* 14803eefdf2bSRichard Henderson * Merge two barriers of the same type into one, 14813eefdf2bSRichard Henderson * or a weaker barrier into a stronger one, 14823eefdf2bSRichard Henderson * or two weaker barriers into a stronger one. 14833eefdf2bSRichard Henderson * mb X; mb Y => mb X|Y 14843eefdf2bSRichard Henderson * mb; strl => mb; st 14853eefdf2bSRichard Henderson * ldaq; mb => ld; mb 14863eefdf2bSRichard Henderson * ldaq; strl => ld; mb; st 14873eefdf2bSRichard Henderson * Other combinations are also merged into a strong 14883eefdf2bSRichard Henderson * barrier. This is stricter than specified but for 14893eefdf2bSRichard Henderson * the purposes of TCG is better than not optimizing. 14903eefdf2bSRichard Henderson */ 14913eefdf2bSRichard Henderson ctx->prev_mb->args[0] |= op->args[0]; 14923eefdf2bSRichard Henderson tcg_op_remove(ctx->tcg, op); 14933eefdf2bSRichard Henderson } else { 14943eefdf2bSRichard Henderson ctx->prev_mb = op; 14953eefdf2bSRichard Henderson } 14963eefdf2bSRichard Henderson return true; 14973eefdf2bSRichard Henderson } 14983eefdf2bSRichard Henderson 14992cfac7faSRichard Henderson static bool fold_mov(OptContext *ctx, TCGOp *op) 15002cfac7faSRichard Henderson { 15012cfac7faSRichard Henderson return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]); 15022cfac7faSRichard Henderson } 15032cfac7faSRichard Henderson 15040c310a30SRichard Henderson static bool fold_movcond(OptContext *ctx, TCGOp *op) 15050c310a30SRichard Henderson { 15060c310a30SRichard Henderson TCGCond cond = op->args[5]; 15077a2f7084SRichard Henderson int i; 15080c310a30SRichard Henderson 15097a2f7084SRichard Henderson if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) { 15107a2f7084SRichard Henderson op->args[5] = cond = tcg_swap_cond(cond); 15117a2f7084SRichard Henderson } 15127a2f7084SRichard Henderson /* 15137a2f7084SRichard Henderson * Canonicalize the "false" input reg to match the destination reg so 15147a2f7084SRichard Henderson * that the tcg backend can implement a "move if true" operation. 15157a2f7084SRichard Henderson */ 15167a2f7084SRichard Henderson if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { 15177a2f7084SRichard Henderson op->args[5] = cond = tcg_invert_cond(cond); 15187a2f7084SRichard Henderson } 15197a2f7084SRichard Henderson 15207a2f7084SRichard Henderson i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond); 15210c310a30SRichard Henderson if (i >= 0) { 15220c310a30SRichard Henderson return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]); 15230c310a30SRichard Henderson } 15240c310a30SRichard Henderson 1525fae450baSRichard Henderson ctx->z_mask = arg_info(op->args[3])->z_mask 1526fae450baSRichard Henderson | arg_info(op->args[4])->z_mask; 15273f2b1f83SRichard Henderson ctx->s_mask = arg_info(op->args[3])->s_mask 15283f2b1f83SRichard Henderson & arg_info(op->args[4])->s_mask; 1529fae450baSRichard Henderson 15300c310a30SRichard Henderson if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) { 15310c310a30SRichard Henderson uint64_t tv = arg_info(op->args[3])->val; 15320c310a30SRichard Henderson uint64_t fv = arg_info(op->args[4])->val; 153367f84c96SRichard Henderson TCGOpcode opc; 15340c310a30SRichard Henderson 153567f84c96SRichard Henderson switch (ctx->type) { 153667f84c96SRichard Henderson case TCG_TYPE_I32: 153767f84c96SRichard Henderson opc = INDEX_op_setcond_i32; 153867f84c96SRichard Henderson break; 153967f84c96SRichard Henderson case TCG_TYPE_I64: 154067f84c96SRichard Henderson opc = INDEX_op_setcond_i64; 154167f84c96SRichard Henderson break; 154267f84c96SRichard Henderson default: 154367f84c96SRichard Henderson g_assert_not_reached(); 154467f84c96SRichard Henderson } 15450c310a30SRichard Henderson 15460c310a30SRichard Henderson if (tv == 1 && fv == 0) { 15470c310a30SRichard Henderson op->opc = opc; 15480c310a30SRichard Henderson op->args[3] = cond; 15490c310a30SRichard Henderson } else if (fv == 1 && tv == 0) { 15500c310a30SRichard Henderson op->opc = opc; 15510c310a30SRichard Henderson op->args[3] = tcg_invert_cond(cond); 15520c310a30SRichard Henderson } 15530c310a30SRichard Henderson } 15540c310a30SRichard Henderson return false; 15550c310a30SRichard Henderson } 15560c310a30SRichard Henderson 15572f9f08baSRichard Henderson static bool fold_mul(OptContext *ctx, TCGOp *op) 15582f9f08baSRichard Henderson { 1559e8679955SRichard Henderson if (fold_const2(ctx, op) || 15605b5cf479SRichard Henderson fold_xi_to_i(ctx, op, 0) || 15615b5cf479SRichard Henderson fold_xi_to_x(ctx, op, 1)) { 1562e8679955SRichard Henderson return true; 1563e8679955SRichard Henderson } 1564e8679955SRichard Henderson return false; 15652f9f08baSRichard Henderson } 15662f9f08baSRichard Henderson 15672f9f08baSRichard Henderson static bool fold_mul_highpart(OptContext *ctx, TCGOp *op) 15682f9f08baSRichard Henderson { 15697a2f7084SRichard Henderson if (fold_const2_commutative(ctx, op) || 1570e8679955SRichard Henderson fold_xi_to_i(ctx, op, 0)) { 1571e8679955SRichard Henderson return true; 1572e8679955SRichard Henderson } 1573e8679955SRichard Henderson return false; 15742f9f08baSRichard Henderson } 15752f9f08baSRichard Henderson 1576407112b0SRichard Henderson static bool fold_multiply2(OptContext *ctx, TCGOp *op) 15776b8ac0d1SRichard Henderson { 15787a2f7084SRichard Henderson swap_commutative(op->args[0], &op->args[2], &op->args[3]); 15797a2f7084SRichard Henderson 15806b8ac0d1SRichard Henderson if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { 1581407112b0SRichard Henderson uint64_t a = arg_info(op->args[2])->val; 1582407112b0SRichard Henderson uint64_t b = arg_info(op->args[3])->val; 1583407112b0SRichard Henderson uint64_t h, l; 15846b8ac0d1SRichard Henderson TCGArg rl, rh; 1585407112b0SRichard Henderson TCGOp *op2; 1586407112b0SRichard Henderson 1587407112b0SRichard Henderson switch (op->opc) { 1588407112b0SRichard Henderson case INDEX_op_mulu2_i32: 1589407112b0SRichard Henderson l = (uint64_t)(uint32_t)a * (uint32_t)b; 1590407112b0SRichard Henderson h = (int32_t)(l >> 32); 1591407112b0SRichard Henderson l = (int32_t)l; 1592407112b0SRichard Henderson break; 1593407112b0SRichard Henderson case INDEX_op_muls2_i32: 1594407112b0SRichard Henderson l = (int64_t)(int32_t)a * (int32_t)b; 1595407112b0SRichard Henderson h = l >> 32; 1596407112b0SRichard Henderson l = (int32_t)l; 1597407112b0SRichard Henderson break; 1598407112b0SRichard Henderson case INDEX_op_mulu2_i64: 1599407112b0SRichard Henderson mulu64(&l, &h, a, b); 1600407112b0SRichard Henderson break; 1601407112b0SRichard Henderson case INDEX_op_muls2_i64: 1602407112b0SRichard Henderson muls64(&l, &h, a, b); 1603407112b0SRichard Henderson break; 1604407112b0SRichard Henderson default: 1605407112b0SRichard Henderson g_assert_not_reached(); 1606407112b0SRichard Henderson } 16076b8ac0d1SRichard Henderson 16086b8ac0d1SRichard Henderson rl = op->args[0]; 16096b8ac0d1SRichard Henderson rh = op->args[1]; 1610407112b0SRichard Henderson 1611407112b0SRichard Henderson /* The proper opcode is supplied by tcg_opt_gen_mov. */ 1612d4478943SPhilippe Mathieu-Daudé op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2); 1613407112b0SRichard Henderson 1614407112b0SRichard Henderson tcg_opt_gen_movi(ctx, op, rl, l); 1615407112b0SRichard Henderson tcg_opt_gen_movi(ctx, op2, rh, h); 16166b8ac0d1SRichard Henderson return true; 16176b8ac0d1SRichard Henderson } 16186b8ac0d1SRichard Henderson return false; 16196b8ac0d1SRichard Henderson } 16206b8ac0d1SRichard Henderson 16212f9f08baSRichard Henderson static bool fold_nand(OptContext *ctx, TCGOp *op) 16222f9f08baSRichard Henderson { 16237a2f7084SRichard Henderson if (fold_const2_commutative(ctx, op) || 16240e0a32baSRichard Henderson fold_xi_to_not(ctx, op, -1)) { 16250e0a32baSRichard Henderson return true; 16260e0a32baSRichard Henderson } 16273f2b1f83SRichard Henderson 16283f2b1f83SRichard Henderson ctx->s_mask = arg_info(op->args[1])->s_mask 16293f2b1f83SRichard Henderson & arg_info(op->args[2])->s_mask; 16300e0a32baSRichard Henderson return false; 16312f9f08baSRichard Henderson } 16322f9f08baSRichard Henderson 16332f9f08baSRichard Henderson static bool fold_neg(OptContext *ctx, TCGOp *op) 16342f9f08baSRichard Henderson { 1635fae450baSRichard Henderson uint64_t z_mask; 1636fae450baSRichard Henderson 16379caca88aSRichard Henderson if (fold_const1(ctx, op)) { 16389caca88aSRichard Henderson return true; 16399caca88aSRichard Henderson } 1640fae450baSRichard Henderson 1641fae450baSRichard Henderson /* Set to 1 all bits to the left of the rightmost. */ 1642fae450baSRichard Henderson z_mask = arg_info(op->args[1])->z_mask; 1643fae450baSRichard Henderson ctx->z_mask = -(z_mask & -z_mask); 1644fae450baSRichard Henderson 16459caca88aSRichard Henderson /* 16469caca88aSRichard Henderson * Because of fold_sub_to_neg, we want to always return true, 16479caca88aSRichard Henderson * via finish_folding. 16489caca88aSRichard Henderson */ 16499caca88aSRichard Henderson finish_folding(ctx, op); 16509caca88aSRichard Henderson return true; 16512f9f08baSRichard Henderson } 16522f9f08baSRichard Henderson 16532f9f08baSRichard Henderson static bool fold_nor(OptContext *ctx, TCGOp *op) 16542f9f08baSRichard Henderson { 16557a2f7084SRichard Henderson if (fold_const2_commutative(ctx, op) || 16560e0a32baSRichard Henderson fold_xi_to_not(ctx, op, 0)) { 16570e0a32baSRichard Henderson return true; 16580e0a32baSRichard Henderson } 16593f2b1f83SRichard Henderson 16603f2b1f83SRichard Henderson ctx->s_mask = arg_info(op->args[1])->s_mask 16613f2b1f83SRichard Henderson & arg_info(op->args[2])->s_mask; 16620e0a32baSRichard Henderson return false; 16632f9f08baSRichard Henderson } 16642f9f08baSRichard Henderson 16652f9f08baSRichard Henderson static bool fold_not(OptContext *ctx, TCGOp *op) 16662f9f08baSRichard Henderson { 16670e0a32baSRichard Henderson if (fold_const1(ctx, op)) { 16680e0a32baSRichard Henderson return true; 16690e0a32baSRichard Henderson } 16700e0a32baSRichard Henderson 16713f2b1f83SRichard Henderson ctx->s_mask = arg_info(op->args[1])->s_mask; 16723f2b1f83SRichard Henderson 16730e0a32baSRichard Henderson /* Because of fold_to_not, we want to always return true, via finish. */ 16740e0a32baSRichard Henderson finish_folding(ctx, op); 16750e0a32baSRichard Henderson return true; 16762f9f08baSRichard Henderson } 16772f9f08baSRichard Henderson 16782f9f08baSRichard Henderson static bool fold_or(OptContext *ctx, TCGOp *op) 16792f9f08baSRichard Henderson { 16807a2f7084SRichard Henderson if (fold_const2_commutative(ctx, op) || 1681a63ce0e9SRichard Henderson fold_xi_to_x(ctx, op, 0) || 1682ca7bb049SRichard Henderson fold_xx_to_x(ctx, op)) { 1683ca7bb049SRichard Henderson return true; 1684ca7bb049SRichard Henderson } 1685fae450baSRichard Henderson 1686fae450baSRichard Henderson ctx->z_mask = arg_info(op->args[1])->z_mask 1687fae450baSRichard Henderson | arg_info(op->args[2])->z_mask; 16883f2b1f83SRichard Henderson ctx->s_mask = arg_info(op->args[1])->s_mask 16893f2b1f83SRichard Henderson & arg_info(op->args[2])->s_mask; 1690fae450baSRichard Henderson return fold_masks(ctx, op); 16912f9f08baSRichard Henderson } 16922f9f08baSRichard Henderson 16932f9f08baSRichard Henderson static bool fold_orc(OptContext *ctx, TCGOp *op) 16942f9f08baSRichard Henderson { 16950e0a32baSRichard Henderson if (fold_const2(ctx, op) || 16964e858d96SRichard Henderson fold_xx_to_i(ctx, op, -1) || 1697a63ce0e9SRichard Henderson fold_xi_to_x(ctx, op, -1) || 16980e0a32baSRichard Henderson fold_ix_to_not(ctx, op, 0)) { 16990e0a32baSRichard Henderson return true; 17000e0a32baSRichard Henderson } 17013f2b1f83SRichard Henderson 17023f2b1f83SRichard Henderson ctx->s_mask = arg_info(op->args[1])->s_mask 17033f2b1f83SRichard Henderson & arg_info(op->args[2])->s_mask; 17040e0a32baSRichard Henderson return false; 17052f9f08baSRichard Henderson } 17062f9f08baSRichard Henderson 17073eefdf2bSRichard Henderson static bool fold_qemu_ld(OptContext *ctx, TCGOp *op) 17083eefdf2bSRichard Henderson { 1709fae450baSRichard Henderson const TCGOpDef *def = &tcg_op_defs[op->opc]; 1710fae450baSRichard Henderson MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs]; 1711fae450baSRichard Henderson MemOp mop = get_memop(oi); 1712fae450baSRichard Henderson int width = 8 * memop_size(mop); 1713fae450baSRichard Henderson 171457fe5c6dSRichard Henderson if (width < 64) { 171557fe5c6dSRichard Henderson ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width); 171657fe5c6dSRichard Henderson if (!(mop & MO_SIGN)) { 1717fae450baSRichard Henderson ctx->z_mask = MAKE_64BIT_MASK(0, width); 171857fe5c6dSRichard Henderson ctx->s_mask <<= 1; 171957fe5c6dSRichard Henderson } 1720fae450baSRichard Henderson } 1721fae450baSRichard Henderson 17223eefdf2bSRichard Henderson /* Opcodes that touch guest memory stop the mb optimization. */ 17233eefdf2bSRichard Henderson ctx->prev_mb = NULL; 17243eefdf2bSRichard Henderson return false; 17253eefdf2bSRichard Henderson } 17263eefdf2bSRichard Henderson 17273eefdf2bSRichard Henderson static bool fold_qemu_st(OptContext *ctx, TCGOp *op) 17283eefdf2bSRichard Henderson { 17293eefdf2bSRichard Henderson /* Opcodes that touch guest memory stop the mb optimization. */ 17303eefdf2bSRichard Henderson ctx->prev_mb = NULL; 17313eefdf2bSRichard Henderson return false; 17323eefdf2bSRichard Henderson } 17333eefdf2bSRichard Henderson 17342f9f08baSRichard Henderson static bool fold_remainder(OptContext *ctx, TCGOp *op) 17352f9f08baSRichard Henderson { 1736267c17e8SRichard Henderson if (fold_const2(ctx, op) || 1737267c17e8SRichard Henderson fold_xx_to_i(ctx, op, 0)) { 1738267c17e8SRichard Henderson return true; 1739267c17e8SRichard Henderson } 1740267c17e8SRichard Henderson return false; 17412f9f08baSRichard Henderson } 17422f9f08baSRichard Henderson 1743c63ff55cSRichard Henderson static bool fold_setcond(OptContext *ctx, TCGOp *op) 1744c63ff55cSRichard Henderson { 1745c63ff55cSRichard Henderson TCGCond cond = op->args[3]; 17467a2f7084SRichard Henderson int i; 1747c63ff55cSRichard Henderson 17487a2f7084SRichard Henderson if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) { 17497a2f7084SRichard Henderson op->args[3] = cond = tcg_swap_cond(cond); 17507a2f7084SRichard Henderson } 17517a2f7084SRichard Henderson 17527a2f7084SRichard Henderson i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond); 1753c63ff55cSRichard Henderson if (i >= 0) { 1754c63ff55cSRichard Henderson return tcg_opt_gen_movi(ctx, op, op->args[0], i); 1755c63ff55cSRichard Henderson } 1756fae450baSRichard Henderson 1757fae450baSRichard Henderson ctx->z_mask = 1; 1758275d7d8eSRichard Henderson ctx->s_mask = smask_from_zmask(1); 1759c63ff55cSRichard Henderson return false; 1760c63ff55cSRichard Henderson } 1761c63ff55cSRichard Henderson 1762bc47b1aaSRichard Henderson static bool fold_setcond2(OptContext *ctx, TCGOp *op) 1763bc47b1aaSRichard Henderson { 1764bc47b1aaSRichard Henderson TCGCond cond = op->args[5]; 17657a2f7084SRichard Henderson int i, inv = 0; 1766bc47b1aaSRichard Henderson 17677a2f7084SRichard Henderson if (swap_commutative2(&op->args[1], &op->args[3])) { 17687a2f7084SRichard Henderson op->args[5] = cond = tcg_swap_cond(cond); 17697a2f7084SRichard Henderson } 17707a2f7084SRichard Henderson 17717a2f7084SRichard Henderson i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond); 1772bc47b1aaSRichard Henderson if (i >= 0) { 1773bc47b1aaSRichard Henderson goto do_setcond_const; 1774bc47b1aaSRichard Henderson } 1775bc47b1aaSRichard Henderson 1776bc47b1aaSRichard Henderson switch (cond) { 1777bc47b1aaSRichard Henderson case TCG_COND_LT: 1778bc47b1aaSRichard Henderson case TCG_COND_GE: 1779bc47b1aaSRichard Henderson /* 1780bc47b1aaSRichard Henderson * Simplify LT/GE comparisons vs zero to a single compare 1781bc47b1aaSRichard Henderson * vs the high word of the input. 1782bc47b1aaSRichard Henderson */ 1783bc47b1aaSRichard Henderson if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 && 1784bc47b1aaSRichard Henderson arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) { 1785bc47b1aaSRichard Henderson goto do_setcond_high; 1786bc47b1aaSRichard Henderson } 1787bc47b1aaSRichard Henderson break; 1788bc47b1aaSRichard Henderson 1789bc47b1aaSRichard Henderson case TCG_COND_NE: 1790bc47b1aaSRichard Henderson inv = 1; 1791bc47b1aaSRichard Henderson QEMU_FALLTHROUGH; 1792bc47b1aaSRichard Henderson case TCG_COND_EQ: 1793bc47b1aaSRichard Henderson /* 1794bc47b1aaSRichard Henderson * Simplify EQ/NE comparisons where one of the pairs 1795bc47b1aaSRichard Henderson * can be simplified. 1796bc47b1aaSRichard Henderson */ 179767f84c96SRichard Henderson i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1], 1798bc47b1aaSRichard Henderson op->args[3], cond); 1799bc47b1aaSRichard Henderson switch (i ^ inv) { 1800bc47b1aaSRichard Henderson case 0: 1801bc47b1aaSRichard Henderson goto do_setcond_const; 1802bc47b1aaSRichard Henderson case 1: 1803bc47b1aaSRichard Henderson goto do_setcond_high; 1804bc47b1aaSRichard Henderson } 1805bc47b1aaSRichard Henderson 180667f84c96SRichard Henderson i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2], 1807bc47b1aaSRichard Henderson op->args[4], cond); 1808bc47b1aaSRichard Henderson switch (i ^ inv) { 1809bc47b1aaSRichard Henderson case 0: 1810bc47b1aaSRichard Henderson goto do_setcond_const; 1811bc47b1aaSRichard Henderson case 1: 1812bc47b1aaSRichard Henderson op->args[2] = op->args[3]; 1813bc47b1aaSRichard Henderson op->args[3] = cond; 1814bc47b1aaSRichard Henderson op->opc = INDEX_op_setcond_i32; 1815bc47b1aaSRichard Henderson break; 1816bc47b1aaSRichard Henderson } 1817bc47b1aaSRichard Henderson break; 1818bc47b1aaSRichard Henderson 1819bc47b1aaSRichard Henderson default: 1820bc47b1aaSRichard Henderson break; 1821bc47b1aaSRichard Henderson 1822bc47b1aaSRichard Henderson do_setcond_high: 1823bc47b1aaSRichard Henderson op->args[1] = op->args[2]; 1824bc47b1aaSRichard Henderson op->args[2] = op->args[4]; 1825bc47b1aaSRichard Henderson op->args[3] = cond; 1826bc47b1aaSRichard Henderson op->opc = INDEX_op_setcond_i32; 1827bc47b1aaSRichard Henderson break; 1828bc47b1aaSRichard Henderson } 1829fae450baSRichard Henderson 1830fae450baSRichard Henderson ctx->z_mask = 1; 1831275d7d8eSRichard Henderson ctx->s_mask = smask_from_zmask(1); 1832bc47b1aaSRichard Henderson return false; 1833bc47b1aaSRichard Henderson 1834bc47b1aaSRichard Henderson do_setcond_const: 1835bc47b1aaSRichard Henderson return tcg_opt_gen_movi(ctx, op, op->args[0], i); 1836bc47b1aaSRichard Henderson } 1837bc47b1aaSRichard Henderson 1838b6617c88SRichard Henderson static bool fold_sextract(OptContext *ctx, TCGOp *op) 1839b6617c88SRichard Henderson { 184057fe5c6dSRichard Henderson uint64_t z_mask, s_mask, s_mask_old; 184157fe5c6dSRichard Henderson int pos = op->args[2]; 184257fe5c6dSRichard Henderson int len = op->args[3]; 1843fae450baSRichard Henderson 1844b6617c88SRichard Henderson if (arg_is_const(op->args[1])) { 1845b6617c88SRichard Henderson uint64_t t; 1846b6617c88SRichard Henderson 1847b6617c88SRichard Henderson t = arg_info(op->args[1])->val; 184857fe5c6dSRichard Henderson t = sextract64(t, pos, len); 1849b6617c88SRichard Henderson return tcg_opt_gen_movi(ctx, op, op->args[0], t); 1850b6617c88SRichard Henderson } 1851fae450baSRichard Henderson 185257fe5c6dSRichard Henderson z_mask = arg_info(op->args[1])->z_mask; 185357fe5c6dSRichard Henderson z_mask = sextract64(z_mask, pos, len); 1854fae450baSRichard Henderson ctx->z_mask = z_mask; 1855fae450baSRichard Henderson 185657fe5c6dSRichard Henderson s_mask_old = arg_info(op->args[1])->s_mask; 185757fe5c6dSRichard Henderson s_mask = sextract64(s_mask_old, pos, len); 185857fe5c6dSRichard Henderson s_mask |= MAKE_64BIT_MASK(len, 64 - len); 185957fe5c6dSRichard Henderson ctx->s_mask = s_mask; 186057fe5c6dSRichard Henderson 186157fe5c6dSRichard Henderson if (pos == 0) { 186257fe5c6dSRichard Henderson ctx->a_mask = s_mask & ~s_mask_old; 186357fe5c6dSRichard Henderson } 186457fe5c6dSRichard Henderson 1865fae450baSRichard Henderson return fold_masks(ctx, op); 1866b6617c88SRichard Henderson } 1867b6617c88SRichard Henderson 18682f9f08baSRichard Henderson static bool fold_shift(OptContext *ctx, TCGOp *op) 18692f9f08baSRichard Henderson { 187093a967fbSRichard Henderson uint64_t s_mask, z_mask, sign; 187193a967fbSRichard Henderson 1872a63ce0e9SRichard Henderson if (fold_const2(ctx, op) || 1873da48e272SRichard Henderson fold_ix_to_i(ctx, op, 0) || 1874a63ce0e9SRichard Henderson fold_xi_to_x(ctx, op, 0)) { 1875a63ce0e9SRichard Henderson return true; 1876a63ce0e9SRichard Henderson } 1877fae450baSRichard Henderson 187893a967fbSRichard Henderson s_mask = arg_info(op->args[1])->s_mask; 187993a967fbSRichard Henderson z_mask = arg_info(op->args[1])->z_mask; 188093a967fbSRichard Henderson 1881fae450baSRichard Henderson if (arg_is_const(op->args[2])) { 188293a967fbSRichard Henderson int sh = arg_info(op->args[2])->val; 188393a967fbSRichard Henderson 188493a967fbSRichard Henderson ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh); 188593a967fbSRichard Henderson 188693a967fbSRichard Henderson s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh); 188793a967fbSRichard Henderson ctx->s_mask = smask_from_smask(s_mask); 188893a967fbSRichard Henderson 1889fae450baSRichard Henderson return fold_masks(ctx, op); 1890fae450baSRichard Henderson } 189193a967fbSRichard Henderson 189293a967fbSRichard Henderson switch (op->opc) { 189393a967fbSRichard Henderson CASE_OP_32_64(sar): 189493a967fbSRichard Henderson /* 189593a967fbSRichard Henderson * Arithmetic right shift will not reduce the number of 189693a967fbSRichard Henderson * input sign repetitions. 189793a967fbSRichard Henderson */ 189893a967fbSRichard Henderson ctx->s_mask = s_mask; 189993a967fbSRichard Henderson break; 190093a967fbSRichard Henderson CASE_OP_32_64(shr): 190193a967fbSRichard Henderson /* 190293a967fbSRichard Henderson * If the sign bit is known zero, then logical right shift 190393a967fbSRichard Henderson * will not reduced the number of input sign repetitions. 190493a967fbSRichard Henderson */ 190593a967fbSRichard Henderson sign = (s_mask & -s_mask) >> 1; 190693a967fbSRichard Henderson if (!(z_mask & sign)) { 190793a967fbSRichard Henderson ctx->s_mask = s_mask; 190893a967fbSRichard Henderson } 190993a967fbSRichard Henderson break; 191093a967fbSRichard Henderson default: 191193a967fbSRichard Henderson break; 191293a967fbSRichard Henderson } 191393a967fbSRichard Henderson 1914a63ce0e9SRichard Henderson return false; 19152f9f08baSRichard Henderson } 19162f9f08baSRichard Henderson 19179caca88aSRichard Henderson static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op) 19189caca88aSRichard Henderson { 19199caca88aSRichard Henderson TCGOpcode neg_op; 19209caca88aSRichard Henderson bool have_neg; 19219caca88aSRichard Henderson 19229caca88aSRichard Henderson if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) { 19239caca88aSRichard Henderson return false; 19249caca88aSRichard Henderson } 19259caca88aSRichard Henderson 19269caca88aSRichard Henderson switch (ctx->type) { 19279caca88aSRichard Henderson case TCG_TYPE_I32: 19289caca88aSRichard Henderson neg_op = INDEX_op_neg_i32; 19299caca88aSRichard Henderson have_neg = TCG_TARGET_HAS_neg_i32; 19309caca88aSRichard Henderson break; 19319caca88aSRichard Henderson case TCG_TYPE_I64: 19329caca88aSRichard Henderson neg_op = INDEX_op_neg_i64; 19339caca88aSRichard Henderson have_neg = TCG_TARGET_HAS_neg_i64; 19349caca88aSRichard Henderson break; 19359caca88aSRichard Henderson case TCG_TYPE_V64: 19369caca88aSRichard Henderson case TCG_TYPE_V128: 19379caca88aSRichard Henderson case TCG_TYPE_V256: 19389caca88aSRichard Henderson neg_op = INDEX_op_neg_vec; 19399caca88aSRichard Henderson have_neg = (TCG_TARGET_HAS_neg_vec && 19409caca88aSRichard Henderson tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0); 19419caca88aSRichard Henderson break; 19429caca88aSRichard Henderson default: 19439caca88aSRichard Henderson g_assert_not_reached(); 19449caca88aSRichard Henderson } 19459caca88aSRichard Henderson if (have_neg) { 19469caca88aSRichard Henderson op->opc = neg_op; 19479caca88aSRichard Henderson op->args[1] = op->args[2]; 19489caca88aSRichard Henderson return fold_neg(ctx, op); 19499caca88aSRichard Henderson } 19509caca88aSRichard Henderson return false; 19519caca88aSRichard Henderson } 19529caca88aSRichard Henderson 1953c578ff18SRichard Henderson /* We cannot as yet do_constant_folding with vectors. */ 1954c578ff18SRichard Henderson static bool fold_sub_vec(OptContext *ctx, TCGOp *op) 19552f9f08baSRichard Henderson { 1956c578ff18SRichard Henderson if (fold_xx_to_i(ctx, op, 0) || 1957a63ce0e9SRichard Henderson fold_xi_to_x(ctx, op, 0) || 19589caca88aSRichard Henderson fold_sub_to_neg(ctx, op)) { 1959cbe42fb2SRichard Henderson return true; 1960cbe42fb2SRichard Henderson } 1961cbe42fb2SRichard Henderson return false; 19622f9f08baSRichard Henderson } 19632f9f08baSRichard Henderson 1964c578ff18SRichard Henderson static bool fold_sub(OptContext *ctx, TCGOp *op) 1965c578ff18SRichard Henderson { 1966c578ff18SRichard Henderson return fold_const2(ctx, op) || fold_sub_vec(ctx, op); 1967c578ff18SRichard Henderson } 1968c578ff18SRichard Henderson 19699531c078SRichard Henderson static bool fold_sub2(OptContext *ctx, TCGOp *op) 1970e3f7dc21SRichard Henderson { 19719531c078SRichard Henderson return fold_addsub2(ctx, op, false); 1972e3f7dc21SRichard Henderson } 1973e3f7dc21SRichard Henderson 1974fae450baSRichard Henderson static bool fold_tcg_ld(OptContext *ctx, TCGOp *op) 1975fae450baSRichard Henderson { 1976fae450baSRichard Henderson /* We can't do any folding with a load, but we can record bits. */ 1977fae450baSRichard Henderson switch (op->opc) { 197857fe5c6dSRichard Henderson CASE_OP_32_64(ld8s): 197957fe5c6dSRichard Henderson ctx->s_mask = MAKE_64BIT_MASK(8, 56); 198057fe5c6dSRichard Henderson break; 1981fae450baSRichard Henderson CASE_OP_32_64(ld8u): 1982fae450baSRichard Henderson ctx->z_mask = MAKE_64BIT_MASK(0, 8); 198357fe5c6dSRichard Henderson ctx->s_mask = MAKE_64BIT_MASK(9, 55); 198457fe5c6dSRichard Henderson break; 198557fe5c6dSRichard Henderson CASE_OP_32_64(ld16s): 198657fe5c6dSRichard Henderson ctx->s_mask = MAKE_64BIT_MASK(16, 48); 1987fae450baSRichard Henderson break; 1988fae450baSRichard Henderson CASE_OP_32_64(ld16u): 1989fae450baSRichard Henderson ctx->z_mask = MAKE_64BIT_MASK(0, 16); 199057fe5c6dSRichard Henderson ctx->s_mask = MAKE_64BIT_MASK(17, 47); 199157fe5c6dSRichard Henderson break; 199257fe5c6dSRichard Henderson case INDEX_op_ld32s_i64: 199357fe5c6dSRichard Henderson ctx->s_mask = MAKE_64BIT_MASK(32, 32); 1994fae450baSRichard Henderson break; 1995fae450baSRichard Henderson case INDEX_op_ld32u_i64: 1996fae450baSRichard Henderson ctx->z_mask = MAKE_64BIT_MASK(0, 32); 199757fe5c6dSRichard Henderson ctx->s_mask = MAKE_64BIT_MASK(33, 31); 1998fae450baSRichard Henderson break; 1999fae450baSRichard Henderson default: 2000fae450baSRichard Henderson g_assert_not_reached(); 2001fae450baSRichard Henderson } 2002fae450baSRichard Henderson return false; 2003fae450baSRichard Henderson } 2004fae450baSRichard Henderson 20052f9f08baSRichard Henderson static bool fold_xor(OptContext *ctx, TCGOp *op) 20062f9f08baSRichard Henderson { 20077a2f7084SRichard Henderson if (fold_const2_commutative(ctx, op) || 20080e0a32baSRichard Henderson fold_xx_to_i(ctx, op, 0) || 2009a63ce0e9SRichard Henderson fold_xi_to_x(ctx, op, 0) || 20100e0a32baSRichard Henderson fold_xi_to_not(ctx, op, -1)) { 2011cbe42fb2SRichard Henderson return true; 2012cbe42fb2SRichard Henderson } 2013fae450baSRichard Henderson 2014fae450baSRichard Henderson ctx->z_mask = arg_info(op->args[1])->z_mask 2015fae450baSRichard Henderson | arg_info(op->args[2])->z_mask; 20163f2b1f83SRichard Henderson ctx->s_mask = arg_info(op->args[1])->s_mask 20173f2b1f83SRichard Henderson & arg_info(op->args[2])->s_mask; 2018fae450baSRichard Henderson return fold_masks(ctx, op); 20192f9f08baSRichard Henderson } 20202f9f08baSRichard Henderson 202122613af4SKirill Batuzov /* Propagate constants and copies, fold constant expressions. */ 202236e60ef6SAurelien Jarno void tcg_optimize(TCGContext *s) 20238f2e8c07SKirill Batuzov { 20245cf32be7SRichard Henderson int nb_temps, i; 2025d0ed5151SRichard Henderson TCGOp *op, *op_next; 2026dc84988aSRichard Henderson OptContext ctx = { .tcg = s }; 20275d8f5363SRichard Henderson 202822613af4SKirill Batuzov /* Array VALS has an element for each temp. 202922613af4SKirill Batuzov If this temp holds a constant then its value is kept in VALS' element. 2030e590d4e6SAurelien Jarno If this temp is a copy of other ones then the other copies are 2031e590d4e6SAurelien Jarno available through the doubly linked circular list. */ 20328f2e8c07SKirill Batuzov 20338f2e8c07SKirill Batuzov nb_temps = s->nb_temps; 20348f17a975SRichard Henderson for (i = 0; i < nb_temps; ++i) { 20358f17a975SRichard Henderson s->temps[i].state_ptr = NULL; 20368f17a975SRichard Henderson } 20378f2e8c07SKirill Batuzov 203815fa08f8SRichard Henderson QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { 2039c45cb8bbSRichard Henderson TCGOpcode opc = op->opc; 20405cf32be7SRichard Henderson const TCGOpDef *def; 2041404a148dSRichard Henderson bool done = false; 2042c45cb8bbSRichard Henderson 20435cf32be7SRichard Henderson /* Calls are special. */ 2044c45cb8bbSRichard Henderson if (opc == INDEX_op_call) { 20455cf32be7SRichard Henderson fold_call(&ctx, op); 20465cf32be7SRichard Henderson continue; 20475cf32be7SRichard Henderson } 20485cf32be7SRichard Henderson 20495cf32be7SRichard Henderson def = &tcg_op_defs[opc]; 2050ec5d4cbeSRichard Henderson init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs); 2051ec5d4cbeSRichard Henderson copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs); 205222613af4SKirill Batuzov 205367f84c96SRichard Henderson /* Pre-compute the type of the operation. */ 205467f84c96SRichard Henderson if (def->flags & TCG_OPF_VECTOR) { 205567f84c96SRichard Henderson ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op); 205667f84c96SRichard Henderson } else if (def->flags & TCG_OPF_64BIT) { 205767f84c96SRichard Henderson ctx.type = TCG_TYPE_I64; 205867f84c96SRichard Henderson } else { 205967f84c96SRichard Henderson ctx.type = TCG_TYPE_I32; 206067f84c96SRichard Henderson } 206167f84c96SRichard Henderson 206257fe5c6dSRichard Henderson /* Assume all bits affected, no bits known zero, no sign reps. */ 2063fae450baSRichard Henderson ctx.a_mask = -1; 2064fae450baSRichard Henderson ctx.z_mask = -1; 206557fe5c6dSRichard Henderson ctx.s_mask = 0; 2066633f6502SPaolo Bonzini 20672cfac7faSRichard Henderson /* 20682cfac7faSRichard Henderson * Process each opcode. 20692cfac7faSRichard Henderson * Sorted alphabetically by opcode as much as possible. 20702cfac7faSRichard Henderson */ 2071c45cb8bbSRichard Henderson switch (opc) { 2072c578ff18SRichard Henderson CASE_OP_32_64(add): 20732f9f08baSRichard Henderson done = fold_add(&ctx, op); 20742f9f08baSRichard Henderson break; 2075c578ff18SRichard Henderson case INDEX_op_add_vec: 2076c578ff18SRichard Henderson done = fold_add_vec(&ctx, op); 2077c578ff18SRichard Henderson break; 20789531c078SRichard Henderson CASE_OP_32_64(add2): 20799531c078SRichard Henderson done = fold_add2(&ctx, op); 2080e3f7dc21SRichard Henderson break; 20812f9f08baSRichard Henderson CASE_OP_32_64_VEC(and): 20822f9f08baSRichard Henderson done = fold_and(&ctx, op); 20832f9f08baSRichard Henderson break; 20842f9f08baSRichard Henderson CASE_OP_32_64_VEC(andc): 20852f9f08baSRichard Henderson done = fold_andc(&ctx, op); 20862f9f08baSRichard Henderson break; 2087079b0804SRichard Henderson CASE_OP_32_64(brcond): 2088079b0804SRichard Henderson done = fold_brcond(&ctx, op); 2089079b0804SRichard Henderson break; 2090764d2abaSRichard Henderson case INDEX_op_brcond2_i32: 2091764d2abaSRichard Henderson done = fold_brcond2(&ctx, op); 2092764d2abaSRichard Henderson break; 209309bacdc2SRichard Henderson CASE_OP_32_64(bswap16): 209409bacdc2SRichard Henderson CASE_OP_32_64(bswap32): 209509bacdc2SRichard Henderson case INDEX_op_bswap64_i64: 209609bacdc2SRichard Henderson done = fold_bswap(&ctx, op); 209709bacdc2SRichard Henderson break; 209830dd0bfeSRichard Henderson CASE_OP_32_64(clz): 209930dd0bfeSRichard Henderson CASE_OP_32_64(ctz): 210030dd0bfeSRichard Henderson done = fold_count_zeros(&ctx, op); 210130dd0bfeSRichard Henderson break; 21022f9f08baSRichard Henderson CASE_OP_32_64(ctpop): 21032f9f08baSRichard Henderson done = fold_ctpop(&ctx, op); 21042f9f08baSRichard Henderson break; 21051b1907b8SRichard Henderson CASE_OP_32_64(deposit): 21061b1907b8SRichard Henderson done = fold_deposit(&ctx, op); 21071b1907b8SRichard Henderson break; 21082f9f08baSRichard Henderson CASE_OP_32_64(div): 21092f9f08baSRichard Henderson CASE_OP_32_64(divu): 21102f9f08baSRichard Henderson done = fold_divide(&ctx, op); 21112f9f08baSRichard Henderson break; 21128cdb3fcbSRichard Henderson case INDEX_op_dup_vec: 21138cdb3fcbSRichard Henderson done = fold_dup(&ctx, op); 21148cdb3fcbSRichard Henderson break; 21158cdb3fcbSRichard Henderson case INDEX_op_dup2_vec: 21168cdb3fcbSRichard Henderson done = fold_dup2(&ctx, op); 21178cdb3fcbSRichard Henderson break; 2118ed523473SRichard Henderson CASE_OP_32_64_VEC(eqv): 21192f9f08baSRichard Henderson done = fold_eqv(&ctx, op); 21202f9f08baSRichard Henderson break; 2121b6617c88SRichard Henderson CASE_OP_32_64(extract): 2122b6617c88SRichard Henderson done = fold_extract(&ctx, op); 2123b6617c88SRichard Henderson break; 2124dcd08996SRichard Henderson CASE_OP_32_64(extract2): 2125dcd08996SRichard Henderson done = fold_extract2(&ctx, op); 2126dcd08996SRichard Henderson break; 21272f9f08baSRichard Henderson CASE_OP_32_64(ext8s): 21282f9f08baSRichard Henderson CASE_OP_32_64(ext16s): 21292f9f08baSRichard Henderson case INDEX_op_ext32s_i64: 21302f9f08baSRichard Henderson case INDEX_op_ext_i32_i64: 21312f9f08baSRichard Henderson done = fold_exts(&ctx, op); 21322f9f08baSRichard Henderson break; 21332f9f08baSRichard Henderson CASE_OP_32_64(ext8u): 21342f9f08baSRichard Henderson CASE_OP_32_64(ext16u): 21352f9f08baSRichard Henderson case INDEX_op_ext32u_i64: 21362f9f08baSRichard Henderson case INDEX_op_extu_i32_i64: 21372f9f08baSRichard Henderson case INDEX_op_extrl_i64_i32: 21382f9f08baSRichard Henderson case INDEX_op_extrh_i64_i32: 21392f9f08baSRichard Henderson done = fold_extu(&ctx, op); 21402f9f08baSRichard Henderson break; 214157fe5c6dSRichard Henderson CASE_OP_32_64(ld8s): 2142fae450baSRichard Henderson CASE_OP_32_64(ld8u): 214357fe5c6dSRichard Henderson CASE_OP_32_64(ld16s): 2144fae450baSRichard Henderson CASE_OP_32_64(ld16u): 214557fe5c6dSRichard Henderson case INDEX_op_ld32s_i64: 2146fae450baSRichard Henderson case INDEX_op_ld32u_i64: 2147fae450baSRichard Henderson done = fold_tcg_ld(&ctx, op); 2148fae450baSRichard Henderson break; 21493eefdf2bSRichard Henderson case INDEX_op_mb: 21503eefdf2bSRichard Henderson done = fold_mb(&ctx, op); 21513eefdf2bSRichard Henderson break; 21522cfac7faSRichard Henderson CASE_OP_32_64_VEC(mov): 21532cfac7faSRichard Henderson done = fold_mov(&ctx, op); 21542cfac7faSRichard Henderson break; 21550c310a30SRichard Henderson CASE_OP_32_64(movcond): 21560c310a30SRichard Henderson done = fold_movcond(&ctx, op); 21570c310a30SRichard Henderson break; 21582f9f08baSRichard Henderson CASE_OP_32_64(mul): 21592f9f08baSRichard Henderson done = fold_mul(&ctx, op); 21602f9f08baSRichard Henderson break; 21612f9f08baSRichard Henderson CASE_OP_32_64(mulsh): 21622f9f08baSRichard Henderson CASE_OP_32_64(muluh): 21632f9f08baSRichard Henderson done = fold_mul_highpart(&ctx, op); 21642f9f08baSRichard Henderson break; 2165407112b0SRichard Henderson CASE_OP_32_64(muls2): 2166407112b0SRichard Henderson CASE_OP_32_64(mulu2): 2167407112b0SRichard Henderson done = fold_multiply2(&ctx, op); 21686b8ac0d1SRichard Henderson break; 2169ed523473SRichard Henderson CASE_OP_32_64_VEC(nand): 21702f9f08baSRichard Henderson done = fold_nand(&ctx, op); 21712f9f08baSRichard Henderson break; 21722f9f08baSRichard Henderson CASE_OP_32_64(neg): 21732f9f08baSRichard Henderson done = fold_neg(&ctx, op); 21742f9f08baSRichard Henderson break; 2175ed523473SRichard Henderson CASE_OP_32_64_VEC(nor): 21762f9f08baSRichard Henderson done = fold_nor(&ctx, op); 21772f9f08baSRichard Henderson break; 21782f9f08baSRichard Henderson CASE_OP_32_64_VEC(not): 21792f9f08baSRichard Henderson done = fold_not(&ctx, op); 21802f9f08baSRichard Henderson break; 21812f9f08baSRichard Henderson CASE_OP_32_64_VEC(or): 21822f9f08baSRichard Henderson done = fold_or(&ctx, op); 21832f9f08baSRichard Henderson break; 21842f9f08baSRichard Henderson CASE_OP_32_64_VEC(orc): 21852f9f08baSRichard Henderson done = fold_orc(&ctx, op); 21862f9f08baSRichard Henderson break; 2187fecccfccSRichard Henderson case INDEX_op_qemu_ld_a32_i32: 2188fecccfccSRichard Henderson case INDEX_op_qemu_ld_a64_i32: 2189fecccfccSRichard Henderson case INDEX_op_qemu_ld_a32_i64: 2190fecccfccSRichard Henderson case INDEX_op_qemu_ld_a64_i64: 2191fecccfccSRichard Henderson case INDEX_op_qemu_ld_a32_i128: 2192fecccfccSRichard Henderson case INDEX_op_qemu_ld_a64_i128: 21933eefdf2bSRichard Henderson done = fold_qemu_ld(&ctx, op); 21943eefdf2bSRichard Henderson break; 2195fecccfccSRichard Henderson case INDEX_op_qemu_st8_a32_i32: 2196fecccfccSRichard Henderson case INDEX_op_qemu_st8_a64_i32: 2197fecccfccSRichard Henderson case INDEX_op_qemu_st_a32_i32: 2198fecccfccSRichard Henderson case INDEX_op_qemu_st_a64_i32: 2199fecccfccSRichard Henderson case INDEX_op_qemu_st_a32_i64: 2200fecccfccSRichard Henderson case INDEX_op_qemu_st_a64_i64: 2201fecccfccSRichard Henderson case INDEX_op_qemu_st_a32_i128: 2202fecccfccSRichard Henderson case INDEX_op_qemu_st_a64_i128: 22033eefdf2bSRichard Henderson done = fold_qemu_st(&ctx, op); 22043eefdf2bSRichard Henderson break; 22052f9f08baSRichard Henderson CASE_OP_32_64(rem): 22062f9f08baSRichard Henderson CASE_OP_32_64(remu): 22072f9f08baSRichard Henderson done = fold_remainder(&ctx, op); 22082f9f08baSRichard Henderson break; 22092f9f08baSRichard Henderson CASE_OP_32_64(rotl): 22102f9f08baSRichard Henderson CASE_OP_32_64(rotr): 22112f9f08baSRichard Henderson CASE_OP_32_64(sar): 22122f9f08baSRichard Henderson CASE_OP_32_64(shl): 22132f9f08baSRichard Henderson CASE_OP_32_64(shr): 22142f9f08baSRichard Henderson done = fold_shift(&ctx, op); 22152f9f08baSRichard Henderson break; 2216c63ff55cSRichard Henderson CASE_OP_32_64(setcond): 2217c63ff55cSRichard Henderson done = fold_setcond(&ctx, op); 2218c63ff55cSRichard Henderson break; 2219bc47b1aaSRichard Henderson case INDEX_op_setcond2_i32: 2220bc47b1aaSRichard Henderson done = fold_setcond2(&ctx, op); 2221bc47b1aaSRichard Henderson break; 2222b6617c88SRichard Henderson CASE_OP_32_64(sextract): 2223b6617c88SRichard Henderson done = fold_sextract(&ctx, op); 2224b6617c88SRichard Henderson break; 2225c578ff18SRichard Henderson CASE_OP_32_64(sub): 22262f9f08baSRichard Henderson done = fold_sub(&ctx, op); 22272f9f08baSRichard Henderson break; 2228c578ff18SRichard Henderson case INDEX_op_sub_vec: 2229c578ff18SRichard Henderson done = fold_sub_vec(&ctx, op); 2230c578ff18SRichard Henderson break; 22319531c078SRichard Henderson CASE_OP_32_64(sub2): 22329531c078SRichard Henderson done = fold_sub2(&ctx, op); 2233e3f7dc21SRichard Henderson break; 22342f9f08baSRichard Henderson CASE_OP_32_64_VEC(xor): 22352f9f08baSRichard Henderson done = fold_xor(&ctx, op); 2236b10f3833SRichard Henderson break; 22372cfac7faSRichard Henderson default: 22382cfac7faSRichard Henderson break; 2239b10f3833SRichard Henderson } 2240b10f3833SRichard Henderson 2241404a148dSRichard Henderson if (!done) { 2242137f1f44SRichard Henderson finish_folding(&ctx, op); 2243404a148dSRichard Henderson } 22448f2e8c07SKirill Batuzov } 22458f2e8c07SKirill Batuzov } 2246