tcg/optimize: Optimize setcond with zmask

If we can show that high bits of an input are zero,
then we may optimize away some comparisons.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2024-03-26 16:00:40 -10:00
parent 19517b8397
commit 8d65cda728

View File

@ -2099,6 +2099,108 @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
return false;
}
static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
{
uint64_t a_zmask, b_val;
TCGCond cond;
if (!arg_is_const(op->args[2])) {
return false;
}
a_zmask = arg_info(op->args[1])->z_mask;
b_val = arg_info(op->args[2])->val;
cond = op->args[3];
if (ctx->type == TCG_TYPE_I32) {
a_zmask = (uint32_t)a_zmask;
b_val = (uint32_t)b_val;
}
/*
* A with only low bits set vs B with high bits set means that A < B.
*/
if (a_zmask < b_val) {
bool inv = false;
switch (cond) {
case TCG_COND_NE:
case TCG_COND_LEU:
case TCG_COND_LTU:
inv = true;
/* fall through */
case TCG_COND_GTU:
case TCG_COND_GEU:
case TCG_COND_EQ:
return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv);
default:
break;
}
}
/*
* A with only lsb set is already boolean.
*/
if (a_zmask <= 1) {
bool convert = false;
bool inv = false;
switch (cond) {
case TCG_COND_EQ:
inv = true;
/* fall through */
case TCG_COND_NE:
convert = (b_val == 0);
break;
case TCG_COND_LTU:
case TCG_COND_TSTEQ:
inv = true;
/* fall through */
case TCG_COND_GEU:
case TCG_COND_TSTNE:
convert = (b_val == 1);
break;
default:
break;
}
if (convert) {
TCGOpcode add_opc, xor_opc, neg_opc;
if (!inv && !neg) {
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
}
switch (ctx->type) {
case TCG_TYPE_I32:
add_opc = INDEX_op_add_i32;
neg_opc = INDEX_op_neg_i32;
xor_opc = INDEX_op_xor_i32;
break;
case TCG_TYPE_I64:
add_opc = INDEX_op_add_i64;
neg_opc = INDEX_op_neg_i64;
xor_opc = INDEX_op_xor_i64;
break;
default:
g_assert_not_reached();
}
if (!inv) {
op->opc = neg_opc;
} else if (neg) {
op->opc = add_opc;
op->args[2] = arg_new_constant(ctx, -1);
} else {
op->opc = xor_opc;
op->args[2] = arg_new_constant(ctx, 1);
}
return false;
}
}
return false;
}
static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
{
TCGOpcode and_opc, sub_opc, xor_opc, neg_opc, shr_opc;
@ -2200,6 +2302,10 @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
if (i >= 0) {
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
}
if (fold_setcond_zmask(ctx, op, false)) {
return true;
}
fold_setcond_tst_pow2(ctx, op, false);
ctx->z_mask = 1;
@ -2214,6 +2320,10 @@ static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
if (i >= 0) {
return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
}
if (fold_setcond_zmask(ctx, op, true)) {
return true;
}
fold_setcond_tst_pow2(ctx, op, true);
/* Value is {0,-1} so all bits are repetitions of the sign. */