nir/lower_int64: Optionally lower ufind_msb using uadd_sat

v2: Fix inverted condition for applying the optimization. Noticed by
Ken.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19042>
This commit is contained in:
Ian Romanick 2022-10-12 14:21:02 -07:00 committed by Marge Bot
parent 831f9d3f61
commit 0cadc3830f

View File

@ -680,9 +680,25 @@ lower_ufind_msb64(nir_builder *b, nir_ssa_def *x)
nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
nir_ssa_def *lo_count = nir_ufind_msb(b, x_lo);
nir_ssa_def *hi_count = nir_ufind_msb(b, x_hi);
nir_ssa_def *valid_hi_bits = nir_ine(b, x_hi, nir_imm_int(b, 0));
nir_ssa_def *hi_res = nir_iadd(b, nir_imm_intN_t(b, 32, 32), hi_count);
return nir_bcsel(b, valid_hi_bits, hi_res, lo_count);
if (b->shader->options->lower_uadd_sat) {
nir_ssa_def *valid_hi_bits = nir_ine(b, x_hi, nir_imm_int(b, 0));
nir_ssa_def *hi_res = nir_iadd(b, nir_imm_intN_t(b, 32, 32), hi_count);
return nir_bcsel(b, valid_hi_bits, hi_res, lo_count);
} else {
/* If hi_count was -1, it will still be -1 after this uadd_sat. As a
* result, hi_count is either -1 or the correct return value for 64-bit
* ufind_msb.
*/
nir_ssa_def *hi_res = nir_uadd_sat(b, nir_imm_intN_t(b, 32, 32), hi_count);
/* hi_res is either -1 or a value in the range [63, 32]. lo_count is
* either -1 or a value in the range [31, 0]. The imax will pick
* lo_count only when hi_res is -1. In those cases, lo_count is
* guaranteed to be the correct answer.
*/
return nir_imax(b, hi_res, lo_count);
}
}
static nir_ssa_def *