From 0cadc3830fa617750d96c133391cc82ff26d531a Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 12 Oct 2022 14:21:02 -0700 Subject: [PATCH] nir/lower_int64: Optionally lower ufind_msb using uadd_sat v2: Fix inverted condition for applying the optimization. Noticed by Ken. Reviewed-by: Kenneth Graunke Part-of: --- src/compiler/nir/nir_lower_int64.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/compiler/nir/nir_lower_int64.c b/src/compiler/nir/nir_lower_int64.c index 422bd40e566..eb1dc1e3409 100644 --- a/src/compiler/nir/nir_lower_int64.c +++ b/src/compiler/nir/nir_lower_int64.c @@ -680,9 +680,25 @@ lower_ufind_msb64(nir_builder *b, nir_ssa_def *x) nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x); nir_ssa_def *lo_count = nir_ufind_msb(b, x_lo); nir_ssa_def *hi_count = nir_ufind_msb(b, x_hi); - nir_ssa_def *valid_hi_bits = nir_ine(b, x_hi, nir_imm_int(b, 0)); - nir_ssa_def *hi_res = nir_iadd(b, nir_imm_intN_t(b, 32, 32), hi_count); - return nir_bcsel(b, valid_hi_bits, hi_res, lo_count); + + if (b->shader->options->lower_uadd_sat) { + nir_ssa_def *valid_hi_bits = nir_ine(b, x_hi, nir_imm_int(b, 0)); + nir_ssa_def *hi_res = nir_iadd(b, nir_imm_intN_t(b, 32, 32), hi_count); + return nir_bcsel(b, valid_hi_bits, hi_res, lo_count); + } else { + /* If hi_count was -1, it will still be -1 after this uadd_sat. As a + * result, hi_count is either -1 or the correct return value for 64-bit + * ufind_msb. + */ + nir_ssa_def *hi_res = nir_uadd_sat(b, nir_imm_intN_t(b, 32, 32), hi_count); + + /* hi_res is either -1 or a value in the range [63, 32]. lo_count is + * either -1 or a value in the range [31, 0]. The imax will pick + * lo_count only when hi_res is -1. In those cases, lo_count is + * guaranteed to be the correct answer. + */ + return nir_imax(b, hi_res, lo_count); + } } static nir_ssa_def *