summaryrefslogtreecommitdiff
path: root/src/compiler/nir
diff options
context:
space:
mode:
authorIan Romanick <ian.d.romanick@intel.com>2022-10-12 14:21:02 -0700
committerMarge Bot <emma+marge@anholt.net>2023-03-10 15:27:17 +0000
commit0cadc3830fa617750d96c133391cc82ff26d531a (patch)
tree1ae24d03b36a8d576ef4164382680c15b530d634 /src/compiler/nir
parent831f9d3f615ca5228e9614191d76f95735bfc2cc (diff)
nir/lower_int64: Optionally lower ufind_msb using uadd_sat
v2: Fix inverted condition for applying the optimization. Noticed by Ken. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19042>
Diffstat (limited to 'src/compiler/nir')
-rw-r--r--src/compiler/nir/nir_lower_int64.c22
1 files changed, 19 insertions, 3 deletions
diff --git a/src/compiler/nir/nir_lower_int64.c b/src/compiler/nir/nir_lower_int64.c
index 422bd40e566..eb1dc1e3409 100644
--- a/src/compiler/nir/nir_lower_int64.c
+++ b/src/compiler/nir/nir_lower_int64.c
@@ -680,9 +680,25 @@ lower_ufind_msb64(nir_builder *b, nir_ssa_def *x)
nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
nir_ssa_def *lo_count = nir_ufind_msb(b, x_lo);
nir_ssa_def *hi_count = nir_ufind_msb(b, x_hi);
- nir_ssa_def *valid_hi_bits = nir_ine(b, x_hi, nir_imm_int(b, 0));
- nir_ssa_def *hi_res = nir_iadd(b, nir_imm_intN_t(b, 32, 32), hi_count);
- return nir_bcsel(b, valid_hi_bits, hi_res, lo_count);
+
+ if (b->shader->options->lower_uadd_sat) {
+ nir_ssa_def *valid_hi_bits = nir_ine(b, x_hi, nir_imm_int(b, 0));
+ nir_ssa_def *hi_res = nir_iadd(b, nir_imm_intN_t(b, 32, 32), hi_count);
+ return nir_bcsel(b, valid_hi_bits, hi_res, lo_count);
+ } else {
+ /* If hi_count was -1, it will still be -1 after this uadd_sat. As a
+ * result, hi_count is either -1 or the correct return value for 64-bit
+ * ufind_msb.
+ */
+ nir_ssa_def *hi_res = nir_uadd_sat(b, nir_imm_intN_t(b, 32, 32), hi_count);
+
+ /* hi_res is either -1 or a value in the range [63, 32]. lo_count is
+ * either -1 or a value in the range [31, 0]. The imax will pick
+ * lo_count only when hi_res is -1. In those cases, lo_count is
+ * guaranteed to be the correct answer.
+ */
+ return nir_imax(b, hi_res, lo_count);
+ }
}
static nir_ssa_def *