summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimur Kristóf <timur.kristof@gmail.com>2020-03-11 15:01:56 +0100
committerMarge Bot <eric+marge@anholt.net>2020-03-12 13:16:07 +0000
commitec16535b493b54c8c039576c0303f324242ae3fb (patch)
treed3a91a09f63db60b96a1d4ee5b446d9b15060b42
parent3aa83d809f6dd61e8052d39e5b3cf048c6fb8223 (diff)
nir: Add ability to lower non-const quad broadcasts to const ones.
Some hardware doesn't support subgroup shuffle, and on such hardware it makes no sense to lower quad broadcasts to shuffle. Instead, let's lower them to four const quad broadcasts, paired with bcsel instructions. Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4147>
-rw-r--r--src/compiler/nir/nir.h1
-rw-r--r--src/compiler/nir/nir_lower_subgroups.c42
2 files changed, 42 insertions, 1 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index a151f7b8e74..c41e78538f0 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3896,6 +3896,7 @@ typedef struct nir_lower_subgroups_options {
bool lower_shuffle_to_32bit:1;
bool lower_quad:1;
bool lower_quad_broadcast_dynamic:1;
+ bool lower_quad_broadcast_dynamic_to_const:1;
} nir_lower_subgroups_options;
bool nir_lower_subgroups(nir_shader *shader,
diff --git a/src/compiler/nir/nir_lower_subgroups.c b/src/compiler/nir/nir_lower_subgroups.c
index 4462c708ec8..f5eebb85144 100644
--- a/src/compiler/nir/nir_lower_subgroups.c
+++ b/src/compiler/nir/nir_lower_subgroups.c
@@ -302,6 +302,46 @@ build_subgroup_mask(nir_builder *b, unsigned bit_size,
}
static nir_ssa_def *
+lower_dynamic_quad_broadcast(nir_builder *b, nir_intrinsic_instr *intrin,
+ const nir_lower_subgroups_options *options)
+{
+ if (!options->lower_quad_broadcast_dynamic_to_const)
+ return lower_shuffle(b, intrin, options->lower_to_scalar, false);
+
+ nir_ssa_def *dst = NULL;
+
+ for (unsigned i = 0; i < 4; ++i) {
+ nir_intrinsic_instr *qbcst =
+ nir_intrinsic_instr_create(b->shader, nir_intrinsic_quad_broadcast);
+
+ qbcst->num_components = intrin->num_components;
+ qbcst->src[1] = nir_src_for_ssa(nir_imm_int(b, i));
+ nir_src_copy(&qbcst->src[0], &intrin->src[0], qbcst);
+ nir_ssa_dest_init(&qbcst->instr, &qbcst->dest,
+ intrin->dest.ssa.num_components,
+ intrin->dest.ssa.bit_size, NULL);
+
+ nir_ssa_def *qbcst_dst = NULL;
+
+ if (options->lower_to_scalar && qbcst->num_components > 1) {
+ qbcst_dst = lower_subgroup_op_to_scalar(b, qbcst, false);
+ } else {
+ nir_builder_instr_insert(b, &qbcst->instr);
+ qbcst_dst = &qbcst->dest.ssa;
+ }
+
+ if (i)
+ dst = nir_bcsel(b, nir_ieq(b, intrin->src[1].ssa,
+ nir_src_for_ssa(nir_imm_int(b, i)).ssa),
+ qbcst_dst, dst);
+ else
+ dst = qbcst_dst;
+ }
+
+ return dst;
+}
+
+static nir_ssa_def *
lower_subgroups_instr(nir_builder *b, nir_instr *instr, void *_options)
{
const nir_lower_subgroups_options *options = _options;
@@ -477,7 +517,7 @@ lower_subgroups_instr(nir_builder *b, nir_instr *instr, void *_options)
(options->lower_quad_broadcast_dynamic &&
intrin->intrinsic == nir_intrinsic_quad_broadcast &&
!nir_src_is_const(intrin->src[1])))
- return lower_shuffle(b, intrin, options->lower_to_scalar, false);
+ return lower_dynamic_quad_broadcast(b, intrin, options);
else if (options->lower_to_scalar && intrin->num_components > 1)
return lower_subgroup_op_to_scalar(b, intrin, false);
break;