summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlyssa Rosenzweig <alyssa@rosenzweig.io>2023-05-23 13:55:43 -0400
committerMarge Bot <emma+marge@anholt.net>2023-06-13 16:36:10 +0000
commit749b4817ad24e2b6e170eb942ffbd0398dcea467 (patch)
treea00d612a2ab28b7e2295ff2c936a34356eef3c3a /src
parentd3aca1a75868cb3c184509bcdffddc2b9310a6ce (diff)
ntt: Use scoped barriers
In addition to bringing us one backend closer to the scoped-only future, this improves the generated code in cases like: memoryBarrierBuffer(); memoryBarrierShared(); controlBarrier(); With scoped_barriers + nir_opt_combine_barriers, we now emit only one MEMBAR instruction (and a BARRIER) rather than two MEMBARs. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Jesse Natalie <jenatali@microsoft.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23191>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/auxiliary/nir/nir_to_tgsi.c85
-rw-r--r--src/gallium/drivers/i915/i915_screen.c1
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_screen.c1
-rw-r--r--src/gallium/drivers/r300/r300_screen.c3
-rw-r--r--src/gallium/drivers/softpipe/sp_screen.c1
-rw-r--r--src/gallium/drivers/svga/svga_screen.c3
6 files changed, 56 insertions, 38 deletions
diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c
index b25eb3b484c..35465d2dfad 100644
--- a/src/gallium/auxiliary/nir/nir_to_tgsi.c
+++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c
@@ -2341,6 +2341,50 @@ ntt_emit_load_sysval(struct ntt_compile *c, nir_intrinsic_instr *instr)
}
static void
+ntt_emit_barrier(struct ntt_compile *c, nir_intrinsic_instr *intr)
+{
+ bool compute = gl_shader_stage_is_compute(c->s->info.stage);
+
+ if (nir_intrinsic_memory_scope(intr) != NIR_SCOPE_NONE) {
+ nir_variable_mode modes = nir_intrinsic_memory_modes(intr);
+ unsigned membar = 0;
+
+ if (modes & nir_var_image)
+ membar |= TGSI_MEMBAR_SHADER_IMAGE;
+
+ if (modes & nir_var_mem_shared)
+ membar |= TGSI_MEMBAR_SHARED;
+
+ /* Atomic counters are lowered to SSBOs, there's no NIR mode corresponding
+ * exactly to atomics. Take the closest match.
+ */
+ if (modes & nir_var_mem_ssbo)
+ membar |= TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER;
+
+ if (modes & nir_var_mem_global)
+ membar |= TGSI_MEMBAR_SHADER_BUFFER;
+
+ /* If we only need workgroup scope (not device-scope), we might be able to
+ * optimize a bit.
+ */
+ if (membar && compute &&
+ nir_intrinsic_memory_scope(intr) == NIR_SCOPE_WORKGROUP) {
+
+ membar |= TGSI_MEMBAR_THREAD_GROUP;
+ }
+
+ /* Only emit a memory barrier if there are any relevant modes */
+ if (membar)
+ ntt_MEMBAR(c, ureg_imm1u(c->ureg, membar));
+ }
+
+ if (nir_intrinsic_execution_scope(intr) != NIR_SCOPE_NONE) {
+ assert(compute || c->s->info.stage == MESA_SHADER_TESS_CTRL);
+ ntt_BARRIER(c);
+ }
+}
+
+static void
ntt_emit_intrinsic(struct ntt_compile *c, nir_intrinsic_instr *instr)
{
switch (instr->intrinsic) {
@@ -2491,42 +2535,8 @@ ntt_emit_intrinsic(struct ntt_compile *c, nir_intrinsic_instr *instr)
ntt_emit_image_load_store(c, instr);
break;
- case nir_intrinsic_control_barrier:
- case nir_intrinsic_memory_barrier_tcs_patch:
- ntt_BARRIER(c);
- break;
-
- case nir_intrinsic_memory_barrier:
- ntt_MEMBAR(c, ureg_imm1u(c->ureg,
- TGSI_MEMBAR_SHADER_BUFFER |
- TGSI_MEMBAR_ATOMIC_BUFFER |
- TGSI_MEMBAR_SHADER_IMAGE |
- TGSI_MEMBAR_SHARED));
- break;
-
- case nir_intrinsic_memory_barrier_atomic_counter:
- ntt_MEMBAR(c, ureg_imm1u(c->ureg, TGSI_MEMBAR_ATOMIC_BUFFER));
- break;
-
- case nir_intrinsic_memory_barrier_buffer:
- ntt_MEMBAR(c, ureg_imm1u(c->ureg, TGSI_MEMBAR_SHADER_BUFFER));
- break;
-
- case nir_intrinsic_memory_barrier_image:
- ntt_MEMBAR(c, ureg_imm1u(c->ureg, TGSI_MEMBAR_SHADER_IMAGE));
- break;
-
- case nir_intrinsic_memory_barrier_shared:
- ntt_MEMBAR(c, ureg_imm1u(c->ureg, TGSI_MEMBAR_SHARED));
- break;
-
- case nir_intrinsic_group_memory_barrier:
- ntt_MEMBAR(c, ureg_imm1u(c->ureg,
- TGSI_MEMBAR_SHADER_BUFFER |
- TGSI_MEMBAR_ATOMIC_BUFFER |
- TGSI_MEMBAR_SHADER_IMAGE |
- TGSI_MEMBAR_SHARED |
- TGSI_MEMBAR_THREAD_GROUP));
+ case nir_intrinsic_scoped_barrier:
+ ntt_emit_barrier(c, instr);
break;
case nir_intrinsic_end_primitive:
@@ -3824,6 +3834,8 @@ const void *nir_to_tgsi_options(struct nir_shader *s,
}
} while (progress);
+ NIR_PASS_V(s, nir_opt_combine_barriers, NULL, NULL);
+
if (screen->get_shader_param(screen,
pipe_shader_type_from_mesa(s->info.stage),
PIPE_SHADER_CAP_INTEGERS)) {
@@ -3942,6 +3954,7 @@ static const nir_shader_compiler_options nir_to_tgsi_compiler_options = {
.lower_vector_cmp = true,
.lower_int64_options = nir_lower_imul_2x32_64,
.use_interpolated_input_intrinsics = true,
+ .use_scoped_barrier = true,
/* TGSI doesn't have a semantic for local or global index, just local and
* workgroup id.
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index 01f2cb8f909..fd9bece25dd 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -120,6 +120,7 @@ static const nir_shader_compiler_options i915_compiler_options = {
.lower_uniforms_to_ubo = true,
.lower_vector_cmp = true,
.use_interpolated_input_intrinsics = true,
+ .use_scoped_barrier = true,
.force_indirect_unrolling = nir_var_all,
.force_indirect_unrolling_sampler = true,
.max_unroll_iterations = 32,
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 78597f7936e..f1872ca09b9 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -490,6 +490,7 @@ static const nir_shader_compiler_options nv30_base_compiler_options = {
.no_integers = true,
.use_interpolated_input_intrinsics = true,
+ .use_scoped_barrier = true,
};
static const void *
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index c096dbc0b2c..2eeb61d3172 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -507,7 +507,8 @@ static int r300_get_video_param(struct pipe_screen *screen,
.lower_uniforms_to_ubo = true, \
.lower_vector_cmp = true, \
.no_integers = true, \
- .use_interpolated_input_intrinsics = true
+ .use_interpolated_input_intrinsics = true, \
+ .use_scoped_barrier = true
static const nir_shader_compiler_options r500_vs_compiler_options = {
COMMON_NIR_OPTIONS,
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 7e096a02162..10468333ff0 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -91,6 +91,7 @@ static const nir_shader_compiler_options sp_compiler_options = {
.lower_int64_options = nir_lower_imul_2x32_64,
.max_unroll_iterations = 32,
.use_interpolated_input_intrinsics = true,
+ .use_scoped_barrier = true,
/* TGSI doesn't have a semantic for local or global index, just local and
* workgroup id.
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index e23eb07e7c8..5f6eb52c729 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -752,7 +752,8 @@ vgpu10_get_shader_param(struct pipe_screen *screen,
.lower_vector_cmp = true, \
.lower_cs_local_index_to_id = true, \
.max_unroll_iterations = 32, \
- .use_interpolated_input_intrinsics = true
+ .use_interpolated_input_intrinsics = true, \
+ .use_scoped_barrier = true
#define VGPU10_OPTIONS \
.lower_doubles_options = nir_lower_dfloor, \