diff options
author | Alyssa Rosenzweig <alyssa@rosenzweig.io> | 2023-05-23 13:55:43 -0400 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2023-06-13 16:36:10 +0000 |
commit | 749b4817ad24e2b6e170eb942ffbd0398dcea467 (patch) | |
tree | a00d612a2ab28b7e2295ff2c936a34356eef3c3a /src | |
parent | d3aca1a75868cb3c184509bcdffddc2b9310a6ce (diff) |
ntt: Use scoped barriers
In addition to bringing us one backend closer to the scoped-only future, this
improves the generated code in cases like:
memoryBarrierBuffer();
memoryBarrierShared();
controlBarrier();
With scoped_barriers + nir_opt_combine_barriers, we now emit only one MEMBAR
instruction (and a BARRIER) rather than two MEMBARs.
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Jesse Natalie <jenatali@microsoft.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23191>
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/auxiliary/nir/nir_to_tgsi.c | 85 | ||||
-rw-r--r-- | src/gallium/drivers/i915/i915_screen.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nv30/nv30_screen.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_screen.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/softpipe/sp_screen.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/svga/svga_screen.c | 3 |
6 files changed, 56 insertions, 38 deletions
diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c index b25eb3b484c..35465d2dfad 100644 --- a/src/gallium/auxiliary/nir/nir_to_tgsi.c +++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c @@ -2341,6 +2341,50 @@ ntt_emit_load_sysval(struct ntt_compile *c, nir_intrinsic_instr *instr) } static void +ntt_emit_barrier(struct ntt_compile *c, nir_intrinsic_instr *intr) +{ + bool compute = gl_shader_stage_is_compute(c->s->info.stage); + + if (nir_intrinsic_memory_scope(intr) != NIR_SCOPE_NONE) { + nir_variable_mode modes = nir_intrinsic_memory_modes(intr); + unsigned membar = 0; + + if (modes & nir_var_image) + membar |= TGSI_MEMBAR_SHADER_IMAGE; + + if (modes & nir_var_mem_shared) + membar |= TGSI_MEMBAR_SHARED; + + /* Atomic counters are lowered to SSBOs, there's no NIR mode corresponding + * exactly to atomics. Take the closest match. + */ + if (modes & nir_var_mem_ssbo) + membar |= TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER; + + if (modes & nir_var_mem_global) + membar |= TGSI_MEMBAR_SHADER_BUFFER; + + /* If we only need workgroup scope (not device-scope), we might be able to + * optimize a bit. + */ + if (membar && compute && + nir_intrinsic_memory_scope(intr) == NIR_SCOPE_WORKGROUP) { + + membar |= TGSI_MEMBAR_THREAD_GROUP; + } + + /* Only emit a memory barrier if there are any relevant modes */ + if (membar) + ntt_MEMBAR(c, ureg_imm1u(c->ureg, membar)); + } + + if (nir_intrinsic_execution_scope(intr) != NIR_SCOPE_NONE) { + assert(compute || c->s->info.stage == MESA_SHADER_TESS_CTRL); + ntt_BARRIER(c); + } +} + +static void ntt_emit_intrinsic(struct ntt_compile *c, nir_intrinsic_instr *instr) { switch (instr->intrinsic) { @@ -2491,42 +2535,8 @@ ntt_emit_intrinsic(struct ntt_compile *c, nir_intrinsic_instr *instr) ntt_emit_image_load_store(c, instr); break; - case nir_intrinsic_control_barrier: - case nir_intrinsic_memory_barrier_tcs_patch: - ntt_BARRIER(c); - break; - - case nir_intrinsic_memory_barrier: - ntt_MEMBAR(c, ureg_imm1u(c->ureg, - TGSI_MEMBAR_SHADER_BUFFER | - TGSI_MEMBAR_ATOMIC_BUFFER | - TGSI_MEMBAR_SHADER_IMAGE | - TGSI_MEMBAR_SHARED)); - break; - - case nir_intrinsic_memory_barrier_atomic_counter: - ntt_MEMBAR(c, ureg_imm1u(c->ureg, TGSI_MEMBAR_ATOMIC_BUFFER)); - break; - - case nir_intrinsic_memory_barrier_buffer: - ntt_MEMBAR(c, ureg_imm1u(c->ureg, TGSI_MEMBAR_SHADER_BUFFER)); - break; - - case nir_intrinsic_memory_barrier_image: - ntt_MEMBAR(c, ureg_imm1u(c->ureg, TGSI_MEMBAR_SHADER_IMAGE)); - break; - - case nir_intrinsic_memory_barrier_shared: - ntt_MEMBAR(c, ureg_imm1u(c->ureg, TGSI_MEMBAR_SHARED)); - break; - - case nir_intrinsic_group_memory_barrier: - ntt_MEMBAR(c, ureg_imm1u(c->ureg, - TGSI_MEMBAR_SHADER_BUFFER | - TGSI_MEMBAR_ATOMIC_BUFFER | - TGSI_MEMBAR_SHADER_IMAGE | - TGSI_MEMBAR_SHARED | - TGSI_MEMBAR_THREAD_GROUP)); + case nir_intrinsic_scoped_barrier: + ntt_emit_barrier(c, instr); break; case nir_intrinsic_end_primitive: @@ -3824,6 +3834,8 @@ const void *nir_to_tgsi_options(struct nir_shader *s, } } while (progress); + NIR_PASS_V(s, nir_opt_combine_barriers, NULL, NULL); + if (screen->get_shader_param(screen, pipe_shader_type_from_mesa(s->info.stage), PIPE_SHADER_CAP_INTEGERS)) { @@ -3942,6 +3954,7 @@ static const nir_shader_compiler_options nir_to_tgsi_compiler_options = { .lower_vector_cmp = true, .lower_int64_options = nir_lower_imul_2x32_64, .use_interpolated_input_intrinsics = true, + .use_scoped_barrier = true, /* TGSI doesn't have a semantic for local or global index, just local and * workgroup id. diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index 01f2cb8f909..fd9bece25dd 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -120,6 +120,7 @@ static const nir_shader_compiler_options i915_compiler_options = { .lower_uniforms_to_ubo = true, .lower_vector_cmp = true, .use_interpolated_input_intrinsics = true, + .use_scoped_barrier = true, .force_indirect_unrolling = nir_var_all, .force_indirect_unrolling_sampler = true, .max_unroll_iterations = 32, diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index 78597f7936e..f1872ca09b9 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -490,6 +490,7 @@ static const nir_shader_compiler_options nv30_base_compiler_options = { .no_integers = true, .use_interpolated_input_intrinsics = true, + .use_scoped_barrier = true, }; static const void * diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index c096dbc0b2c..2eeb61d3172 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -507,7 +507,8 @@ static int r300_get_video_param(struct pipe_screen *screen, .lower_uniforms_to_ubo = true, \ .lower_vector_cmp = true, \ .no_integers = true, \ - .use_interpolated_input_intrinsics = true + .use_interpolated_input_intrinsics = true, \ + .use_scoped_barrier = true static const nir_shader_compiler_options r500_vs_compiler_options = { COMMON_NIR_OPTIONS, diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 7e096a02162..10468333ff0 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -91,6 +91,7 @@ static const nir_shader_compiler_options sp_compiler_options = { .lower_int64_options = nir_lower_imul_2x32_64, .max_unroll_iterations = 32, .use_interpolated_input_intrinsics = true, + .use_scoped_barrier = true, /* TGSI doesn't have a semantic for local or global index, just local and * workgroup id. diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index e23eb07e7c8..5f6eb52c729 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -752,7 +752,8 @@ vgpu10_get_shader_param(struct pipe_screen *screen, .lower_vector_cmp = true, \ .lower_cs_local_index_to_id = true, \ .max_unroll_iterations = 32, \ - .use_interpolated_input_intrinsics = true + .use_interpolated_input_intrinsics = true, \ + .use_scoped_barrier = true #define VGPU10_OPTIONS \ .lower_doubles_options = nir_lower_dfloor, \ |