diff options
-rw-r--r-- | docs/gallium/screen.rst | 3 | ||||
-rw-r--r-- | src/compiler/nir/nir.h | 4 | ||||
-rw-r--r-- | src/compiler/nir/nir_lower_uniforms_to_ubo.c | 103 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_vs_llvm.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/nir/nir_to_tgsi.c | 4 | ||||
-rw-r--r-- | src/gallium/auxiliary/nir/tgsi_to_nir.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/d3d12/d3d12_compiler.cpp | 2 | ||||
-rw-r--r-- | src/gallium/drivers/etnaviv/tests/lower_ubo_tests.cpp | 2 | ||||
-rw-r--r-- | src/gallium/drivers/zink/zink_compiler.c | 2 | ||||
-rw-r--r-- | src/mesa/state_tracker/st_glsl_to_nir.cpp | 6 | ||||
-rw-r--r-- | src/panfrost/midgard/midgard_compile.c | 2 |
11 files changed, 68 insertions, 64 deletions
diff --git a/docs/gallium/screen.rst b/docs/gallium/screen.rst index fc57e4a0e9e..328b3e3e2f2 100644 --- a/docs/gallium/screen.rst +++ b/docs/gallium/screen.rst @@ -481,7 +481,8 @@ The integer capabilities: those bits set, pipe_context::set_constant_buffer(.., 0, ..) is ignored by the driver, and the driver can throw assertion failures. * ``PIPE_CAP_PACKED_UNIFORMS``: True if the driver supports packed uniforms - as opposed to padding to vec4s. + as opposed to padding to vec4s. Requires ``PIPE_SHADER_CAP_INTEGERS`` if + ``lower_uniforms_to_ubo`` is set. * ``PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES``: Whether the ``PIPE_CONSERVATIVE_RASTER_POST_SNAP`` mode is supported for triangles. The post-snap mode means the conservative rasterization occurs after diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 28931886ede..a0a646610db 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3435,8 +3435,6 @@ typedef struct nir_shader_compiler_options { unsigned max_unroll_iterations; unsigned max_unroll_iterations_aggressive; - /* For the non-zero value of the enum corresponds multiplier when - * calling lower_uniforms_to_ubo */ bool lower_uniforms_to_ubo; nir_lower_int64_options lower_int64_options; @@ -4633,7 +4631,7 @@ bool nir_vectorize_tess_levels(nir_shader *shader); bool nir_lower_fragcolor(nir_shader *shader); bool nir_lower_fragcoord_wtrans(nir_shader *shader); void nir_lower_viewport_transform(nir_shader *shader); -bool nir_lower_uniforms_to_ubo(nir_shader *shader, int multiplier); +bool nir_lower_uniforms_to_ubo(nir_shader *shader, bool dword_packed, bool load_vec4); typedef struct nir_lower_subgroups_options { uint8_t subgroup_size; diff --git a/src/compiler/nir/nir_lower_uniforms_to_ubo.c b/src/compiler/nir/nir_lower_uniforms_to_ubo.c index 65107c01046..0b37b705ef8 100644 --- a/src/compiler/nir/nir_lower_uniforms_to_ubo.c +++ b/src/compiler/nir/nir_lower_uniforms_to_ubo.c @@ -22,27 +22,24 @@ */ /* - * Remap load_uniform intrinsics to UBO accesses of UBO binding point 0. - * Simultaneously, remap existing UBO accesses by increasing their binding - * point by 1. + * Remap load_uniform intrinsics to nir_load_ubo or nir_load_ubo_vec4 accesses + * of UBO binding point 0. Simultaneously, remap existing UBO accesses by + * increasing their binding point by 1. * - * Note that nir_intrinsic_load_uniform base/ranges can be set in different - * units, and the multiplier argument caters to supporting these different - * units. + * For PIPE_CAP_PACKED_UNIFORMS, dword_packed should be set to indicate that + * nir_intrinsic_load_uniform is in increments of dwords instead of vec4s. * - * For example: - * - st_glsl_to_nir for PIPE_CAP_PACKED_UNIFORMS uses dwords (4 bytes) so the - * multiplier should be 4 - * - st_glsl_to_nir for !PIPE_CAP_PACKED_UNIFORMS uses vec4s so the - * multiplier should be 16 - * - tgsi_to_nir uses vec4s, so the multiplier should be 16 + * If load_vec4 is set, then nir_intrinsic_load_ubo_vec4 will be generated + * instead of nir_intrinsic_load_ubo, saving addressing math for hardawre + * needing aligned vec4 loads in increments of vec4s (such as TGSI CONST file + * loads). */ #include "nir.h" #include "nir_builder.h" static bool -lower_instr(nir_intrinsic_instr *instr, nir_builder *b, int multiplier) +lower_instr(nir_intrinsic_instr *instr, nir_builder *b, bool dword_packed, bool load_vec4) { b->cursor = nir_before_instr(&instr->instr); @@ -58,43 +55,51 @@ lower_instr(nir_intrinsic_instr *instr, nir_builder *b, int multiplier) if (instr->intrinsic == nir_intrinsic_load_uniform) { nir_ssa_def *ubo_idx = nir_imm_int(b, 0); - nir_ssa_def *ubo_offset = - nir_iadd(b, nir_imm_int(b, multiplier * nir_intrinsic_base(instr)), - nir_imul(b, nir_imm_int(b, multiplier), - nir_ssa_for_src(b, instr->src[0], 1))); - - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo); - load->num_components = instr->num_components; - load->src[0] = nir_src_for_ssa(ubo_idx); - load->src[1] = nir_src_for_ssa(ubo_offset); - assert(instr->dest.ssa.bit_size >= 8); + nir_ssa_def *uniform_offset = nir_ssa_for_src(b, instr->src[0], 1); - /* If it's const, set the alignment to our known constant offset. If - * not, set it to a pessimistic value based on the multiplier (or the - * scalar size, for qword loads). - * - * We could potentially set up stricter alignments for indirects by - * knowing what features are enabled in the APIs (see comment in - * nir_lower_ubo_vec4.c) - */ - if (nir_src_is_const(instr->src[0])) { - nir_intrinsic_set_align(load, NIR_ALIGN_MUL_MAX, - (nir_src_as_uint(instr->src[0]) + - nir_intrinsic_base(instr) * multiplier) % - NIR_ALIGN_MUL_MAX); + assert(instr->dest.ssa.bit_size >= 8); + nir_ssa_def *load_result; + if (load_vec4) { + /* No asking us to generate load_vec4 when you've packed your uniforms + * as dwords instead of vec4s. + */ + assert(!dword_packed); + load_result = nir_load_ubo_vec4(b, instr->num_components, instr->dest.ssa.bit_size, + ubo_idx, + nir_iadd_imm(b, uniform_offset, nir_intrinsic_base(instr))); } else { - nir_intrinsic_set_align(load, MAX2(multiplier, - instr->dest.ssa.bit_size / 8), 0); - } - nir_ssa_dest_init(&load->instr, &load->dest, - load->num_components, instr->dest.ssa.bit_size, - instr->dest.ssa.name); - nir_builder_instr_insert(b, &load->instr); - nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa); + /* For PIPE_CAP_PACKED_UNIFORMS, the uniforms are packed with the + * base/offset in dword units instead of vec4 units. + */ + int multiplier = dword_packed ? 4 : 16; + load_result = nir_load_ubo(b, instr->num_components, instr->dest.ssa.bit_size, + ubo_idx, + nir_iadd_imm(b, nir_imul_imm(b, uniform_offset, multiplier), + nir_intrinsic_base(instr) * multiplier)); + nir_intrinsic_instr *load = nir_instr_as_intrinsic(load_result->parent_instr); + + /* If it's const, set the alignment to our known constant offset. If + * not, set it to a pessimistic value based on the multiplier (or the + * scalar size, for qword loads). + * + * We could potentially set up stricter alignments for indirects by + * knowing what features are enabled in the APIs (see comment in + * nir_lower_ubo_vec4.c) + */ + if (nir_src_is_const(instr->src[0])) { + nir_intrinsic_set_align(load, NIR_ALIGN_MUL_MAX, + (nir_src_as_uint(instr->src[0]) + + nir_intrinsic_base(instr) * multiplier) % + NIR_ALIGN_MUL_MAX); + } else { + nir_intrinsic_set_align(load, MAX2(multiplier, + instr->dest.ssa.bit_size / 8), 0); + } - nir_intrinsic_set_range_base(load, nir_intrinsic_base(instr) * multiplier); - nir_intrinsic_set_range(load, nir_intrinsic_range(instr) * multiplier); + nir_intrinsic_set_range_base(load, nir_intrinsic_base(instr) * multiplier); + nir_intrinsic_set_range(load, nir_intrinsic_range(instr) * multiplier); + } + nir_ssa_def_rewrite_uses(&instr->dest.ssa, load_result); nir_instr_remove(&instr->instr); return true; @@ -104,7 +109,7 @@ lower_instr(nir_intrinsic_instr *instr, nir_builder *b, int multiplier) } bool -nir_lower_uniforms_to_ubo(nir_shader *shader, int multiplier) +nir_lower_uniforms_to_ubo(nir_shader *shader, bool dword_packed, bool load_vec4) { bool progress = false; @@ -117,7 +122,7 @@ nir_lower_uniforms_to_ubo(nir_shader *shader, int multiplier) if (instr->type == nir_instr_type_intrinsic) progress |= lower_instr(nir_instr_as_intrinsic(instr), &builder, - multiplier); + dword_packed, load_vec4); } } diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 67ae6a16356..00c63ed5f45 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -100,7 +100,7 @@ draw_create_vs_llvm(struct draw_context *draw, vs->base.state.ir.nir = state->ir.nir; nir_shader *nir = (nir_shader *)state->ir.nir; if (!nir->options->lower_uniforms_to_ubo) - NIR_PASS_V(state->ir.nir, nir_lower_uniforms_to_ubo, 16); + NIR_PASS_V(state->ir.nir, nir_lower_uniforms_to_ubo, false, false); nir_tgsi_scan_shader(state->ir.nir, &vs->base.info, true); } else { /* we make a private copy of the tokens */ diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c index d0307a690c0..1f6270233bf 100644 --- a/src/gallium/auxiliary/nir/nir_to_tgsi.c +++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c @@ -2718,8 +2718,8 @@ nir_to_tgsi(struct nir_shader *s, if (!original_options->lower_uniforms_to_ubo) { NIR_PASS_V(s, nir_lower_uniforms_to_ubo, - screen->get_param(screen, PIPE_CAP_PACKED_UNIFORMS) ? - 4 : 16); + screen->get_param(screen, PIPE_CAP_PACKED_UNIFORMS), + !native_integers); } /* Do lowering so we can directly translate f64/i64 NIR ALU ops to TGSI -- diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index a39309a6ac7..242a0e40fa6 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -2496,7 +2496,7 @@ ttn_finalize_nir(struct ttn_compile *c, struct pipe_screen *screen) } if (nir->options->lower_uniforms_to_ubo) - NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 16); + NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, false, false); if (!c->cap_samplers_as_deref) NIR_PASS_V(nir, nir_lower_samplers); diff --git a/src/gallium/drivers/d3d12/d3d12_compiler.cpp b/src/gallium/drivers/d3d12/d3d12_compiler.cpp index d127ae36df5..cbaab4f79f0 100644 --- a/src/gallium/drivers/d3d12/d3d12_compiler.cpp +++ b/src/gallium/drivers/d3d12/d3d12_compiler.cpp @@ -147,7 +147,7 @@ compile_nir(struct d3d12_context *ctx, struct d3d12_shader_selector *sel, uint32_t num_ubos_before_lower_to_ubo = nir->info.num_ubos; uint32_t num_uniforms_before_lower_to_ubo = nir->num_uniforms; - NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 16); + NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, false, false); shader->has_default_ubo0 = num_uniforms_before_lower_to_ubo > 0 && nir->info.num_ubos > num_ubos_before_lower_to_ubo; diff --git a/src/gallium/drivers/etnaviv/tests/lower_ubo_tests.cpp b/src/gallium/drivers/etnaviv/tests/lower_ubo_tests.cpp index 2b9fa41062e..24342b288a2 100644 --- a/src/gallium/drivers/etnaviv/tests/lower_ubo_tests.cpp +++ b/src/gallium/drivers/etnaviv/tests/lower_ubo_tests.cpp @@ -123,7 +123,7 @@ TEST_F(nir_lower_ubo_test, basic) nir_ssa_def *offset = nir_imm_int(&b, 4); nir_load_uniform(&b, 1, 32, offset); - nir_lower_uniforms_to_ubo(b.shader, 16); + nir_lower_uniforms_to_ubo(b.shader, false, false); nir_opt_constant_folding(b.shader); ASSERT_TRUE(etna_nir_lower_ubo_to_uniform(b.shader)); diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index f030483817d..3cfb6b4dc32 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -914,7 +914,7 @@ zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr, bool optimize) tex_opts.lower_tg4_offsets = true; NIR_PASS_V(nir, nir_lower_tex, &tex_opts); } - NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 16); + NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, false, false); if (nir->info.stage == MESA_SHADER_GEOMETRY) NIR_PASS_V(nir, nir_lower_gs_intrinsics, nir_lower_gs_intrinsics_per_stream); optimize_nir(nir); diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index 92d021e52fc..0a19949ad82 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -1005,12 +1005,10 @@ st_unpacked_uniforms_type_size(const struct glsl_type *type, bool bindless) void st_nir_lower_uniforms(struct st_context *st, nir_shader *nir) { - unsigned multiplier = 16; if (st->ctx->Const.PackedDriverUniformStorage) { NIR_PASS_V(nir, nir_lower_io, nir_var_uniform, st_packed_uniforms_type_size, (nir_lower_io_options)0); - multiplier = 4; } else { NIR_PASS_V(nir, nir_lower_io, nir_var_uniform, st_unpacked_uniforms_type_size, @@ -1018,7 +1016,9 @@ st_nir_lower_uniforms(struct st_context *st, nir_shader *nir) } if (nir->options->lower_uniforms_to_ubo) - NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, multiplier); + NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, + st->ctx->Const.PackedDriverUniformStorage, + !st->ctx->Const.NativeIntegers); } /* Last third of preparing nir from glsl, which happens after shader diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 638432d39cf..e00967d503e 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -338,7 +338,7 @@ optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend) * to UBO ordinarily, but it isn't as aggressive as we need. */ NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true); - NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 16); + NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, false, false); do { progress = false; |