summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2021-04-09 16:10:30 -0700
committerEric Engestrom <eric@engestrom.ch>2021-04-18 22:13:27 +0200
commit8636a1220c22e19978ee26b47fb20dc1346dd128 (patch)
treea3c88478bb278948680637b362aa35602a90c087
parent743b02f9394869da4f1f9ce2f43f190e3846044b (diff)
nir: Generate load_ubo_vec4 directly for !PIPE_CAP_NATIVE_INTEGERS
The prog_to_nir->NIR-to-TGSI change ended up causing regressions on r300, and svga against r300-class hardware, because nir_lower_uniforms_to_ubo() introduced shifts that nir_lower_ubo_vec4() tried to reverse, but that NIR couldn't prove are no-ops (since shifting up and back down may drop bits), and the hardware can't do the integer ops. Instead, make it so that nir_lower_uniforms_to_ubo can generate nir_intrinsic_load_ubo_vec4 directly for !INTEGER hardware. Fixes: cf3fc79cd0ab ("st/mesa: Replace mesa_to_tgsi() with prog_to_nir() and nir_to_tgsi().") Closes: #4602 Reviewed-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10194> (cherry picked from commit 5de3cbbb2e66fcdb7d6219b78b89ad932ecadbd5)
-rw-r--r--.pick_status.json2
-rw-r--r--docs/gallium/screen.rst3
-rw-r--r--src/compiler/nir/nir.h4
-rw-r--r--src/compiler/nir/nir_lower_uniforms_to_ubo.c103
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_llvm.c2
-rw-r--r--src/gallium/auxiliary/nir/nir_to_tgsi.c4
-rw-r--r--src/gallium/auxiliary/nir/tgsi_to_nir.c2
-rw-r--r--src/gallium/drivers/d3d12/d3d12_compiler.cpp2
-rw-r--r--src/gallium/drivers/etnaviv/tests/lower_ubo_tests.cpp2
-rw-r--r--src/gallium/drivers/zink/zink_compiler.c2
-rw-r--r--src/mesa/state_tracker/st_glsl_to_nir.cpp6
-rw-r--r--src/panfrost/midgard/midgard_compile.c2
12 files changed, 69 insertions, 65 deletions
diff --git a/.pick_status.json b/.pick_status.json
index 885d41112b7..8791d9dd64b 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -175,7 +175,7 @@
"description": "nir: Generate load_ubo_vec4 directly for !PIPE_CAP_NATIVE_INTEGERS",
"nominated": true,
"nomination_type": 1,
- "resolution": 0,
+ "resolution": 1,
"master_sha": null,
"because_sha": "cf3fc79cd0ab55776bbbabe76237c272ab07133e"
},
diff --git a/docs/gallium/screen.rst b/docs/gallium/screen.rst
index cdc1467c78a..b1b0dc5559d 100644
--- a/docs/gallium/screen.rst
+++ b/docs/gallium/screen.rst
@@ -481,7 +481,8 @@ The integer capabilities:
those bits set, pipe_context::set_constant_buffer(.., 0, ..) is ignored
by the driver, and the driver can throw assertion failures.
* ``PIPE_CAP_PACKED_UNIFORMS``: True if the driver supports packed uniforms
- as opposed to padding to vec4s.
+ as opposed to padding to vec4s. Requires ``PIPE_SHADER_CAP_INTEGERS`` if
+ ``lower_uniforms_to_ubo`` is set.
* ``PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES``: Whether the
``PIPE_CONSERVATIVE_RASTER_POST_SNAP`` mode is supported for triangles.
The post-snap mode means the conservative rasterization occurs after
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 28931886ede..a0a646610db 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3435,8 +3435,6 @@ typedef struct nir_shader_compiler_options {
unsigned max_unroll_iterations;
unsigned max_unroll_iterations_aggressive;
- /* For the non-zero value of the enum corresponds multiplier when
- * calling lower_uniforms_to_ubo */
bool lower_uniforms_to_ubo;
nir_lower_int64_options lower_int64_options;
@@ -4633,7 +4631,7 @@ bool nir_vectorize_tess_levels(nir_shader *shader);
bool nir_lower_fragcolor(nir_shader *shader);
bool nir_lower_fragcoord_wtrans(nir_shader *shader);
void nir_lower_viewport_transform(nir_shader *shader);
-bool nir_lower_uniforms_to_ubo(nir_shader *shader, int multiplier);
+bool nir_lower_uniforms_to_ubo(nir_shader *shader, bool dword_packed, bool load_vec4);
typedef struct nir_lower_subgroups_options {
uint8_t subgroup_size;
diff --git a/src/compiler/nir/nir_lower_uniforms_to_ubo.c b/src/compiler/nir/nir_lower_uniforms_to_ubo.c
index 65107c01046..0b37b705ef8 100644
--- a/src/compiler/nir/nir_lower_uniforms_to_ubo.c
+++ b/src/compiler/nir/nir_lower_uniforms_to_ubo.c
@@ -22,27 +22,24 @@
*/
/*
- * Remap load_uniform intrinsics to UBO accesses of UBO binding point 0.
- * Simultaneously, remap existing UBO accesses by increasing their binding
- * point by 1.
+ * Remap load_uniform intrinsics to nir_load_ubo or nir_load_ubo_vec4 accesses
+ * of UBO binding point 0. Simultaneously, remap existing UBO accesses by
+ * increasing their binding point by 1.
*
- * Note that nir_intrinsic_load_uniform base/ranges can be set in different
- * units, and the multiplier argument caters to supporting these different
- * units.
+ * For PIPE_CAP_PACKED_UNIFORMS, dword_packed should be set to indicate that
+ * nir_intrinsic_load_uniform is in increments of dwords instead of vec4s.
*
- * For example:
- * - st_glsl_to_nir for PIPE_CAP_PACKED_UNIFORMS uses dwords (4 bytes) so the
- * multiplier should be 4
- * - st_glsl_to_nir for !PIPE_CAP_PACKED_UNIFORMS uses vec4s so the
- * multiplier should be 16
- * - tgsi_to_nir uses vec4s, so the multiplier should be 16
+ * If load_vec4 is set, then nir_intrinsic_load_ubo_vec4 will be generated
+ * instead of nir_intrinsic_load_ubo, saving addressing math for hardawre
+ * needing aligned vec4 loads in increments of vec4s (such as TGSI CONST file
+ * loads).
*/
#include "nir.h"
#include "nir_builder.h"
static bool
-lower_instr(nir_intrinsic_instr *instr, nir_builder *b, int multiplier)
+lower_instr(nir_intrinsic_instr *instr, nir_builder *b, bool dword_packed, bool load_vec4)
{
b->cursor = nir_before_instr(&instr->instr);
@@ -58,43 +55,51 @@ lower_instr(nir_intrinsic_instr *instr, nir_builder *b, int multiplier)
if (instr->intrinsic == nir_intrinsic_load_uniform) {
nir_ssa_def *ubo_idx = nir_imm_int(b, 0);
- nir_ssa_def *ubo_offset =
- nir_iadd(b, nir_imm_int(b, multiplier * nir_intrinsic_base(instr)),
- nir_imul(b, nir_imm_int(b, multiplier),
- nir_ssa_for_src(b, instr->src[0], 1)));
-
- nir_intrinsic_instr *load =
- nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo);
- load->num_components = instr->num_components;
- load->src[0] = nir_src_for_ssa(ubo_idx);
- load->src[1] = nir_src_for_ssa(ubo_offset);
- assert(instr->dest.ssa.bit_size >= 8);
+ nir_ssa_def *uniform_offset = nir_ssa_for_src(b, instr->src[0], 1);
- /* If it's const, set the alignment to our known constant offset. If
- * not, set it to a pessimistic value based on the multiplier (or the
- * scalar size, for qword loads).
- *
- * We could potentially set up stricter alignments for indirects by
- * knowing what features are enabled in the APIs (see comment in
- * nir_lower_ubo_vec4.c)
- */
- if (nir_src_is_const(instr->src[0])) {
- nir_intrinsic_set_align(load, NIR_ALIGN_MUL_MAX,
- (nir_src_as_uint(instr->src[0]) +
- nir_intrinsic_base(instr) * multiplier) %
- NIR_ALIGN_MUL_MAX);
+ assert(instr->dest.ssa.bit_size >= 8);
+ nir_ssa_def *load_result;
+ if (load_vec4) {
+ /* No asking us to generate load_vec4 when you've packed your uniforms
+ * as dwords instead of vec4s.
+ */
+ assert(!dword_packed);
+ load_result = nir_load_ubo_vec4(b, instr->num_components, instr->dest.ssa.bit_size,
+ ubo_idx,
+ nir_iadd_imm(b, uniform_offset, nir_intrinsic_base(instr)));
} else {
- nir_intrinsic_set_align(load, MAX2(multiplier,
- instr->dest.ssa.bit_size / 8), 0);
- }
- nir_ssa_dest_init(&load->instr, &load->dest,
- load->num_components, instr->dest.ssa.bit_size,
- instr->dest.ssa.name);
- nir_builder_instr_insert(b, &load->instr);
- nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
+ /* For PIPE_CAP_PACKED_UNIFORMS, the uniforms are packed with the
+ * base/offset in dword units instead of vec4 units.
+ */
+ int multiplier = dword_packed ? 4 : 16;
+ load_result = nir_load_ubo(b, instr->num_components, instr->dest.ssa.bit_size,
+ ubo_idx,
+ nir_iadd_imm(b, nir_imul_imm(b, uniform_offset, multiplier),
+ nir_intrinsic_base(instr) * multiplier));
+ nir_intrinsic_instr *load = nir_instr_as_intrinsic(load_result->parent_instr);
+
+ /* If it's const, set the alignment to our known constant offset. If
+ * not, set it to a pessimistic value based on the multiplier (or the
+ * scalar size, for qword loads).
+ *
+ * We could potentially set up stricter alignments for indirects by
+ * knowing what features are enabled in the APIs (see comment in
+ * nir_lower_ubo_vec4.c)
+ */
+ if (nir_src_is_const(instr->src[0])) {
+ nir_intrinsic_set_align(load, NIR_ALIGN_MUL_MAX,
+ (nir_src_as_uint(instr->src[0]) +
+ nir_intrinsic_base(instr) * multiplier) %
+ NIR_ALIGN_MUL_MAX);
+ } else {
+ nir_intrinsic_set_align(load, MAX2(multiplier,
+ instr->dest.ssa.bit_size / 8), 0);
+ }
- nir_intrinsic_set_range_base(load, nir_intrinsic_base(instr) * multiplier);
- nir_intrinsic_set_range(load, nir_intrinsic_range(instr) * multiplier);
+ nir_intrinsic_set_range_base(load, nir_intrinsic_base(instr) * multiplier);
+ nir_intrinsic_set_range(load, nir_intrinsic_range(instr) * multiplier);
+ }
+ nir_ssa_def_rewrite_uses(&instr->dest.ssa, load_result);
nir_instr_remove(&instr->instr);
return true;
@@ -104,7 +109,7 @@ lower_instr(nir_intrinsic_instr *instr, nir_builder *b, int multiplier)
}
bool
-nir_lower_uniforms_to_ubo(nir_shader *shader, int multiplier)
+nir_lower_uniforms_to_ubo(nir_shader *shader, bool dword_packed, bool load_vec4)
{
bool progress = false;
@@ -117,7 +122,7 @@ nir_lower_uniforms_to_ubo(nir_shader *shader, int multiplier)
if (instr->type == nir_instr_type_intrinsic)
progress |= lower_instr(nir_instr_as_intrinsic(instr),
&builder,
- multiplier);
+ dword_packed, load_vec4);
}
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
index 67ae6a16356..00c63ed5f45 100644
--- a/src/gallium/auxiliary/draw/draw_vs_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -100,7 +100,7 @@ draw_create_vs_llvm(struct draw_context *draw,
vs->base.state.ir.nir = state->ir.nir;
nir_shader *nir = (nir_shader *)state->ir.nir;
if (!nir->options->lower_uniforms_to_ubo)
- NIR_PASS_V(state->ir.nir, nir_lower_uniforms_to_ubo, 16);
+ NIR_PASS_V(state->ir.nir, nir_lower_uniforms_to_ubo, false, false);
nir_tgsi_scan_shader(state->ir.nir, &vs->base.info, true);
} else {
/* we make a private copy of the tokens */
diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c
index d0307a690c0..1f6270233bf 100644
--- a/src/gallium/auxiliary/nir/nir_to_tgsi.c
+++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c
@@ -2718,8 +2718,8 @@ nir_to_tgsi(struct nir_shader *s,
if (!original_options->lower_uniforms_to_ubo) {
NIR_PASS_V(s, nir_lower_uniforms_to_ubo,
- screen->get_param(screen, PIPE_CAP_PACKED_UNIFORMS) ?
- 4 : 16);
+ screen->get_param(screen, PIPE_CAP_PACKED_UNIFORMS),
+ !native_integers);
}
/* Do lowering so we can directly translate f64/i64 NIR ALU ops to TGSI --
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index a39309a6ac7..242a0e40fa6 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -2496,7 +2496,7 @@ ttn_finalize_nir(struct ttn_compile *c, struct pipe_screen *screen)
}
if (nir->options->lower_uniforms_to_ubo)
- NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 16);
+ NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, false, false);
if (!c->cap_samplers_as_deref)
NIR_PASS_V(nir, nir_lower_samplers);
diff --git a/src/gallium/drivers/d3d12/d3d12_compiler.cpp b/src/gallium/drivers/d3d12/d3d12_compiler.cpp
index d127ae36df5..cbaab4f79f0 100644
--- a/src/gallium/drivers/d3d12/d3d12_compiler.cpp
+++ b/src/gallium/drivers/d3d12/d3d12_compiler.cpp
@@ -147,7 +147,7 @@ compile_nir(struct d3d12_context *ctx, struct d3d12_shader_selector *sel,
uint32_t num_ubos_before_lower_to_ubo = nir->info.num_ubos;
uint32_t num_uniforms_before_lower_to_ubo = nir->num_uniforms;
- NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 16);
+ NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, false, false);
shader->has_default_ubo0 = num_uniforms_before_lower_to_ubo > 0 &&
nir->info.num_ubos > num_ubos_before_lower_to_ubo;
diff --git a/src/gallium/drivers/etnaviv/tests/lower_ubo_tests.cpp b/src/gallium/drivers/etnaviv/tests/lower_ubo_tests.cpp
index 2b9fa41062e..24342b288a2 100644
--- a/src/gallium/drivers/etnaviv/tests/lower_ubo_tests.cpp
+++ b/src/gallium/drivers/etnaviv/tests/lower_ubo_tests.cpp
@@ -123,7 +123,7 @@ TEST_F(nir_lower_ubo_test, basic)
nir_ssa_def *offset = nir_imm_int(&b, 4);
nir_load_uniform(&b, 1, 32, offset);
- nir_lower_uniforms_to_ubo(b.shader, 16);
+ nir_lower_uniforms_to_ubo(b.shader, false, false);
nir_opt_constant_folding(b.shader);
ASSERT_TRUE(etna_nir_lower_ubo_to_uniform(b.shader));
diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c
index f030483817d..3cfb6b4dc32 100644
--- a/src/gallium/drivers/zink/zink_compiler.c
+++ b/src/gallium/drivers/zink/zink_compiler.c
@@ -914,7 +914,7 @@ zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr, bool optimize)
tex_opts.lower_tg4_offsets = true;
NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
}
- NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 16);
+ NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, false, false);
if (nir->info.stage == MESA_SHADER_GEOMETRY)
NIR_PASS_V(nir, nir_lower_gs_intrinsics, nir_lower_gs_intrinsics_per_stream);
optimize_nir(nir);
diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index 92d021e52fc..0a19949ad82 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -1005,12 +1005,10 @@ st_unpacked_uniforms_type_size(const struct glsl_type *type, bool bindless)
void
st_nir_lower_uniforms(struct st_context *st, nir_shader *nir)
{
- unsigned multiplier = 16;
if (st->ctx->Const.PackedDriverUniformStorage) {
NIR_PASS_V(nir, nir_lower_io, nir_var_uniform,
st_packed_uniforms_type_size,
(nir_lower_io_options)0);
- multiplier = 4;
} else {
NIR_PASS_V(nir, nir_lower_io, nir_var_uniform,
st_unpacked_uniforms_type_size,
@@ -1018,7 +1016,9 @@ st_nir_lower_uniforms(struct st_context *st, nir_shader *nir)
}
if (nir->options->lower_uniforms_to_ubo)
- NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, multiplier);
+ NIR_PASS_V(nir, nir_lower_uniforms_to_ubo,
+ st->ctx->Const.PackedDriverUniformStorage,
+ !st->ctx->Const.NativeIntegers);
}
/* Last third of preparing nir from glsl, which happens after shader
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c
index 638432d39cf..e00967d503e 100644
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -338,7 +338,7 @@ optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend)
* to UBO ordinarily, but it isn't as aggressive as we need. */
NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
- NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 16);
+ NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, false, false);
do {
progress = false;