summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2020-08-14 13:10:02 -0700
committerMarge Bot <eric+marge@anholt.net>2020-09-08 18:20:51 +0000
commitf3b33a5a35e605101d45213bddf52f2f800a52bb (patch)
treef3d31817e38e31de8ffbb7101abd2bda7e5df448
parent3a9356831a962997e37589c7a04e12aaa85a99e3 (diff)
nir: Add a range_base+range to nir_intrinsic_load_ubo().
For UBO accesses to be the same performance as classic GL default uniform block uniforms, we need to be able to push them through the same path. On freedreno, we haven't been uploading UBOs as push constants when they're used for indirect array access, because we don't know what range of the UBO is needed for an access. I believe we won't be able to calculate the range in general in spirv given casts that can happen, so we define a [0, ~0] range to be "We don't know anything". We use that at the moment for all UBO loads except for nir_lower_uniforms_to_ubo, where we now avoid losing the range information that default uniform block loads come with. In a departure from other NIR intrinsics with a "base", I didn't make the base an be something you have to add to the src[1] offset. This keeps us from needing to modify all drivers (particularly since the base+offset thing can mean needing to do addition in the backend), makes backend tracking of ranges easy, and makes the range calculations in load_store_vectorizer reasonable. However, this could definitely cause some confusion for people used to the normal NIR base. Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com> Reviewed-by: Rob Clark <robdclark@chromium.org> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6359>
-rw-r--r--src/compiler/nir/nir.h15
-rw-r--r--src/compiler/nir/nir_intrinsics.py10
-rw-r--r--src/compiler/nir/nir_lower_io.c5
-rw-r--r--src/compiler/nir/nir_lower_uniforms_to_ubo.c3
-rw-r--r--src/compiler/nir/nir_opt_load_store_vectorize.c10
-rw-r--r--src/compiler/nir/nir_print.c1
-rw-r--r--src/compiler/nir/nir_validate.c3
-rw-r--r--src/compiler/nir/tests/load_store_vectorizer_tests.cpp53
-rw-r--r--src/compiler/spirv/vtn_variables.c3
-rw-r--r--src/gallium/auxiliary/nir/tgsi_to_nir.c18
-rw-r--r--src/intel/vulkan/anv_nir_apply_pipeline_layout.c6
11 files changed, 123 insertions, 4 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index bf054e82802..bb3507ff862 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1642,8 +1642,18 @@ typedef enum {
NIR_INTRINSIC_UCP_ID,
/**
- * The amount of data, starting from BASE, that this instruction may
- * access. This is used to provide bounds if the offset is not constant.
+ * The start of NIR_INTRINSIC_RANGE. Only present on instructions that
+ * don't have NIR_INTRINSIC_BASE.
+ *
+ * If the [range_base, range] is [0, ~0], then we don't know the possible
+ * range of the access.
+ */
+ NIR_INTRINSIC_RANGE_BASE,
+
+ /**
+ * The amount of data, starting from BASE or RANGE_BASE, that this
+ * instruction may access. This is used to provide bounds if the offset is
+ * not constant.
*/
NIR_INTRINSIC_RANGE,
@@ -1896,6 +1906,7 @@ INTRINSIC_IDX_ACCESSORS(base, BASE, int)
INTRINSIC_IDX_ACCESSORS(stream_id, STREAM_ID, unsigned)
INTRINSIC_IDX_ACCESSORS(ucp_id, UCP_ID, unsigned)
INTRINSIC_IDX_ACCESSORS(range, RANGE, unsigned)
+INTRINSIC_IDX_ACCESSORS(range_base, RANGE_BASE, unsigned)
INTRINSIC_IDX_ACCESSORS(desc_set, DESC_SET, unsigned)
INTRINSIC_IDX_ACCESSORS(binding, BINDING, unsigned)
INTRINSIC_IDX_ACCESSORS(component, COMPONENT, unsigned)
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py
index 0de66f4c2fb..e63900082ab 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -89,6 +89,10 @@ UCP_ID = "NIR_INTRINSIC_UCP_ID"
# may access. This is used to provide bounds if the offset is
# not constant.
RANGE = "NIR_INTRINSIC_RANGE"
+# The offset to the start of the NIR_INTRINSIC_RANGE. This is an alternative
+# to NIR_INTRINSIC_BASE for describing the valid range in intrinsics that don't
+# have the implicit addition of a base to the offset.
+RANGE_BASE = "NIR_INTRINSIC_RANGE_BASE"
# The vulkan descriptor set binding for vulkan_resource_index
# intrinsic
DESC_SET = "NIR_INTRINSIC_DESC_SET"
@@ -724,6 +728,10 @@ intrinsic("load_fs_input_interp_deltas", src_comp=[1], dest_comp=3,
# range (starting at base) of the data from which we are loading. If
# range == 0, then the range is unknown.
#
+# UBO load operations have a nir_intrinsic_range_base() and
+# nir_intrinsic_range() that specify the byte range [range_base,
+# range_base+range] of the UBO that the src offset access must lie within.
+#
# Some load operations such as UBO/SSBO load and per_vertex loads take an
# additional source to specify which UBO/SSBO/vertex to load from.
#
@@ -739,7 +747,7 @@ def load(name, src_comp, indices=[], flags=[]):
# src[] = { offset }.
load("uniform", [1], [BASE, RANGE, TYPE], [CAN_ELIMINATE, CAN_REORDER])
# src[] = { buffer_index, offset }.
-load("ubo", [-1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE, CAN_REORDER])
+load("ubo", [-1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET, RANGE_BASE, RANGE], flags=[CAN_ELIMINATE, CAN_REORDER])
# src[] = { buffer_index, offset in vec4 units }
load("ubo_vec4", [-1, 1], [ACCESS, COMPONENT], flags=[CAN_ELIMINATE, CAN_REORDER])
# src[] = { offset }.
diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c
index d7a095cb9bc..c795d50d9f6 100644
--- a/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@ -1030,6 +1030,11 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
nir_intrinsic_set_align(load, align_mul, align_offset);
+ if (op == nir_intrinsic_load_ubo) {
+ nir_intrinsic_set_range_base(load, 0);
+ nir_intrinsic_set_range(load, ~0);
+ }
+
assert(intrin->dest.is_ssa);
load->num_components = num_components;
nir_ssa_dest_init(&load->instr, &load->dest, num_components,
diff --git a/src/compiler/nir/nir_lower_uniforms_to_ubo.c b/src/compiler/nir/nir_lower_uniforms_to_ubo.c
index 78afc7f6232..4d1a5a7fbb5 100644
--- a/src/compiler/nir/nir_lower_uniforms_to_ubo.c
+++ b/src/compiler/nir/nir_lower_uniforms_to_ubo.c
@@ -76,6 +76,9 @@ lower_instr(nir_intrinsic_instr *instr, nir_builder *b, int multiplier)
nir_builder_instr_insert(b, &load->instr);
nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(&load->dest.ssa));
+ nir_intrinsic_set_range_base(load, nir_intrinsic_base(instr) * multiplier);
+ nir_intrinsic_set_range(load, nir_intrinsic_range(instr) * multiplier);
+
nir_instr_remove(&instr->instr);
return true;
}
diff --git a/src/compiler/nir/nir_opt_load_store_vectorize.c b/src/compiler/nir/nir_opt_load_store_vectorize.c
index 147b88c3594..370366a25df 100644
--- a/src/compiler/nir/nir_opt_load_store_vectorize.c
+++ b/src/compiler/nir/nir_opt_load_store_vectorize.c
@@ -812,6 +812,16 @@ vectorize_loads(nir_builder *b, struct vectorize_ctx *ctx,
if (first != low && nir_intrinsic_has_base(first->intrin))
nir_intrinsic_set_base(first->intrin, nir_intrinsic_base(low->intrin));
+ if (nir_intrinsic_has_range_base(first->intrin)) {
+ uint32_t low_base = nir_intrinsic_range_base(low->intrin);
+ uint32_t high_base = nir_intrinsic_range_base(high->intrin);
+ uint32_t low_end = low_base + nir_intrinsic_range(low->intrin);
+ uint32_t high_end = high_base + nir_intrinsic_range(high->intrin);
+
+ nir_intrinsic_set_range_base(first->intrin, low_base);
+ nir_intrinsic_set_range(first->intrin, MAX2(low_end, high_end) - low_base);
+ }
+
first->key = low->key;
first->offset = low->offset;
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index e31de358172..810d3ce55cf 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -816,6 +816,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
[NIR_INTRINSIC_STREAM_ID] = "stream-id",
[NIR_INTRINSIC_UCP_ID] = "ucp-id",
[NIR_INTRINSIC_RANGE] = "range",
+ [NIR_INTRINSIC_RANGE_BASE] = "range_base",
[NIR_INTRINSIC_DESC_SET] = "desc-set",
[NIR_INTRINSIC_BINDING] = "binding",
[NIR_INTRINSIC_COMPONENT] = "component",
diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c
index b7622cd06a9..1a9732365d6 100644
--- a/src/compiler/nir/nir_validate.c
+++ b/src/compiler/nir/nir_validate.c
@@ -601,6 +601,9 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
}
case nir_intrinsic_load_ubo:
+ /* Make sure that the creator didn't forget to set the range_base+range. */
+ validate_assert(state, nir_intrinsic_range(instr) != 0);
+ /* Fall through */
case nir_intrinsic_load_ssbo:
case nir_intrinsic_load_shared:
case nir_intrinsic_load_global:
diff --git a/src/compiler/nir/tests/load_store_vectorizer_tests.cpp b/src/compiler/nir/tests/load_store_vectorizer_tests.cpp
index d1a494d61e0..75880c3805f 100644
--- a/src/compiler/nir/tests/load_store_vectorizer_tests.cpp
+++ b/src/compiler/nir/tests/load_store_vectorizer_tests.cpp
@@ -223,10 +223,27 @@ nir_load_store_vectorize_test::create_indirect_load(
} else {
load->src[0] = nir_src_for_ssa(offset);
}
+ int byte_size = (bit_size == 1 ? 32 : bit_size) / 8;
+
if (mode != nir_var_mem_push_const) {
- nir_intrinsic_set_align(load, (bit_size == 1 ? 32 : bit_size) / 8, 0);
+ nir_intrinsic_set_align(load, byte_size, 0);
nir_intrinsic_set_access(load, (gl_access_qualifier)access);
}
+
+ if (nir_intrinsic_has_range_base(load)) {
+ uint32_t range = byte_size * components;
+ int offset_src = res ? 1 : 0;
+
+ if (nir_src_is_const(load->src[offset_src])) {
+ nir_intrinsic_set_range_base(load, nir_src_as_uint(load->src[offset_src]));
+ nir_intrinsic_set_range(load, range);
+ } else {
+ /* Unknown range */
+ nir_intrinsic_set_range_base(load, 0);
+ nir_intrinsic_set_range(load, ~0);
+ }
+ }
+
nir_builder_instr_insert(b, &load->instr);
nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, &load->dest.ssa)->parent_instr);
movs[id] = mov;
@@ -380,6 +397,8 @@ TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent)
nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
ASSERT_EQ(load->dest.ssa.bit_size, 32);
ASSERT_EQ(load->dest.ssa.num_components, 2);
+ ASSERT_EQ(nir_intrinsic_range_base(load), 0);
+ ASSERT_EQ(nir_intrinsic_range(load), 8);
ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
@@ -400,11 +419,41 @@ TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting)
nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
ASSERT_EQ(load->dest.ssa.bit_size, 32);
ASSERT_EQ(load->dest.ssa.num_components, 3);
+ ASSERT_EQ(nir_intrinsic_range_base(load), 0);
+ ASSERT_EQ(nir_intrinsic_range(load), 12);
ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
EXPECT_INSTR_SWIZZLES(movs[0x2], load, "yz");
}
+/* Test for a bug in range handling */
+TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting_range)
+{
+ create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 4);
+ create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 1);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 4);
+ ASSERT_EQ(nir_intrinsic_range_base(load), 0);
+ ASSERT_EQ(nir_intrinsic_range(load), 16);
+ ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x1]->swizzle[1], 1);
+ ASSERT_EQ(loads[0x1]->swizzle[2], 2);
+ ASSERT_EQ(loads[0x1]->swizzle[3], 3);
+ ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
TEST_F(nir_load_store_vectorize_test, ubo_load_identical)
{
create_load(nir_var_mem_ubo, 0, 0, 0x1);
@@ -420,6 +469,8 @@ TEST_F(nir_load_store_vectorize_test, ubo_load_identical)
nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
ASSERT_EQ(load->dest.ssa.bit_size, 32);
ASSERT_EQ(load->dest.ssa.num_components, 1);
+ ASSERT_EQ(nir_intrinsic_range_base(load), 0);
+ ASSERT_EQ(nir_intrinsic_range(load), 4);
ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c
index 820a4c8c97d..f97887254c3 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -853,6 +853,9 @@ _vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load,
if (op == nir_intrinsic_load_push_constant) {
nir_intrinsic_set_base(instr, access_offset);
nir_intrinsic_set_range(instr, access_size);
+ } else if (op == nir_intrinsic_load_ubo) {
+ nir_intrinsic_set_range_base(instr, 0);
+ nir_intrinsic_set_range(instr, ~0);
}
if (op == nir_intrinsic_load_ubo ||
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 1070f22537a..fed27c02567 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -75,6 +75,7 @@ struct ttn_compile {
nir_variable *samplers[PIPE_MAX_SAMPLERS];
nir_variable *images[PIPE_MAX_SHADER_IMAGES];
nir_variable *ssbo[PIPE_MAX_SHADER_BUFFERS];
+ uint32_t ubo_sizes[PIPE_MAX_CONSTANT_BUFFERS];
unsigned num_samplers;
unsigned num_images;
@@ -315,6 +316,8 @@ ttn_emit_declaration(struct ttn_compile *c)
decl->Dim.Index2D != 0) {
b->shader->info.num_ubos =
MAX2(b->shader->info.num_ubos, decl->Dim.Index2D);
+ c->ubo_sizes[decl->Dim.Index2D] =
+ MAX2(c->ubo_sizes[decl->Dim.Index2D], decl->Range.Last * 16);
return;
}
@@ -741,12 +744,27 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
/* UBO offsets are in bytes, but TGSI gives them to us in vec4's */
offset = nir_ishl(b, offset, nir_imm_int(b, 4));
nir_intrinsic_set_align(load, 16, 0);
+
+ /* Set a very conservative base/range of the access: 16 bytes if not
+ * indirect at all, offset to the end of the UBO if the offset is
+ * indirect, and totally unknown if the block number is indirect.
+ */
+ uint32_t base = index * 16;
+ nir_intrinsic_set_range_base(load, base);
+ if (dimind)
+ nir_intrinsic_set_range(load, ~0);
+ else if (indirect)
+ nir_intrinsic_set_range(load, c->ubo_sizes[dim->Index] - base);
+ else
+ nir_intrinsic_set_range(load, base + 16);
} else {
nir_intrinsic_set_base(load, index);
if (indirect) {
offset = ttn_src_for_indirect(c, indirect);
+ nir_intrinsic_set_range(load, c->ubo_sizes[0] - index);
} else {
offset = nir_imm_int(b, 0);
+ nir_intrinsic_set_range(load, 1);
}
}
load->src[srcn++] = nir_src_for_ssa(offset);
diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
index 4884ba325cd..4787bf44aba 100644
--- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
+++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
@@ -538,6 +538,8 @@ build_ssbo_descriptor_load(const VkDescriptorType desc_type,
desc_load->num_components = 4;
nir_ssa_dest_init(&desc_load->instr, &desc_load->dest, 4, 32, NULL);
nir_builder_instr_insert(b, &desc_load->instr);
+ nir_intrinsic_set_range_base(desc_load, 0);
+ nir_intrinsic_set_range(desc_load, ~0);
return &desc_load->dest.ssa;
}
@@ -745,6 +747,8 @@ build_descriptor_load(nir_deref_instr *deref, unsigned offset,
nir_ssa_dest_init(&desc_load->instr, &desc_load->dest,
num_components, bit_size, NULL);
nir_builder_instr_insert(b, &desc_load->instr);
+ nir_intrinsic_set_range_base(desc_load, 0);
+ nir_intrinsic_set_range(desc_load, ~0);
return &desc_load->dest.ssa;
}
@@ -846,6 +850,8 @@ lower_load_constant(nir_intrinsic_instr *intrin,
load_ubo->src[0] = nir_src_for_ssa(index);
load_ubo->src[1] = nir_src_for_ssa(offset);
nir_intrinsic_set_align(load_ubo, intrin->dest.ssa.bit_size / 8, 0);
+ nir_intrinsic_set_range_base(load_ubo, nir_intrinsic_base(intrin));
+ nir_intrinsic_set_range(load_ubo, nir_intrinsic_range(intrin));
nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest,
intrin->dest.ssa.num_components,
intrin->dest.ssa.bit_size, NULL);