summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/compiler/nir/nir.h15
-rw-r--r--src/compiler/nir/nir_intrinsics.py10
-rw-r--r--src/compiler/nir/nir_lower_io.c5
-rw-r--r--src/compiler/nir/nir_lower_uniforms_to_ubo.c3
-rw-r--r--src/compiler/nir/nir_opt_load_store_vectorize.c10
-rw-r--r--src/compiler/nir/nir_print.c1
-rw-r--r--src/compiler/nir/nir_validate.c3
-rw-r--r--src/compiler/nir/tests/load_store_vectorizer_tests.cpp53
-rw-r--r--src/compiler/spirv/vtn_variables.c3
-rw-r--r--src/gallium/auxiliary/nir/tgsi_to_nir.c18
-rw-r--r--src/intel/vulkan/anv_nir_apply_pipeline_layout.c6
11 files changed, 123 insertions, 4 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index bf054e82802..bb3507ff862 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1642,8 +1642,18 @@ typedef enum {
NIR_INTRINSIC_UCP_ID,
/**
- * The amount of data, starting from BASE, that this instruction may
- * access. This is used to provide bounds if the offset is not constant.
+ * The start of NIR_INTRINSIC_RANGE. Only present on instructions that
+ * don't have NIR_INTRINSIC_BASE.
+ *
+ * If the [range_base, range] is [0, ~0], then we don't know the possible
+ * range of the access.
+ */
+ NIR_INTRINSIC_RANGE_BASE,
+
+ /**
+ * The amount of data, starting from BASE or RANGE_BASE, that this
+ * instruction may access. This is used to provide bounds if the offset is
+ * not constant.
*/
NIR_INTRINSIC_RANGE,
@@ -1896,6 +1906,7 @@ INTRINSIC_IDX_ACCESSORS(base, BASE, int)
INTRINSIC_IDX_ACCESSORS(stream_id, STREAM_ID, unsigned)
INTRINSIC_IDX_ACCESSORS(ucp_id, UCP_ID, unsigned)
INTRINSIC_IDX_ACCESSORS(range, RANGE, unsigned)
+INTRINSIC_IDX_ACCESSORS(range_base, RANGE_BASE, unsigned)
INTRINSIC_IDX_ACCESSORS(desc_set, DESC_SET, unsigned)
INTRINSIC_IDX_ACCESSORS(binding, BINDING, unsigned)
INTRINSIC_IDX_ACCESSORS(component, COMPONENT, unsigned)
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py
index 0de66f4c2fb..e63900082ab 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -89,6 +89,10 @@ UCP_ID = "NIR_INTRINSIC_UCP_ID"
# may access. This is used to provide bounds if the offset is
# not constant.
RANGE = "NIR_INTRINSIC_RANGE"
+# The offset to the start of the NIR_INTRINSIC_RANGE. This is an alternative
+# to NIR_INTRINSIC_BASE for describing the valid range in intrinsics that don't
+# have the implicit addition of a base to the offset.
+RANGE_BASE = "NIR_INTRINSIC_RANGE_BASE"
# The vulkan descriptor set binding for vulkan_resource_index
# intrinsic
DESC_SET = "NIR_INTRINSIC_DESC_SET"
@@ -724,6 +728,10 @@ intrinsic("load_fs_input_interp_deltas", src_comp=[1], dest_comp=3,
# range (starting at base) of the data from which we are loading. If
# range == 0, then the range is unknown.
#
+# UBO load operations have a nir_intrinsic_range_base() and
+# nir_intrinsic_range() that specify the byte range [range_base,
+# range_base+range] of the UBO that the src offset access must lie within.
+#
# Some load operations such as UBO/SSBO load and per_vertex loads take an
# additional source to specify which UBO/SSBO/vertex to load from.
#
@@ -739,7 +747,7 @@ def load(name, src_comp, indices=[], flags=[]):
# src[] = { offset }.
load("uniform", [1], [BASE, RANGE, TYPE], [CAN_ELIMINATE, CAN_REORDER])
# src[] = { buffer_index, offset }.
-load("ubo", [-1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE, CAN_REORDER])
+load("ubo", [-1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET, RANGE_BASE, RANGE], flags=[CAN_ELIMINATE, CAN_REORDER])
# src[] = { buffer_index, offset in vec4 units }
load("ubo_vec4", [-1, 1], [ACCESS, COMPONENT], flags=[CAN_ELIMINATE, CAN_REORDER])
# src[] = { offset }.
diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c
index d7a095cb9bc..c795d50d9f6 100644
--- a/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@ -1030,6 +1030,11 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
nir_intrinsic_set_align(load, align_mul, align_offset);
+ if (op == nir_intrinsic_load_ubo) {
+ nir_intrinsic_set_range_base(load, 0);
+ nir_intrinsic_set_range(load, ~0);
+ }
+
assert(intrin->dest.is_ssa);
load->num_components = num_components;
nir_ssa_dest_init(&load->instr, &load->dest, num_components,
diff --git a/src/compiler/nir/nir_lower_uniforms_to_ubo.c b/src/compiler/nir/nir_lower_uniforms_to_ubo.c
index 78afc7f6232..4d1a5a7fbb5 100644
--- a/src/compiler/nir/nir_lower_uniforms_to_ubo.c
+++ b/src/compiler/nir/nir_lower_uniforms_to_ubo.c
@@ -76,6 +76,9 @@ lower_instr(nir_intrinsic_instr *instr, nir_builder *b, int multiplier)
nir_builder_instr_insert(b, &load->instr);
nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(&load->dest.ssa));
+ nir_intrinsic_set_range_base(load, nir_intrinsic_base(instr) * multiplier);
+ nir_intrinsic_set_range(load, nir_intrinsic_range(instr) * multiplier);
+
nir_instr_remove(&instr->instr);
return true;
}
diff --git a/src/compiler/nir/nir_opt_load_store_vectorize.c b/src/compiler/nir/nir_opt_load_store_vectorize.c
index 147b88c3594..370366a25df 100644
--- a/src/compiler/nir/nir_opt_load_store_vectorize.c
+++ b/src/compiler/nir/nir_opt_load_store_vectorize.c
@@ -812,6 +812,16 @@ vectorize_loads(nir_builder *b, struct vectorize_ctx *ctx,
if (first != low && nir_intrinsic_has_base(first->intrin))
nir_intrinsic_set_base(first->intrin, nir_intrinsic_base(low->intrin));
+ if (nir_intrinsic_has_range_base(first->intrin)) {
+ uint32_t low_base = nir_intrinsic_range_base(low->intrin);
+ uint32_t high_base = nir_intrinsic_range_base(high->intrin);
+ uint32_t low_end = low_base + nir_intrinsic_range(low->intrin);
+ uint32_t high_end = high_base + nir_intrinsic_range(high->intrin);
+
+ nir_intrinsic_set_range_base(first->intrin, low_base);
+ nir_intrinsic_set_range(first->intrin, MAX2(low_end, high_end) - low_base);
+ }
+
first->key = low->key;
first->offset = low->offset;
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index e31de358172..810d3ce55cf 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -816,6 +816,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
[NIR_INTRINSIC_STREAM_ID] = "stream-id",
[NIR_INTRINSIC_UCP_ID] = "ucp-id",
[NIR_INTRINSIC_RANGE] = "range",
+ [NIR_INTRINSIC_RANGE_BASE] = "range_base",
[NIR_INTRINSIC_DESC_SET] = "desc-set",
[NIR_INTRINSIC_BINDING] = "binding",
[NIR_INTRINSIC_COMPONENT] = "component",
diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c
index b7622cd06a9..1a9732365d6 100644
--- a/src/compiler/nir/nir_validate.c
+++ b/src/compiler/nir/nir_validate.c
@@ -601,6 +601,9 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
}
case nir_intrinsic_load_ubo:
+ /* Make sure that the creator didn't forget to set the range_base+range. */
+ validate_assert(state, nir_intrinsic_range(instr) != 0);
+ /* Fall through */
case nir_intrinsic_load_ssbo:
case nir_intrinsic_load_shared:
case nir_intrinsic_load_global:
diff --git a/src/compiler/nir/tests/load_store_vectorizer_tests.cpp b/src/compiler/nir/tests/load_store_vectorizer_tests.cpp
index d1a494d61e0..75880c3805f 100644
--- a/src/compiler/nir/tests/load_store_vectorizer_tests.cpp
+++ b/src/compiler/nir/tests/load_store_vectorizer_tests.cpp
@@ -223,10 +223,27 @@ nir_load_store_vectorize_test::create_indirect_load(
} else {
load->src[0] = nir_src_for_ssa(offset);
}
+ int byte_size = (bit_size == 1 ? 32 : bit_size) / 8;
+
if (mode != nir_var_mem_push_const) {
- nir_intrinsic_set_align(load, (bit_size == 1 ? 32 : bit_size) / 8, 0);
+ nir_intrinsic_set_align(load, byte_size, 0);
nir_intrinsic_set_access(load, (gl_access_qualifier)access);
}
+
+ if (nir_intrinsic_has_range_base(load)) {
+ uint32_t range = byte_size * components;
+ int offset_src = res ? 1 : 0;
+
+ if (nir_src_is_const(load->src[offset_src])) {
+ nir_intrinsic_set_range_base(load, nir_src_as_uint(load->src[offset_src]));
+ nir_intrinsic_set_range(load, range);
+ } else {
+ /* Unknown range */
+ nir_intrinsic_set_range_base(load, 0);
+ nir_intrinsic_set_range(load, ~0);
+ }
+ }
+
nir_builder_instr_insert(b, &load->instr);
nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, &load->dest.ssa)->parent_instr);
movs[id] = mov;
@@ -380,6 +397,8 @@ TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent)
nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
ASSERT_EQ(load->dest.ssa.bit_size, 32);
ASSERT_EQ(load->dest.ssa.num_components, 2);
+ ASSERT_EQ(nir_intrinsic_range_base(load), 0);
+ ASSERT_EQ(nir_intrinsic_range(load), 8);
ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
@@ -400,11 +419,41 @@ TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting)
nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
ASSERT_EQ(load->dest.ssa.bit_size, 32);
ASSERT_EQ(load->dest.ssa.num_components, 3);
+ ASSERT_EQ(nir_intrinsic_range_base(load), 0);
+ ASSERT_EQ(nir_intrinsic_range(load), 12);
ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
EXPECT_INSTR_SWIZZLES(movs[0x2], load, "yz");
}
+/* Test for a bug in range handling */
+TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting_range)
+{
+ create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 4);
+ create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 1);
+
+ nir_validate_shader(b->shader, NULL);
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2);
+
+ EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo));
+
+ ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1);
+
+ nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
+ ASSERT_EQ(load->dest.ssa.bit_size, 32);
+ ASSERT_EQ(load->dest.ssa.num_components, 4);
+ ASSERT_EQ(nir_intrinsic_range_base(load), 0);
+ ASSERT_EQ(nir_intrinsic_range(load), 16);
+ ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
+ ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
+ ASSERT_EQ(loads[0x1]->swizzle[0], 0);
+ ASSERT_EQ(loads[0x1]->swizzle[1], 1);
+ ASSERT_EQ(loads[0x1]->swizzle[2], 2);
+ ASSERT_EQ(loads[0x1]->swizzle[3], 3);
+ ASSERT_EQ(loads[0x2]->swizzle[0], 1);
+}
+
TEST_F(nir_load_store_vectorize_test, ubo_load_identical)
{
create_load(nir_var_mem_ubo, 0, 0, 0x1);
@@ -420,6 +469,8 @@ TEST_F(nir_load_store_vectorize_test, ubo_load_identical)
nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0);
ASSERT_EQ(load->dest.ssa.bit_size, 32);
ASSERT_EQ(load->dest.ssa.num_components, 1);
+ ASSERT_EQ(nir_intrinsic_range_base(load), 0);
+ ASSERT_EQ(nir_intrinsic_range(load), 4);
ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa);
ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa);
diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c
index 820a4c8c97d..f97887254c3 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -853,6 +853,9 @@ _vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load,
if (op == nir_intrinsic_load_push_constant) {
nir_intrinsic_set_base(instr, access_offset);
nir_intrinsic_set_range(instr, access_size);
+ } else if (op == nir_intrinsic_load_ubo) {
+ nir_intrinsic_set_range_base(instr, 0);
+ nir_intrinsic_set_range(instr, ~0);
}
if (op == nir_intrinsic_load_ubo ||
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 1070f22537a..fed27c02567 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -75,6 +75,7 @@ struct ttn_compile {
nir_variable *samplers[PIPE_MAX_SAMPLERS];
nir_variable *images[PIPE_MAX_SHADER_IMAGES];
nir_variable *ssbo[PIPE_MAX_SHADER_BUFFERS];
+ uint32_t ubo_sizes[PIPE_MAX_CONSTANT_BUFFERS];
unsigned num_samplers;
unsigned num_images;
@@ -315,6 +316,8 @@ ttn_emit_declaration(struct ttn_compile *c)
decl->Dim.Index2D != 0) {
b->shader->info.num_ubos =
MAX2(b->shader->info.num_ubos, decl->Dim.Index2D);
+ c->ubo_sizes[decl->Dim.Index2D] =
+ MAX2(c->ubo_sizes[decl->Dim.Index2D], decl->Range.Last * 16);
return;
}
@@ -741,12 +744,27 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
/* UBO offsets are in bytes, but TGSI gives them to us in vec4's */
offset = nir_ishl(b, offset, nir_imm_int(b, 4));
nir_intrinsic_set_align(load, 16, 0);
+
+ /* Set a very conservative base/range of the access: 16 bytes if not
+ * indirect at all, offset to the end of the UBO if the offset is
+ * indirect, and totally unknown if the block number is indirect.
+ */
+ uint32_t base = index * 16;
+ nir_intrinsic_set_range_base(load, base);
+ if (dimind)
+ nir_intrinsic_set_range(load, ~0);
+ else if (indirect)
+ nir_intrinsic_set_range(load, c->ubo_sizes[dim->Index] - base);
+ else
+ nir_intrinsic_set_range(load, base + 16);
} else {
nir_intrinsic_set_base(load, index);
if (indirect) {
offset = ttn_src_for_indirect(c, indirect);
+ nir_intrinsic_set_range(load, c->ubo_sizes[0] - index);
} else {
offset = nir_imm_int(b, 0);
+ nir_intrinsic_set_range(load, 1);
}
}
load->src[srcn++] = nir_src_for_ssa(offset);
diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
index 4884ba325cd..4787bf44aba 100644
--- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
+++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
@@ -538,6 +538,8 @@ build_ssbo_descriptor_load(const VkDescriptorType desc_type,
desc_load->num_components = 4;
nir_ssa_dest_init(&desc_load->instr, &desc_load->dest, 4, 32, NULL);
nir_builder_instr_insert(b, &desc_load->instr);
+ nir_intrinsic_set_range_base(desc_load, 0);
+ nir_intrinsic_set_range(desc_load, ~0);
return &desc_load->dest.ssa;
}
@@ -745,6 +747,8 @@ build_descriptor_load(nir_deref_instr *deref, unsigned offset,
nir_ssa_dest_init(&desc_load->instr, &desc_load->dest,
num_components, bit_size, NULL);
nir_builder_instr_insert(b, &desc_load->instr);
+ nir_intrinsic_set_range_base(desc_load, 0);
+ nir_intrinsic_set_range(desc_load, ~0);
return &desc_load->dest.ssa;
}
@@ -846,6 +850,8 @@ lower_load_constant(nir_intrinsic_instr *intrin,
load_ubo->src[0] = nir_src_for_ssa(index);
load_ubo->src[1] = nir_src_for_ssa(offset);
nir_intrinsic_set_align(load_ubo, intrin->dest.ssa.bit_size / 8, 0);
+ nir_intrinsic_set_range_base(load_ubo, nir_intrinsic_base(intrin));
+ nir_intrinsic_set_range(load_ubo, nir_intrinsic_range(intrin));
nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest,
intrin->dest.ssa.num_components,
intrin->dest.ssa.bit_size, NULL);