diff options
-rw-r--r-- | src/compiler/nir/nir.h | 15 | ||||
-rw-r--r-- | src/compiler/nir/nir_intrinsics.py | 10 | ||||
-rw-r--r-- | src/compiler/nir/nir_lower_io.c | 5 | ||||
-rw-r--r-- | src/compiler/nir/nir_lower_uniforms_to_ubo.c | 3 | ||||
-rw-r--r-- | src/compiler/nir/nir_opt_load_store_vectorize.c | 10 | ||||
-rw-r--r-- | src/compiler/nir/nir_print.c | 1 | ||||
-rw-r--r-- | src/compiler/nir/nir_validate.c | 3 | ||||
-rw-r--r-- | src/compiler/nir/tests/load_store_vectorizer_tests.cpp | 53 | ||||
-rw-r--r-- | src/compiler/spirv/vtn_variables.c | 3 | ||||
-rw-r--r-- | src/gallium/auxiliary/nir/tgsi_to_nir.c | 18 | ||||
-rw-r--r-- | src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 6 |
11 files changed, 123 insertions, 4 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index bf054e82802..bb3507ff862 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1642,8 +1642,18 @@ typedef enum { NIR_INTRINSIC_UCP_ID, /** - * The amount of data, starting from BASE, that this instruction may - * access. This is used to provide bounds if the offset is not constant. + * The start of NIR_INTRINSIC_RANGE. Only present on instructions that + * don't have NIR_INTRINSIC_BASE. + * + * If the [range_base, range] is [0, ~0], then we don't know the possible + * range of the access. + */ + NIR_INTRINSIC_RANGE_BASE, + + /** + * The amount of data, starting from BASE or RANGE_BASE, that this + * instruction may access. This is used to provide bounds if the offset is + * not constant. */ NIR_INTRINSIC_RANGE, @@ -1896,6 +1906,7 @@ INTRINSIC_IDX_ACCESSORS(base, BASE, int) INTRINSIC_IDX_ACCESSORS(stream_id, STREAM_ID, unsigned) INTRINSIC_IDX_ACCESSORS(ucp_id, UCP_ID, unsigned) INTRINSIC_IDX_ACCESSORS(range, RANGE, unsigned) +INTRINSIC_IDX_ACCESSORS(range_base, RANGE_BASE, unsigned) INTRINSIC_IDX_ACCESSORS(desc_set, DESC_SET, unsigned) INTRINSIC_IDX_ACCESSORS(binding, BINDING, unsigned) INTRINSIC_IDX_ACCESSORS(component, COMPONENT, unsigned) diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 0de66f4c2fb..e63900082ab 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -89,6 +89,10 @@ UCP_ID = "NIR_INTRINSIC_UCP_ID" # may access. This is used to provide bounds if the offset is # not constant. RANGE = "NIR_INTRINSIC_RANGE" +# The offset to the start of the NIR_INTRINSIC_RANGE. This is an alternative +# to NIR_INTRINSIC_BASE for describing the valid range in intrinsics that don't +# have the implicit addition of a base to the offset. +RANGE_BASE = "NIR_INTRINSIC_RANGE_BASE" # The vulkan descriptor set binding for vulkan_resource_index # intrinsic DESC_SET = "NIR_INTRINSIC_DESC_SET" @@ -724,6 +728,10 @@ intrinsic("load_fs_input_interp_deltas", src_comp=[1], dest_comp=3, # range (starting at base) of the data from which we are loading. If # range == 0, then the range is unknown. # +# UBO load operations have a nir_intrinsic_range_base() and +# nir_intrinsic_range() that specify the byte range [range_base, +# range_base+range] of the UBO that the src offset access must lie within. +# # Some load operations such as UBO/SSBO load and per_vertex loads take an # additional source to specify which UBO/SSBO/vertex to load from. # @@ -739,7 +747,7 @@ def load(name, src_comp, indices=[], flags=[]): # src[] = { offset }. load("uniform", [1], [BASE, RANGE, TYPE], [CAN_ELIMINATE, CAN_REORDER]) # src[] = { buffer_index, offset }. -load("ubo", [-1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE, CAN_REORDER]) +load("ubo", [-1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET, RANGE_BASE, RANGE], flags=[CAN_ELIMINATE, CAN_REORDER]) # src[] = { buffer_index, offset in vec4 units } load("ubo_vec4", [-1, 1], [ACCESS, COMPONENT], flags=[CAN_ELIMINATE, CAN_REORDER]) # src[] = { offset }. diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index d7a095cb9bc..c795d50d9f6 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -1030,6 +1030,11 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin, nir_intrinsic_set_align(load, align_mul, align_offset); + if (op == nir_intrinsic_load_ubo) { + nir_intrinsic_set_range_base(load, 0); + nir_intrinsic_set_range(load, ~0); + } + assert(intrin->dest.is_ssa); load->num_components = num_components; nir_ssa_dest_init(&load->instr, &load->dest, num_components, diff --git a/src/compiler/nir/nir_lower_uniforms_to_ubo.c b/src/compiler/nir/nir_lower_uniforms_to_ubo.c index 78afc7f6232..4d1a5a7fbb5 100644 --- a/src/compiler/nir/nir_lower_uniforms_to_ubo.c +++ b/src/compiler/nir/nir_lower_uniforms_to_ubo.c @@ -76,6 +76,9 @@ lower_instr(nir_intrinsic_instr *instr, nir_builder *b, int multiplier) nir_builder_instr_insert(b, &load->instr); nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(&load->dest.ssa)); + nir_intrinsic_set_range_base(load, nir_intrinsic_base(instr) * multiplier); + nir_intrinsic_set_range(load, nir_intrinsic_range(instr) * multiplier); + nir_instr_remove(&instr->instr); return true; } diff --git a/src/compiler/nir/nir_opt_load_store_vectorize.c b/src/compiler/nir/nir_opt_load_store_vectorize.c index 147b88c3594..370366a25df 100644 --- a/src/compiler/nir/nir_opt_load_store_vectorize.c +++ b/src/compiler/nir/nir_opt_load_store_vectorize.c @@ -812,6 +812,16 @@ vectorize_loads(nir_builder *b, struct vectorize_ctx *ctx, if (first != low && nir_intrinsic_has_base(first->intrin)) nir_intrinsic_set_base(first->intrin, nir_intrinsic_base(low->intrin)); + if (nir_intrinsic_has_range_base(first->intrin)) { + uint32_t low_base = nir_intrinsic_range_base(low->intrin); + uint32_t high_base = nir_intrinsic_range_base(high->intrin); + uint32_t low_end = low_base + nir_intrinsic_range(low->intrin); + uint32_t high_end = high_base + nir_intrinsic_range(high->intrin); + + nir_intrinsic_set_range_base(first->intrin, low_base); + nir_intrinsic_set_range(first->intrin, MAX2(low_end, high_end) - low_base); + } + first->key = low->key; first->offset = low->offset; diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index e31de358172..810d3ce55cf 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -816,6 +816,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) [NIR_INTRINSIC_STREAM_ID] = "stream-id", [NIR_INTRINSIC_UCP_ID] = "ucp-id", [NIR_INTRINSIC_RANGE] = "range", + [NIR_INTRINSIC_RANGE_BASE] = "range_base", [NIR_INTRINSIC_DESC_SET] = "desc-set", [NIR_INTRINSIC_BINDING] = "binding", [NIR_INTRINSIC_COMPONENT] = "component", diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c index b7622cd06a9..1a9732365d6 100644 --- a/src/compiler/nir/nir_validate.c +++ b/src/compiler/nir/nir_validate.c @@ -601,6 +601,9 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state) } case nir_intrinsic_load_ubo: + /* Make sure that the creator didn't forget to set the range_base+range. */ + validate_assert(state, nir_intrinsic_range(instr) != 0); + /* Fall through */ case nir_intrinsic_load_ssbo: case nir_intrinsic_load_shared: case nir_intrinsic_load_global: diff --git a/src/compiler/nir/tests/load_store_vectorizer_tests.cpp b/src/compiler/nir/tests/load_store_vectorizer_tests.cpp index d1a494d61e0..75880c3805f 100644 --- a/src/compiler/nir/tests/load_store_vectorizer_tests.cpp +++ b/src/compiler/nir/tests/load_store_vectorizer_tests.cpp @@ -223,10 +223,27 @@ nir_load_store_vectorize_test::create_indirect_load( } else { load->src[0] = nir_src_for_ssa(offset); } + int byte_size = (bit_size == 1 ? 32 : bit_size) / 8; + if (mode != nir_var_mem_push_const) { - nir_intrinsic_set_align(load, (bit_size == 1 ? 32 : bit_size) / 8, 0); + nir_intrinsic_set_align(load, byte_size, 0); nir_intrinsic_set_access(load, (gl_access_qualifier)access); } + + if (nir_intrinsic_has_range_base(load)) { + uint32_t range = byte_size * components; + int offset_src = res ? 1 : 0; + + if (nir_src_is_const(load->src[offset_src])) { + nir_intrinsic_set_range_base(load, nir_src_as_uint(load->src[offset_src])); + nir_intrinsic_set_range(load, range); + } else { + /* Unknown range */ + nir_intrinsic_set_range_base(load, 0); + nir_intrinsic_set_range(load, ~0); + } + } + nir_builder_instr_insert(b, &load->instr); nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, &load->dest.ssa)->parent_instr); movs[id] = mov; @@ -380,6 +397,8 @@ TEST_F(nir_load_store_vectorize_test, ubo_load_adjacent) nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0); ASSERT_EQ(load->dest.ssa.bit_size, 32); ASSERT_EQ(load->dest.ssa.num_components, 2); + ASSERT_EQ(nir_intrinsic_range_base(load), 0); + ASSERT_EQ(nir_intrinsic_range(load), 8); ASSERT_EQ(nir_src_as_uint(load->src[1]), 0); EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x"); EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y"); @@ -400,11 +419,41 @@ TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting) nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0); ASSERT_EQ(load->dest.ssa.bit_size, 32); ASSERT_EQ(load->dest.ssa.num_components, 3); + ASSERT_EQ(nir_intrinsic_range_base(load), 0); + ASSERT_EQ(nir_intrinsic_range(load), 12); ASSERT_EQ(nir_src_as_uint(load->src[1]), 0); EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy"); EXPECT_INSTR_SWIZZLES(movs[0x2], load, "yz"); } +/* Test for a bug in range handling */ +TEST_F(nir_load_store_vectorize_test, ubo_load_intersecting_range) +{ + create_load(nir_var_mem_ubo, 0, 0, 0x1, 32, 4); + create_load(nir_var_mem_ubo, 0, 4, 0x2, 32, 1); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ubo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ubo), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0); + ASSERT_EQ(load->dest.ssa.bit_size, 32); + ASSERT_EQ(load->dest.ssa.num_components, 4); + ASSERT_EQ(nir_intrinsic_range_base(load), 0); + ASSERT_EQ(nir_intrinsic_range(load), 16); + ASSERT_EQ(nir_src_as_uint(load->src[1]), 0); + ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa); + ASSERT_EQ(loads[0x1]->swizzle[0], 0); + ASSERT_EQ(loads[0x1]->swizzle[1], 1); + ASSERT_EQ(loads[0x1]->swizzle[2], 2); + ASSERT_EQ(loads[0x1]->swizzle[3], 3); + ASSERT_EQ(loads[0x2]->swizzle[0], 1); +} + TEST_F(nir_load_store_vectorize_test, ubo_load_identical) { create_load(nir_var_mem_ubo, 0, 0, 0x1); @@ -420,6 +469,8 @@ TEST_F(nir_load_store_vectorize_test, ubo_load_identical) nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ubo, 0); ASSERT_EQ(load->dest.ssa.bit_size, 32); ASSERT_EQ(load->dest.ssa.num_components, 1); + ASSERT_EQ(nir_intrinsic_range_base(load), 0); + ASSERT_EQ(nir_intrinsic_range(load), 4); ASSERT_EQ(nir_src_as_uint(load->src[1]), 0); ASSERT_EQ(loads[0x1]->src.ssa, &load->dest.ssa); ASSERT_EQ(loads[0x2]->src.ssa, &load->dest.ssa); diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index 820a4c8c97d..f97887254c3 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -853,6 +853,9 @@ _vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load, if (op == nir_intrinsic_load_push_constant) { nir_intrinsic_set_base(instr, access_offset); nir_intrinsic_set_range(instr, access_size); + } else if (op == nir_intrinsic_load_ubo) { + nir_intrinsic_set_range_base(instr, 0); + nir_intrinsic_set_range(instr, ~0); } if (op == nir_intrinsic_load_ubo || diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 1070f22537a..fed27c02567 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -75,6 +75,7 @@ struct ttn_compile { nir_variable *samplers[PIPE_MAX_SAMPLERS]; nir_variable *images[PIPE_MAX_SHADER_IMAGES]; nir_variable *ssbo[PIPE_MAX_SHADER_BUFFERS]; + uint32_t ubo_sizes[PIPE_MAX_CONSTANT_BUFFERS]; unsigned num_samplers; unsigned num_images; @@ -315,6 +316,8 @@ ttn_emit_declaration(struct ttn_compile *c) decl->Dim.Index2D != 0) { b->shader->info.num_ubos = MAX2(b->shader->info.num_ubos, decl->Dim.Index2D); + c->ubo_sizes[decl->Dim.Index2D] = + MAX2(c->ubo_sizes[decl->Dim.Index2D], decl->Range.Last * 16); return; } @@ -741,12 +744,27 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, /* UBO offsets are in bytes, but TGSI gives them to us in vec4's */ offset = nir_ishl(b, offset, nir_imm_int(b, 4)); nir_intrinsic_set_align(load, 16, 0); + + /* Set a very conservative base/range of the access: 16 bytes if not + * indirect at all, offset to the end of the UBO if the offset is + * indirect, and totally unknown if the block number is indirect. + */ + uint32_t base = index * 16; + nir_intrinsic_set_range_base(load, base); + if (dimind) + nir_intrinsic_set_range(load, ~0); + else if (indirect) + nir_intrinsic_set_range(load, c->ubo_sizes[dim->Index] - base); + else + nir_intrinsic_set_range(load, base + 16); } else { nir_intrinsic_set_base(load, index); if (indirect) { offset = ttn_src_for_indirect(c, indirect); + nir_intrinsic_set_range(load, c->ubo_sizes[0] - index); } else { offset = nir_imm_int(b, 0); + nir_intrinsic_set_range(load, 1); } } load->src[srcn++] = nir_src_for_ssa(offset); diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index 4884ba325cd..4787bf44aba 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -538,6 +538,8 @@ build_ssbo_descriptor_load(const VkDescriptorType desc_type, desc_load->num_components = 4; nir_ssa_dest_init(&desc_load->instr, &desc_load->dest, 4, 32, NULL); nir_builder_instr_insert(b, &desc_load->instr); + nir_intrinsic_set_range_base(desc_load, 0); + nir_intrinsic_set_range(desc_load, ~0); return &desc_load->dest.ssa; } @@ -745,6 +747,8 @@ build_descriptor_load(nir_deref_instr *deref, unsigned offset, nir_ssa_dest_init(&desc_load->instr, &desc_load->dest, num_components, bit_size, NULL); nir_builder_instr_insert(b, &desc_load->instr); + nir_intrinsic_set_range_base(desc_load, 0); + nir_intrinsic_set_range(desc_load, ~0); return &desc_load->dest.ssa; } @@ -846,6 +850,8 @@ lower_load_constant(nir_intrinsic_instr *intrin, load_ubo->src[0] = nir_src_for_ssa(index); load_ubo->src[1] = nir_src_for_ssa(offset); nir_intrinsic_set_align(load_ubo, intrin->dest.ssa.bit_size / 8, 0); + nir_intrinsic_set_range_base(load_ubo, nir_intrinsic_base(intrin)); + nir_intrinsic_set_range(load_ubo, nir_intrinsic_range(intrin)); nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, NULL); |