summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Ekstrand <jason@jlekstrand.net>2020-08-06 22:17:17 -0500
committerMarge Bot <eric+marge@anholt.net>2020-11-25 05:37:10 +0000
commit9fa1cdfe7ffd9e7ebd83055e2008f3e4b8ada549 (patch)
treefe795e28df4bede11bd4aea240afaf0ab302155d
parentf7e24e559fb632eae54e444b022db9da35922258 (diff)
intel/rt: Implement push constants as global memory reads
They're not really "push" anymore but that's because there is no such thing as push constants in bindless shaders on Intel. They should be fast enough, though. There is some room for debate here as to whether we want to do the pull in NIR or push it into the back-end. The advantage of doing it in the back-end is that it'd be easier to use MOV_INDIRECT for indirect push constant access rather than falling back to a dataport message. Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7356>
-rw-r--r--src/compiler/nir/nir_builder.h15
-rw-r--r--src/intel/compiler/brw_nir_lower_rt_intrinsics.c48
-rw-r--r--src/intel/compiler/brw_rt.h3
3 files changed, 66 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index 2bdebe94028..45f09a43817 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -1496,6 +1496,21 @@ nir_store_global(nir_builder *build, nir_ssa_def *addr, unsigned align,
}
static inline nir_ssa_def *
+nir_load_global_constant(nir_builder *build, nir_ssa_def *addr, unsigned align,
+ unsigned num_components, unsigned bit_size)
+{
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_global_constant);
+ load->num_components = num_components;
+ load->src[0] = nir_src_for_ssa(addr);
+ nir_intrinsic_set_align(load, align, 0);
+ nir_ssa_dest_init(&load->instr, &load->dest,
+ num_components, bit_size, NULL);
+ nir_builder_instr_insert(build, &load->instr);
+ return &load->dest.ssa;
+}
+
+static inline nir_ssa_def *
nir_load_param(nir_builder *build, uint32_t param_idx)
{
assert(param_idx < build->impl->function->num_params);
diff --git a/src/intel/compiler/brw_nir_lower_rt_intrinsics.c b/src/intel/compiler/brw_nir_lower_rt_intrinsics.c
index 11fa8e49c07..4198aab78f5 100644
--- a/src/intel/compiler/brw_nir_lower_rt_intrinsics.c
+++ b/src/intel/compiler/brw_nir_lower_rt_intrinsics.c
@@ -135,6 +135,54 @@ lower_rt_intrinsics_impl(nir_function_impl *impl,
nir_instr_remove(instr);
break;
+ case nir_intrinsic_load_uniform: {
+ /* We don't want to lower this in the launch trampoline. */
+ if (stage == MESA_SHADER_COMPUTE)
+ break;
+
+ assert(intrin->dest.is_ssa);
+ assert(intrin->src[0].is_ssa);
+
+ unsigned bit_size = intrin->dest.ssa.bit_size;
+ assert(bit_size >= 8 && bit_size % 8 == 0);
+ unsigned byte_size = bit_size / 8;
+
+ if (nir_src_is_const(intrin->src[0])) {
+ uint64_t offset = BRW_RT_PUSH_CONST_OFFSET +
+ nir_intrinsic_base(intrin) +
+ nir_src_as_uint(intrin->src[0]);
+
+ /* Things should be component-aligned. */
+ assert(offset % byte_size == 0);
+
+ unsigned suboffset = offset % 64;
+ uint64_t aligned_offset = offset - suboffset;
+
+ /* Load two just in case we go over a 64B boundary */
+ nir_ssa_def *data[2];
+ for (unsigned i = 0; i < 2; i++) {
+ nir_ssa_def *addr =
+ nir_iadd_imm(b, nir_load_btd_global_arg_addr_intel(b),
+ aligned_offset + i * 64);
+ data[i] = nir_load_global_const_block_intel(b, addr, 16);
+ }
+
+ sysval = nir_extract_bits(b, data, 2, suboffset * 8,
+ intrin->num_components, bit_size);
+ } else {
+ nir_ssa_def *offset32 =
+ nir_iadd_imm(b, intrin->src[0].ssa,
+ BRW_RT_PUSH_CONST_OFFSET +
+ nir_intrinsic_base(intrin));
+ nir_ssa_def *addr =
+ nir_iadd(b, nir_load_btd_global_arg_addr_intel(b),
+ nir_u2u64(b, offset32));
+ sysval = nir_load_global_constant(b, addr, byte_size,
+ intrin->num_components, bit_size);
+ }
+ break;
+ }
+
case nir_intrinsic_load_ray_launch_id:
sysval = nir_channels(b, hotzone, 0xe);
break;
diff --git a/src/intel/compiler/brw_rt.h b/src/intel/compiler/brw_rt.h
index eebb29b1f1b..f153257b6a4 100644
--- a/src/intel/compiler/brw_rt.h
+++ b/src/intel/compiler/brw_rt.h
@@ -31,6 +31,9 @@ extern "C" {
/** Vulkan defines shaderGroupHandleSize = 32 */
#define BRW_RT_SBT_HANDLE_SIZE 32
+/** Offset after the RT dispatch globals at which "push" constants live */
+#define BRW_RT_PUSH_CONST_OFFSET 128
+
/** Stride of the resume SBT */
#define BRW_BTD_RESUME_SBT_STRIDE 8