summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2020-08-18 11:38:41 -0700
committerEric Anholt <eric@anholt.net>2020-08-24 09:53:35 -0700
commit73616598bd25764789f858fdadf7c6cd9dccb49c (patch)
treec3e77636c008b705e4b23dffed07f13521abce16
parentd84a16e4817c33eb8cf7d47cc15e6a3ee6385cd1 (diff)
nir: Add a lowering pass for backends wanting load_ubo with vec4 offsets.
This is very common for backends -- r600, freedreno, and nir_to_tgsi all needed versions of it. Make a common intrinsic to use for it with a shared, slightly-tuned-from-ir3 lowering pass. Reviewed-by: Gert Wollny <gert.wollny@collabora.com> Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com> Reviewed-by: Connor Abbott <cwabbott0@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6378>
-rw-r--r--src/compiler/Makefile.sources1
-rw-r--r--src/compiler/nir/meson.build1
-rw-r--r--src/compiler/nir/nir.h2
-rw-r--r--src/compiler/nir/nir_intrinsics.py2
-rw-r--r--src/compiler/nir/nir_lower_ubo_vec4.c168
-rw-r--r--src/compiler/nir/nir_validate.c8
6 files changed, 182 insertions, 0 deletions
diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 17a907da54d..d2a2a161eb5 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -292,6 +292,7 @@ NIR_FILES = \
nir/nir_lower_tex.c \
nir/nir_lower_to_source_mods.c \
nir/nir_lower_two_sided_color.c \
+ nir/nir_lower_ubo_vec4.c \
nir/nir_lower_uniforms_to_ubo.c \
nir/nir_lower_vars_to_ssa.c \
nir/nir_lower_var_copies.c \
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index b56c73324d4..71fed704af6 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -180,6 +180,7 @@ files_libnir = files(
'nir_lower_wpos_ytransform.c',
'nir_lower_wrmasks.c',
'nir_lower_bit_size.c',
+ 'nir_lower_ubo_vec4.c',
'nir_lower_uniforms_to_ubo.c',
'nir_metadata.c',
'nir_move_vec_src_uses_to_dest.c',
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index aa7ff44985e..49d61484d73 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -4051,6 +4051,8 @@ bool nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer);
bool nir_lower_amul(nir_shader *shader,
int (*type_size)(const struct glsl_type *, bool));
+bool nir_lower_ubo_vec4(nir_shader *shader);
+
void nir_assign_io_var_locations(nir_shader *shader,
nir_variable_mode mode,
unsigned *size,
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py
index 80c61650f78..ee02b86e502 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -735,6 +735,8 @@ def load(name, src_comp, indices=[], flags=[]):
load("uniform", [1], [BASE, RANGE, TYPE], [CAN_ELIMINATE, CAN_REORDER])
# src[] = { buffer_index, offset }.
load("ubo", [-1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE, CAN_REORDER])
+# src[] = { buffer_index, offset in vec4 units }
+load("ubo_vec4", [-1, 1], [ACCESS, COMPONENT], flags=[CAN_ELIMINATE, CAN_REORDER])
# src[] = { offset }.
load("input", [1], [BASE, COMPONENT, TYPE], [CAN_ELIMINATE, CAN_REORDER])
# src[] = { vertex_id, offset }.
diff --git a/src/compiler/nir/nir_lower_ubo_vec4.c b/src/compiler/nir/nir_lower_ubo_vec4.c
new file mode 100644
index 00000000000..2f739ffd63a
--- /dev/null
+++ b/src/compiler/nir/nir_lower_ubo_vec4.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright © 2020 Google LLC
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/* Lowers nir_intrinsic_load_ubo() to nir_intrinsic_load_ubo_vec4() taking an
+ * offset in vec4 units. This is a fairly common mode of UBO addressing for
+ * hardware to have, and it gives NIR a chance to optimize the addressing math
+ * and CSE the loads.
+ *
+ * We assume that the UBO loads do not cross a vec4 boundary. This is true
+ * for:
+ * - std140 (GLSL 1.40, GLSL ES)
+ * - Vulkan "Extended Layout" (the baseline for UBOs)
+ *
+ * but not:
+ *
+ * - GLSL 4.30's new packed mode (enabled by PIPE_CAP_LOAD_CONSTBUF) where
+ * vec3 arrays are packed tightly.
+ *
+ * - Vulkan's scalarBlockLayout optional feature:
+ *
+ * "A member is defined to improperly straddle if either of the following are
+ * true:
+ *
+ * • It is a vector with total size less than or equal to 16 bytes, and has
+ * Offset decorations placing its first byte at F and its last byte at L
+ * where floor(F / 16) != floor(L / 16).
+ * • It is a vector with total size greater than 16 bytes and has its Offset
+ * decorations placing its first byte at a non-integer multiple of 16.
+ *
+ * [...]
+ *
+ * Unless the scalarBlockLayout feature is enabled on the device:
+ *
+ * • Vectors must not improperly straddle, as defined above."
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+static bool
+nir_lower_ubo_vec4_filter(const nir_instr *instr, const void *data)
+{
+ if (instr->type != nir_instr_type_intrinsic)
+ return false;
+
+ return nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_ubo;
+}
+
+static nir_ssa_def *
+nir_lower_ubo_vec4_lower(nir_builder *b, nir_instr *instr, void *data)
+{
+ b->cursor = nir_before_instr(instr);
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+ nir_ssa_def *byte_offset = nir_ssa_for_src(b, intr->src[1], 1);
+
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo_vec4);
+ nir_src_copy(&load->src[0], &intr->src[0], &load->instr);
+ load->src[1] = nir_src_for_ssa(nir_ushr_imm(b, byte_offset, 4));
+
+ unsigned align_mul = nir_intrinsic_align_mul(intr);
+ unsigned align_offset = nir_intrinsic_align_offset(intr);
+
+ int chan_size_bytes = intr->dest.ssa.bit_size / 8;
+ int chans_per_vec4 = 16 / chan_size_bytes;
+
+ /* We don't care if someone figured out that things are aligned beyond
+ * vec4.
+ */
+ align_mul = MIN2(align_mul, 16);
+ align_offset &= 15;
+ assert(align_offset % chan_size_bytes == 0);
+
+ /* We assume that loads don't cross vec4 boundaries, just that we need
+ * to extract from within the vec4 when we don't have a good alignment.
+ */
+ if (intr->num_components == chans_per_vec4) {
+ align_mul = 16;
+ align_offset = 0;
+ }
+
+ unsigned num_components = intr->num_components;
+ bool aligned_mul = align_mul % 16 == 0;
+ if (!aligned_mul)
+ num_components = chans_per_vec4;
+
+ nir_ssa_dest_init(&load->instr, &load->dest,
+ num_components, intr->dest.ssa.bit_size,
+ intr->dest.ssa.name);
+ load->num_components = num_components;
+ nir_builder_instr_insert(b, &load->instr);
+
+ nir_ssa_def *result = &load->dest.ssa;
+
+ int align_chan_offset = align_offset / chan_size_bytes;
+ if (aligned_mul) {
+ /* For an aligned load, just ask the backend to load from the known
+ * offset's component.
+ */
+ nir_intrinsic_set_component(load, align_chan_offset);
+ } else {
+ if (align_mul == 8) {
+ /* Special case: Loading small vectors from offset % 8 == 0 can be
+ * done with just one bcsel.
+ */
+ nir_component_mask_t low_channels =
+ BITSET_MASK(intr->num_components) << (align_chan_offset);
+ nir_component_mask_t high_channels =
+ low_channels << (8 / chan_size_bytes);
+ result = nir_bcsel(b,
+ nir_i2b(b, nir_iand_imm(b, byte_offset, 8)),
+ nir_channels(b, result, high_channels),
+ nir_channels(b, result, low_channels));
+ } else {
+ /* General fallback case: Per-result-channel bcsel-based extraction
+ * from the load.
+ */
+ assert(align_mul == 4);
+ assert(align_chan_offset == 0);
+
+ nir_ssa_def *component =
+ nir_iand_imm(b,
+ nir_udiv_imm(b, byte_offset, chan_size_bytes),
+ chans_per_vec4 - 1);
+
+ nir_ssa_def *channels[NIR_MAX_VEC_COMPONENTS];
+ for (unsigned i = 0; i < intr->num_components; i++) {
+ channels[i] = nir_vector_extract(b, result,
+ nir_iadd_imm(b, component, i));
+ }
+
+ result = nir_vec(b, channels, intr->num_components);
+ }
+ }
+
+ return result;
+}
+
+bool
+nir_lower_ubo_vec4(nir_shader *shader)
+{
+ return nir_shader_lower_instructions(shader,
+ nir_lower_ubo_vec4_filter,
+ nir_lower_ubo_vec4_lower,
+ NULL);
+}
diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c
index c7f2097c912..32c13eb2d5e 100644
--- a/src/compiler/nir/nir_validate.c
+++ b/src/compiler/nir/nir_validate.c
@@ -585,6 +585,14 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
break;
}
+ case nir_intrinsic_load_ubo_vec4: {
+ int bit_size = nir_dest_bit_size(instr->dest);
+ validate_assert(state, bit_size >= 8);
+ validate_assert(state, (nir_intrinsic_component(instr) +
+ instr->num_components) * (bit_size / 8) <= 16);
+ break;
+ }
+
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ssbo:
case nir_intrinsic_load_shared: