summaryrefslogtreecommitdiff
path: root/src/glsl/lower_const_arrays_to_uniforms.cpp
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2014-04-26 00:18:54 -0700
committerKenneth Graunke <kenneth@whitecape.org>2014-11-06 16:20:01 -0800
commit4f22db5fbbe59eacb762aa410f18c3078e85c2b7 (patch)
tree454c8021c061bfd49e246e1ee256e529e830cd2c /src/glsl/lower_const_arrays_to_uniforms.cpp
parent0c0bfb2ead03789164cee364fbf405994d876ca3 (diff)
glsl: Lower constant arrays to uniform arrays.
Consider GLSL code such as: const ivec2 offsets[] = ivec2[](ivec2(-1, -1), ivec2(-1, 0), ivec2(-1, 1), ivec2(0, -1), ivec2(0, 0), ivec2(0, 1), ivec2(1, -1), ivec2(1, 0), ivec2(1, 1)); ivec2 offset = offsets[<non-constant expression>]; Both i965 and nv50 currently handle this very poorly. On i965, this becomes a pile of MOVs to load the immediate constants into registers, a pile of scratch writes to move the whole array to memory, and one scratch read to actually access the value - effectively the same as if it were a non-constant array. We'd much rather upload large blocks of constant data as uniform data, so drivers can simply upload the data via constbufs, and not have to populate it via shader instructions. This is currently non-optional because both i965 and nouveau benefit from it, and according to Marek radeonsi would benefit today as well. (According to Tom, radeonsi may want to handle this itself in the long term, but we can always add a flag when it becomes useful.) Improves performance in a terrain rendering microbenchmark by about 2x, and cuts the number of instructions in about half. Helps a lot of "Natural Selection 2" shaders, as well as one "HOARD" shader. total instructions in shared programs: 5473459 -> 5471765 (-0.03%) instructions in affected programs: 5880 -> 4186 (-28.81%) v2: Use ir_var_hidden to avoid exposing the new uniform via the GL uniform introspection API. v3: Alphabetize Makefile.sources properly. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=77957 Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Diffstat (limited to 'src/glsl/lower_const_arrays_to_uniforms.cpp')
-rw-r--r--src/glsl/lower_const_arrays_to_uniforms.cpp102
1 files changed, 102 insertions, 0 deletions
diff --git a/src/glsl/lower_const_arrays_to_uniforms.cpp b/src/glsl/lower_const_arrays_to_uniforms.cpp
new file mode 100644
index 00000000000..b3c0ee29fba
--- /dev/null
+++ b/src/glsl/lower_const_arrays_to_uniforms.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_const_arrays_to_uniforms.cpp
+ *
+ * Lower constant arrays to uniform arrays.
+ *
+ * Some driver backends (such as i965 and nouveau) don't handle constant arrays
+ * gracefully, instead treating them as ordinary writable temporary arrays.
+ * Since arrays can be large, this often means spilling them to scratch memory,
+ * which usually involves a large number of instructions.
+ *
+ * This must be called prior to link_set_uniform_initializers(); we need the
+ * linker to process our new uniform's constant initializer.
+ *
+ * This should be called after optimizations, since those can result in
+ * splitting and removing arrays that are indexed by constant expressions.
+ */
+#include "ir.h"
+#include "ir_visitor.h"
+#include "ir_rvalue_visitor.h"
+#include "glsl_types.h"
+
+namespace {
+class lower_const_array_visitor : public ir_rvalue_visitor {
+public:
+ lower_const_array_visitor(exec_list *insts)
+ {
+ instructions = insts;
+ progress = false;
+ }
+
+ bool run()
+ {
+ visit_list_elements(this, instructions);
+ return progress;
+ }
+
+ void handle_rvalue(ir_rvalue **rvalue);
+
+private:
+ exec_list *instructions;
+ bool progress;
+};
+
+void
+lower_const_array_visitor::handle_rvalue(ir_rvalue **rvalue)
+{
+ if (!*rvalue)
+ return;
+
+ ir_constant *con = (*rvalue)->as_constant();
+ if (!con || !con->type->is_array())
+ return;
+
+ void *mem_ctx = ralloc_parent(con);
+
+ ir_variable *uni =
+ new(mem_ctx) ir_variable(con->type, "constarray", ir_var_uniform);
+ uni->constant_initializer = con;
+ uni->constant_value = con;
+ uni->data.has_initializer = true;
+ uni->data.how_declared = ir_var_hidden;
+ uni->data.read_only = true;
+ /* Assume the whole thing is accessed. */
+ uni->data.max_array_access = uni->type->length - 1;
+ instructions->push_head(uni);
+
+ *rvalue = new(mem_ctx) ir_dereference_variable(uni);
+
+ progress = true;
+}
+
+} /* anonymous namespace */
+
+bool
+lower_const_arrays_to_uniforms(exec_list *instructions)
+{
+ lower_const_array_visitor v(instructions);
+ return v.run();
+}