summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2011-07-25 18:13:04 -0700
committerEric Anholt <eric@anholt.net>2011-08-05 10:08:31 -0700
commitee0373b833155804bb8846c6f05f897b9ee5afa6 (patch)
tree9550aa61942a3c78d5b2fe177a9c830ffe572d4f
parent0722edc59cd526437c2d4bad474b934dad84d789 (diff)
i965/fs: Don't upload unused uniform components.
This saves both register space and upload bandwidth for unused values. Note that previously we were relying on the visitor not initially generating references to different sets of uniforms between the 8-wide and 16-wide code generation, and now we're relying on them dead-code eliminating the same stuff, too.
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp89
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h10
2 files changed, 95 insertions, 4 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 02041b3bc03..f55be022f72 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -242,11 +242,12 @@ import_uniforms_callback(const void *key,
* This brings in those uniform definitions
*/
void
-fs_visitor::import_uniforms(struct hash_table *src_variable_ht)
+fs_visitor::import_uniforms(fs_visitor *v)
{
- hash_table_call_foreach(src_variable_ht,
+ hash_table_call_foreach(v->variable_ht,
import_uniforms_callback,
variable_ht);
+ this->params_remap = v->params_remap;
}
/* Our support for uniforms is piggy-backed on the struct
@@ -798,6 +799,86 @@ fs_visitor::split_virtual_grfs()
this->live_intervals_valid = false;
}
+bool
+fs_visitor::remove_dead_constants()
+{
+ if (c->dispatch_width == 8) {
+ this->params_remap = ralloc_array(mem_ctx, int, c->prog_data.nr_params);
+
+ for (unsigned int i = 0; i < c->prog_data.nr_params; i++)
+ this->params_remap[i] = -1;
+
+ /* Find which params are still in use. */
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
+
+ for (int i = 0; i < 3; i++) {
+ int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+
+ if (inst->src[i].file != UNIFORM)
+ continue;
+
+ assert(constant_nr < (int)c->prog_data.nr_params);
+
+ /* For now, set this to non-negative. We'll give it the
+ * actual new number in a moment, in order to keep the
+ * register numbers nicely ordered.
+ */
+ this->params_remap[constant_nr] = 0;
+ }
+ }
+
+ /* Figure out what the new numbers for the params will be. At some
+ * point when we're doing uniform array access, we're going to want
+ * to keep the distinction between .reg and .reg_offset, but for
+ * now we don't care.
+ */
+ unsigned int new_nr_params = 0;
+ for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+ if (this->params_remap[i] != -1) {
+ this->params_remap[i] = new_nr_params++;
+ }
+ }
+
+ /* Update the list of params to be uploaded to match our new numbering. */
+ for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+ int remapped = this->params_remap[i];
+
+ if (remapped == -1)
+ continue;
+
+ /* We've already done setup_paramvalues_refs() so no need to worry
+ * about param_index and param_offset.
+ */
+ c->prog_data.param[remapped] = c->prog_data.param[i];
+ c->prog_data.param_convert[remapped] = c->prog_data.param_convert[i];
+ }
+
+ c->prog_data.nr_params = new_nr_params;
+ } else {
+ /* This should have been generated in the 8-wide pass already. */
+ assert(this->params_remap);
+ }
+
+ /* Now do the renumbering of the shader to remove unused params. */
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
+
+ for (int i = 0; i < 3; i++) {
+ int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+
+ if (inst->src[i].file != UNIFORM)
+ continue;
+
+ assert(this->params_remap[constant_nr] != -1);
+ inst->src[i].hw_reg = this->params_remap[constant_nr];
+ inst->src[i].reg_offset = 0;
+ }
+ }
+
+ return true;
+}
+
/**
* Choose accesses from the UNIFORM file to demote to using the pull
* constant buffer.
@@ -1624,6 +1705,8 @@ fs_visitor::run()
progress = dead_code_eliminate() || progress;
} while (progress);
+ remove_dead_constants();
+
schedule_instructions();
assign_curb_setup();
@@ -1702,7 +1785,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
if (intel->gen >= 5 && c->prog_data.nr_pull_params == 0) {
c->dispatch_width = 16;
fs_visitor v2(c, prog, shader);
- v2.import_uniforms(v.variable_ht);
+ v2.import_uniforms(&v);
v2.run();
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 89d6cda7e4f..96e1420038f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -421,7 +421,7 @@ public:
fs_reg *variable_storage(ir_variable *var);
int virtual_grf_alloc(int size);
- void import_uniforms(struct hash_table *src_variable_ht);
+ void import_uniforms(fs_visitor *v);
void visit(ir_variable *ir);
void visit(ir_assignment *ir);
@@ -489,6 +489,7 @@ public:
bool register_coalesce();
bool compute_to_mrf();
bool dead_code_eliminate();
+ bool remove_dead_constants();
bool remove_duplicate_mrf_writes();
bool virtual_grf_interferes(int a, int b);
void schedule_instructions();
@@ -566,6 +567,13 @@ public:
int *virtual_grf_use;
bool live_intervals_valid;
+ /* This is the map from UNIFORM hw_reg + reg_offset as generated by
+ * the visitor to the packed uniform number after
+ * remove_dead_constants() that represents the actual uploaded
+ * uniform index.
+ */
+ int *params_remap;
+
struct hash_table *variable_ht;
ir_variable *frag_color, *frag_data, *frag_depth;
int first_non_payload_grf;