summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2015-01-10 15:01:48 +1300
committerEric Anholt <eric@anholt.net>2015-01-10 20:55:37 +1300
commita8e14c293b19a2d298f91f283d6b6839f36fb518 (patch)
treed4e63712e0de0b37b8aeb80f144681a720b81651
parentb920ecf793bd419558a240014624add08774765d (diff)
vc4: Allow dead code elimination of VPM reads.
This gets a bunch of dead reads out of the CSes, which don't read most attributes generally. total instructions in shared programs: 39753 -> 39487 (-0.67%) instructions in affected programs: 4721 -> 4455 (-5.63%)
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_dead_code.c44
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h1
2 files changed, 44 insertions, 1 deletions
diff --git a/src/gallium/drivers/vc4/vc4_opt_dead_code.c b/src/gallium/drivers/vc4/vc4_opt_dead_code.c
index f555fcb600..94ab382500 100644
--- a/src/gallium/drivers/vc4/vc4_opt_dead_code.c
+++ b/src/gallium/drivers/vc4/vc4_opt_dead_code.c
@@ -46,6 +46,36 @@ dce(struct vc4_compile *c, struct qinst *inst)
qir_remove_instruction(inst);
}
+static bool
+has_nonremovable_reads(struct vc4_compile *c, struct qinst *inst)
+{
+ for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+ if (inst->src[i].file == QFILE_VPM) {
+ uint32_t attr = inst->src[i].index / 4;
+ uint32_t offset = (inst->src[i].index % 4) * 4;
+
+ if (c->vattr_sizes[attr] != offset + 4)
+ return true;
+
+ /* Can't get rid of the last VPM read, or the
+ * simulator (at least) throws an error.
+ */
+ uint32_t total_size = 0;
+ for (uint32_t i = 0; i < ARRAY_SIZE(c->vattr_sizes); i++)
+ total_size += c->vattr_sizes[i];
+ if (total_size == 4)
+ return true;
+ }
+
+ if (inst->src[i].file == QFILE_VARY &&
+ c->input_semantics[inst->src[i].index].semantic == 0xff) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool
qir_opt_dead_code(struct vc4_compile *c)
{
@@ -65,12 +95,24 @@ qir_opt_dead_code(struct vc4_compile *c)
!used[inst->dst.index] &&
(!qir_has_side_effects(c, inst) ||
inst->op == QOP_TEX_RESULT) &&
- !(qir_has_side_effect_reads(c, inst))) {
+ !has_nonremovable_reads(c, inst)) {
if (inst->op == QOP_TEX_RESULT) {
dce_tex = true;
c->num_texture_samples--;
}
+ for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+ if (inst->src[i].file != QFILE_VPM)
+ continue;
+ uint32_t attr = inst->src[i].index / 4;
+ uint32_t offset = (inst->src[i].index % 4) * 4;
+
+ if (c->vattr_sizes[attr] == offset + 4) {
+ c->num_inputs--;
+ c->vattr_sizes[attr] -= 4;
+ }
+ }
+
dce(c, inst);
progress = true;
continue;
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index d2f89ae9e6..307a79f77c 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -30,6 +30,7 @@
#include <stdint.h>
#include <string.h>
+#include "util/macros.h"
#include "util/u_simple_list.h"
#include "tgsi/tgsi_parse.h"