summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2012-01-06 10:20:09 -0800
committerEric Anholt <eric@anholt.net>2012-01-11 12:06:34 -0800
commit2c623c2be529edc974ca293f3c71929c639ec0c9 (patch)
tree4e3050156de1b4910c441cbb2d1775cbbc31c215
parentf1b33c74dc11b97a86a7f0e9cbe4cb168b2b9540 (diff)
i965/vs: Try to emit more components of constant vectors at once.
We were naively emitting each component at a time, even if we were emitting the same value to multiple channels. Improves on a codegen regression from the old VS to the new VS on some unigine shaders (because we emit constant vecs/matrices as immediates instead of loading them as push constants, so we had over 4x the instructions for using them). shader-db results: Total instructions: 58594 -> 58540 11/870 programs affected (1.3%) 765 -> 711 instructions in affected programs (7.1% reduction)
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp29
1 files changed, 27 insertions, 2 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 20da487399f..5df2470dc1b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1701,22 +1701,45 @@ vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
if (ir->type->is_matrix()) {
for (int i = 0; i < ir->type->matrix_columns; i++) {
+ float *vec = &ir->value.f[i * ir->type->vector_elements];
+
for (int j = 0; j < ir->type->vector_elements; j++) {
dst->writemask = 1 << j;
dst->type = BRW_REGISTER_TYPE_F;
- emit(MOV(*dst,
- src_reg(ir->value.f[i * ir->type->vector_elements + j])));
+ emit(MOV(*dst, src_reg(vec[j])));
}
dst->reg_offset++;
}
return;
}
+ int remaining_writemask = (1 << ir->type->vector_elements) - 1;
+
for (int i = 0; i < ir->type->vector_elements; i++) {
+ if (!(remaining_writemask & (1 << i)))
+ continue;
+
dst->writemask = 1 << i;
dst->type = brw_type_for_base_type(ir->type);
+ /* Find other components that match the one we're about to
+ * write. Emits fewer instructions for things like vec4(0.5,
+ * 1.5, 1.5, 1.5).
+ */
+ for (int j = i + 1; j < ir->type->vector_elements; j++) {
+ if (ir->type->base_type == GLSL_TYPE_BOOL) {
+ if (ir->value.b[i] == ir->value.b[j])
+ dst->writemask |= (1 << j);
+ } else {
+ /* u, i, and f storage all line up, so no need for a
+ * switch case for comparing each type.
+ */
+ if (ir->value.u[i] == ir->value.u[j])
+ dst->writemask |= (1 << j);
+ }
+ }
+
switch (ir->type->base_type) {
case GLSL_TYPE_FLOAT:
emit(MOV(*dst, src_reg(ir->value.f[i])));
@@ -1734,6 +1757,8 @@ vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
assert(!"Non-float/uint/int/bool constant");
break;
}
+
+ remaining_writemask &= ~dst->writemask;
}
dst->reg_offset++;
}