summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2011-03-24 09:08:58 -0700
committerEric Anholt <eric@anholt.net>2011-04-26 12:19:33 -0700
commit54990673a65b72fd222aeafc19f3a384ce597146 (patch)
tree334da79da77b84e6ddd9840f96d25b72bf053071
parentaf20328271425c217630b5114ee172bd8387a91a (diff)
i965/fs: Fix interference calculation of pixel_[xy] in 16-wide.
Fixes glsl-fs-ceil in that mode, which produced the code in the comment. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp23
1 files changed, 23 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 17f4435a1ac..aa51d04df33 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3393,6 +3393,29 @@ fs_visitor::virtual_grf_interferes(int a, int b)
(this->virtual_grf_use[b] != -1 ||
this->virtual_grf_def[b] == MAX_INSTRUCTION));
+ /* If the register is used to store 16 values of less than float
+ * size (only the case for pixel_[xy]), then we can't allocate
+ * another dword-sized thing to that register that would be used in
+ * the same instruction. This is because when the GPU decodes (for
+ * example):
+ *
+ * (declare (in ) vec4 gl_FragCoord@0x97766a0)
+ * add(16) g6<1>F g6<8,8,1>UW 0.5F { align1 compr };
+ *
+ * it's actually processed as:
+ * add(8) g6<1>F g6<8,8,1>UW 0.5F { align1 };
+ * add(8) g7<1>F g6.8<8,8,1>UW 0.5F { align1 sechalf };
+ *
+ * so our second half values in g6 got overwritten in the first
+ * half.
+ */
+ if (c->dispatch_width == 16 && (this->pixel_x.reg == a ||
+ this->pixel_x.reg == b ||
+ this->pixel_y.reg == a ||
+ this->pixel_y.reg == b)) {
+ return start <= end;
+ }
+
return start < end;
}