summaryrefslogtreecommitdiff
path: root/src/mesa
diff options
context:
space:
mode:
authorPaul Berry <stereotype441@gmail.com>2013-11-04 20:06:48 -0800
committerIan Romanick <ian.d.romanick@intel.com>2013-11-23 12:33:17 -0800
commit5af1fb532451f41d7cd920497d468582711bac00 (patch)
treed5e602fddd6f934dcdaed36a54aea79aa73a17a3 /src/mesa
parent0040edcf9dd401b1b49d487c0ab15ad6aae8d4df (diff)
i965/gen7: Emit workaround flush when changing GS enable state.
v2: Don't go to extra work to avoid extraneous flushes. (Previous experiments in the kernel have suggested that flushing the pipeline when it is already empty is extremely cheap). Cc: "10.0" <mesa-stable@lists.freedesktop.org> Reviewed-by: Eric Anholt <eric@anholt.net> (cherry picked from commit 7dfb4b2d00ddb8e5ee24d4c58eb9415dc4ccc21c)
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h6
-rw-r--r--src/mesa/drivers/dri/i965/gen7_blorp.cpp16
-rw-r--r--src/mesa/drivers/dri/i965/gen7_gs_state.c16
-rw-r--r--src/mesa/drivers/dri/i965/gen7_urb.c24
-rw-r--r--src/mesa/drivers/dri/i965/intel_batchbuffer.c30
-rw-r--r--src/mesa/drivers/dri/i965/intel_batchbuffer.h1
7 files changed, 72 insertions, 22 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index a33e993f281..0b22992b80a 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -766,6 +766,7 @@ brwCreateContext(gl_api api,
brw->prim_restart.in_progress = false;
brw->prim_restart.enable_cut_index = false;
+ brw->gs.enabled = false;
if (brw->gen < 6) {
brw->curbe.last_buf = calloc(1, 4096);
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 8b1cbb34d7b..4a089868339 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1300,6 +1300,12 @@ struct brw_context
struct {
struct brw_stage_state base;
struct brw_gs_prog_data *prog_data;
+
+ /**
+ * True if the 3DSTATE_GS command most recently emitted to the 3D
+ * pipeline enabled the GS; false otherwise.
+ */
+ bool enabled;
} gs;
struct {
diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
index 540c46dcb40..d48153824de 100644
--- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
@@ -402,6 +402,21 @@ gen7_blorp_emit_gs_disable(struct brw_context *brw,
OUT_BATCH(0);
ADVANCE_BATCH();
+ /**
+ * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
+ * Geometry > Geometry Shader > State:
+ *
+ * "Note: Because of corruption in IVB:GT2, software needs to flush the
+ * whole fixed function pipeline when the GS enable changes value in
+ * the 3DSTATE_GS."
+ *
+ * The hardware architects have clarified that in this context "flush the
+ * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
+ * Stall" bit set.
+ */
+ if (!brw->is_haswell && brw->gt == 2 && brw->gs.enabled)
+ gen7_emit_cs_stall_flush(brw);
+
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
OUT_BATCH(0);
@@ -411,6 +426,7 @@ gen7_blorp_emit_gs_disable(struct brw_context *brw,
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
+ brw->gs.enabled = false;
}
/* 3DSTATE_STREAMOUT
diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c
index 584f2db8f8e..d2ba354e2c4 100644
--- a/src/mesa/drivers/dri/i965/gen7_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c
@@ -80,6 +80,21 @@ upload_gs_state(struct brw_context *brw)
gen7_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS);
+ /**
+ * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
+ * Geometry > Geometry Shader > State:
+ *
+ * "Note: Because of corruption in IVB:GT2, software needs to flush the
+ * whole fixed function pipeline when the GS enable changes value in
+ * the 3DSTATE_GS."
+ *
+ * The hardware architects have clarified that in this context "flush the
+ * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
+ * Stall" bit set.
+ */
+ if (!brw->is_haswell && brw->gt == 2 && brw->gs.enabled != active)
+ gen7_emit_cs_stall_flush(brw);
+
if (active) {
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
@@ -176,6 +191,7 @@ upload_gs_state(struct brw_context *brw)
OUT_BATCH(0);
ADVANCE_BATCH();
}
+ brw->gs.enabled = active;
}
const struct brw_tracked_state gen7_gs_state = {
diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c
index 6dcdfe4fa44..c6385862b92 100644
--- a/src/mesa/drivers/dri/i965/gen7_urb.c
+++ b/src/mesa/drivers/dri/i965/gen7_urb.c
@@ -122,28 +122,8 @@ gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
*
* No such restriction exists for Haswell.
*/
- if (!brw->is_haswell) {
- BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
- /* From p61 of the Ivy Bridge PRM (1.10.4 PIPE_CONTROL Command: DW1[20]
- * CS Stall):
- *
- * One of the following must also be set:
- * - Render Target Cache Flush Enable ([12] of DW1)
- * - Depth Cache Flush Enable ([0] of DW1)
- * - Stall at Pixel Scoreboard ([1] of DW1)
- * - Depth Stall ([13] of DW1)
- * - Post-Sync Operation ([13] of DW1)
- *
- * We choose to do a Post-Sync Operation (Write Immediate Data), since
- * it seems like it will incur the least additional performance penalty.
- */
- OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
- OUT_RELOC(brw->batch.workaround_bo,
- I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
- }
+ if (!brw->is_haswell)
+ gen7_emit_cs_stall_flush(brw);
}
const struct brw_tracked_state gen7_push_constant_space = {
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 6d1ae797426..a2c5b5b8409 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -510,6 +510,36 @@ gen7_emit_vs_workaround_flush(struct brw_context *brw)
ADVANCE_BATCH();
}
+
+/**
+ * Emit a PIPE_CONTROL command for gen7 with the CS Stall bit set.
+ */
+void
+gen7_emit_cs_stall_flush(struct brw_context *brw)
+{
+ BEGIN_BATCH(4);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
+ /* From p61 of the Ivy Bridge PRM (1.10.4 PIPE_CONTROL Command: DW1[20]
+ * CS Stall):
+ *
+ * One of the following must also be set:
+ * - Render Target Cache Flush Enable ([12] of DW1)
+ * - Depth Cache Flush Enable ([0] of DW1)
+ * - Stall at Pixel Scoreboard ([1] of DW1)
+ * - Depth Stall ([13] of DW1)
+ * - Post-Sync Operation ([13] of DW1)
+ *
+ * We choose to do a Post-Sync Operation (Write Immediate Data), since
+ * it seems like it will incur the least additional performance penalty.
+ */
+ OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
+ OUT_RELOC(brw->batch.workaround_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+}
+
+
/**
* Emits a PIPE_CONTROL with a non-zero post-sync operation, for
* implementing two workarounds on gen6. From section 1.4.7.1
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
index d46f48e20ef..cabbb69a63c 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
@@ -59,6 +59,7 @@ void intel_batchbuffer_emit_mi_flush(struct brw_context *brw);
void intel_emit_post_sync_nonzero_flush(struct brw_context *brw);
void intel_emit_depth_stall_flushes(struct brw_context *brw);
void gen7_emit_vs_workaround_flush(struct brw_context *brw);
+void gen7_emit_cs_stall_flush(struct brw_context *brw);
static INLINE uint32_t float_as_int(float f)
{