summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2014-08-20 14:51:08 -0700
committerEric Anholt <eric@anholt.net>2014-08-22 10:16:58 -0700
commitae83955b1da238ccf180cba568f4269f01bb21fa (patch)
tree5a63d710929c31375c3a543c54a1c38452e50afb
parentc3c922289b2fb080ec184d9bd7e71a8870ced18d (diff)
vc4: Emit the scoreboard wait just when it's needed.
This should improve performance on real hardware by allowing more shader instances to run in parallel. It also fixes assertion failures in tests that don't emit a fragment color, since otherwise we didn't have enough instructions to fit our signals in.
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c27
1 files changed, 25 insertions, 2 deletions
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 43491019855..477929cc199 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -97,6 +97,7 @@ static void
serialize_insts(struct qcompile *c)
{
int last_sfu_write = -10;
+ bool scoreboard_wait_emitted = false;
while (!is_empty_list(&c->qpu_inst_list)) {
struct queued_qpu_inst *q =
@@ -173,6 +174,30 @@ serialize_insts(struct qcompile *c)
last_sfu_write = c->qpu_inst_count;
}
+ /* "A scoreboard wait must not occur in the first two
+ * instructions of a fragment shader. This is either the
+ * explicit Wait for Scoreboard signal or an implicit wait
+ * with the first tile-buffer read or write instruction."
+ */
+ if (!scoreboard_wait_emitted &&
+ (waddr_a == QPU_W_TLB_Z || waddr_m == QPU_W_TLB_Z ||
+ waddr_a == QPU_W_TLB_COLOR_MS ||
+ waddr_m == QPU_W_TLB_COLOR_MS ||
+ waddr_a == QPU_W_TLB_COLOR_ALL ||
+ waddr_m == QPU_W_TLB_COLOR_ALL ||
+ QPU_GET_FIELD(q->inst, QPU_SIG) == QPU_SIG_COLOR_LOAD)) {
+ while (c->qpu_inst_count < 3 ||
+ QPU_GET_FIELD(c->qpu_insts[c->qpu_inst_count - 1],
+ QPU_SIG) != QPU_SIG_NONE) {
+ serialize_one_inst(c, qpu_inst(qpu_a_NOP(),
+ qpu_m_NOP()));
+ }
+ c->qpu_insts[c->qpu_inst_count - 1] =
+ qpu_set_sig(c->qpu_insts[c->qpu_inst_count - 1],
+ QPU_SIG_WAIT_FOR_SCOREBOARD);
+ scoreboard_wait_emitted = true;
+ }
+
serialize_one_inst(c, q->inst);
remove_from_list(&q->link);
@@ -613,8 +638,6 @@ vc4_generate_code(struct qcompile *c)
case QSTAGE_COORD:
break;
case QSTAGE_FRAG:
- c->qpu_insts[2] = qpu_set_sig(c->qpu_insts[2],
- QPU_SIG_WAIT_FOR_SCOREBOARD);
c->qpu_insts[c->qpu_inst_count - 1] =
qpu_set_sig(c->qpu_insts[c->qpu_inst_count - 1],
QPU_SIG_SCOREBOARD_UNLOCK);