summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/vc4/vc4_qpu_schedule.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/vc4/vc4_qpu_schedule.c')
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_schedule.c29
1 files changed, 27 insertions, 2 deletions
diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
index 680191542b8..45360f73410 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
@@ -385,12 +385,27 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n)
switch (sig) {
case QPU_SIG_SW_BREAKPOINT:
case QPU_SIG_NONE:
- case QPU_SIG_THREAD_SWITCH:
- case QPU_SIG_LAST_THREAD_SWITCH:
case QPU_SIG_SMALL_IMM:
case QPU_SIG_LOAD_IMM:
break;
+ case QPU_SIG_THREAD_SWITCH:
+ case QPU_SIG_LAST_THREAD_SWITCH:
+ /* All accumulator contents and flags are undefined after the
+ * switch.
+ */
+ for (int i = 0; i < ARRAY_SIZE(state->last_r); i++)
+ add_write_dep(state, &state->last_r[i], n);
+ add_write_dep(state, &state->last_sf, n);
+
+ /* Scoreboard-locking operations have to stay after the last
+ * thread switch.
+ */
+ add_write_dep(state, &state->last_tlb, n);
+
+ add_write_dep(state, &state->last_tmu_write, n);
+ break;
+
case QPU_SIG_LOAD_TMU0:
case QPU_SIG_LOAD_TMU1:
/* TMU loads are coming from a FIFO, so ordering is important.
@@ -902,6 +917,16 @@ schedule_instructions(struct vc4_compile *c,
qpu_serialize_one_inst(c, inst);
qpu_serialize_one_inst(c, inst);
qpu_serialize_one_inst(c, inst);
+ } else if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_THREAD_SWITCH ||
+ QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_LAST_THREAD_SWITCH) {
+ /* The thread switch occurs after two delay slots. We
+ * should fit things in these slots, but we don't
+ * currently.
+ */
+ inst = qpu_NOP();
+ update_scoreboard_for_chosen(scoreboard, inst);
+ qpu_serialize_one_inst(c, inst);
+ qpu_serialize_one_inst(c, inst);
}
}