summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Clark <robdclark@chromium.org>2019-11-08 12:55:27 -0800
committerRob Clark <robdclark@chromium.org>2019-11-12 13:57:52 -0800
commitb22617fb57be54a859a8d62a5e545afcb38266e9 (patch)
tree65f1307aa332c1730c7fc05a264280ee23408e46
parent4bb697d938d17bbdd5124db0102d97fb9ead2229 (diff)
freedreno/ir3: fix gpu hang with pre-fs-tex-fetch
For pre-fs-dispatch texture fetch, we need to assign bary_ij to r0.x, even if it is not used in the shader (ie. only varying use is for tex coords). But if, for example, gl_FragCoord is used, it could get assigned on top of bary_ij, resulting in a GPU hang. The solution to this is two-fold: (1) the inputs/outputs rework has the benefit of making RA realize bary_ij is a vec2, even if there are no split/collect instructions (due to no varying fetches in the shader itself). And (2) extend the live ranges of meta:input instructions to the first non-input, to prevent RA from assigning the same register to multiple inputs. Backport note: because of (1) above, a better solution for 19.3 would be to revert f30c256ec05. Fixes: f30c256ec05 ("freedreno/ir3: enable pre-fs texture fetch for a6xx") Signed-off-by: Rob Clark <robdclark@chromium.org> Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com> Reviewed-by: Eric Anholt <eric@anholt.net>
-rw-r--r--src/freedreno/ir3/ir3_ra.c12
-rw-r--r--src/freedreno/ir3/ir3_sched.c30
2 files changed, 32 insertions, 10 deletions
diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c
index 1f24b5eadac..67d8a93884e 100644
--- a/src/freedreno/ir3/ir3_ra.c
+++ b/src/freedreno/ir3/ir3_ra.c
@@ -701,6 +701,15 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
block->data = bd;
+ struct ir3_instruction *first_non_input = NULL;
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+ if (instr->opc != OPC_META_INPUT) {
+ first_non_input = instr;
+ break;
+ }
+ }
+
+
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
struct ir3_instruction *src;
struct ir3_register *reg;
@@ -767,6 +776,9 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
def(name, id->defn);
+ if (instr->opc == OPC_META_INPUT)
+ use(name, first_non_input);
+
if (is_high(id->defn)) {
ra_set_node_class(ctx->g, name,
ctx->set->high_classes[id->cls - HIGH_OFFSET]);
diff --git a/src/freedreno/ir3/ir3_sched.c b/src/freedreno/ir3/ir3_sched.c
index a70251374ee..247221d3a03 100644
--- a/src/freedreno/ir3/ir3_sched.c
+++ b/src/freedreno/ir3/ir3_sched.c
@@ -778,18 +778,28 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
list_inithead(&block->instr_list);
list_inithead(&ctx->depth_list);
- /* first a pre-pass to schedule all meta:input instructions
- * (which need to appear first so that RA knows the register is
- * occupied), and move remaining to depth sorted list:
+ /* First schedule all meta:input instructions, followed by
+ * tex-prefetch. We want all of the instructions that load
+ * values into registers before the shader starts to go
+ * before any other instructions. But in particular we
+ * want inputs to come before prefetches. This is because
+ * a FS's bary_ij input may not actually be live in the
+ * shader, but it should not be scheduled on top of any
+ * other input (but can be overwritten by a tex prefetch)
+ *
+ * Finally, move all the remaining instructions to the depth-
+ * list
*/
- list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) {
- if ((instr->opc == OPC_META_INPUT) ||
- (instr->opc == OPC_META_TEX_PREFETCH)) {
+ list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node)
+ if (instr->opc == OPC_META_INPUT)
schedule(ctx, instr);
- } else {
- ir3_insert_by_depth(instr, &ctx->depth_list);
- }
- }
+
+ list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node)
+ if (instr->opc == OPC_META_TEX_PREFETCH)
+ schedule(ctx, instr);
+
+ list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node)
+ ir3_insert_by_depth(instr, &ctx->depth_list);
while (!list_is_empty(&ctx->depth_list)) {
struct ir3_sched_notes notes = {0};