summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorConnor Abbott <cwabbott0@gmail.com>2021-05-27 16:54:20 +0200
committerMarge Bot <eric+marge@anholt.net>2021-07-08 16:02:41 +0000
commit22ae91b28405b121cbb94badcb9381db94358a0e (patch)
tree76faa465773f77656a50d90359c85dfe5667ef53
parent8176657ead5a05022d8f93a6bcd31b22e5b40504 (diff)
ir3: Handle shared register liveness correctly
As explained in the comments added, we need to add extra edges to the CFG which are ignored except for shared registers. This plumbs through support for this. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6752>
-rw-r--r--src/freedreno/ir3/ir3.c5
-rw-r--r--src/freedreno/ir3/ir3.h19
-rw-r--r--src/freedreno/ir3/ir3_compiler_nir.c13
-rw-r--r--src/freedreno/ir3/ir3_liveness.c14
-rw-r--r--src/freedreno/ir3/ir3_ra.c38
-rw-r--r--src/freedreno/ir3/ir3_ra_validate.c21
6 files changed, 97 insertions, 13 deletions
diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c
index 977b87ba834..ad7d13103e5 100644
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@@ -363,6 +363,11 @@ void ir3_block_add_predecessor(struct ir3_block *block, struct ir3_block *pred)
array_insert(block, block->predecessors, pred);
}
+void ir3_block_add_physical_predecessor(struct ir3_block *block, struct ir3_block *pred)
+{
+ array_insert(block, block->physical_predecessors, pred);
+}
+
void ir3_block_remove_predecessor(struct ir3_block *block, struct ir3_block *pred)
{
for (unsigned i = 0; i < block->predecessors_count; i++) {
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 5bbdc4cc748..eab23608ad5 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -522,14 +522,26 @@ struct ir3_block {
struct list_head instr_list; /* list of ir3_instruction */
- /* each block has either one or two successors.. in case of
- * two successors, 'condition' decides which one to follow.
- * A block preceding an if/else has two successors.
+ /* each block has either one or two successors.. in case of two
+ * successors, 'condition' decides which one to follow. A block preceding
+ * an if/else has two successors.
+ *
+ * In some cases the path that the machine actually takes through the
+ * program may not match the per-thread view of the CFG. In particular
+ * this is the case for if/else, where the machine jumps from the end of
+ * the if to the beginning of the else and switches active lanes. While
+ * most things only care about the per-thread view, we need to use the
+ * "physical" view when allocating shared registers. "successors" contains
+ * the per-thread successors, and "physical_successors" contains the
+ * physical successors which includes the fallthrough edge from the if to
+ * the else.
*/
struct ir3_instruction *condition;
struct ir3_block *successors[2];
+ struct ir3_block *physical_successors[2];
DECLARE_ARRAY(struct ir3_block *, predecessors);
+ DECLARE_ARRAY(struct ir3_block *, physical_predecessors);
uint16_t start_ip, end_ip;
@@ -573,6 +585,7 @@ ir3_start_block(struct ir3 *ir)
}
void ir3_block_add_predecessor(struct ir3_block *block, struct ir3_block *pred);
+void ir3_block_add_physical_predecessor(struct ir3_block *block, struct ir3_block *pred);
void ir3_block_remove_predecessor(struct ir3_block *block, struct ir3_block *pred);
unsigned ir3_block_get_pred_index(struct ir3_block *block, struct ir3_block *pred);
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index b43c521eda1..ff52d1c2e51 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -2882,6 +2882,7 @@ emit_block(struct ir3_context *ctx, nir_block *nblock)
if (nblock->successors[i]) {
ctx->block->successors[i] =
get_block(ctx, nblock->successors[i]);
+ ctx->block->physical_successors[i] = ctx->block->successors[i];
}
}
@@ -2899,6 +2900,16 @@ emit_if(struct ir3_context *ctx, nir_if *nif)
emit_cf_list(ctx, &nif->then_list);
emit_cf_list(ctx, &nif->else_list);
+
+ struct ir3_block *last_then = get_block(ctx, nir_if_last_then_block(nif));
+ struct ir3_block *first_else = get_block(ctx, nir_if_first_else_block(nif));
+ assert(last_then->physical_successors[0] && !last_then->physical_successors[1]);
+ last_then->physical_successors[1] = first_else;
+
+ struct ir3_block *last_else = get_block(ctx, nir_if_last_else_block(nif));
+ struct ir3_block *after_if =
+ get_block(ctx, nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node)));
+ last_else->physical_successors[0] = after_if;
}
static void
@@ -3071,6 +3082,8 @@ setup_predecessors(struct ir3 *ir)
for (int i = 0; i < ARRAY_SIZE(block->successors); i++) {
if (block->successors[i])
ir3_block_add_predecessor(block->successors[i], block);
+ if (block->physical_successors[i])
+ ir3_block_add_physical_predecessor(block->physical_successors[i], block);
}
}
}
diff --git a/src/freedreno/ir3/ir3_liveness.c b/src/freedreno/ir3/ir3_liveness.c
index 4285c7dc7cf..86e4a0fa1e7 100644
--- a/src/freedreno/ir3/ir3_liveness.c
+++ b/src/freedreno/ir3/ir3_liveness.c
@@ -96,6 +96,20 @@ compute_block_liveness(struct ir3_liveness *live, struct ir3_block *block,
}
}
}
+
+ for (unsigned i = 0; i < block->physical_predecessors_count; i++) {
+ const struct ir3_block *pred = block->physical_predecessors[i];
+ unsigned name;
+ BITSET_FOREACH_SET(name, tmp_live, live->definitions_count) {
+ struct ir3_register *reg = live->definitions[name];
+ if (!(reg->flags & IR3_REG_SHARED))
+ continue;
+ if (!BITSET_TEST(live->live_out[pred->index], name)) {
+ progress = true;
+ BITSET_SET(live->live_out[pred->index], name);
+ }
+ }
+ }
return progress;
}
diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c
index 8cea35df9d1..f65b7688104 100644
--- a/src/freedreno/ir3/ir3_ra.c
+++ b/src/freedreno/ir3/ir3_ra.c
@@ -1646,8 +1646,14 @@ insert_live_in_move(struct ra_ctx *ctx, struct ra_interval *interval)
{
physreg_t physreg = ra_interval_get_physreg(interval);
- for (unsigned i = 0; i < ctx->block->predecessors_count; i++) {
- struct ir3_block *pred = ctx->block->predecessors[i];
+ bool shared = interval->interval.reg->flags & IR3_REG_SHARED;
+ struct ir3_block **predecessors =
+ shared ? ctx->block->physical_predecessors : ctx->block->predecessors;
+ unsigned predecessors_count =
+ shared ? ctx->block->physical_predecessors_count : ctx->block->predecessors_count;
+
+ for (unsigned i = 0; i < predecessors_count; i++) {
+ struct ir3_block *pred = predecessors[i];
struct ra_block_state *pred_state = &ctx->blocks[pred->index];
if (!pred_state->visited)
@@ -1656,6 +1662,27 @@ insert_live_in_move(struct ra_ctx *ctx, struct ra_interval *interval)
physreg_t pred_reg = read_register(ctx, pred, interval->interval.reg);
if (pred_reg != physreg) {
insert_liveout_copy(pred, physreg, pred_reg, interval->interval.reg);
+
+ /* This is a bit tricky, but when visiting the destination of a
+ * physical-only edge, we have two predecessors (the if and the
+ * header block) and both have multiple successors. We pick the
+ * register for all live-ins from the normal edge, which should
+ * guarantee that there's no need for shuffling things around in
+ * the normal predecessor as long as there are no phi nodes, but
+ * we still may need to insert fixup code in the physical
+ * predecessor (i.e. the last block of the if) and that has
+ * another successor (the block after the if) so we need to update
+ * the renames state for when we process the other successor. This
+ * crucially depends on the other successor getting processed
+ * after this.
+ *
+ * For normal (non-physical) edges we disallow critical edges so
+ * that hacks like this aren't necessary.
+ */
+ if (!pred_state->renames)
+ pred_state->renames = _mesa_pointer_hash_table_create(ctx);
+ _mesa_hash_table_insert(pred_state->renames, interval->interval.reg,
+ (void *)(uintptr_t)physreg);
}
}
}
@@ -1850,10 +1877,6 @@ handle_block(struct ra_ctx *ctx, struct ir3_block *block)
}
ctx->blocks[block->index].visited = true;
-
- for (unsigned i = 0; i < block->dom_children_count; i++) {
- handle_block(ctx, block->dom_children[i]);
- }
}
static unsigned
@@ -1933,7 +1956,8 @@ ir3_ra(struct ir3_shader_variant *v)
ctx->shared.size = RA_SHARED_SIZE;
- handle_block(ctx, ir3_start_block(v->ir));
+ foreach_block (block, &v->ir->block_list)
+ handle_block(ctx, block);
ir3_ra_validate(v, ctx->full.size, ctx->half.size, live->block_count);
diff --git a/src/freedreno/ir3/ir3_ra_validate.c b/src/freedreno/ir3/ir3_ra_validate.c
index 046c49723e8..070ddc195b6 100644
--- a/src/freedreno/ir3/ir3_ra_validate.c
+++ b/src/freedreno/ir3/ir3_ra_validate.c
@@ -192,10 +192,16 @@ merge_state(struct ra_val_ctx *ctx, struct reaching_state *dst,
bool progress = false;
progress |= merge_file(&dst->full, &src->full, ctx->full_size);
progress |= merge_file(&dst->half, &src->half, ctx->half_size);
- progress |= merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE);
return progress;
}
+static bool
+merge_state_physical(struct ra_val_ctx *ctx, struct reaching_state *dst,
+ const struct reaching_state *src)
+{
+ return merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE);
+}
+
static struct file_state *
ra_val_get_file(struct ra_val_ctx *ctx, struct ir3_register *reg)
{
@@ -337,12 +343,21 @@ propagate_block(struct ra_val_ctx *ctx, struct ir3_block *block)
bool progress = false;
for (unsigned i = 0; i < 2; i++) {
- if (!block->successors[i])
+ struct ir3_block *succ = block->successors[i];
+ if (!succ)
continue;
progress |= merge_state(ctx,
- &ctx->block_reaching[block->successors[i]->index],
+ &ctx->block_reaching[succ->index],
&ctx->reaching);
}
+ for (unsigned i = 0; i < 2; i++) {
+ struct ir3_block *succ = block->physical_successors[i];
+ if (!succ)
+ continue;
+ progress |= merge_state_physical(ctx,
+ &ctx->block_reaching[succ->index],
+ &ctx->reaching);
+ }
return progress;
}