summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/freedreno/ir3/ir3.c5
-rw-r--r--src/freedreno/ir3/ir3.h19
-rw-r--r--src/freedreno/ir3/ir3_compiler_nir.c13
-rw-r--r--src/freedreno/ir3/ir3_liveness.c14
-rw-r--r--src/freedreno/ir3/ir3_ra.c38
-rw-r--r--src/freedreno/ir3/ir3_ra_validate.c21
6 files changed, 97 insertions, 13 deletions
diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c
index 977b87ba834..ad7d13103e5 100644
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@@ -363,6 +363,11 @@ void ir3_block_add_predecessor(struct ir3_block *block, struct ir3_block *pred)
array_insert(block, block->predecessors, pred);
}
+void ir3_block_add_physical_predecessor(struct ir3_block *block, struct ir3_block *pred)
+{
+ array_insert(block, block->physical_predecessors, pred);
+}
+
void ir3_block_remove_predecessor(struct ir3_block *block, struct ir3_block *pred)
{
for (unsigned i = 0; i < block->predecessors_count; i++) {
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 5bbdc4cc748..eab23608ad5 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -522,14 +522,26 @@ struct ir3_block {
struct list_head instr_list; /* list of ir3_instruction */
- /* each block has either one or two successors.. in case of
- * two successors, 'condition' decides which one to follow.
- * A block preceding an if/else has two successors.
+ /* each block has either one or two successors.. in case of two
+ * successors, 'condition' decides which one to follow. A block preceding
+ * an if/else has two successors.
+ *
+ * In some cases the path that the machine actually takes through the
+ * program may not match the per-thread view of the CFG. In particular
+ * this is the case for if/else, where the machine jumps from the end of
+ * the if to the beginning of the else and switches active lanes. While
+ * most things only care about the per-thread view, we need to use the
+ * "physical" view when allocating shared registers. "successors" contains
+ * the per-thread successors, and "physical_successors" contains the
+ * physical successors which includes the fallthrough edge from the if to
+ * the else.
*/
struct ir3_instruction *condition;
struct ir3_block *successors[2];
+ struct ir3_block *physical_successors[2];
DECLARE_ARRAY(struct ir3_block *, predecessors);
+ DECLARE_ARRAY(struct ir3_block *, physical_predecessors);
uint16_t start_ip, end_ip;
@@ -573,6 +585,7 @@ ir3_start_block(struct ir3 *ir)
}
void ir3_block_add_predecessor(struct ir3_block *block, struct ir3_block *pred);
+void ir3_block_add_physical_predecessor(struct ir3_block *block, struct ir3_block *pred);
void ir3_block_remove_predecessor(struct ir3_block *block, struct ir3_block *pred);
unsigned ir3_block_get_pred_index(struct ir3_block *block, struct ir3_block *pred);
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index b43c521eda1..ff52d1c2e51 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -2882,6 +2882,7 @@ emit_block(struct ir3_context *ctx, nir_block *nblock)
if (nblock->successors[i]) {
ctx->block->successors[i] =
get_block(ctx, nblock->successors[i]);
+ ctx->block->physical_successors[i] = ctx->block->successors[i];
}
}
@@ -2899,6 +2900,16 @@ emit_if(struct ir3_context *ctx, nir_if *nif)
emit_cf_list(ctx, &nif->then_list);
emit_cf_list(ctx, &nif->else_list);
+
+ struct ir3_block *last_then = get_block(ctx, nir_if_last_then_block(nif));
+ struct ir3_block *first_else = get_block(ctx, nir_if_first_else_block(nif));
+ assert(last_then->physical_successors[0] && !last_then->physical_successors[1]);
+ last_then->physical_successors[1] = first_else;
+
+ struct ir3_block *last_else = get_block(ctx, nir_if_last_else_block(nif));
+ struct ir3_block *after_if =
+ get_block(ctx, nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node)));
+ last_else->physical_successors[0] = after_if;
}
static void
@@ -3071,6 +3082,8 @@ setup_predecessors(struct ir3 *ir)
for (int i = 0; i < ARRAY_SIZE(block->successors); i++) {
if (block->successors[i])
ir3_block_add_predecessor(block->successors[i], block);
+ if (block->physical_successors[i])
+ ir3_block_add_physical_predecessor(block->physical_successors[i], block);
}
}
}
diff --git a/src/freedreno/ir3/ir3_liveness.c b/src/freedreno/ir3/ir3_liveness.c
index 4285c7dc7cf..86e4a0fa1e7 100644
--- a/src/freedreno/ir3/ir3_liveness.c
+++ b/src/freedreno/ir3/ir3_liveness.c
@@ -96,6 +96,20 @@ compute_block_liveness(struct ir3_liveness *live, struct ir3_block *block,
}
}
}
+
+ for (unsigned i = 0; i < block->physical_predecessors_count; i++) {
+ const struct ir3_block *pred = block->physical_predecessors[i];
+ unsigned name;
+ BITSET_FOREACH_SET(name, tmp_live, live->definitions_count) {
+ struct ir3_register *reg = live->definitions[name];
+ if (!(reg->flags & IR3_REG_SHARED))
+ continue;
+ if (!BITSET_TEST(live->live_out[pred->index], name)) {
+ progress = true;
+ BITSET_SET(live->live_out[pred->index], name);
+ }
+ }
+ }
return progress;
}
diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c
index 8cea35df9d1..f65b7688104 100644
--- a/src/freedreno/ir3/ir3_ra.c
+++ b/src/freedreno/ir3/ir3_ra.c
@@ -1646,8 +1646,14 @@ insert_live_in_move(struct ra_ctx *ctx, struct ra_interval *interval)
{
physreg_t physreg = ra_interval_get_physreg(interval);
- for (unsigned i = 0; i < ctx->block->predecessors_count; i++) {
- struct ir3_block *pred = ctx->block->predecessors[i];
+ bool shared = interval->interval.reg->flags & IR3_REG_SHARED;
+ struct ir3_block **predecessors =
+ shared ? ctx->block->physical_predecessors : ctx->block->predecessors;
+ unsigned predecessors_count =
+ shared ? ctx->block->physical_predecessors_count : ctx->block->predecessors_count;
+
+ for (unsigned i = 0; i < predecessors_count; i++) {
+ struct ir3_block *pred = predecessors[i];
struct ra_block_state *pred_state = &ctx->blocks[pred->index];
if (!pred_state->visited)
@@ -1656,6 +1662,27 @@ insert_live_in_move(struct ra_ctx *ctx, struct ra_interval *interval)
physreg_t pred_reg = read_register(ctx, pred, interval->interval.reg);
if (pred_reg != physreg) {
insert_liveout_copy(pred, physreg, pred_reg, interval->interval.reg);
+
+ /* This is a bit tricky, but when visiting the destination of a
+ * physical-only edge, we have two predecessors (the if and the
+ * header block) and both have multiple successors. We pick the
+ * register for all live-ins from the normal edge, which should
+ * guarantee that there's no need for shuffling things around in
+ * the normal predecessor as long as there are no phi nodes, but
+ * we still may need to insert fixup code in the physical
+ * predecessor (i.e. the last block of the if) and that has
+ * another successor (the block after the if) so we need to update
+ * the renames state for when we process the other successor. This
+ * crucially depends on the other successor getting processed
+ * after this.
+ *
+ * For normal (non-physical) edges we disallow critical edges so
+ * that hacks like this aren't necessary.
+ */
+ if (!pred_state->renames)
+ pred_state->renames = _mesa_pointer_hash_table_create(ctx);
+ _mesa_hash_table_insert(pred_state->renames, interval->interval.reg,
+ (void *)(uintptr_t)physreg);
}
}
}
@@ -1850,10 +1877,6 @@ handle_block(struct ra_ctx *ctx, struct ir3_block *block)
}
ctx->blocks[block->index].visited = true;
-
- for (unsigned i = 0; i < block->dom_children_count; i++) {
- handle_block(ctx, block->dom_children[i]);
- }
}
static unsigned
@@ -1933,7 +1956,8 @@ ir3_ra(struct ir3_shader_variant *v)
ctx->shared.size = RA_SHARED_SIZE;
- handle_block(ctx, ir3_start_block(v->ir));
+ foreach_block (block, &v->ir->block_list)
+ handle_block(ctx, block);
ir3_ra_validate(v, ctx->full.size, ctx->half.size, live->block_count);
diff --git a/src/freedreno/ir3/ir3_ra_validate.c b/src/freedreno/ir3/ir3_ra_validate.c
index 046c49723e8..070ddc195b6 100644
--- a/src/freedreno/ir3/ir3_ra_validate.c
+++ b/src/freedreno/ir3/ir3_ra_validate.c
@@ -192,10 +192,16 @@ merge_state(struct ra_val_ctx *ctx, struct reaching_state *dst,
bool progress = false;
progress |= merge_file(&dst->full, &src->full, ctx->full_size);
progress |= merge_file(&dst->half, &src->half, ctx->half_size);
- progress |= merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE);
return progress;
}
+static bool
+merge_state_physical(struct ra_val_ctx *ctx, struct reaching_state *dst,
+ const struct reaching_state *src)
+{
+ return merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE);
+}
+
static struct file_state *
ra_val_get_file(struct ra_val_ctx *ctx, struct ir3_register *reg)
{
@@ -337,12 +343,21 @@ propagate_block(struct ra_val_ctx *ctx, struct ir3_block *block)
bool progress = false;
for (unsigned i = 0; i < 2; i++) {
- if (!block->successors[i])
+ struct ir3_block *succ = block->successors[i];
+ if (!succ)
continue;
progress |= merge_state(ctx,
- &ctx->block_reaching[block->successors[i]->index],
+ &ctx->block_reaching[succ->index],
&ctx->reaching);
}
+ for (unsigned i = 0; i < 2; i++) {
+ struct ir3_block *succ = block->physical_successors[i];
+ if (!succ)
+ continue;
+ progress |= merge_state_physical(ctx,
+ &ctx->block_reaching[succ->index],
+ &ctx->reaching);
+ }
return progress;
}