diff options
-rw-r--r-- | src/freedreno/ir3/ir3.c | 5 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3.h | 19 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_compiler_nir.c | 13 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_liveness.c | 14 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_ra.c | 38 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_ra_validate.c | 21 |
6 files changed, 97 insertions, 13 deletions
diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 977b87ba834..ad7d13103e5 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -363,6 +363,11 @@ void ir3_block_add_predecessor(struct ir3_block *block, struct ir3_block *pred) array_insert(block, block->predecessors, pred); } +void ir3_block_add_physical_predecessor(struct ir3_block *block, struct ir3_block *pred) +{ + array_insert(block, block->physical_predecessors, pred); +} + void ir3_block_remove_predecessor(struct ir3_block *block, struct ir3_block *pred) { for (unsigned i = 0; i < block->predecessors_count; i++) { diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 5bbdc4cc748..eab23608ad5 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -522,14 +522,26 @@ struct ir3_block { struct list_head instr_list; /* list of ir3_instruction */ - /* each block has either one or two successors.. in case of - * two successors, 'condition' decides which one to follow. - * A block preceding an if/else has two successors. + /* each block has either one or two successors.. in case of two + * successors, 'condition' decides which one to follow. A block preceding + * an if/else has two successors. + * + * In some cases the path that the machine actually takes through the + * program may not match the per-thread view of the CFG. In particular + * this is the case for if/else, where the machine jumps from the end of + * the if to the beginning of the else and switches active lanes. While + * most things only care about the per-thread view, we need to use the + * "physical" view when allocating shared registers. "successors" contains + * the per-thread successors, and "physical_successors" contains the + * physical successors which includes the fallthrough edge from the if to + * the else. */ struct ir3_instruction *condition; struct ir3_block *successors[2]; + struct ir3_block *physical_successors[2]; DECLARE_ARRAY(struct ir3_block *, predecessors); + DECLARE_ARRAY(struct ir3_block *, physical_predecessors); uint16_t start_ip, end_ip; @@ -573,6 +585,7 @@ ir3_start_block(struct ir3 *ir) } void ir3_block_add_predecessor(struct ir3_block *block, struct ir3_block *pred); +void ir3_block_add_physical_predecessor(struct ir3_block *block, struct ir3_block *pred); void ir3_block_remove_predecessor(struct ir3_block *block, struct ir3_block *pred); unsigned ir3_block_get_pred_index(struct ir3_block *block, struct ir3_block *pred); diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index b43c521eda1..ff52d1c2e51 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -2882,6 +2882,7 @@ emit_block(struct ir3_context *ctx, nir_block *nblock) if (nblock->successors[i]) { ctx->block->successors[i] = get_block(ctx, nblock->successors[i]); + ctx->block->physical_successors[i] = ctx->block->successors[i]; } } @@ -2899,6 +2900,16 @@ emit_if(struct ir3_context *ctx, nir_if *nif) emit_cf_list(ctx, &nif->then_list); emit_cf_list(ctx, &nif->else_list); + + struct ir3_block *last_then = get_block(ctx, nir_if_last_then_block(nif)); + struct ir3_block *first_else = get_block(ctx, nir_if_first_else_block(nif)); + assert(last_then->physical_successors[0] && !last_then->physical_successors[1]); + last_then->physical_successors[1] = first_else; + + struct ir3_block *last_else = get_block(ctx, nir_if_last_else_block(nif)); + struct ir3_block *after_if = + get_block(ctx, nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node))); + last_else->physical_successors[0] = after_if; } static void @@ -3071,6 +3082,8 @@ setup_predecessors(struct ir3 *ir) for (int i = 0; i < ARRAY_SIZE(block->successors); i++) { if (block->successors[i]) ir3_block_add_predecessor(block->successors[i], block); + if (block->physical_successors[i]) + ir3_block_add_physical_predecessor(block->physical_successors[i], block); } } } diff --git a/src/freedreno/ir3/ir3_liveness.c b/src/freedreno/ir3/ir3_liveness.c index 4285c7dc7cf..86e4a0fa1e7 100644 --- a/src/freedreno/ir3/ir3_liveness.c +++ b/src/freedreno/ir3/ir3_liveness.c @@ -96,6 +96,20 @@ compute_block_liveness(struct ir3_liveness *live, struct ir3_block *block, } } } + + for (unsigned i = 0; i < block->physical_predecessors_count; i++) { + const struct ir3_block *pred = block->physical_predecessors[i]; + unsigned name; + BITSET_FOREACH_SET(name, tmp_live, live->definitions_count) { + struct ir3_register *reg = live->definitions[name]; + if (!(reg->flags & IR3_REG_SHARED)) + continue; + if (!BITSET_TEST(live->live_out[pred->index], name)) { + progress = true; + BITSET_SET(live->live_out[pred->index], name); + } + } + } return progress; } diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c index 8cea35df9d1..f65b7688104 100644 --- a/src/freedreno/ir3/ir3_ra.c +++ b/src/freedreno/ir3/ir3_ra.c @@ -1646,8 +1646,14 @@ insert_live_in_move(struct ra_ctx *ctx, struct ra_interval *interval) { physreg_t physreg = ra_interval_get_physreg(interval); - for (unsigned i = 0; i < ctx->block->predecessors_count; i++) { - struct ir3_block *pred = ctx->block->predecessors[i]; + bool shared = interval->interval.reg->flags & IR3_REG_SHARED; + struct ir3_block **predecessors = + shared ? ctx->block->physical_predecessors : ctx->block->predecessors; + unsigned predecessors_count = + shared ? ctx->block->physical_predecessors_count : ctx->block->predecessors_count; + + for (unsigned i = 0; i < predecessors_count; i++) { + struct ir3_block *pred = predecessors[i]; struct ra_block_state *pred_state = &ctx->blocks[pred->index]; if (!pred_state->visited) @@ -1656,6 +1662,27 @@ insert_live_in_move(struct ra_ctx *ctx, struct ra_interval *interval) physreg_t pred_reg = read_register(ctx, pred, interval->interval.reg); if (pred_reg != physreg) { insert_liveout_copy(pred, physreg, pred_reg, interval->interval.reg); + + /* This is a bit tricky, but when visiting the destination of a + * physical-only edge, we have two predecessors (the if and the + * header block) and both have multiple successors. We pick the + * register for all live-ins from the normal edge, which should + * guarantee that there's no need for shuffling things around in + * the normal predecessor as long as there are no phi nodes, but + * we still may need to insert fixup code in the physical + * predecessor (i.e. the last block of the if) and that has + * another successor (the block after the if) so we need to update + * the renames state for when we process the other successor. This + * crucially depends on the other successor getting processed + * after this. + * + * For normal (non-physical) edges we disallow critical edges so + * that hacks like this aren't necessary. + */ + if (!pred_state->renames) + pred_state->renames = _mesa_pointer_hash_table_create(ctx); + _mesa_hash_table_insert(pred_state->renames, interval->interval.reg, + (void *)(uintptr_t)physreg); } } } @@ -1850,10 +1877,6 @@ handle_block(struct ra_ctx *ctx, struct ir3_block *block) } ctx->blocks[block->index].visited = true; - - for (unsigned i = 0; i < block->dom_children_count; i++) { - handle_block(ctx, block->dom_children[i]); - } } static unsigned @@ -1933,7 +1956,8 @@ ir3_ra(struct ir3_shader_variant *v) ctx->shared.size = RA_SHARED_SIZE; - handle_block(ctx, ir3_start_block(v->ir)); + foreach_block (block, &v->ir->block_list) + handle_block(ctx, block); ir3_ra_validate(v, ctx->full.size, ctx->half.size, live->block_count); diff --git a/src/freedreno/ir3/ir3_ra_validate.c b/src/freedreno/ir3/ir3_ra_validate.c index 046c49723e8..070ddc195b6 100644 --- a/src/freedreno/ir3/ir3_ra_validate.c +++ b/src/freedreno/ir3/ir3_ra_validate.c @@ -192,10 +192,16 @@ merge_state(struct ra_val_ctx *ctx, struct reaching_state *dst, bool progress = false; progress |= merge_file(&dst->full, &src->full, ctx->full_size); progress |= merge_file(&dst->half, &src->half, ctx->half_size); - progress |= merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE); return progress; } +static bool +merge_state_physical(struct ra_val_ctx *ctx, struct reaching_state *dst, + const struct reaching_state *src) +{ + return merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE); +} + static struct file_state * ra_val_get_file(struct ra_val_ctx *ctx, struct ir3_register *reg) { @@ -337,12 +343,21 @@ propagate_block(struct ra_val_ctx *ctx, struct ir3_block *block) bool progress = false; for (unsigned i = 0; i < 2; i++) { - if (!block->successors[i]) + struct ir3_block *succ = block->successors[i]; + if (!succ) continue; progress |= merge_state(ctx, - &ctx->block_reaching[block->successors[i]->index], + &ctx->block_reaching[succ->index], &ctx->reaching); } + for (unsigned i = 0; i < 2; i++) { + struct ir3_block *succ = block->physical_successors[i]; + if (!succ) + continue; + progress |= merge_state_physical(ctx, + &ctx->block_reaching[succ->index], + &ctx->reaching); + } return progress; } |