summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/lima/ir/gp/scheduler.c462
1 files changed, 316 insertions, 146 deletions
diff --git a/src/gallium/drivers/lima/ir/gp/scheduler.c b/src/gallium/drivers/lima/ir/gp/scheduler.c
index 2a11bdfd1c..75adf9ca44 100644
--- a/src/gallium/drivers/lima/ir/gp/scheduler.c
+++ b/src/gallium/drivers/lima/ir/gp/scheduler.c
@@ -79,6 +79,13 @@
* scheduled total, below the limit. So the algorithm will always succeed.
*/
+typedef struct {
+ struct list_head ready_list;
+ int ready_list_slots;
+ gpir_instr *instr;
+ gpir_block *block;
+} sched_ctx;
+
static int gpir_min_dist_alu(gpir_dep *dep)
{
switch (dep->pred->op) {
@@ -230,7 +237,7 @@ static void schedule_update_distance(gpir_node *node)
if (pred->sched.dist < 0)
schedule_update_distance(pred);
- int dist = pred->sched.dist + 1;
+ int dist = pred->sched.dist + gpir_min_dist_alu(dep);
if (node->sched.dist < dist)
node->sched.dist = dist;
}
@@ -378,162 +385,357 @@ static gpir_node *schedule_create_move_node(gpir_node *node)
return &move->node;
}
-static gpir_node *gpir_sched_node(gpir_instr *instr, gpir_node *node)
+static bool gpir_is_input_node(gpir_node *node)
{
- if (node->op == gpir_op_mov) {
- gpir_node *child = gpir_node_to_alu(node)->children[0];
- gpir_node_foreach_succ_safe(node, dep) {
- gpir_node *succ = dep->succ;
- if (succ->sched.instr < 0 ||
- instr->index < succ->sched.instr + gpir_get_min_dist(dep)) {
- gpir_node_replace_pred(dep, child);
- if (dep->type == GPIR_DEP_INPUT)
- gpir_node_replace_child(succ, node, child);
- }
+ gpir_node_foreach_succ(node, dep) {
+ if (dep->type == GPIR_DEP_INPUT)
+ return true;
+ }
+ return false;
+}
+
+/* Get the number of slots required for a node on the ready list.
+ */
+static int gpir_get_slots_required(gpir_node *node)
+{
+ if (!gpir_is_input_node(node))
+ return 0;
+
+ if (gpir_op_infos[node->op].may_consume_two_slots && node->sched.ready) {
+ /* If the node is fully ready, we have to assume that it may get
+ * scheduled at any time, at which point it takes up two slots.
+ */
+ return 2;
+ }
+
+ return 1;
+}
+
+/* Once we schedule the successor, would the predecessor be fully ready? */
+static bool pred_almost_ready(gpir_dep *dep)
+{
+ bool fully_ready = true;
+ gpir_node_foreach_succ(dep->pred, other_dep) {
+ gpir_node *succ = other_dep->succ;
+ if (succ->sched.instr < 0 && dep->succ != other_dep->succ) {
+ fully_ready = false;
+ break;
}
- MAYBE_UNUSED bool result = schedule_try_place_node(instr, node);
- assert(result);
- return node;
}
- else {
- gpir_node *move = schedule_create_move_node(node);
- list_del(&node->list);
- node->sched.ready = false;
- node->sched.inserted = false;
- gpir_node_replace_succ(move, node);
- gpir_node_add_dep(move, node, GPIR_DEP_INPUT);
- return move;
+
+ return fully_ready;
+}
+
+/* Speculatively schedule a dep, and count how many slots it consumes.
+ */
+static int gpir_get_dep_slots(sched_ctx *ctx, gpir_dep *dep)
+{
+ gpir_node *pred = dep->pred;
+ if (!gpir_is_input_node(pred))
+ return 0;
+
+ /* Try and speculatively schedule any loads. */
+ if (pred->type == gpir_node_type_load && pred_almost_ready(dep) &&
+ schedule_try_place_node(ctx->instr, pred)) {
+ return 0;
+ }
+
+ int total = 0;
+ if (!pred->sched.inserted)
+ total++;
+
+ if (gpir_op_infos[pred->op].may_consume_two_slots) {
+ /* If pred goes from partially ready or not ready to fully ready, then
+ * it takes up two slots as per gpir_get_slots_required(), so we need to
+ * add an extra slot.
+ */
+ if (pred_almost_ready(dep))
+ total++;
}
+
+ return total;
}
-static bool gpir_is_input_node(gpir_node *node)
+static void cleanup_speculative_loads(sched_ctx *ctx, gpir_node *node)
{
- gpir_node_foreach_succ(node, dep) {
- if (dep->type == GPIR_DEP_INPUT)
- return true;
+ gpir_node_foreach_pred(node, dep) {
+ gpir_node *pred = dep->pred;
+ if (pred->sched.instr >= 0) {
+ pred->sched.instr = -1;
+ gpir_instr_remove_node(ctx->instr, pred);
+ }
}
- return false;
}
-static int gpir_get_min_scheduled_succ(gpir_node *node)
+/* Get the total number of slots on the ready list if this node were to be
+ * scheduled.
+ */
+static int gpir_get_ready_list_slots(sched_ctx *ctx, gpir_node *node)
+{
+ assert(ctx->ready_list_slots <= GPIR_VALUE_REG_NUM);
+ int total = ctx->ready_list_slots - gpir_get_slots_required(node);
+
+ if (!schedule_try_place_node(ctx->instr, node))
+ return INT_MAX;
+
+ gpir_node_foreach_pred(node, dep) {
+ int slots = gpir_get_dep_slots(ctx, dep);
+ if (node->op == gpir_op_mov && slots != 0) {
+ /* At this stage, we only insert moves for loads that we couldn't
+ * schedule immediately. If we couldn't schedule the load, there's
+ * no point scheduling the move.
+ */
+ cleanup_speculative_loads(ctx, node);
+ gpir_instr_remove_node(ctx->instr, node);
+ return INT_MAX;
+ }
+ total += slots;
+ }
+
+ /* Cleanup any speculatively scheduled loads. */
+ cleanup_speculative_loads(ctx, node);
+ gpir_instr_remove_node(ctx->instr, node);
+
+ return total;
+}
+
+static bool try_place_node(sched_ctx *ctx, gpir_node *node)
+{
+ if (!schedule_try_place_node(ctx->instr, node)) {
+ gpir_debug("failed to place %d\n", node->index);
+ return false;
+ }
+
+ gpir_debug("placed node %d\n", node->index);
+
+ list_del(&node->list);
+ list_add(&node->list, &ctx->block->node_list);
+ gpir_node_foreach_pred(node, dep) {
+ gpir_node *pred = dep->pred;
+ schedule_insert_ready_list(&ctx->ready_list, pred);
+ }
+
+ return true;
+}
+
+static gpir_node *create_move(sched_ctx *ctx, gpir_node *node)
+{
+ gpir_node *move = schedule_create_move_node(node);
+ list_del(&node->list);
+ node->sched.ready = false;
+ node->sched.inserted = false;
+ gpir_node_replace_succ(move, node);
+ gpir_node_add_dep(move, node, GPIR_DEP_INPUT);
+ schedule_insert_ready_list(&ctx->ready_list, move);
+ return move;
+}
+
+static bool try_schedule_node(sched_ctx *ctx, gpir_node *node)
+{
+ if (!try_place_node(ctx, node))
+ return false;
+
+ gpir_node_foreach_pred(node, dep) {
+ gpir_node *pred = dep->pred;
+ if (dep->type == GPIR_DEP_INPUT && pred->type == gpir_node_type_load) {
+ if (!try_place_node(ctx, pred)) {
+ create_move(ctx, pred);
+ }
+ }
+ }
+
+ return true;
+}
+
+static int gpir_get_curr_ready_list_slots(sched_ctx *ctx)
+{
+ int total = 0;
+ list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
+ total += gpir_get_slots_required(node);
+ }
+
+ return total;
+}
+
+/* What gpir_get_min_end() would return if node were replaced with a move
+ * instruction not in the complex slot. Normally this is 2 + min_end, except
+ * for some store instructions which must have the move node in the same
+ * instruction.
+ */
+static int gpir_get_min_end_move(gpir_node *node)
{
int min = INT_MAX;
gpir_node_foreach_succ(node, dep) {
gpir_node *succ = dep->succ;
if (succ->sched.instr >= 0 && dep->type == GPIR_DEP_INPUT) {
- if (min > succ->sched.instr)
- min = succ->sched.instr;
+ int dist = 2;
+ switch (succ->op) {
+ case gpir_op_store_temp:
+ case gpir_op_store_reg:
+ case gpir_op_store_varying:
+ dist = 0;
+ break;
+ default:
+ break;
+ }
+ if (min > succ->sched.instr + dist)
+ min = succ->sched.instr + dist;
}
}
return min;
}
-static gpir_node *gpir_sched_instr_pass(gpir_instr *instr,
- struct list_head *ready_list)
+static bool try_node(sched_ctx *ctx, bool max_only)
{
- /* fully ready node reach its max dist with any of its successor */
- list_for_each_entry_safe(gpir_node, node, ready_list, list) {
+ gpir_node *min_node = NULL;
+ int min_slots = INT_MAX;
+ list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
if (node->sched.ready) {
- int end = gpir_get_min_end(node);
- assert(end >= instr->index);
- if (instr->index < end)
- continue;
-
- gpir_debug("fully ready max node %d\n", node->index);
+ /* First, schedule required stuff */
+ if (max_only) {
+ int end = gpir_get_min_end(node);
+ if (end != ctx->instr->index)
+ continue;
+ }
- if (schedule_try_place_node(instr, node))
- return node;
+ int slots = gpir_get_ready_list_slots(ctx, node);
+ if (slots == INT_MAX)
+ continue;
- return gpir_sched_node(instr, node);
+ if (node == NULL) {
+ min_slots = slots;
+ min_node = node;
+ continue;
+ }
+ if (min_slots <= GPIR_VALUE_REG_NUM) {
+ if (node->sched.dist <= min_node->sched.dist)
+ break;
+ }
+ if (slots < min_slots) {
+ min_slots = slots;
+ min_node = node;
+ }
}
}
- /* partially ready node reach its max dist with any of its successor */
- list_for_each_entry_safe(gpir_node, node, ready_list, list) {
- if (!node->sched.ready) {
- int end = gpir_get_min_end(node);
- assert(end >= instr->index);
- if (instr->index < end)
- continue;
+ if (min_node && min_slots <= GPIR_VALUE_REG_NUM) {
+ gpir_debug("trying to schedule %d (slots = %d)%s\n", min_node->index,
+ min_slots, max_only ? " (max)" : "");
+ if (try_schedule_node(ctx, min_node))
+ ctx->ready_list_slots = min_slots;
+ return true;
+ }
- gpir_debug("partially ready max node %d\n", node->index);
+ return false;
+}
+
+static void place_move(sched_ctx *ctx, gpir_node *node)
+{
+ gpir_node *move = create_move(ctx, node);
+ gpir_node_foreach_succ_safe(move, dep) {
+ gpir_node *succ = dep->succ;
+ if (succ->sched.instr < 0 ||
+ ctx->instr->index < succ->sched.instr + gpir_get_min_dist(dep)) {
+ gpir_node_replace_pred(dep, node);
+ if (dep->type == GPIR_DEP_INPUT)
+ gpir_node_replace_child(succ, move, node);
+ }
+ }
+ MAYBE_UNUSED bool result = try_place_node(ctx, move);
+ assert(result);
+}
- return gpir_sched_node(instr, node);
+static bool sched_move(sched_ctx *ctx)
+{
+ list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
+ if (gpir_is_input_node(node) &&
+ gpir_get_min_end_move(node) == ctx->instr->index) {
+ place_move(ctx, node);
+ return true;
}
}
- /* schedule node used by previous instr when count > 5 */
int count = 0;
- list_for_each_entry(gpir_node, node, ready_list, list) {
- if (gpir_is_input_node(node)) {
- int min = gpir_get_min_scheduled_succ(node);
- assert(min >= instr->index - 1);
- if (min == instr->index - 1)
- count += gpir_op_infos[node->op].may_consume_two_slots ? 2 : 1;
+ list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
+ if (gpir_is_input_node(node) &&
+ gpir_get_min_end_move(node) == ctx->instr->index + 1) {
+ count += gpir_get_slots_required(node);
}
}
if (count > 5) {
- /* schedule fully ready node first */
- list_for_each_entry(gpir_node, node, ready_list, list) {
- if (gpir_is_input_node(node)) {
- int min = gpir_get_min_scheduled_succ(node);
- if (min == instr->index - 1 && node->sched.ready) {
- gpir_debug(">5 ready node %d\n", node->index);
-
- if (schedule_try_place_node(instr, node))
- return node;
+ /* This is a bit tricky... if a two-slot instruction becomes ready, then
+ * it could go from counting one slot to counting two. If there was
+ * another use one instruction ago, then it would add to "count",
+ * possibly making large enough that we can't get it below 5 without
+ * running out of slots unless we evict the two-slot instruction itself
+ * (that decreases count by 2 while only taking up one slot).
+ */
+ if (count - 5 > ctx->instr->alu_num_slot_free) {
+ list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
+ if (gpir_get_min_end_move(node) == ctx->instr->index + 1 &&
+ node->op == gpir_op_complex1 && node->sched.ready) {
+ gpir_debug("count > 5\n");
+ place_move(ctx, node);
+ return true;
}
}
}
- /* no fully ready node be scheduled, schedule partially ready node */
- list_for_each_entry_safe(gpir_node, node, ready_list, list) {
- if (gpir_is_input_node(node)) {
- int min = gpir_get_min_scheduled_succ(node);
- if (min == instr->index - 1 && !node->sched.ready) {
- gpir_debug(">5 partially ready node %d\n", node->index);
-
- return gpir_sched_node(instr, node);
- }
+ list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
+ /* complex1 has a latency of two cycles, so if it is ready, we want
+ * to try not to insert a mov during the cycle after it becomes
+ * ready, since this delays when it becomes available by an extra
+ * cycle. Other opcodes don't have this problem, i.e. inserting a
+ * move won't hurt anything.
+ */
+ if (gpir_get_min_end_move(node) == ctx->instr->index + 1 &&
+ !(node->op == gpir_op_complex1 && node->sched.ready)) {
+ gpir_debug("count > 5\n");
+ place_move(ctx, node);
+ return true;
}
}
- /* finally schedule move for fully ready node */
- list_for_each_entry_safe(gpir_node, node, ready_list, list) {
- if (gpir_is_input_node(node)) {
- int min = gpir_get_min_scheduled_succ(node);
- if (min == instr->index - 1 && node->sched.ready) {
- gpir_debug(">5 fully ready move node %d\n", node->index);
-
- return gpir_sched_node(instr, node);
- }
+ /* In the case where everything was complex1, we need to try again. */
+ list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
+ if (gpir_get_min_end_move(node) == ctx->instr->index + 1) {
+ gpir_debug("count > 5\n");
+ place_move(ctx, node);
+ return true;
}
}
}
- /* schedule remain fully ready nodes */
- list_for_each_entry(gpir_node, node, ready_list, list) {
- if (node->sched.ready) {
- gpir_debug("remain fully ready node %d\n", node->index);
+ return false;
+}
- if (schedule_try_place_node(instr, node))
- return node;
- }
- }
+static bool gpir_sched_instr_pass(sched_ctx *ctx)
+{
+ /* First, schedule max nodes */
+ if (try_node(ctx, true))
+ return true;
- return NULL;
+ /* TODO: try to spill max nodes */
+
+ /* Schedule moves for max nodes if we couldn't schedule them. */
+ if (sched_move(ctx))
+ return true;
+
+ /* Try and schedule the rest of the nodes now. */
+ return try_node(ctx, false);
}
-static void schedule_print_pre_one_instr(gpir_instr *instr,
- struct list_head *ready_list)
+static void schedule_print_pre_one_instr(sched_ctx *ctx)
{
if (!lima_shader_debug_gp)
return;
- printf("instr %d for ready list:", instr->index);
- list_for_each_entry(gpir_node, node, ready_list, list) {
- printf(" %d/%c", node->index, node->sched.ready ? 'r' : 'p');
+ printf("instr %d for ready list:", ctx->instr->index);
+ list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
+ printf(" %d/%c (%d, %d, %d)", node->index, node->sched.ready ? 'r' : 'p',
+ node->sched.dist, gpir_get_slots_required(node),
+ gpir_get_min_end_move(node));
}
printf("\n");
}
@@ -552,31 +754,17 @@ static void schedule_print_post_one_instr(gpir_instr *instr)
}
-static bool schedule_one_instr(gpir_block *block, struct list_head *ready_list)
+static bool schedule_one_instr(sched_ctx *ctx)
{
- gpir_instr *instr = gpir_instr_create(block);
+ gpir_instr *instr = gpir_instr_create(ctx->block);
if (unlikely(!instr))
return false;
+ ctx->instr = instr;
- schedule_print_pre_one_instr(instr, ready_list);
+ schedule_print_pre_one_instr(ctx);
- while (true) {
- gpir_node *node = gpir_sched_instr_pass(instr, ready_list);
- if (!node)
- break;
-
- if (node->sched.instr < 0)
- schedule_insert_ready_list(ready_list, node);
- else {
- list_del(&node->list);
- list_add(&node->list, &block->node_list);
-
- gpir_node_foreach_pred(node, dep) {
- gpir_node *pred = dep->pred;
- schedule_insert_ready_list(ready_list, pred);
- }
- }
- }
+ while (gpir_sched_instr_pass(ctx))
+ assert(ctx->ready_list_slots == gpir_get_curr_ready_list_slots(ctx));
schedule_print_post_one_instr(instr);
return true;
@@ -590,18 +778,20 @@ static bool schedule_block(gpir_block *block)
schedule_update_distance(node);
}
- struct list_head ready_list;
- list_inithead(&ready_list);
+ sched_ctx ctx;
+ list_inithead(&ctx.ready_list);
+ ctx.block = block;
+ ctx.ready_list_slots = 0;
/* construct the ready list from root nodes */
list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
if (gpir_node_is_root(node))
- schedule_insert_ready_list(&ready_list, node);
+ schedule_insert_ready_list(&ctx.ready_list, node);
}
list_inithead(&block->node_list);
- while (!list_empty(&ready_list)) {
- if (!schedule_one_instr(block, &ready_list))
+ while (!list_empty(&ctx.ready_list)) {
+ if (!schedule_one_instr(&ctx))
return false;
}
@@ -610,26 +800,6 @@ static bool schedule_block(gpir_block *block)
static void schedule_build_vreg_dependency(gpir_block *block)
{
- gpir_node *regs[GPIR_VALUE_REG_NUM] = {0};
- list_for_each_entry(gpir_node, node, &block->node_list, list) {
- /* store node has no value reg assigned */
- if (node->value_reg < 0)
- continue;
-
- gpir_node *reg = regs[node->value_reg];
- if (reg) {
- gpir_node_foreach_succ(reg, dep) {
- /* write after read dep should only apply to real 'read' */
- if (dep->type != GPIR_DEP_INPUT)
- continue;
-
- gpir_node *succ = dep->succ;
- gpir_node_add_dep(node, succ, GPIR_DEP_VREG_WRITE_AFTER_READ);
- }
- }
- regs[node->value_reg] = node;
- }
-
/* merge dummy_f/m to the node created from */
list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
if (node->op == gpir_op_dummy_m) {