diff options
-rw-r--r-- | src/gallium/drivers/lima/ir/gp/scheduler.c | 462 |
1 files changed, 316 insertions, 146 deletions
diff --git a/src/gallium/drivers/lima/ir/gp/scheduler.c b/src/gallium/drivers/lima/ir/gp/scheduler.c index 2a11bdfd1c..75adf9ca44 100644 --- a/src/gallium/drivers/lima/ir/gp/scheduler.c +++ b/src/gallium/drivers/lima/ir/gp/scheduler.c @@ -79,6 +79,13 @@ * scheduled total, below the limit. So the algorithm will always succeed. */ +typedef struct { + struct list_head ready_list; + int ready_list_slots; + gpir_instr *instr; + gpir_block *block; +} sched_ctx; + static int gpir_min_dist_alu(gpir_dep *dep) { switch (dep->pred->op) { @@ -230,7 +237,7 @@ static void schedule_update_distance(gpir_node *node) if (pred->sched.dist < 0) schedule_update_distance(pred); - int dist = pred->sched.dist + 1; + int dist = pred->sched.dist + gpir_min_dist_alu(dep); if (node->sched.dist < dist) node->sched.dist = dist; } @@ -378,162 +385,357 @@ static gpir_node *schedule_create_move_node(gpir_node *node) return &move->node; } -static gpir_node *gpir_sched_node(gpir_instr *instr, gpir_node *node) +static bool gpir_is_input_node(gpir_node *node) { - if (node->op == gpir_op_mov) { - gpir_node *child = gpir_node_to_alu(node)->children[0]; - gpir_node_foreach_succ_safe(node, dep) { - gpir_node *succ = dep->succ; - if (succ->sched.instr < 0 || - instr->index < succ->sched.instr + gpir_get_min_dist(dep)) { - gpir_node_replace_pred(dep, child); - if (dep->type == GPIR_DEP_INPUT) - gpir_node_replace_child(succ, node, child); - } + gpir_node_foreach_succ(node, dep) { + if (dep->type == GPIR_DEP_INPUT) + return true; + } + return false; +} + +/* Get the number of slots required for a node on the ready list. + */ +static int gpir_get_slots_required(gpir_node *node) +{ + if (!gpir_is_input_node(node)) + return 0; + + if (gpir_op_infos[node->op].may_consume_two_slots && node->sched.ready) { + /* If the node is fully ready, we have to assume that it may get + * scheduled at any time, at which point it takes up two slots. + */ + return 2; + } + + return 1; +} + +/* Once we schedule the successor, would the predecessor be fully ready? */ +static bool pred_almost_ready(gpir_dep *dep) +{ + bool fully_ready = true; + gpir_node_foreach_succ(dep->pred, other_dep) { + gpir_node *succ = other_dep->succ; + if (succ->sched.instr < 0 && dep->succ != other_dep->succ) { + fully_ready = false; + break; } - MAYBE_UNUSED bool result = schedule_try_place_node(instr, node); - assert(result); - return node; } - else { - gpir_node *move = schedule_create_move_node(node); - list_del(&node->list); - node->sched.ready = false; - node->sched.inserted = false; - gpir_node_replace_succ(move, node); - gpir_node_add_dep(move, node, GPIR_DEP_INPUT); - return move; + + return fully_ready; +} + +/* Speculatively schedule a dep, and count how many slots it consumes. + */ +static int gpir_get_dep_slots(sched_ctx *ctx, gpir_dep *dep) +{ + gpir_node *pred = dep->pred; + if (!gpir_is_input_node(pred)) + return 0; + + /* Try and speculatively schedule any loads. */ + if (pred->type == gpir_node_type_load && pred_almost_ready(dep) && + schedule_try_place_node(ctx->instr, pred)) { + return 0; + } + + int total = 0; + if (!pred->sched.inserted) + total++; + + if (gpir_op_infos[pred->op].may_consume_two_slots) { + /* If pred goes from partially ready or not ready to fully ready, then + * it takes up two slots as per gpir_get_slots_required(), so we need to + * add an extra slot. + */ + if (pred_almost_ready(dep)) + total++; } + + return total; } -static bool gpir_is_input_node(gpir_node *node) +static void cleanup_speculative_loads(sched_ctx *ctx, gpir_node *node) { - gpir_node_foreach_succ(node, dep) { - if (dep->type == GPIR_DEP_INPUT) - return true; + gpir_node_foreach_pred(node, dep) { + gpir_node *pred = dep->pred; + if (pred->sched.instr >= 0) { + pred->sched.instr = -1; + gpir_instr_remove_node(ctx->instr, pred); + } } - return false; } -static int gpir_get_min_scheduled_succ(gpir_node *node) +/* Get the total number of slots on the ready list if this node were to be + * scheduled. + */ +static int gpir_get_ready_list_slots(sched_ctx *ctx, gpir_node *node) +{ + assert(ctx->ready_list_slots <= GPIR_VALUE_REG_NUM); + int total = ctx->ready_list_slots - gpir_get_slots_required(node); + + if (!schedule_try_place_node(ctx->instr, node)) + return INT_MAX; + + gpir_node_foreach_pred(node, dep) { + int slots = gpir_get_dep_slots(ctx, dep); + if (node->op == gpir_op_mov && slots != 0) { + /* At this stage, we only insert moves for loads that we couldn't + * schedule immediately. If we couldn't schedule the load, there's + * no point scheduling the move. + */ + cleanup_speculative_loads(ctx, node); + gpir_instr_remove_node(ctx->instr, node); + return INT_MAX; + } + total += slots; + } + + /* Cleanup any speculatively scheduled loads. */ + cleanup_speculative_loads(ctx, node); + gpir_instr_remove_node(ctx->instr, node); + + return total; +} + +static bool try_place_node(sched_ctx *ctx, gpir_node *node) +{ + if (!schedule_try_place_node(ctx->instr, node)) { + gpir_debug("failed to place %d\n", node->index); + return false; + } + + gpir_debug("placed node %d\n", node->index); + + list_del(&node->list); + list_add(&node->list, &ctx->block->node_list); + gpir_node_foreach_pred(node, dep) { + gpir_node *pred = dep->pred; + schedule_insert_ready_list(&ctx->ready_list, pred); + } + + return true; +} + +static gpir_node *create_move(sched_ctx *ctx, gpir_node *node) +{ + gpir_node *move = schedule_create_move_node(node); + list_del(&node->list); + node->sched.ready = false; + node->sched.inserted = false; + gpir_node_replace_succ(move, node); + gpir_node_add_dep(move, node, GPIR_DEP_INPUT); + schedule_insert_ready_list(&ctx->ready_list, move); + return move; +} + +static bool try_schedule_node(sched_ctx *ctx, gpir_node *node) +{ + if (!try_place_node(ctx, node)) + return false; + + gpir_node_foreach_pred(node, dep) { + gpir_node *pred = dep->pred; + if (dep->type == GPIR_DEP_INPUT && pred->type == gpir_node_type_load) { + if (!try_place_node(ctx, pred)) { + create_move(ctx, pred); + } + } + } + + return true; +} + +static int gpir_get_curr_ready_list_slots(sched_ctx *ctx) +{ + int total = 0; + list_for_each_entry(gpir_node, node, &ctx->ready_list, list) { + total += gpir_get_slots_required(node); + } + + return total; +} + +/* What gpir_get_min_end() would return if node were replaced with a move + * instruction not in the complex slot. Normally this is 2 + min_end, except + * for some store instructions which must have the move node in the same + * instruction. + */ +static int gpir_get_min_end_move(gpir_node *node) { int min = INT_MAX; gpir_node_foreach_succ(node, dep) { gpir_node *succ = dep->succ; if (succ->sched.instr >= 0 && dep->type == GPIR_DEP_INPUT) { - if (min > succ->sched.instr) - min = succ->sched.instr; + int dist = 2; + switch (succ->op) { + case gpir_op_store_temp: + case gpir_op_store_reg: + case gpir_op_store_varying: + dist = 0; + break; + default: + break; + } + if (min > succ->sched.instr + dist) + min = succ->sched.instr + dist; } } return min; } -static gpir_node *gpir_sched_instr_pass(gpir_instr *instr, - struct list_head *ready_list) +static bool try_node(sched_ctx *ctx, bool max_only) { - /* fully ready node reach its max dist with any of its successor */ - list_for_each_entry_safe(gpir_node, node, ready_list, list) { + gpir_node *min_node = NULL; + int min_slots = INT_MAX; + list_for_each_entry(gpir_node, node, &ctx->ready_list, list) { if (node->sched.ready) { - int end = gpir_get_min_end(node); - assert(end >= instr->index); - if (instr->index < end) - continue; - - gpir_debug("fully ready max node %d\n", node->index); + /* First, schedule required stuff */ + if (max_only) { + int end = gpir_get_min_end(node); + if (end != ctx->instr->index) + continue; + } - if (schedule_try_place_node(instr, node)) - return node; + int slots = gpir_get_ready_list_slots(ctx, node); + if (slots == INT_MAX) + continue; - return gpir_sched_node(instr, node); + if (node == NULL) { + min_slots = slots; + min_node = node; + continue; + } + if (min_slots <= GPIR_VALUE_REG_NUM) { + if (node->sched.dist <= min_node->sched.dist) + break; + } + if (slots < min_slots) { + min_slots = slots; + min_node = node; + } } } - /* partially ready node reach its max dist with any of its successor */ - list_for_each_entry_safe(gpir_node, node, ready_list, list) { - if (!node->sched.ready) { - int end = gpir_get_min_end(node); - assert(end >= instr->index); - if (instr->index < end) - continue; + if (min_node && min_slots <= GPIR_VALUE_REG_NUM) { + gpir_debug("trying to schedule %d (slots = %d)%s\n", min_node->index, + min_slots, max_only ? " (max)" : ""); + if (try_schedule_node(ctx, min_node)) + ctx->ready_list_slots = min_slots; + return true; + } - gpir_debug("partially ready max node %d\n", node->index); + return false; +} + +static void place_move(sched_ctx *ctx, gpir_node *node) +{ + gpir_node *move = create_move(ctx, node); + gpir_node_foreach_succ_safe(move, dep) { + gpir_node *succ = dep->succ; + if (succ->sched.instr < 0 || + ctx->instr->index < succ->sched.instr + gpir_get_min_dist(dep)) { + gpir_node_replace_pred(dep, node); + if (dep->type == GPIR_DEP_INPUT) + gpir_node_replace_child(succ, move, node); + } + } + MAYBE_UNUSED bool result = try_place_node(ctx, move); + assert(result); +} - return gpir_sched_node(instr, node); +static bool sched_move(sched_ctx *ctx) +{ + list_for_each_entry(gpir_node, node, &ctx->ready_list, list) { + if (gpir_is_input_node(node) && + gpir_get_min_end_move(node) == ctx->instr->index) { + place_move(ctx, node); + return true; } } - /* schedule node used by previous instr when count > 5 */ int count = 0; - list_for_each_entry(gpir_node, node, ready_list, list) { - if (gpir_is_input_node(node)) { - int min = gpir_get_min_scheduled_succ(node); - assert(min >= instr->index - 1); - if (min == instr->index - 1) - count += gpir_op_infos[node->op].may_consume_two_slots ? 2 : 1; + list_for_each_entry(gpir_node, node, &ctx->ready_list, list) { + if (gpir_is_input_node(node) && + gpir_get_min_end_move(node) == ctx->instr->index + 1) { + count += gpir_get_slots_required(node); } } if (count > 5) { - /* schedule fully ready node first */ - list_for_each_entry(gpir_node, node, ready_list, list) { - if (gpir_is_input_node(node)) { - int min = gpir_get_min_scheduled_succ(node); - if (min == instr->index - 1 && node->sched.ready) { - gpir_debug(">5 ready node %d\n", node->index); - - if (schedule_try_place_node(instr, node)) - return node; + /* This is a bit tricky... if a two-slot instruction becomes ready, then + * it could go from counting one slot to counting two. If there was + * another use one instruction ago, then it would add to "count", + * possibly making large enough that we can't get it below 5 without + * running out of slots unless we evict the two-slot instruction itself + * (that decreases count by 2 while only taking up one slot). + */ + if (count - 5 > ctx->instr->alu_num_slot_free) { + list_for_each_entry(gpir_node, node, &ctx->ready_list, list) { + if (gpir_get_min_end_move(node) == ctx->instr->index + 1 && + node->op == gpir_op_complex1 && node->sched.ready) { + gpir_debug("count > 5\n"); + place_move(ctx, node); + return true; } } } - /* no fully ready node be scheduled, schedule partially ready node */ - list_for_each_entry_safe(gpir_node, node, ready_list, list) { - if (gpir_is_input_node(node)) { - int min = gpir_get_min_scheduled_succ(node); - if (min == instr->index - 1 && !node->sched.ready) { - gpir_debug(">5 partially ready node %d\n", node->index); - - return gpir_sched_node(instr, node); - } + list_for_each_entry(gpir_node, node, &ctx->ready_list, list) { + /* complex1 has a latency of two cycles, so if it is ready, we want + * to try not to insert a mov during the cycle after it becomes + * ready, since this delays when it becomes available by an extra + * cycle. Other opcodes don't have this problem, i.e. inserting a + * move won't hurt anything. + */ + if (gpir_get_min_end_move(node) == ctx->instr->index + 1 && + !(node->op == gpir_op_complex1 && node->sched.ready)) { + gpir_debug("count > 5\n"); + place_move(ctx, node); + return true; } } - /* finally schedule move for fully ready node */ - list_for_each_entry_safe(gpir_node, node, ready_list, list) { - if (gpir_is_input_node(node)) { - int min = gpir_get_min_scheduled_succ(node); - if (min == instr->index - 1 && node->sched.ready) { - gpir_debug(">5 fully ready move node %d\n", node->index); - - return gpir_sched_node(instr, node); - } + /* In the case where everything was complex1, we need to try again. */ + list_for_each_entry(gpir_node, node, &ctx->ready_list, list) { + if (gpir_get_min_end_move(node) == ctx->instr->index + 1) { + gpir_debug("count > 5\n"); + place_move(ctx, node); + return true; } } } - /* schedule remain fully ready nodes */ - list_for_each_entry(gpir_node, node, ready_list, list) { - if (node->sched.ready) { - gpir_debug("remain fully ready node %d\n", node->index); + return false; +} - if (schedule_try_place_node(instr, node)) - return node; - } - } +static bool gpir_sched_instr_pass(sched_ctx *ctx) +{ + /* First, schedule max nodes */ + if (try_node(ctx, true)) + return true; - return NULL; + /* TODO: try to spill max nodes */ + + /* Schedule moves for max nodes if we couldn't schedule them. */ + if (sched_move(ctx)) + return true; + + /* Try and schedule the rest of the nodes now. */ + return try_node(ctx, false); } -static void schedule_print_pre_one_instr(gpir_instr *instr, - struct list_head *ready_list) +static void schedule_print_pre_one_instr(sched_ctx *ctx) { if (!lima_shader_debug_gp) return; - printf("instr %d for ready list:", instr->index); - list_for_each_entry(gpir_node, node, ready_list, list) { - printf(" %d/%c", node->index, node->sched.ready ? 'r' : 'p'); + printf("instr %d for ready list:", ctx->instr->index); + list_for_each_entry(gpir_node, node, &ctx->ready_list, list) { + printf(" %d/%c (%d, %d, %d)", node->index, node->sched.ready ? 'r' : 'p', + node->sched.dist, gpir_get_slots_required(node), + gpir_get_min_end_move(node)); } printf("\n"); } @@ -552,31 +754,17 @@ static void schedule_print_post_one_instr(gpir_instr *instr) } -static bool schedule_one_instr(gpir_block *block, struct list_head *ready_list) +static bool schedule_one_instr(sched_ctx *ctx) { - gpir_instr *instr = gpir_instr_create(block); + gpir_instr *instr = gpir_instr_create(ctx->block); if (unlikely(!instr)) return false; + ctx->instr = instr; - schedule_print_pre_one_instr(instr, ready_list); + schedule_print_pre_one_instr(ctx); - while (true) { - gpir_node *node = gpir_sched_instr_pass(instr, ready_list); - if (!node) - break; - - if (node->sched.instr < 0) - schedule_insert_ready_list(ready_list, node); - else { - list_del(&node->list); - list_add(&node->list, &block->node_list); - - gpir_node_foreach_pred(node, dep) { - gpir_node *pred = dep->pred; - schedule_insert_ready_list(ready_list, pred); - } - } - } + while (gpir_sched_instr_pass(ctx)) + assert(ctx->ready_list_slots == gpir_get_curr_ready_list_slots(ctx)); schedule_print_post_one_instr(instr); return true; @@ -590,18 +778,20 @@ static bool schedule_block(gpir_block *block) schedule_update_distance(node); } - struct list_head ready_list; - list_inithead(&ready_list); + sched_ctx ctx; + list_inithead(&ctx.ready_list); + ctx.block = block; + ctx.ready_list_slots = 0; /* construct the ready list from root nodes */ list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { if (gpir_node_is_root(node)) - schedule_insert_ready_list(&ready_list, node); + schedule_insert_ready_list(&ctx.ready_list, node); } list_inithead(&block->node_list); - while (!list_empty(&ready_list)) { - if (!schedule_one_instr(block, &ready_list)) + while (!list_empty(&ctx.ready_list)) { + if (!schedule_one_instr(&ctx)) return false; } @@ -610,26 +800,6 @@ static bool schedule_block(gpir_block *block) static void schedule_build_vreg_dependency(gpir_block *block) { - gpir_node *regs[GPIR_VALUE_REG_NUM] = {0}; - list_for_each_entry(gpir_node, node, &block->node_list, list) { - /* store node has no value reg assigned */ - if (node->value_reg < 0) - continue; - - gpir_node *reg = regs[node->value_reg]; - if (reg) { - gpir_node_foreach_succ(reg, dep) { - /* write after read dep should only apply to real 'read' */ - if (dep->type != GPIR_DEP_INPUT) - continue; - - gpir_node *succ = dep->succ; - gpir_node_add_dep(node, succ, GPIR_DEP_VREG_WRITE_AFTER_READ); - } - } - regs[node->value_reg] = node; - } - /* merge dummy_f/m to the node created from */ list_for_each_entry_safe(gpir_node, node, &block->node_list, list) { if (node->op == gpir_op_dummy_m) { |