diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp | 166 |
1 files changed, 78 insertions, 88 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index 90419a3735e..1e92f680b24 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -407,7 +407,7 @@ public: this->mode = mode; this->time = 0; if (!post_reg_alloc) { - this->reg_pressure_in = rzalloc_array(mem_ctx, int, block_count); + this->reg_pressure_out = rzalloc_array(mem_ctx, int, block_count); this->livein = ralloc_array(mem_ctx, BITSET_WORD *, block_count); for (int i = 0; i < block_count; i++) @@ -424,21 +424,21 @@ public: this->hw_liveout[i] = rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(hw_reg_count)); - this->written = rzalloc_array(mem_ctx, bool, grf_count); + this->read = rzalloc_array(mem_ctx, bool, grf_count); - this->reads_remaining = rzalloc_array(mem_ctx, int, grf_count); + this->writes_remaining = rzalloc_array(mem_ctx, int, grf_count); - this->hw_reads_remaining = rzalloc_array(mem_ctx, int, hw_reg_count); + this->hw_read = rzalloc_array(mem_ctx, bool, hw_reg_count); this->max_reg_pressure = 0; } else { - this->reg_pressure_in = NULL; + this->reg_pressure_out = NULL; this->livein = NULL; this->liveout = NULL; this->hw_liveout = NULL; - this->written = NULL; - this->reads_remaining = NULL; - this->hw_reads_remaining = NULL; + this->read = NULL; + this->writes_remaining = NULL; + this->hw_read = NULL; } } @@ -465,7 +465,7 @@ public: */ virtual int issue_time(backend_instruction *inst) = 0; - virtual void count_reads_remaining(backend_instruction *inst) = 0; + virtual void count_writes_remaining(backend_instruction *inst) = 0; virtual void setup_liveness(cfg_t *cfg) = 0; virtual void update_register_pressure(backend_instruction *inst) = 0; virtual int get_register_pressure_benefit(backend_instruction *inst) = 0; @@ -492,10 +492,10 @@ public: int max_reg_pressure; /* - * The register pressure at the beginning of each basic block. + * The register pressure at the end of each basic block. */ - int *reg_pressure_in; + int *reg_pressure_out; /* * The virtual GRF's whose range overlaps the beginning of each basic block. @@ -516,22 +516,22 @@ public: BITSET_WORD **hw_liveout; /* - * Whether we've scheduled a write for this virtual GRF yet. + * Whether we've scheduled a read for this virtual GRF yet. */ - bool *written; + bool *read; /* - * How many reads we haven't scheduled for this virtual GRF yet. + * How many writes we haven't scheduled for this virtual GRF yet. */ - int *reads_remaining; + int *writes_remaining; /* - * How many reads we haven't scheduled for this hardware GRF yet. + * Whether we've scheduled a read for this hardware GRF yet. */ - int *hw_reads_remaining; + bool *hw_read; }; class fs_instruction_scheduler : public instruction_scheduler @@ -546,7 +546,7 @@ public: int issue_time(backend_instruction *inst); fs_visitor *v; - void count_reads_remaining(backend_instruction *inst); + void count_writes_remaining(backend_instruction *inst); void setup_liveness(cfg_t *cfg); void update_register_pressure(backend_instruction *inst); int get_register_pressure_benefit(backend_instruction *inst); @@ -574,27 +574,15 @@ is_src_duplicate(fs_inst *inst, int src) } void -fs_instruction_scheduler::count_reads_remaining(backend_instruction *be) +fs_instruction_scheduler::count_writes_remaining(backend_instruction *be) { fs_inst *inst = (fs_inst *)be; - if (!reads_remaining) + if (!writes_remaining) return; - for (int i = 0; i < inst->sources; i++) { - if (is_src_duplicate(inst, i)) - continue; - - if (inst->src[i].file == VGRF) { - reads_remaining[inst->src[i].nr]++; - } else if (inst->src[i].file == FIXED_GRF) { - if (inst->src[i].nr >= hw_reg_count) - continue; - - for (int j = 0; j < inst->regs_read(i); j++) - hw_reads_remaining[inst->src[i].nr + j]++; - } - } + if (inst->dst.file == VGRF) + writes_remaining[inst->dst.nr]++; } void @@ -607,14 +595,16 @@ fs_instruction_scheduler::setup_liveness(cfg_t *cfg) for (int i = 0; i < v->live_intervals->num_vars; i++) { if (BITSET_TEST(v->live_intervals->block_data[block].livein, i)) { int vgrf = v->live_intervals->vgrf_from_var[i]; - if (!BITSET_TEST(livein[block], vgrf)) { - reg_pressure_in[block] += v->alloc.sizes[vgrf]; - BITSET_SET(livein[block], vgrf); - } + BITSET_SET(livein[block], vgrf); } - if (BITSET_TEST(v->live_intervals->block_data[block].liveout, i)) - BITSET_SET(liveout[block], v->live_intervals->vgrf_from_var[i]); + if (BITSET_TEST(v->live_intervals->block_data[block].liveout, i)) { + int vgrf = v->live_intervals->vgrf_from_var[i]; + if (!BITSET_TEST(liveout[block], vgrf)) { + reg_pressure_out[block] += v->alloc.sizes[vgrf]; + BITSET_SET(liveout[block], vgrf); + } + } } } @@ -626,12 +616,12 @@ fs_instruction_scheduler::setup_liveness(cfg_t *cfg) for (int i = 0; i < grf_count; i++) { if (v->virtual_grf_start[i] <= cfg->blocks[block]->end_ip && v->virtual_grf_end[i] >= cfg->blocks[block + 1]->start_ip) { - if (!BITSET_TEST(livein[block + 1], i)) { - reg_pressure_in[block + 1] += v->alloc.sizes[i]; - BITSET_SET(livein[block + 1], i); + if (!BITSET_TEST(liveout[block], i)) { + reg_pressure_out[block] += v->alloc.sizes[i]; + BITSET_SET(liveout[block], i); } - BITSET_SET(liveout[block], i); + BITSET_SET(livein[block + 1], i); } } } @@ -645,10 +635,11 @@ fs_instruction_scheduler::setup_liveness(cfg_t *cfg) for (int block = 0; block < cfg->num_blocks; block++) { if (cfg->blocks[block]->start_ip <= payload_last_use_ip[i]) - reg_pressure_in[block]++; - if (cfg->blocks[block]->end_ip <= payload_last_use_ip[i]) + if (cfg->blocks[block]->end_ip <= payload_last_use_ip[i]) { + reg_pressure_out[block]++; BITSET_SET(hw_liveout[block], i); + } } } } @@ -658,11 +649,11 @@ fs_instruction_scheduler::update_register_pressure(backend_instruction *be) { fs_inst *inst = (fs_inst *)be; - if (!reads_remaining) + if (!writes_remaining) return; if (inst->dst.file == VGRF) { - written[inst->dst.nr] = true; + writes_remaining[inst->dst.nr]--; } for (int i = 0; i < inst->sources; i++) { @@ -670,11 +661,11 @@ fs_instruction_scheduler::update_register_pressure(backend_instruction *be) continue; if (inst->src[i].file == VGRF) { - reads_remaining[inst->src[i].nr]--; + read[inst->src[i].nr] = true; } else if (inst->src[i].file == FIXED_GRF && inst->src[i].nr < hw_reg_count) { for (int off = 0; off < inst->regs_read(i); off++) - hw_reads_remaining[inst->src[i].nr + off]--; + hw_read[inst->src[i].nr + off] = true; } } } @@ -687,8 +678,8 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be) if (inst->dst.file == VGRF) { if (!BITSET_TEST(livein[block_idx], inst->dst.nr) && - !written[inst->dst.nr]) - benefit -= v->alloc.sizes[inst->dst.nr]; + writes_remaining[inst->dst.nr] == 1) + benefit += v->alloc.sizes[inst->dst.nr]; } for (int i = 0; i < inst->sources; i++) { @@ -697,16 +688,16 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be) if (inst->src[i].file == VGRF && !BITSET_TEST(liveout[block_idx], inst->src[i].nr) && - reads_remaining[inst->src[i].nr] == 1) - benefit += v->alloc.sizes[inst->src[i].nr]; + !read[inst->src[i].nr]) + benefit -= v->alloc.sizes[inst->src[i].nr]; if (inst->src[i].file == FIXED_GRF && inst->src[i].nr < hw_reg_count) { for (int off = 0; off < inst->regs_read(i); off++) { int reg = inst->src[i].nr + off; if (!BITSET_TEST(hw_liveout[block_idx], reg) && - hw_reads_remaining[reg] == 1) { - benefit++; + !hw_read[reg]) { + benefit--; } } } @@ -724,7 +715,7 @@ public: int issue_time(backend_instruction *inst); vec4_visitor *v; - void count_reads_remaining(backend_instruction *inst); + void count_writes_remaining(backend_instruction *inst); void setup_liveness(cfg_t *cfg); void update_register_pressure(backend_instruction *inst); int get_register_pressure_benefit(backend_instruction *inst); @@ -738,7 +729,7 @@ vec4_instruction_scheduler::vec4_instruction_scheduler(vec4_visitor *v, } void -vec4_instruction_scheduler::count_reads_remaining(backend_instruction *be) +vec4_instruction_scheduler::count_writes_remaining(backend_instruction *be) { } @@ -821,30 +812,30 @@ instruction_scheduler::add_dep(schedule_node *before, schedule_node *after, assert(before != after); - for (int i = 0; i < before->child_count; i++) { - if (before->children[i] == after) { - before->child_latency[i] = MAX2(before->child_latency[i], latency); + for (int i = 0; i < after->child_count; i++) { + if (after->children[i] == before) { + after->child_latency[i] = MAX2(after->child_latency[i], latency); return; } } - if (before->child_array_size <= before->child_count) { - if (before->child_array_size < 16) - before->child_array_size = 16; + if (after->child_array_size <= after->child_count) { + if (after->child_array_size < 16) + after->child_array_size = 16; else - before->child_array_size *= 2; + after->child_array_size *= 2; - before->children = reralloc(mem_ctx, before->children, - schedule_node *, - before->child_array_size); - before->child_latency = reralloc(mem_ctx, before->child_latency, - int, before->child_array_size); + after->children = reralloc(mem_ctx, after->children, + schedule_node *, + after->child_array_size); + after->child_latency = reralloc(mem_ctx, after->child_latency, + int, after->child_array_size); } - before->children[before->child_count] = after; - before->child_latency[before->child_count] = latency; - before->child_count++; - after->parent_count++; + after->children[after->child_count] = before; + after->child_latency[after->child_count] = latency; + after->child_count++; + before->parent_count++; } void @@ -1379,10 +1370,10 @@ fs_instruction_scheduler::choose_instruction_to_schedule() * tree of lowered ubo loads, which appear reversed in the * instruction stream with respect to when they can be consumed). */ - if (n->delay > chosen->delay) { + if (n->delay < chosen->delay) { chosen = n; continue; - } else if (n->delay < chosen->delay) { + } else if (n->delay > chosen->delay) { continue; } @@ -1437,7 +1428,7 @@ instruction_scheduler::schedule_instructions(bblock_t *block) int instructions_to_schedule = block->end_ip - block->start_ip + 1; time = 0; if (!post_reg_alloc) { - reg_pressure = reg_pressure_in[block->num]; + reg_pressure = reg_pressure_out[block->num]; max_reg_pressure = MAX2(max_reg_pressure, reg_pressure); } block_idx = block->num; @@ -1455,7 +1446,7 @@ instruction_scheduler::schedule_instructions(bblock_t *block) assert(chosen); chosen->remove(); chosen->inst->exec_node::remove(); - block->instructions.push_tail(chosen->inst); + block->instructions.push_head(chosen->inst); instructions_to_schedule--; if (!post_reg_alloc) { @@ -1467,13 +1458,12 @@ instruction_scheduler::schedule_instructions(bblock_t *block) /* If we expected a delay for scheduling, then bump the clock to reflect * that. In reality, the hardware will switch to another hyperthread * and may not return to dispatching our thread for a while even after - * we're unblocked. After this, we have the time when the chosen + * we're unblocked. Before this, we have the time when the chosen * instruction will start executing. */ time = MAX2(time, chosen->unblocked_time); - /* Update the clock for how soon an instruction could start after the - * chosen one. + /* Update the clock for the actual execution of the instruction. */ time += issue_time(chosen->inst); @@ -1560,15 +1550,15 @@ instruction_scheduler::run(cfg_t *cfg) if (block->end_ip - block->start_ip <= 1) continue; - if (reads_remaining) { - memset(reads_remaining, 0, - grf_count * sizeof(*reads_remaining)); - memset(hw_reads_remaining, 0, - hw_reg_count * sizeof(*hw_reads_remaining)); - memset(written, 0, grf_count * sizeof(*written)); + if (writes_remaining) { + memset(writes_remaining, 0, + grf_count * sizeof(*writes_remaining)); + memset(hw_read, 0, + hw_reg_count * sizeof(*hw_read)); + memset(read, 0, grf_count * sizeof(*read)); foreach_inst_in_block(fs_inst, inst, block) - count_reads_remaining(inst); + count_writes_remaining(inst); } add_insts_from_block(block); |