summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp166
1 files changed, 78 insertions, 88 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 90419a3735e..1e92f680b24 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -407,7 +407,7 @@ public:
this->mode = mode;
this->time = 0;
if (!post_reg_alloc) {
- this->reg_pressure_in = rzalloc_array(mem_ctx, int, block_count);
+ this->reg_pressure_out = rzalloc_array(mem_ctx, int, block_count);
this->livein = ralloc_array(mem_ctx, BITSET_WORD *, block_count);
for (int i = 0; i < block_count; i++)
@@ -424,21 +424,21 @@ public:
this->hw_liveout[i] = rzalloc_array(mem_ctx, BITSET_WORD,
BITSET_WORDS(hw_reg_count));
- this->written = rzalloc_array(mem_ctx, bool, grf_count);
+ this->read = rzalloc_array(mem_ctx, bool, grf_count);
- this->reads_remaining = rzalloc_array(mem_ctx, int, grf_count);
+ this->writes_remaining = rzalloc_array(mem_ctx, int, grf_count);
- this->hw_reads_remaining = rzalloc_array(mem_ctx, int, hw_reg_count);
+ this->hw_read = rzalloc_array(mem_ctx, bool, hw_reg_count);
this->max_reg_pressure = 0;
} else {
- this->reg_pressure_in = NULL;
+ this->reg_pressure_out = NULL;
this->livein = NULL;
this->liveout = NULL;
this->hw_liveout = NULL;
- this->written = NULL;
- this->reads_remaining = NULL;
- this->hw_reads_remaining = NULL;
+ this->read = NULL;
+ this->writes_remaining = NULL;
+ this->hw_read = NULL;
}
}
@@ -465,7 +465,7 @@ public:
*/
virtual int issue_time(backend_instruction *inst) = 0;
- virtual void count_reads_remaining(backend_instruction *inst) = 0;
+ virtual void count_writes_remaining(backend_instruction *inst) = 0;
virtual void setup_liveness(cfg_t *cfg) = 0;
virtual void update_register_pressure(backend_instruction *inst) = 0;
virtual int get_register_pressure_benefit(backend_instruction *inst) = 0;
@@ -492,10 +492,10 @@ public:
int max_reg_pressure;
/*
- * The register pressure at the beginning of each basic block.
+ * The register pressure at the end of each basic block.
*/
- int *reg_pressure_in;
+ int *reg_pressure_out;
/*
* The virtual GRF's whose range overlaps the beginning of each basic block.
@@ -516,22 +516,22 @@ public:
BITSET_WORD **hw_liveout;
/*
- * Whether we've scheduled a write for this virtual GRF yet.
+ * Whether we've scheduled a read for this virtual GRF yet.
*/
- bool *written;
+ bool *read;
/*
- * How many reads we haven't scheduled for this virtual GRF yet.
+ * How many writes we haven't scheduled for this virtual GRF yet.
*/
- int *reads_remaining;
+ int *writes_remaining;
/*
- * How many reads we haven't scheduled for this hardware GRF yet.
+ * Whether we've scheduled a read for this hardware GRF yet.
*/
- int *hw_reads_remaining;
+ bool *hw_read;
};
class fs_instruction_scheduler : public instruction_scheduler
@@ -546,7 +546,7 @@ public:
int issue_time(backend_instruction *inst);
fs_visitor *v;
- void count_reads_remaining(backend_instruction *inst);
+ void count_writes_remaining(backend_instruction *inst);
void setup_liveness(cfg_t *cfg);
void update_register_pressure(backend_instruction *inst);
int get_register_pressure_benefit(backend_instruction *inst);
@@ -574,27 +574,15 @@ is_src_duplicate(fs_inst *inst, int src)
}
void
-fs_instruction_scheduler::count_reads_remaining(backend_instruction *be)
+fs_instruction_scheduler::count_writes_remaining(backend_instruction *be)
{
fs_inst *inst = (fs_inst *)be;
- if (!reads_remaining)
+ if (!writes_remaining)
return;
- for (int i = 0; i < inst->sources; i++) {
- if (is_src_duplicate(inst, i))
- continue;
-
- if (inst->src[i].file == VGRF) {
- reads_remaining[inst->src[i].nr]++;
- } else if (inst->src[i].file == FIXED_GRF) {
- if (inst->src[i].nr >= hw_reg_count)
- continue;
-
- for (int j = 0; j < inst->regs_read(i); j++)
- hw_reads_remaining[inst->src[i].nr + j]++;
- }
- }
+ if (inst->dst.file == VGRF)
+ writes_remaining[inst->dst.nr]++;
}
void
@@ -607,14 +595,16 @@ fs_instruction_scheduler::setup_liveness(cfg_t *cfg)
for (int i = 0; i < v->live_intervals->num_vars; i++) {
if (BITSET_TEST(v->live_intervals->block_data[block].livein, i)) {
int vgrf = v->live_intervals->vgrf_from_var[i];
- if (!BITSET_TEST(livein[block], vgrf)) {
- reg_pressure_in[block] += v->alloc.sizes[vgrf];
- BITSET_SET(livein[block], vgrf);
- }
+ BITSET_SET(livein[block], vgrf);
}
- if (BITSET_TEST(v->live_intervals->block_data[block].liveout, i))
- BITSET_SET(liveout[block], v->live_intervals->vgrf_from_var[i]);
+ if (BITSET_TEST(v->live_intervals->block_data[block].liveout, i)) {
+ int vgrf = v->live_intervals->vgrf_from_var[i];
+ if (!BITSET_TEST(liveout[block], vgrf)) {
+ reg_pressure_out[block] += v->alloc.sizes[vgrf];
+ BITSET_SET(liveout[block], vgrf);
+ }
+ }
}
}
@@ -626,12 +616,12 @@ fs_instruction_scheduler::setup_liveness(cfg_t *cfg)
for (int i = 0; i < grf_count; i++) {
if (v->virtual_grf_start[i] <= cfg->blocks[block]->end_ip &&
v->virtual_grf_end[i] >= cfg->blocks[block + 1]->start_ip) {
- if (!BITSET_TEST(livein[block + 1], i)) {
- reg_pressure_in[block + 1] += v->alloc.sizes[i];
- BITSET_SET(livein[block + 1], i);
+ if (!BITSET_TEST(liveout[block], i)) {
+ reg_pressure_out[block] += v->alloc.sizes[i];
+ BITSET_SET(liveout[block], i);
}
- BITSET_SET(liveout[block], i);
+ BITSET_SET(livein[block + 1], i);
}
}
}
@@ -645,10 +635,11 @@ fs_instruction_scheduler::setup_liveness(cfg_t *cfg)
for (int block = 0; block < cfg->num_blocks; block++) {
if (cfg->blocks[block]->start_ip <= payload_last_use_ip[i])
- reg_pressure_in[block]++;
- if (cfg->blocks[block]->end_ip <= payload_last_use_ip[i])
+ if (cfg->blocks[block]->end_ip <= payload_last_use_ip[i]) {
+ reg_pressure_out[block]++;
BITSET_SET(hw_liveout[block], i);
+ }
}
}
}
@@ -658,11 +649,11 @@ fs_instruction_scheduler::update_register_pressure(backend_instruction *be)
{
fs_inst *inst = (fs_inst *)be;
- if (!reads_remaining)
+ if (!writes_remaining)
return;
if (inst->dst.file == VGRF) {
- written[inst->dst.nr] = true;
+ writes_remaining[inst->dst.nr]--;
}
for (int i = 0; i < inst->sources; i++) {
@@ -670,11 +661,11 @@ fs_instruction_scheduler::update_register_pressure(backend_instruction *be)
continue;
if (inst->src[i].file == VGRF) {
- reads_remaining[inst->src[i].nr]--;
+ read[inst->src[i].nr] = true;
} else if (inst->src[i].file == FIXED_GRF &&
inst->src[i].nr < hw_reg_count) {
for (int off = 0; off < inst->regs_read(i); off++)
- hw_reads_remaining[inst->src[i].nr + off]--;
+ hw_read[inst->src[i].nr + off] = true;
}
}
}
@@ -687,8 +678,8 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
if (inst->dst.file == VGRF) {
if (!BITSET_TEST(livein[block_idx], inst->dst.nr) &&
- !written[inst->dst.nr])
- benefit -= v->alloc.sizes[inst->dst.nr];
+ writes_remaining[inst->dst.nr] == 1)
+ benefit += v->alloc.sizes[inst->dst.nr];
}
for (int i = 0; i < inst->sources; i++) {
@@ -697,16 +688,16 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
if (inst->src[i].file == VGRF &&
!BITSET_TEST(liveout[block_idx], inst->src[i].nr) &&
- reads_remaining[inst->src[i].nr] == 1)
- benefit += v->alloc.sizes[inst->src[i].nr];
+ !read[inst->src[i].nr])
+ benefit -= v->alloc.sizes[inst->src[i].nr];
if (inst->src[i].file == FIXED_GRF &&
inst->src[i].nr < hw_reg_count) {
for (int off = 0; off < inst->regs_read(i); off++) {
int reg = inst->src[i].nr + off;
if (!BITSET_TEST(hw_liveout[block_idx], reg) &&
- hw_reads_remaining[reg] == 1) {
- benefit++;
+ !hw_read[reg]) {
+ benefit--;
}
}
}
@@ -724,7 +715,7 @@ public:
int issue_time(backend_instruction *inst);
vec4_visitor *v;
- void count_reads_remaining(backend_instruction *inst);
+ void count_writes_remaining(backend_instruction *inst);
void setup_liveness(cfg_t *cfg);
void update_register_pressure(backend_instruction *inst);
int get_register_pressure_benefit(backend_instruction *inst);
@@ -738,7 +729,7 @@ vec4_instruction_scheduler::vec4_instruction_scheduler(vec4_visitor *v,
}
void
-vec4_instruction_scheduler::count_reads_remaining(backend_instruction *be)
+vec4_instruction_scheduler::count_writes_remaining(backend_instruction *be)
{
}
@@ -821,30 +812,30 @@ instruction_scheduler::add_dep(schedule_node *before, schedule_node *after,
assert(before != after);
- for (int i = 0; i < before->child_count; i++) {
- if (before->children[i] == after) {
- before->child_latency[i] = MAX2(before->child_latency[i], latency);
+ for (int i = 0; i < after->child_count; i++) {
+ if (after->children[i] == before) {
+ after->child_latency[i] = MAX2(after->child_latency[i], latency);
return;
}
}
- if (before->child_array_size <= before->child_count) {
- if (before->child_array_size < 16)
- before->child_array_size = 16;
+ if (after->child_array_size <= after->child_count) {
+ if (after->child_array_size < 16)
+ after->child_array_size = 16;
else
- before->child_array_size *= 2;
+ after->child_array_size *= 2;
- before->children = reralloc(mem_ctx, before->children,
- schedule_node *,
- before->child_array_size);
- before->child_latency = reralloc(mem_ctx, before->child_latency,
- int, before->child_array_size);
+ after->children = reralloc(mem_ctx, after->children,
+ schedule_node *,
+ after->child_array_size);
+ after->child_latency = reralloc(mem_ctx, after->child_latency,
+ int, after->child_array_size);
}
- before->children[before->child_count] = after;
- before->child_latency[before->child_count] = latency;
- before->child_count++;
- after->parent_count++;
+ after->children[after->child_count] = before;
+ after->child_latency[after->child_count] = latency;
+ after->child_count++;
+ before->parent_count++;
}
void
@@ -1379,10 +1370,10 @@ fs_instruction_scheduler::choose_instruction_to_schedule()
* tree of lowered ubo loads, which appear reversed in the
* instruction stream with respect to when they can be consumed).
*/
- if (n->delay > chosen->delay) {
+ if (n->delay < chosen->delay) {
chosen = n;
continue;
- } else if (n->delay < chosen->delay) {
+ } else if (n->delay > chosen->delay) {
continue;
}
@@ -1437,7 +1428,7 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
int instructions_to_schedule = block->end_ip - block->start_ip + 1;
time = 0;
if (!post_reg_alloc) {
- reg_pressure = reg_pressure_in[block->num];
+ reg_pressure = reg_pressure_out[block->num];
max_reg_pressure = MAX2(max_reg_pressure, reg_pressure);
}
block_idx = block->num;
@@ -1455,7 +1446,7 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
assert(chosen);
chosen->remove();
chosen->inst->exec_node::remove();
- block->instructions.push_tail(chosen->inst);
+ block->instructions.push_head(chosen->inst);
instructions_to_schedule--;
if (!post_reg_alloc) {
@@ -1467,13 +1458,12 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
/* If we expected a delay for scheduling, then bump the clock to reflect
* that. In reality, the hardware will switch to another hyperthread
* and may not return to dispatching our thread for a while even after
- * we're unblocked. After this, we have the time when the chosen
+ * we're unblocked. Before this, we have the time when the chosen
* instruction will start executing.
*/
time = MAX2(time, chosen->unblocked_time);
- /* Update the clock for how soon an instruction could start after the
- * chosen one.
+ /* Update the clock for the actual execution of the instruction.
*/
time += issue_time(chosen->inst);
@@ -1560,15 +1550,15 @@ instruction_scheduler::run(cfg_t *cfg)
if (block->end_ip - block->start_ip <= 1)
continue;
- if (reads_remaining) {
- memset(reads_remaining, 0,
- grf_count * sizeof(*reads_remaining));
- memset(hw_reads_remaining, 0,
- hw_reg_count * sizeof(*hw_reads_remaining));
- memset(written, 0, grf_count * sizeof(*written));
+ if (writes_remaining) {
+ memset(writes_remaining, 0,
+ grf_count * sizeof(*writes_remaining));
+ memset(hw_read, 0,
+ hw_reg_count * sizeof(*hw_read));
+ memset(read, 0, grf_count * sizeof(*read));
foreach_inst_in_block(fs_inst, inst, block)
- count_reads_remaining(inst);
+ count_writes_remaining(inst);
}
add_insts_from_block(block);