1 files changed, 78 insertions, 88 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 90419a3735e..1e92f680b24 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -407,7 +407,7 @@ public:
       this->mode = mode;
       this->time = 0;
       if (!post_reg_alloc) {
-         this->reg_pressure_in = rzalloc_array(mem_ctx, int, block_count);
+         this->reg_pressure_out = rzalloc_array(mem_ctx, int, block_count);
 
          this->livein = ralloc_array(mem_ctx, BITSET_WORD *, block_count);
          for (int i = 0; i < block_count; i++)
@@ -424,21 +424,21 @@ public:
             this->hw_liveout[i] = rzalloc_array(mem_ctx, BITSET_WORD,
                                                 BITSET_WORDS(hw_reg_count));
 
-         this->written = rzalloc_array(mem_ctx, bool, grf_count);
+         this->read = rzalloc_array(mem_ctx, bool, grf_count);
 
-         this->reads_remaining = rzalloc_array(mem_ctx, int, grf_count);
+         this->writes_remaining = rzalloc_array(mem_ctx, int, grf_count);
 
-         this->hw_reads_remaining = rzalloc_array(mem_ctx, int, hw_reg_count);
+         this->hw_read = rzalloc_array(mem_ctx, bool, hw_reg_count);
 
          this->max_reg_pressure = 0;
       } else {
-         this->reg_pressure_in = NULL;
+         this->reg_pressure_out = NULL;
          this->livein = NULL;
          this->liveout = NULL;
          this->hw_liveout = NULL;
-         this->written = NULL;
-         this->reads_remaining = NULL;
-         this->hw_reads_remaining = NULL;
+         this->read = NULL;
+         this->writes_remaining = NULL;
+         this->hw_read = NULL;
       }
    }
 
@@ -465,7 +465,7 @@ public:
     */
    virtual int issue_time(backend_instruction *inst) = 0;
 
-   virtual void count_reads_remaining(backend_instruction *inst) = 0;
+   virtual void count_writes_remaining(backend_instruction *inst) = 0;
    virtual void setup_liveness(cfg_t *cfg) = 0;
    virtual void update_register_pressure(backend_instruction *inst) = 0;
    virtual int get_register_pressure_benefit(backend_instruction *inst) = 0;
@@ -492,10 +492,10 @@ public:
    int max_reg_pressure;
 
    /*
-    * The register pressure at the beginning of each basic block.
+    * The register pressure at the end of each basic block.
     */
 
-   int *reg_pressure_in;
+   int *reg_pressure_out;
 
    /*
     * The virtual GRF's whose range overlaps the beginning of each basic block.
@@ -516,22 +516,22 @@ public:
    BITSET_WORD **hw_liveout;
 
    /*
-    * Whether we've scheduled a write for this virtual GRF yet.
+    * Whether we've scheduled a read for this virtual GRF yet.
     */
 
-   bool *written;
+   bool *read;
 
    /*
-    * How many reads we haven't scheduled for this virtual GRF yet.
+    * How many writes we haven't scheduled for this virtual GRF yet.
     */
 
-   int *reads_remaining;
+   int *writes_remaining;
 
    /*
-    * How many reads we haven't scheduled for this hardware GRF yet.
+    * Whether we've scheduled a read for this hardware GRF yet.
     */
 
-   int *hw_reads_remaining;
+   bool *hw_read;
 };
 
 class fs_instruction_scheduler : public instruction_scheduler
@@ -546,7 +546,7 @@ public:
    int issue_time(backend_instruction *inst);
    fs_visitor *v;
 
-   void count_reads_remaining(backend_instruction *inst);
+   void count_writes_remaining(backend_instruction *inst);
    void setup_liveness(cfg_t *cfg);
    void update_register_pressure(backend_instruction *inst);
    int get_register_pressure_benefit(backend_instruction *inst);
@@ -574,27 +574,15 @@ is_src_duplicate(fs_inst *inst, int src)
 }
 
 void
-fs_instruction_scheduler::count_reads_remaining(backend_instruction *be)
+fs_instruction_scheduler::count_writes_remaining(backend_instruction *be)
 {
    fs_inst *inst = (fs_inst *)be;
 
-   if (!reads_remaining)
+   if (!writes_remaining)
       return;
 
-   for (int i = 0; i < inst->sources; i++) {
-      if (is_src_duplicate(inst, i))
-         continue;
-
-      if (inst->src[i].file == VGRF) {
-         reads_remaining[inst->src[i].nr]++;
-      } else if (inst->src[i].file == FIXED_GRF) {
-         if (inst->src[i].nr >= hw_reg_count)
-            continue;
-
-         for (int j = 0; j < inst->regs_read(i); j++)
-            hw_reads_remaining[inst->src[i].nr + j]++;
-      }
-   }
+   if (inst->dst.file == VGRF)
+      writes_remaining[inst->dst.nr]++;
 }
 
 void
@@ -607,14 +595,16 @@ fs_instruction_scheduler::setup_liveness(cfg_t *cfg)
       for (int i = 0; i < v->live_intervals->num_vars; i++) {
          if (BITSET_TEST(v->live_intervals->block_data[block].livein, i)) {
             int vgrf = v->live_intervals->vgrf_from_var[i];
-            if (!BITSET_TEST(livein[block], vgrf)) {
-               reg_pressure_in[block] += v->alloc.sizes[vgrf];
-               BITSET_SET(livein[block], vgrf);
-            }
+            BITSET_SET(livein[block], vgrf);
          }
 
-         if (BITSET_TEST(v->live_intervals->block_data[block].liveout, i))
-            BITSET_SET(liveout[block], v->live_intervals->vgrf_from_var[i]);
+         if (BITSET_TEST(v->live_intervals->block_data[block].liveout, i)) {
+            int vgrf = v->live_intervals->vgrf_from_var[i];
+            if (!BITSET_TEST(liveout[block], vgrf)) {
+               reg_pressure_out[block] += v->alloc.sizes[vgrf];
+               BITSET_SET(liveout[block], vgrf);
+            }
+         }
       }
    }
 
@@ -626,12 +616,12 @@ fs_instruction_scheduler::setup_liveness(cfg_t *cfg)
       for (int i = 0; i < grf_count; i++) {
          if (v->virtual_grf_start[i] <= cfg->blocks[block]->end_ip &&
              v->virtual_grf_end[i] >= cfg->blocks[block + 1]->start_ip) {
-            if (!BITSET_TEST(livein[block + 1], i)) {
-                reg_pressure_in[block + 1] += v->alloc.sizes[i];
-                BITSET_SET(livein[block + 1], i);
+            if (!BITSET_TEST(liveout[block], i)) {
+                reg_pressure_out[block] += v->alloc.sizes[i];
+                BITSET_SET(liveout[block], i);
             }
 
-            BITSET_SET(liveout[block], i);
+            BITSET_SET(livein[block + 1], i);
          }
       }
    }
@@ -645,10 +635,11 @@ fs_instruction_scheduler::setup_liveness(cfg_t *cfg)
 
       for (int block = 0; block < cfg->num_blocks; block++) {
          if (cfg->blocks[block]->start_ip <= payload_last_use_ip[i])
-            reg_pressure_in[block]++;
 
-         if (cfg->blocks[block]->end_ip <= payload_last_use_ip[i])
+         if (cfg->blocks[block]->end_ip <= payload_last_use_ip[i]) {
+            reg_pressure_out[block]++;
             BITSET_SET(hw_liveout[block], i);
+         }
       }
    }
 }
@@ -658,11 +649,11 @@ fs_instruction_scheduler::update_register_pressure(backend_instruction *be)
 {
    fs_inst *inst = (fs_inst *)be;
 
-   if (!reads_remaining)
+   if (!writes_remaining)
       return;
 
    if (inst->dst.file == VGRF) {
-      written[inst->dst.nr] = true;
+      writes_remaining[inst->dst.nr]--;
    }
 
    for (int i = 0; i < inst->sources; i++) {
@@ -670,11 +661,11 @@ fs_instruction_scheduler::update_register_pressure(backend_instruction *be)
           continue;
 
       if (inst->src[i].file == VGRF) {
-         reads_remaining[inst->src[i].nr]--;
+         read[inst->src[i].nr] = true;
       } else if (inst->src[i].file == FIXED_GRF &&
                  inst->src[i].nr < hw_reg_count) {
          for (int off = 0; off < inst->regs_read(i); off++)
-            hw_reads_remaining[inst->src[i].nr + off]--;
+            hw_read[inst->src[i].nr + off] = true;
       }
    }
 }
@@ -687,8 +678,8 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
 
    if (inst->dst.file == VGRF) {
       if (!BITSET_TEST(livein[block_idx], inst->dst.nr) &&
-          !written[inst->dst.nr])
-         benefit -= v->alloc.sizes[inst->dst.nr];
+          writes_remaining[inst->dst.nr] == 1)
+         benefit += v->alloc.sizes[inst->dst.nr];
    }
 
    for (int i = 0; i < inst->sources; i++) {
@@ -697,16 +688,16 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
 
       if (inst->src[i].file == VGRF &&
           !BITSET_TEST(liveout[block_idx], inst->src[i].nr) &&
-          reads_remaining[inst->src[i].nr] == 1)
-         benefit += v->alloc.sizes[inst->src[i].nr];
+          !read[inst->src[i].nr])
+         benefit -= v->alloc.sizes[inst->src[i].nr];
 
       if (inst->src[i].file == FIXED_GRF &&
           inst->src[i].nr < hw_reg_count) {
          for (int off = 0; off < inst->regs_read(i); off++) {
             int reg = inst->src[i].nr + off;
             if (!BITSET_TEST(hw_liveout[block_idx], reg) &&
-                hw_reads_remaining[reg] == 1) {
-               benefit++;
+                !hw_read[reg]) {
+               benefit--;
             }
          }
       }
@@ -724,7 +715,7 @@ public:
    int issue_time(backend_instruction *inst);
    vec4_visitor *v;
 
-   void count_reads_remaining(backend_instruction *inst);
+   void count_writes_remaining(backend_instruction *inst);
    void setup_liveness(cfg_t *cfg);
    void update_register_pressure(backend_instruction *inst);
    int get_register_pressure_benefit(backend_instruction *inst);
@@ -738,7 +729,7 @@ vec4_instruction_scheduler::vec4_instruction_scheduler(vec4_visitor *v,
 }
 
 void
-vec4_instruction_scheduler::count_reads_remaining(backend_instruction *be)
+vec4_instruction_scheduler::count_writes_remaining(backend_instruction *be)
 {
 }
 
@@ -821,30 +812,30 @@ instruction_scheduler::add_dep(schedule_node *before, schedule_node *after,
 
    assert(before != after);
 
-   for (int i = 0; i < before->child_count; i++) {
-      if (before->children[i] == after) {
-         before->child_latency[i] = MAX2(before->child_latency[i], latency);
+   for (int i = 0; i < after->child_count; i++) {
+      if (after->children[i] == before) {
+         after->child_latency[i] = MAX2(after->child_latency[i], latency);
          return;
       }
    }
 
-   if (before->child_array_size <= before->child_count) {
-      if (before->child_array_size < 16)
-         before->child_array_size = 16;
+   if (after->child_array_size <= after->child_count) {
+      if (after->child_array_size < 16)
+         after->child_array_size = 16;
       else
-         before->child_array_size *= 2;
+         after->child_array_size *= 2;
 
-      before->children = reralloc(mem_ctx, before->children,
-                                  schedule_node *,
-                                  before->child_array_size);
-      before->child_latency = reralloc(mem_ctx, before->child_latency,
-                                       int, before->child_array_size);
+      after->children = reralloc(mem_ctx, after->children,
+                                 schedule_node *,
+                                 after->child_array_size);
+      after->child_latency = reralloc(mem_ctx, after->child_latency,
+                                      int, after->child_array_size);
    }
 
-   before->children[before->child_count] = after;
-   before->child_latency[before->child_count] = latency;
-   before->child_count++;
-   after->parent_count++;
+   after->children[after->child_count] = before;
+   after->child_latency[after->child_count] = latency;
+   after->child_count++;
+   before->parent_count++;
 }
 
 void
@@ -1379,10 +1370,10 @@ fs_instruction_scheduler::choose_instruction_to_schedule()
           * tree of lowered ubo loads, which appear reversed in the
           * instruction stream with respect to when they can be consumed).
           */
-         if (n->delay > chosen->delay) {
+         if (n->delay < chosen->delay) {
             chosen = n;
             continue;
-         } else if (n->delay < chosen->delay) {
+         } else if (n->delay > chosen->delay) {
             continue;
          }
 
@@ -1437,7 +1428,7 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
    int instructions_to_schedule = block->end_ip - block->start_ip + 1;
    time = 0;
    if (!post_reg_alloc) {
-      reg_pressure = reg_pressure_in[block->num];
+      reg_pressure = reg_pressure_out[block->num];
       max_reg_pressure = MAX2(max_reg_pressure, reg_pressure);
    }
    block_idx = block->num;
@@ -1455,7 +1446,7 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
       assert(chosen);
       chosen->remove();
       chosen->inst->exec_node::remove();
-      block->instructions.push_tail(chosen->inst);
+      block->instructions.push_head(chosen->inst);
       instructions_to_schedule--;
 
       if (!post_reg_alloc) {
@@ -1467,13 +1458,12 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
       /* If we expected a delay for scheduling, then bump the clock to reflect
        * that.  In reality, the hardware will switch to another hyperthread
        * and may not return to dispatching our thread for a while even after
-       * we're unblocked.  After this, we have the time when the chosen
+       * we're unblocked.  Before this, we have the time when the chosen
        * instruction will start executing.
        */
       time = MAX2(time, chosen->unblocked_time);
 
-      /* Update the clock for how soon an instruction could start after the
-       * chosen one.
+      /* Update the clock for the actual execution of the instruction.
        */
       time += issue_time(chosen->inst);
 
@@ -1560,15 +1550,15 @@ instruction_scheduler::run(cfg_t *cfg)
       if (block->end_ip - block->start_ip <= 1)
          continue;
 
-      if (reads_remaining) {
-         memset(reads_remaining, 0,
-                grf_count * sizeof(*reads_remaining));
-         memset(hw_reads_remaining, 0,
-                hw_reg_count * sizeof(*hw_reads_remaining));
-         memset(written, 0, grf_count * sizeof(*written));
+      if (writes_remaining) {
+         memset(writes_remaining, 0,
+                grf_count * sizeof(*writes_remaining));
+         memset(hw_read, 0,
+                hw_reg_count * sizeof(*hw_read));
+         memset(read, 0, grf_count * sizeof(*read));
 
          foreach_inst_in_block(fs_inst, inst, block)
-            count_reads_remaining(inst);
+            count_writes_remaining(inst);
       }
 
       add_insts_from_block(block);