summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVadim Girlin <vadimgirlin@gmail.com>2013-11-15 18:24:53 +0100
committerVadim Girlin <vadimgirlin@gmail.com>2013-11-17 01:36:28 +0400
commit4cb04aa0dfaf8c23e312fa49778e637e59410727 (patch)
tree792ceced81b094ef9a02fd98e0575c761c310abd
parent04856ceb5c3cb01bd96ca45e7bb5f2d1eacc46f9 (diff)
r600g/sb: work around hw issues with stack on eg/cm
v2: make it actually work, improve condition Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68503 Cc: "10.0" <mesa-stable@lists.freedesktop.org> Signed-off-by: Vadim Girlin <vadimgirlin@gmail.com>
-rw-r--r--src/gallium/drivers/r600/sb/sb_bc.h21
-rw-r--r--src/gallium/drivers/r600/sb/sb_bc_finalize.cpp129
-rw-r--r--src/gallium/drivers/r600/sb/sb_context.cpp9
-rw-r--r--src/gallium/drivers/r600/sb/sb_ir.h5
-rw-r--r--src/gallium/drivers/r600/sb/sb_pass.h3
5 files changed, 123 insertions, 44 deletions
diff --git a/src/gallium/drivers/r600/sb/sb_bc.h b/src/gallium/drivers/r600/sb/sb_bc.h
index ad1b862fd57..73b8b08ba39 100644
--- a/src/gallium/drivers/r600/sb/sb_bc.h
+++ b/src/gallium/drivers/r600/sb/sb_bc.h
@@ -614,6 +614,10 @@ public:
unsigned num_slots;
bool uses_mova_gpr;
+ bool stack_workaround_8xx;
+ bool stack_workaround_9xx;
+
+ unsigned wavefront_size;
unsigned stack_entry_size;
static unsigned dump_pass;
@@ -638,6 +642,23 @@ public:
bool is_cayman() {return hw_class == HW_CLASS_CAYMAN;}
bool is_egcm() {return hw_class >= HW_CLASS_EVERGREEN;}
+ bool needs_8xx_stack_workaround() {
+ if (!is_evergreen())
+ return false;
+
+ switch (hw_chip) {
+ case HW_CHIP_CYPRESS:
+ case HW_CHIP_JUNIPER:
+ return false;
+ default:
+ return true;
+ }
+ }
+
+ bool needs_9xx_stack_workaround() {
+ return is_cayman();
+ }
+
sb_hw_class_bits hw_class_bit() {
switch (hw_class) {
case HW_CLASS_R600:return HB_R6;
diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
index c56c866bafc..bc71cf873da 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -40,8 +40,9 @@ namespace r600_sb {
int bc_finalizer::run() {
- regions_vec &rv = sh.get_regions();
+ run_on(sh.root);
+ regions_vec &rv = sh.get_regions();
for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
++I) {
region_node *r = *I;
@@ -58,8 +59,6 @@ int bc_finalizer::run() {
r->expand();
}
- run_on(sh.root);
-
cf_peephole();
// workaround for some problems on r6xx/7xx
@@ -213,18 +212,36 @@ void bc_finalizer::run_on(container_node* c) {
if (n->is_alu_group()) {
finalize_alu_group(static_cast<alu_group_node*>(n));
} else {
- if (n->is_fetch_inst()) {
+ if (n->is_alu_clause()) {
+ cf_node *c = static_cast<cf_node*>(n);
+
+ if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) {
+ if (ctx.stack_workaround_8xx) {
+ region_node *r = c->get_parent_region();
+ if (r) {
+ unsigned ifs, loops;
+ unsigned elems = get_stack_depth(r, loops, ifs);
+ unsigned dmod1 = elems % ctx.stack_entry_size;
+ unsigned dmod2 = (elems + 1) % ctx.stack_entry_size;
+
+ if (elems && (!dmod1 || !dmod2))
+ c->flags |= NF_ALU_STACK_WORKAROUND;
+ }
+ } else if (ctx.stack_workaround_9xx) {
+ region_node *r = c->get_parent_region();
+ if (r) {
+ unsigned ifs, loops;
+ get_stack_depth(r, loops, ifs);
+ if (loops >= 2)
+ c->flags |= NF_ALU_STACK_WORKAROUND;
+ }
+ }
+ }
+ } else if (n->is_fetch_inst()) {
finalize_fetch(static_cast<fetch_node*>(n));
} else if (n->is_cf_inst()) {
finalize_cf(static_cast<cf_node*>(n));
- } else if (n->is_alu_clause()) {
-
- } else if (n->is_fetch_clause()) {
-
- } else {
- assert(!"unexpected node");
}
-
if (n->is_container())
run_on(static_cast<container_node*>(n));
}
@@ -578,10 +595,6 @@ void bc_finalizer::finalize_cf(cf_node* c) {
unsigned flags = c->bc.op_ptr->flags;
- if (flags & CF_CALL) {
- update_nstack(c->get_parent_region(), ctx.is_cayman() ? 1 : 2);
- }
-
c->bc.end_of_program = 0;
last_cf = c;
@@ -715,17 +728,8 @@ void bc_finalizer::finalize_cf(cf_node* c) {
c->bc.index_gpr = reg >= 0 ? reg : 0;
}
-
-
-
- } else {
-
-#if 0
- if ((flags & (CF_BRANCH | CF_LOOP)) && !sh.uses_gradients) {
- c->bc.valid_pixel_mode = 1;
- }
-#endif
-
+ } else if (flags & CF_CALL) {
+ update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1);
}
}
@@ -763,37 +767,78 @@ void bc_finalizer::update_ngpr(unsigned gpr) {
ngpr = gpr + 1;
}
-void bc_finalizer::update_nstack(region_node* r, unsigned add) {
- unsigned loops = 0;
- unsigned ifs = 0;
+unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
+ unsigned &ifs, unsigned add) {
+ unsigned stack_elements = add;
+ bool has_non_wqm_push_with_loops_on_stack = false;
+ bool has_non_wqm_push = (add != 0);
+ region_node *r = n->is_region() ?
+ static_cast<region_node*>(n) : n->get_parent_region();
+
+ loops = 0;
+ ifs = 0;
while (r) {
- if (r->is_loop())
+ if (r->is_loop()) {
++loops;
- else
+ if (has_non_wqm_push)
+ has_non_wqm_push_with_loops_on_stack = true;
+ } else {
++ifs;
-
+ has_non_wqm_push = true;
+ }
r = r->get_parent_region();
}
-
- unsigned stack_elements = (loops * ctx.stack_entry_size) + ifs + add;
-
- // FIXME calculate more precisely
- if (ctx.is_evergreen()) {
- ++stack_elements;
- } else {
- stack_elements += 2;
- if (ctx.is_cayman())
+ stack_elements += (loops * ctx.stack_entry_size) + ifs;
+
+ // reserve additional elements in some cases
+ switch (ctx.hw_class) {
+ case HW_CLASS_R600:
+ case HW_CLASS_R700:
+ if (has_non_wqm_push)
+ stack_elements += 2;
+ break;
+ case HW_CLASS_CAYMAN:
+ if (stack_elements)
+ stack_elements += 2;
+ break;
+ case HW_CLASS_EVERGREEN:
+ if (has_non_wqm_push_with_loops_on_stack)
++stack_elements;
+ break;
}
+ return stack_elements;
+}
- unsigned stack_entries = (stack_elements + 3) >> 2;
+void bc_finalizer::update_nstack(region_node* r, unsigned add) {
+ unsigned loops = 0;
+ unsigned ifs = 0;
+ unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add;
+
+ // XXX all chips expect this value to be computed using 4 as entry size,
+ // not the real entry size
+ unsigned stack_entries = (elems + 3) >> 2;
if (nstack < stack_entries)
nstack = stack_entries;
}
void bc_finalizer::cf_peephole() {
+ if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) {
+ for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
+ I = N) {
+ N = I; ++N;
+ cf_node *c = static_cast<cf_node*>(*I);
+
+ if (c->bc.op == CF_OP_ALU_PUSH_BEFORE &&
+ (c->flags & NF_ALU_STACK_WORKAROUND)) {
+ cf_node *push = sh.create_cf(CF_OP_PUSH);
+ c->insert_before(push);
+ push->jump(c);
+ c->bc.set_op(CF_OP_ALU);
+ }
+ }
+ }
for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
I = N) {
diff --git a/src/gallium/drivers/r600/sb/sb_context.cpp b/src/gallium/drivers/r600/sb/sb_context.cpp
index 9474f74e89f..8e1142873ac 100644
--- a/src/gallium/drivers/r600/sb/sb_context.cpp
+++ b/src/gallium/drivers/r600/sb/sb_context.cpp
@@ -66,20 +66,27 @@ int sb_context::init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass) {
case HW_CHIP_RS780:
case HW_CHIP_RV620:
case HW_CHIP_RS880:
-
+ wavefront_size = 16;
+ stack_entry_size = 8;
+ break;
case HW_CHIP_RV630:
case HW_CHIP_RV635:
case HW_CHIP_RV730:
case HW_CHIP_RV710:
case HW_CHIP_PALM:
case HW_CHIP_CEDAR:
+ wavefront_size = 32;
stack_entry_size = 8;
break;
default:
+ wavefront_size = 64;
stack_entry_size = 4;
break;
}
+ stack_workaround_8xx = needs_8xx_stack_workaround();
+ stack_workaround_9xx = needs_9xx_stack_workaround();
+
return 0;
}
diff --git a/src/gallium/drivers/r600/sb/sb_ir.h b/src/gallium/drivers/r600/sb/sb_ir.h
index a74d6cb5aa2..85c3d06ea7f 100644
--- a/src/gallium/drivers/r600/sb/sb_ir.h
+++ b/src/gallium/drivers/r600/sb/sb_ir.h
@@ -700,7 +700,10 @@ enum node_flags {
NF_DONT_MOVE = (1 << 8),
// for KILLxx - we want to schedule them as early as possible
- NF_SCHEDULE_EARLY = (1 << 9)
+ NF_SCHEDULE_EARLY = (1 << 9),
+
+ // for ALU_PUSH_BEFORE - when set, replace with PUSH + ALU
+ NF_ALU_STACK_WORKAROUND = (1 << 10)
};
inline node_flags operator |(node_flags l, node_flags r) {
diff --git a/src/gallium/drivers/r600/sb/sb_pass.h b/src/gallium/drivers/r600/sb/sb_pass.h
index a3f8515acde..c955656449f 100644
--- a/src/gallium/drivers/r600/sb/sb_pass.h
+++ b/src/gallium/drivers/r600/sb/sb_pass.h
@@ -708,6 +708,9 @@ public:
void update_ngpr(unsigned gpr);
void update_nstack(region_node *r, unsigned add = 0);
+ unsigned get_stack_depth(node *n, unsigned &loops, unsigned &ifs,
+ unsigned add = 0);
+
void cf_peephole();
};