summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Clark <robdclark@chromium.org>2020-02-25 10:44:26 -0800
committerMarge Bot <eric+marge@anholt.net>2020-02-28 16:53:41 +0000
commit56565b7bba54b8298d2c14c66bb87c59930b09ee (patch)
treea90cd5469a0cd18242a4289bbf0019c97d3c1c05
parent2cf4b5f29edbd7a01590fdf244fead5551db8d3f (diff)
freedreno/ir3: update SFU delay
1) emperically, 10 seems like a more accurate # than 4 2) push "soft" delay handling into ir3_delayslots(), as we should also be using it to calculate the costs that the schedulers use Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3989>
-rw-r--r--src/freedreno/ir3/ir3.h2
-rw-r--r--src/freedreno/ir3/ir3_delay.c26
-rw-r--r--src/freedreno/ir3/ir3_depth.c2
-rw-r--r--src/freedreno/ir3/ir3_postsched.c2
4 files changed, 19 insertions, 13 deletions
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 21fd8c602b9..b66d8e2d6fd 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -1157,7 +1157,7 @@ void ir3_print_instr(struct ir3_instruction *instr);
/* delay calculation: */
int ir3_delayslots(struct ir3_instruction *assigner,
- struct ir3_instruction *consumer, unsigned n);
+ struct ir3_instruction *consumer, unsigned n, bool soft);
unsigned ir3_delay_calc(struct ir3_block *block, struct ir3_instruction *instr,
bool soft, bool pred);
void ir3_remove_nops(struct ir3 *ir);
diff --git a/src/freedreno/ir3/ir3_delay.c b/src/freedreno/ir3/ir3_delay.c
index 0b796a4183a..5839128a4c6 100644
--- a/src/freedreno/ir3/ir3_delay.c
+++ b/src/freedreno/ir3/ir3_delay.c
@@ -69,7 +69,7 @@ ignore_dep(struct ir3_instruction *assigner,
*/
int
ir3_delayslots(struct ir3_instruction *assigner,
- struct ir3_instruction *consumer, unsigned n)
+ struct ir3_instruction *consumer, unsigned n, bool soft)
{
if (ignore_dep(assigner, consumer, n))
return 0;
@@ -85,6 +85,20 @@ ir3_delayslots(struct ir3_instruction *assigner,
if (writes_addr(assigner))
return 6;
+ /* On a6xx, it takes the number of delay slots to get a SFU result
+ * back (ie. using nop's instead of (ss) is:
+ *
+ * 8 - single warp
+ * 9 - two warps
+ * 10 - four warps
+ *
+ * and so on. Not quite sure where it tapers out (ie. how many
+ * warps share an SFU unit). But 10 seems like a reasonable #
+ * to choose:
+ */
+ if (soft && is_sfu(assigner))
+ return 10;
+
/* handled via sync flags: */
if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner))
return 0;
@@ -195,15 +209,7 @@ delay_calc_srcn(struct ir3_block *block,
delay = MAX2(delay, d);
}
} else {
- if (soft) {
- if (is_sfu(assigner)) {
- delay = 4;
- } else {
- delay = ir3_delayslots(assigner, consumer, srcn);
- }
- } else {
- delay = ir3_delayslots(assigner, consumer, srcn);
- }
+ delay = ir3_delayslots(assigner, consumer, srcn, soft);
delay -= distance(block, assigner, delay, pred);
}
diff --git a/src/freedreno/ir3/ir3_depth.c b/src/freedreno/ir3/ir3_depth.c
index 135d4365d2e..6bb946871e5 100644
--- a/src/freedreno/ir3/ir3_depth.c
+++ b/src/freedreno/ir3/ir3_depth.c
@@ -89,7 +89,7 @@ ir3_instr_depth(struct ir3_instruction *instr, unsigned boost, bool falsedep)
if (i == 0)
continue;
- sd = ir3_delayslots(src, instr, i) + src->depth;
+ sd = ir3_delayslots(src, instr, i, true) + src->depth;
sd += boost;
instr->depth = MAX2(instr->depth, sd);
diff --git a/src/freedreno/ir3/ir3_postsched.c b/src/freedreno/ir3/ir3_postsched.c
index 4290e882249..47a8e52fdeb 100644
--- a/src/freedreno/ir3/ir3_postsched.c
+++ b/src/freedreno/ir3/ir3_postsched.c
@@ -380,7 +380,7 @@ calculate_deps(struct ir3_postsched_deps_state *state,
struct ir3_postsched_node *dep = dep_reg(state, reg->num + b);
if (dep && (state->direction == F)) {
- unsigned d = ir3_delayslots(dep->instr, node->instr, i);
+ unsigned d = ir3_delayslots(dep->instr, node->instr, i, true);
node->delay = MAX2(node->delay, d);
}
}