summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorFrancisco Jerez <currojerez@riseup.net>2021-05-25 14:41:26 -0700
committerMarge Bot <eric+marge@anholt.net>2021-06-23 07:34:22 +0000
commit385da1fe36ca231fc0ad3634b1aa3be4ef679959 (patch)
treefa07a71102c5f4f4afc62e10d9b266527e24c35e /src
parent231337a13af03f5579f5401cff16aa96ec796746 (diff)
intel/fs: Track single accumulator in scoreboard lowering pass.
This change reduces the precision of the scoreboard data structure for accumulator registers, because the rules determining the aliasing of accumulator registers are non-trivial and poorly documented (e.g. acc0 overlaps the storage of acc1 when the former is accessed with an integer type). We could implement those rules but it wouldn't have any practical benefit since we currently only use acc0-1, and for the most part we can rely on the hardware's accumulator dependency tracking. Instead make our lives easier by representing it as a single register. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11433>
Diffstat (limited to 'src')
-rw-r--r--src/intel/compiler/brw_fs_scoreboard.cpp25
1 files changed, 9 insertions, 16 deletions
diff --git a/src/intel/compiler/brw_fs_scoreboard.cpp b/src/intel/compiler/brw_fs_scoreboard.cpp
index 56027077895..5bfec9126e4 100644
--- a/src/intel/compiler/brw_fs_scoreboard.cpp
+++ b/src/intel/compiler/brw_fs_scoreboard.cpp
@@ -43,6 +43,8 @@
* - ip instruction pointer
* - tm0 timestamp register
* - dbg0 debug register
+ * - acc2-9 special accumulator registers on TGL
+ * - mme0-7 math macro extended accumulator registers
*
* The following ARF registers don't need to be tracked here because data
* coherency is still provided transparently by the hardware:
@@ -615,9 +617,7 @@ namespace {
sb.grf_deps[i] = merge(eq, sb0.grf_deps[i], sb1.grf_deps[i]);
sb.addr_dep = merge(eq, sb0.addr_dep, sb1.addr_dep);
-
- for (unsigned i = 0; i < ARRAY_SIZE(sb.accum_deps); i++)
- sb.accum_deps[i] = merge(eq, sb0.accum_deps[i], sb1.accum_deps[i]);
+ sb.accum_dep = merge(eq, sb0.accum_dep, sb1.accum_dep);
return sb;
}
@@ -635,9 +635,7 @@ namespace {
sb.grf_deps[i] = shadow(sb0.grf_deps[i], sb1.grf_deps[i]);
sb.addr_dep = shadow(sb0.addr_dep, sb1.addr_dep);
-
- for (unsigned i = 0; i < ARRAY_SIZE(sb.accum_deps); i++)
- sb.accum_deps[i] = shadow(sb0.accum_deps[i], sb1.accum_deps[i]);
+ sb.accum_dep = shadow(sb0.accum_dep, sb1.accum_dep);
return sb;
}
@@ -655,9 +653,7 @@ namespace {
sb.grf_deps[i] = transport(sb0.grf_deps[i], delta);
sb.addr_dep = transport(sb0.addr_dep, delta);
-
- for (unsigned i = 0; i < ARRAY_SIZE(sb.accum_deps); i++)
- sb.accum_deps[i] = transport(sb0.accum_deps[i], delta);
+ sb.accum_dep = transport(sb0.accum_dep, delta);
return sb;
}
@@ -673,10 +669,8 @@ namespace {
if (sb0.addr_dep != sb1.addr_dep)
return false;
- for (unsigned i = 0; i < ARRAY_SIZE(sb0.accum_deps); i++) {
- if (sb0.accum_deps[i] != sb1.accum_deps[i])
- return false;
- }
+ if (sb0.accum_dep != sb1.accum_dep)
+ return false;
return true;
}
@@ -690,7 +684,7 @@ namespace {
private:
dependency grf_deps[BRW_MAX_GRF];
dependency addr_dep;
- dependency accum_deps[10];
+ dependency accum_dep;
dependency *
dep(const fs_reg &r)
@@ -703,8 +697,7 @@ namespace {
r.file == ARF && reg >= BRW_ARF_ADDRESS &&
reg < BRW_ARF_ACCUMULATOR ? &addr_dep :
r.file == ARF && reg >= BRW_ARF_ACCUMULATOR &&
- reg < BRW_ARF_FLAG ? &accum_deps[
- reg - BRW_ARF_ACCUMULATOR] :
+ reg < BRW_ARF_FLAG ? &accum_dep :
NULL);
}
};