diff options
author | Francisco Jerez <currojerez@riseup.net> | 2021-05-25 14:41:26 -0700 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2021-06-23 07:34:22 +0000 |
commit | 385da1fe36ca231fc0ad3634b1aa3be4ef679959 (patch) | |
tree | fa07a71102c5f4f4afc62e10d9b266527e24c35e /src/intel/compiler/brw_fs_scoreboard.cpp | |
parent | 231337a13af03f5579f5401cff16aa96ec796746 (diff) |
intel/fs: Track single accumulator in scoreboard lowering pass.
This change reduces the precision of the scoreboard data structure for
accumulator registers, because the rules determining the aliasing of
accumulator registers are non-trivial and poorly documented (e.g. acc0
overlaps the storage of acc1 when the former is accessed with an
integer type). We could implement those rules but it wouldn't have
any practical benefit since we currently only use acc0-1, and for the
most part we can rely on the hardware's accumulator dependency
tracking. Instead make our lives easier by representing it as a
single register.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11433>
Diffstat (limited to 'src/intel/compiler/brw_fs_scoreboard.cpp')
-rw-r--r-- | src/intel/compiler/brw_fs_scoreboard.cpp | 25 |
1 files changed, 9 insertions, 16 deletions
diff --git a/src/intel/compiler/brw_fs_scoreboard.cpp b/src/intel/compiler/brw_fs_scoreboard.cpp index 56027077895..5bfec9126e4 100644 --- a/src/intel/compiler/brw_fs_scoreboard.cpp +++ b/src/intel/compiler/brw_fs_scoreboard.cpp @@ -43,6 +43,8 @@ * - ip instruction pointer * - tm0 timestamp register * - dbg0 debug register + * - acc2-9 special accumulator registers on TGL + * - mme0-7 math macro extended accumulator registers * * The following ARF registers don't need to be tracked here because data * coherency is still provided transparently by the hardware: @@ -615,9 +617,7 @@ namespace { sb.grf_deps[i] = merge(eq, sb0.grf_deps[i], sb1.grf_deps[i]); sb.addr_dep = merge(eq, sb0.addr_dep, sb1.addr_dep); - - for (unsigned i = 0; i < ARRAY_SIZE(sb.accum_deps); i++) - sb.accum_deps[i] = merge(eq, sb0.accum_deps[i], sb1.accum_deps[i]); + sb.accum_dep = merge(eq, sb0.accum_dep, sb1.accum_dep); return sb; } @@ -635,9 +635,7 @@ namespace { sb.grf_deps[i] = shadow(sb0.grf_deps[i], sb1.grf_deps[i]); sb.addr_dep = shadow(sb0.addr_dep, sb1.addr_dep); - - for (unsigned i = 0; i < ARRAY_SIZE(sb.accum_deps); i++) - sb.accum_deps[i] = shadow(sb0.accum_deps[i], sb1.accum_deps[i]); + sb.accum_dep = shadow(sb0.accum_dep, sb1.accum_dep); return sb; } @@ -655,9 +653,7 @@ namespace { sb.grf_deps[i] = transport(sb0.grf_deps[i], delta); sb.addr_dep = transport(sb0.addr_dep, delta); - - for (unsigned i = 0; i < ARRAY_SIZE(sb.accum_deps); i++) - sb.accum_deps[i] = transport(sb0.accum_deps[i], delta); + sb.accum_dep = transport(sb0.accum_dep, delta); return sb; } @@ -673,10 +669,8 @@ namespace { if (sb0.addr_dep != sb1.addr_dep) return false; - for (unsigned i = 0; i < ARRAY_SIZE(sb0.accum_deps); i++) { - if (sb0.accum_deps[i] != sb1.accum_deps[i]) - return false; - } + if (sb0.accum_dep != sb1.accum_dep) + return false; return true; } @@ -690,7 +684,7 @@ namespace { private: dependency grf_deps[BRW_MAX_GRF]; dependency addr_dep; - dependency accum_deps[10]; + dependency accum_dep; dependency * dep(const fs_reg &r) @@ -703,8 +697,7 @@ namespace { r.file == ARF && reg >= BRW_ARF_ADDRESS && reg < BRW_ARF_ACCUMULATOR ? &addr_dep : r.file == ARF && reg >= BRW_ARF_ACCUMULATOR && - reg < BRW_ARF_FLAG ? &accum_deps[ - reg - BRW_ARF_ACCUMULATOR] : + reg < BRW_ARF_FLAG ? &accum_dep : NULL); } }; |