summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorFrancisco Jerez <currojerez@riseup.net>2021-05-26 16:50:40 -0700
committerMarge Bot <eric+marge@anholt.net>2021-06-23 07:34:22 +0000
commit5e7f443de05f7865654b280ffab298172b33b863 (patch)
tree7a6b07494a06c17d8a17d1c8da86be1c64dcae4a /src
parentd46bb14d140079b78d9d1478d13ac5e0864fe403 (diff)
intel/fs: Add SWSB dependency annotations for cross-pipeline WaR data hazards on XeHP+.
In cases where an in-order instruction is overwriting a register previously read by another in-order instruction, drop the dependency iff the previous read is guaranteed to have occurred from the same in-order pipeline. This should only have an effect on XeHP+ since previous Xe platforms only had one in-order FPU pipeline. The previous workaround we were using for this treated all ordered read dependencies as write dependencies to avoid noise from our simulation environment. Relative to our previous workaround this improves performance of GFXBench5 gl_tess by ~7% on a DG2 system among other single-digit percentual FPS improvements. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11433>
Diffstat (limited to 'src')
-rw-r--r--src/intel/compiler/brw_fs_scoreboard.cpp13
1 files changed, 9 insertions, 4 deletions
diff --git a/src/intel/compiler/brw_fs_scoreboard.cpp b/src/intel/compiler/brw_fs_scoreboard.cpp
index 8702e12e516..4176f54c908 100644
--- a/src/intel/compiler/brw_fs_scoreboard.cpp
+++ b/src/intel/compiler/brw_fs_scoreboard.cpp
@@ -579,11 +579,16 @@ namespace {
/**
* Return simplified dependency removing any synchronization modes not
* applicable to an instruction \p inst writing the same register location.
+ *
+ * This clears any WaR dependency for writes performed from the same
+ * pipeline as the read, since there is no possibility for a data hazard.
*/
dependency
- dependency_for_write(const fs_inst *inst, dependency dep)
+ dependency_for_write(const struct intel_device_info *devinfo,
+ const fs_inst *inst, dependency dep)
{
- if (!is_unordered(inst))
+ if (!is_unordered(inst) &&
+ is_single_pipe(dep.jp, inferred_exec_pipe(devinfo, inst)))
dep.ordered &= TGL_REGDIST_DST;
return dep;
}
@@ -1124,7 +1129,7 @@ namespace {
if (inst->dst.file != BAD_FILE && !inst->dst.is_null() &&
!inst->dst.is_accumulator()) {
for (unsigned j = 0; j < regs_written(inst); j++) {
- add_dependency(ids, deps[ip], dependency_for_write(inst,
+ add_dependency(ids, deps[ip], dependency_for_write(devinfo, inst,
sb.get(byte_offset(inst->dst, REG_SIZE * j))));
}
}
@@ -1144,7 +1149,7 @@ namespace {
if (is_send(inst) && inst->base_mrf != -1) {
for (unsigned j = 0; j < inst->implied_mrf_writes(); j++)
- add_dependency(ids, deps[ip], dependency_for_write(inst,
+ add_dependency(ids, deps[ip], dependency_for_write(devinfo, inst,
sb.get(brw_uvec_mrf(8, inst->base_mrf + j, 0))));
}
}