summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrancisco Jerez <currojerez@riseup.net>2021-05-25 15:02:53 -0700
committerMarge Bot <eric+marge@anholt.net>2021-06-23 07:34:22 +0000
commitd46bb14d140079b78d9d1478d13ac5e0864fe403 (patch)
tree687c8a62590e15a50e328a2962f581daf817dbf1
parent385da1fe36ca231fc0ad3634b1aa3be4ef679959 (diff)
intel/fs: Implement Wa_22012725308 for cross-pipe accumulator data hazard.
The hardware fails to provide the expected data coherency guarantees for accumulator registers when accessed from multiple FPU pipelines. Fix this by tracking implicit accumulator accesses just like we do for regular GRF registers, but instead of adding synchronization annotations for any dependency we only do it for dependencies with a pipeline mismatch, since the hardware should be able to guarantee proper synchronization for matching pipelines. Note that this workaround handles RaW and WaW dependencies in addition to the WaR dependencies described in the hardware bug report even though cross-pipeline RaW accumulator dependencies should be extremely rare, since chances are the hardware will also hang if we ever hit such a condition. This only affects XeHP+, since all FPU instructions are executed as a single in-order pipeline on earlier Xe platforms. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11433>
-rw-r--r--src/intel/compiler/brw_fs_scoreboard.cpp51
1 files changed, 50 insertions, 1 deletions
diff --git a/src/intel/compiler/brw_fs_scoreboard.cpp b/src/intel/compiler/brw_fs_scoreboard.cpp
index 5bfec9126e4..8702e12e516 100644
--- a/src/intel/compiler/brw_fs_scoreboard.cpp
+++ b/src/intel/compiler/brw_fs_scoreboard.cpp
@@ -210,6 +210,21 @@ namespace {
};
/**
+ * Return true if the specified ordered address is trivially satisfied for
+ * all pipelines except potentially for the specified pipeline \p p.
+ */
+ bool
+ is_single_pipe(const ordered_address &jp, tgl_pipe p)
+ {
+ for (unsigned q = 0; q < IDX(TGL_PIPE_ALL); q++) {
+ if ((p == TGL_PIPE_NONE || IDX(p) != q) && jp.jp[q] > INT_MIN)
+ return false;
+ }
+
+ return true;
+ }
+
+ /**
* Return the number of instructions in the program.
*/
unsigned
@@ -962,6 +977,9 @@ namespace {
sb.set(byte_offset(inst->src[i], REG_SIZE * j), rd_dep);
}
+ if (inst->reads_accumulator_implicitly())
+ sb.set(brw_acc_reg(8), dependency(TGL_REGDIST_SRC, jp, exec_all));
+
if (is_send(inst) && inst->base_mrf != -1) {
const dependency rd_dep = dependency(TGL_SBID_SRC, ip, exec_all);
@@ -976,6 +994,9 @@ namespace {
dependency(TGL_REGDIST_DST, jp, exec_all) :
dependency();
+ if (inst->writes_accumulator_implicitly(devinfo))
+ sb.set(brw_acc_reg(8), wr_dep);
+
if (is_valid(wr_dep) && inst->dst.file != BAD_FILE &&
!inst->dst.is_null()) {
for (unsigned j = 0; j < regs_written(inst); j++)
@@ -1059,6 +1080,7 @@ namespace {
gather_inst_dependencies(const fs_visitor *shader,
const ordered_address *jps)
{
+ const struct intel_device_info *devinfo = shader->devinfo;
equivalence_relation eq(num_instructions(shader));
scoreboard *sbs = propagate_block_scoreboards(shader, jps, eq);
const unsigned *ids = eq.flatten();
@@ -1067,6 +1089,7 @@ namespace {
foreach_block_and_inst(block, fs_inst, inst, shader->cfg) {
const bool exec_all = inst->force_writemask_all;
+ const tgl_pipe p = inferred_exec_pipe(devinfo, inst);
scoreboard &sb = sbs[block->num];
for (unsigned i = 0; i < inst->sources; i++) {
@@ -1075,6 +1098,18 @@ namespace {
sb.get(byte_offset(inst->src[i], REG_SIZE * j))));
}
+ if (inst->reads_accumulator_implicitly()) {
+ /* Wa_22012725308:
+ *
+ * "When the accumulator registers are used as source and/or
+ * destination, hardware does not ensure prevention of write
+ * after read hazard across execution pipes."
+ */
+ const dependency dep = sb.get(brw_acc_reg(8));
+ if (dep.ordered && !is_single_pipe(dep.jp, p))
+ add_dependency(ids, deps[ip], dep);
+ }
+
if (is_send(inst) && inst->base_mrf != -1) {
for (unsigned j = 0; j < inst->mlen; j++)
add_dependency(ids, deps[ip], dependency_for_read(
@@ -1086,13 +1121,27 @@ namespace {
dependency(TGL_SBID_SET, ip, exec_all));
if (!inst->no_dd_check) {
- if (inst->dst.file != BAD_FILE && !inst->dst.is_null()) {
+ if (inst->dst.file != BAD_FILE && !inst->dst.is_null() &&
+ !inst->dst.is_accumulator()) {
for (unsigned j = 0; j < regs_written(inst); j++) {
add_dependency(ids, deps[ip], dependency_for_write(inst,
sb.get(byte_offset(inst->dst, REG_SIZE * j))));
}
}
+ if (inst->writes_accumulator_implicitly(devinfo) ||
+ inst->dst.is_accumulator()) {
+ /* Wa_22012725308:
+ *
+ * "When the accumulator registers are used as source and/or
+ * destination, hardware does not ensure prevention of write
+ * after read hazard across execution pipes."
+ */
+ const dependency dep = sb.get(brw_acc_reg(8));
+ if (dep.ordered && !is_single_pipe(dep.jp, p))
+ add_dependency(ids, deps[ip], dep);
+ }
+
if (is_send(inst) && inst->base_mrf != -1) {
for (unsigned j = 0; j < inst->implied_mrf_writes(); j++)
add_dependency(ids, deps[ip], dependency_for_write(inst,