summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Turner <mattst88@gmail.com>2016-05-02 23:32:13 -0700
committerMatt Turner <mattst88@gmail.com>2016-05-05 10:18:28 -0700
commitf01d92f4734a7ca62926dceda1d004c0cb10548c (patch)
treef5c3ac8ddaccf61de72f36a227ad60a22bb77c8c
parent9d86a5eea79ac30bb90af363c66a5ba8529b37d8 (diff)
i965/fs: Don't follow pow with an instruction with two dest regs.
Beginning with commit 7b208a73, Unigine Valley began hanging the GPU on Gen >= 8 platforms. Evidently that commit allowed the scheduler to make different choices that somehow finally ran afoul of a hardware bug in which POW and FDIV instructions may not be followed by an instruction with two destination registers (including compressed instructions). I presume the conditions are more complex than that, but the internal hardware bug report (BDWGFX bug_de 1696294) does not contain much more information. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94924 Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com> [v1] Tested-by: Mark Janes <mark.a.janes@intel.com> [v1] Reviewed-by: Francisco Jerez <currojerez@riseup.net>
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp18
1 files changed, 18 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 8654ca4d0af..ff8d37e2c0c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1726,6 +1726,24 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
unsigned int last_insn_offset = p->next_insn_offset;
bool multiple_instructions_emitted = false;
+ /* From the Broadwell PRM, Volume 7, "3D-Media-GPGPU", in the
+ * "Register Region Restrictions" section: for BDW, SKL:
+ *
+ * "A POW/FDIV operation must not be followed by an instruction
+ * that requires two destination registers."
+ *
+ * The documentation is often lacking annotations for Atom parts,
+ * and empirically this affects CHV as well.
+ */
+ if (devinfo->gen >= 8 &&
+ p->nr_insn > 1 &&
+ brw_inst_opcode(devinfo, brw_last_inst) == BRW_OPCODE_MATH &&
+ brw_inst_math_function(devinfo, brw_last_inst) == BRW_MATH_FUNCTION_POW &&
+ inst->dst.component_size(inst->exec_size) > REG_SIZE) {
+ brw_NOP(p);
+ last_insn_offset = p->next_insn_offset;
+ }
+
if (unlikely(debug_flag))
annotate(p->devinfo, &annotation, cfg, inst, p->next_insn_offset);