summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrancisco Jerez <currojerez@riseup.net>2019-01-10 20:23:53 -0800
committerMarge Bot <eric+marge@anholt.net>2021-04-16 08:27:35 +0000
commita0e0dfe1743c703e718e509e7c2096d1b6e3dc95 (patch)
tree36f6eda6f3609e2387391aa3bb74a577f46969ef
parent635ed58e527f1a1c0b11eca0552e892f56f8ccf6 (diff)
intel/fs: Introduce lowering pass to implement derivatives in terms of quad swizzles.
Unfortunately the funky Align1 regions used by the code generator in order to implement derivatives efficiently aren't available to the floating-point pipeline on XeHP. We need to lower them into a number of pipelined integer shuffle instructions followed by the floating-point difference computation. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10000>
-rw-r--r--src/intel/compiler/brw_fs.cpp64
-rw-r--r--src/intel/compiler/brw_fs.h1
2 files changed, 64 insertions, 1 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 00499e92759..9100b8d0a5e 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -7406,6 +7406,65 @@ fs_visitor::lower_barycentrics()
return progress;
}
+/**
+ * Lower a derivative instruction as the floating-point difference of two
+ * swizzles of the source, specified as \p swz0 and \p swz1.
+ */
+static bool
+lower_derivative(fs_visitor *v, bblock_t *block, fs_inst *inst,
+ unsigned swz0, unsigned swz1)
+{
+ const fs_builder ibld(v, block, inst);
+ const fs_reg tmp0 = ibld.vgrf(inst->src[0].type);
+ const fs_reg tmp1 = ibld.vgrf(inst->src[0].type);
+
+ ibld.emit(SHADER_OPCODE_QUAD_SWIZZLE, tmp0, inst->src[0], brw_imm_ud(swz0));
+ ibld.emit(SHADER_OPCODE_QUAD_SWIZZLE, tmp1, inst->src[0], brw_imm_ud(swz1));
+
+ inst->resize_sources(2);
+ inst->src[0] = negate(tmp0);
+ inst->src[1] = tmp1;
+ inst->opcode = BRW_OPCODE_ADD;
+
+ return true;
+}
+
+/**
+ * Lower derivative instructions on platforms where codegen cannot implement
+ * them efficiently (i.e. XeHP).
+ */
+bool
+fs_visitor::lower_derivatives()
+{
+ bool progress = false;
+
+ if (devinfo->verx10 < 125)
+ return false;
+
+ foreach_block_and_inst(block, fs_inst, inst, cfg) {
+ if (inst->opcode == FS_OPCODE_DDX_COARSE)
+ progress |= lower_derivative(this, block, inst,
+ BRW_SWIZZLE_XXXX, BRW_SWIZZLE_YYYY);
+
+ else if (inst->opcode == FS_OPCODE_DDX_FINE)
+ progress |= lower_derivative(this, block, inst,
+ BRW_SWIZZLE_XXZZ, BRW_SWIZZLE_YYWW);
+
+ else if (inst->opcode == FS_OPCODE_DDY_COARSE)
+ progress |= lower_derivative(this, block, inst,
+ BRW_SWIZZLE_XXXX, BRW_SWIZZLE_ZZZZ);
+
+ else if (inst->opcode == FS_OPCODE_DDY_FINE)
+ progress |= lower_derivative(this, block, inst,
+ BRW_SWIZZLE_XYXY, BRW_SWIZZLE_ZWZW);
+ }
+
+ if (progress)
+ invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
+
+ return progress;
+}
+
void
fs_visitor::dump_instructions() const
{
@@ -7978,7 +8037,10 @@ fs_visitor::optimize()
OPT(dead_code_eliminate);
}
- if (OPT(lower_regioning)) {
+ progress = false;
+ OPT(lower_derivatives);
+ OPT(lower_regioning);
+ if (progress) {
OPT(opt_copy_propagation);
OPT(dead_code_eliminate);
OPT(lower_simd_width);
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 1f286a0e593..413b225cfce 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -196,6 +196,7 @@ public:
bool lower_minmax();
bool lower_simd_width();
bool lower_barycentrics();
+ bool lower_derivatives();
bool lower_scoreboard();
bool lower_sub_sat();
bool opt_combine_constants();