summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Romanick <ian.d.romanick@intel.com>2023-06-22 19:03:25 -0700
committerMarge Bot <emma+marge@anholt.net>2023-09-25 21:13:53 +0000
commit3fd835f6fd72b2388893d562558eff92c335dbbd (patch)
tree3d1bccb600db31f16bf9cdaf98cedd6ec74edeea
parent41fe60cf3e4b09c80a43e6cf2ae5ff7430fb94f3 (diff)
intel/fs: Constant fold OR and AND
The path taken in fs_visitor::swizzle_nir_scratch_addr for DG2 generates some AND and OR instructions before the SHL. This commit folds those so the whold calculation becomes a constant (like on older platforms). v2: Fix return type of src_as_uint. Noticed by Marcin. shader-db results: DG2 total instructions in shared programs: 23190475 -> 23179540 (-0.05%) instructions in affected programs: 36026 -> 25091 (-30.35%) helped: 7 / HURT: 0 total cycles in shared programs: 841196807 -> 841142563 (<.01%) cycles in affected programs: 1660670 -> 1606426 (-3.27%) helped: 7 / HURT: 0 No shader-db changes on any older Intel platforms. fossil-db results: DG2 Totals: Instrs: 197780372 -> 197773966 (-0.00%) Cycles: 14066410782 -> 14066399378 (-0.00%); split: -0.00%, +0.00% Subgroup size: 8438104 -> 8438112 (+0.00%) Send messages: 8049445 -> 8049446 (+0.00%) Scratch Memory Size: 14263296 -> 14264320 (+0.01%) Totals from 9 (0.00% of 668055) affected shaders: Instrs: 24547 -> 18141 (-26.10%) Cycles: 1984791 -> 1973387 (-0.57%); split: -0.98%, +0.40% Subgroup size: 88 -> 96 (+9.09%) Send messages: 867 -> 868 (+0.12%) Scratch Memory Size: 69632 -> 70656 (+1.47%) No fossil-db changes on any older Intel platforms. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (cherry picked from commit cb0de0a1d3420ddf9da56b24b5dd09205b8574ea) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25377>
-rw-r--r--src/intel/compiler/brw_fs.cpp84
-rw-r--r--src/intel/compiler/brw_fs_copy_propagation.cpp15
2 files changed, 97 insertions, 2 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 346482d49bc..591cc6d77ba 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -2602,6 +2602,62 @@ fs_visitor::lower_constant_loads()
invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
}
+static uint64_t
+src_as_uint(const fs_reg &src)
+{
+ assert(src.file == IMM);
+
+ switch (src.type) {
+ case BRW_REGISTER_TYPE_W:
+ return (uint64_t)(int16_t)(src.ud & 0xffff);
+
+ case BRW_REGISTER_TYPE_UW:
+ return (uint64_t)(uint16_t)(src.ud & 0xffff);
+
+ case BRW_REGISTER_TYPE_D:
+ return (uint64_t)src.d;
+
+ case BRW_REGISTER_TYPE_UD:
+ return (uint64_t)src.ud;
+
+ case BRW_REGISTER_TYPE_Q:
+ return src.d64;
+
+ case BRW_REGISTER_TYPE_UQ:
+ return src.u64;
+
+ default:
+ unreachable("Invalid integer type.");
+ }
+}
+
+static fs_reg
+brw_imm_for_type(uint64_t value, enum brw_reg_type type)
+{
+ switch (type) {
+ case BRW_REGISTER_TYPE_W:
+ return brw_imm_w(value);
+
+ case BRW_REGISTER_TYPE_UW:
+ return brw_imm_uw(value);
+
+ case BRW_REGISTER_TYPE_D:
+ return brw_imm_d(value);
+
+ case BRW_REGISTER_TYPE_UD:
+ return brw_imm_ud(value);
+
+ case BRW_REGISTER_TYPE_Q:
+ return brw_imm_d(value);
+
+ case BRW_REGISTER_TYPE_UQ:
+ return brw_imm_uq(value);
+
+ default:
+ unreachable("Invalid integer type.");
+ }
+}
+
bool
fs_visitor::opt_algebraic()
{
@@ -2735,7 +2791,35 @@ fs_visitor::opt_algebraic()
break;
}
break;
+
+ case BRW_OPCODE_AND:
+ if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
+ const uint64_t src0 = src_as_uint(inst->src[0]);
+ const uint64_t src1 = src_as_uint(inst->src[1]);
+
+ inst->opcode = BRW_OPCODE_MOV;
+ inst->sources = 1;
+ inst->src[0] = brw_imm_for_type(src0 & src1, inst->dst.type);
+ inst->src[1] = reg_undef;
+ progress = true;
+ break;
+ }
+
+ break;
+
case BRW_OPCODE_OR:
+ if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
+ const uint64_t src0 = src_as_uint(inst->src[0]);
+ const uint64_t src1 = src_as_uint(inst->src[1]);
+
+ inst->opcode = BRW_OPCODE_MOV;
+ inst->sources = 1;
+ inst->src[0] = brw_imm_for_type(src0 | src1, inst->dst.type);
+ inst->src[1] = reg_undef;
+ progress = true;
+ break;
+ }
+
if (inst->src[0].equals(inst->src[1]) ||
inst->src[1].is_zero()) {
/* On Gfx8+, the OR instruction can have a source modifier that
diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp
index d16077c3c13..ba0ebc58909 100644
--- a/src/intel/compiler/brw_fs_copy_propagation.cpp
+++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
@@ -935,8 +935,6 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
case BRW_OPCODE_MUL:
case SHADER_OPCODE_MULH:
case BRW_OPCODE_ADD:
- case BRW_OPCODE_OR:
- case BRW_OPCODE_AND:
case BRW_OPCODE_XOR:
case BRW_OPCODE_ADDC:
if (i == 1) {
@@ -1072,6 +1070,8 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
}
break;
+ case BRW_OPCODE_AND:
+ case BRW_OPCODE_OR:
case SHADER_OPCODE_TEX_LOGICAL:
case SHADER_OPCODE_TXD_LOGICAL:
case SHADER_OPCODE_TXF_LOGICAL:
@@ -1120,6 +1120,17 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
}
}
+ /* If only one of the sources of a 2-source, commutative instruction (e.g.,
+ * AND) is immediate, it must be src1. If both are immediate, opt_algebraic
+ * should fold it away.
+ */
+ if (progress && inst->sources == 2 && inst->is_commutative() &&
+ inst->src[0].file == IMM && inst->src[1].file != IMM) {
+ const auto src1 = inst->src[1];
+ inst->src[1] = inst->src[0];
+ inst->src[0] = src1;
+ }
+
return progress;
}