summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/i915
diff options
context:
space:
mode:
authorIan Romanick <ian.d.romanick@intel.com>2022-03-01 16:30:31 -0800
committerMarge Bot <emma+marge@anholt.net>2022-03-03 00:07:58 +0000
commit0a6c6dcb00847a0f413ab4fae0d83900deb27167 (patch)
tree0ee984f78c7a91b0210b002d88111afa6ff3a7b6 /src/gallium/drivers/i915
parent374da6fc41e955c261b7888a2809b7025fdf0f97 (diff)
i915g: Emit better code for SEQ(x, 0) and SNE(x, 0)
total instructions in shared programs: 789000 -> 788481 (-0.07%) instructions in affected programs: 16179 -> 15660 (-3.21%) helped: 157 HURT: 0 helped stats (abs) min: 3 max: 12 x̄: 3.31 x̃: 3 helped stats (rel) min: 1.56% max: 14.29% x̄: 4.24% x̃: 2.56% 95% mean confidence interval for instructions value: -3.51 -3.10 95% mean confidence interval for instructions %-change: -4.70% -3.78% Instructions are helped. LOST: 0 GAINED: 3 v2: Drop setting src1 to zero. Suggested by Emma. Reviewed-by: Emma Anholt <emma@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15210>
Diffstat (limited to 'src/gallium/drivers/i915')
-rw-r--r--src/gallium/drivers/i915/i915_fpc_translate.c72
1 files changed, 54 insertions, 18 deletions
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index a17d73d8310..0acea54efe8 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -706,22 +706,40 @@ i915_translate_instruction(struct i915_fp_compile *p,
0);
break;
- case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SEQ: {
+ const uint32_t zero = swizzle(UREG(REG_TYPE_R, 0),
+ SRC_ZERO, SRC_ZERO, SRC_ZERO, SRC_ZERO);
+
/* if we're both >= and <= then we're == */
src0 = src_vector(p, &inst->Src[0], fs);
src1 = src_vector(p, &inst->Src[1], fs);
tmp = i915_get_utemp(p);
- i915_emit_arith(p, A0_SGE, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0);
-
- i915_emit_arith(p, A0_SGE, get_result_vector(p, &inst->Dst[0]),
- get_result_flags(inst), 0, src1, src0, 0);
-
- i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]),
- get_result_flags(inst), 0,
- get_result_vector(p, &inst->Dst[0]), tmp, 0);
+ if (src0 == zero || src1 == zero) {
+ if (src0 == zero)
+ src0 = src1;
+
+ /* x == 0 is equivalent to -abs(x) >= 0, but the latter requires only
+ * two instructions instead of three.
+ */
+ i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
+ negate(src0, 1, 1, 1, 1), 0);
+ i915_emit_arith(p, A0_SGE, get_result_vector(p, &inst->Dst[0]),
+ get_result_flags(inst), 0,
+ negate(tmp, 1, 1, 1, 1), zero, 0);
+ } else {
+ i915_emit_arith(p, A0_SGE, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0);
+
+ i915_emit_arith(p, A0_SGE, get_result_vector(p, &inst->Dst[0]),
+ get_result_flags(inst), 0, src1, src0, 0);
+
+ i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]),
+ get_result_flags(inst), 0,
+ get_result_vector(p, &inst->Dst[0]), tmp, 0);
+ }
break;
+ }
case TGSI_OPCODE_SGE:
emit_simple_arith(p, inst, A0_SGE, 2, fs);
@@ -741,21 +759,39 @@ i915_translate_instruction(struct i915_fp_compile *p,
emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs);
break;
- case TGSI_OPCODE_SNE:
+ case TGSI_OPCODE_SNE: {
+ const uint32_t zero = swizzle(UREG(REG_TYPE_R, 0),
+ SRC_ZERO, SRC_ZERO, SRC_ZERO, SRC_ZERO);
+
/* if we're < or > then we're != */
src0 = src_vector(p, &inst->Src[0], fs);
src1 = src_vector(p, &inst->Src[1], fs);
tmp = i915_get_utemp(p);
- i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0);
-
- i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
- get_result_flags(inst), 0, src1, src0, 0);
-
- i915_emit_arith(p, A0_ADD, get_result_vector(p, &inst->Dst[0]),
- get_result_flags(inst), 0,
- get_result_vector(p, &inst->Dst[0]), tmp, 0);
+ if (src0 == zero || src1 == zero) {
+ if (src0 == zero)
+ src0 = src1;
+
+ /* x != 0 is equivalent to -abs(x) < 0, but the latter requires only
+ * two instructions instead of three.
+ */
+ i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
+ negate(src0, 1, 1, 1, 1), 0);
+ i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
+ get_result_flags(inst), 0,
+ negate(tmp, 1, 1, 1, 1), zero, 0);
+ } else {
+ i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0);
+
+ i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
+ get_result_flags(inst), 0, src1, src0, 0);
+
+ i915_emit_arith(p, A0_ADD, get_result_vector(p, &inst->Dst[0]),
+ get_result_flags(inst), 0,
+ get_result_vector(p, &inst->Dst[0]), tmp, 0);
+ }
break;
+ }
case TGSI_OPCODE_SSG:
/* compute (src>0) - (src<0) */