diff options
author | Ian Romanick <ian.d.romanick@intel.com> | 2022-03-01 16:30:31 -0800 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2022-03-03 00:07:58 +0000 |
commit | 0a6c6dcb00847a0f413ab4fae0d83900deb27167 (patch) | |
tree | 0ee984f78c7a91b0210b002d88111afa6ff3a7b6 /src/gallium/drivers/i915 | |
parent | 374da6fc41e955c261b7888a2809b7025fdf0f97 (diff) |
i915g: Emit better code for SEQ(x, 0) and SNE(x, 0)
total instructions in shared programs: 789000 -> 788481 (-0.07%)
instructions in affected programs: 16179 -> 15660 (-3.21%)
helped: 157
HURT: 0
helped stats (abs) min: 3 max: 12 x̄: 3.31 x̃: 3
helped stats (rel) min: 1.56% max: 14.29% x̄: 4.24% x̃: 2.56%
95% mean confidence interval for instructions value: -3.51 -3.10
95% mean confidence interval for instructions %-change: -4.70% -3.78%
Instructions are helped.
LOST: 0
GAINED: 3
v2: Drop setting src1 to zero. Suggested by Emma.
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15210>
Diffstat (limited to 'src/gallium/drivers/i915')
-rw-r--r-- | src/gallium/drivers/i915/i915_fpc_translate.c | 72 |
1 files changed, 54 insertions, 18 deletions
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index a17d73d8310..0acea54efe8 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -706,22 +706,40 @@ i915_translate_instruction(struct i915_fp_compile *p, 0); break; - case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SEQ: { + const uint32_t zero = swizzle(UREG(REG_TYPE_R, 0), + SRC_ZERO, SRC_ZERO, SRC_ZERO, SRC_ZERO); + /* if we're both >= and <= then we're == */ src0 = src_vector(p, &inst->Src[0], fs); src1 = src_vector(p, &inst->Src[1], fs); tmp = i915_get_utemp(p); - i915_emit_arith(p, A0_SGE, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0); - - i915_emit_arith(p, A0_SGE, get_result_vector(p, &inst->Dst[0]), - get_result_flags(inst), 0, src1, src0, 0); - - i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]), - get_result_flags(inst), 0, - get_result_vector(p, &inst->Dst[0]), tmp, 0); + if (src0 == zero || src1 == zero) { + if (src0 == zero) + src0 = src1; + + /* x == 0 is equivalent to -abs(x) >= 0, but the latter requires only + * two instructions instead of three. + */ + i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0, + negate(src0, 1, 1, 1, 1), 0); + i915_emit_arith(p, A0_SGE, get_result_vector(p, &inst->Dst[0]), + get_result_flags(inst), 0, + negate(tmp, 1, 1, 1, 1), zero, 0); + } else { + i915_emit_arith(p, A0_SGE, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0); + + i915_emit_arith(p, A0_SGE, get_result_vector(p, &inst->Dst[0]), + get_result_flags(inst), 0, src1, src0, 0); + + i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]), + get_result_flags(inst), 0, + get_result_vector(p, &inst->Dst[0]), tmp, 0); + } break; + } case TGSI_OPCODE_SGE: emit_simple_arith(p, inst, A0_SGE, 2, fs); @@ -741,21 +759,39 @@ i915_translate_instruction(struct i915_fp_compile *p, emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs); break; - case TGSI_OPCODE_SNE: + case TGSI_OPCODE_SNE: { + const uint32_t zero = swizzle(UREG(REG_TYPE_R, 0), + SRC_ZERO, SRC_ZERO, SRC_ZERO, SRC_ZERO); + /* if we're < or > then we're != */ src0 = src_vector(p, &inst->Src[0], fs); src1 = src_vector(p, &inst->Src[1], fs); tmp = i915_get_utemp(p); - i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0); - - i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]), - get_result_flags(inst), 0, src1, src0, 0); - - i915_emit_arith(p, A0_ADD, get_result_vector(p, &inst->Dst[0]), - get_result_flags(inst), 0, - get_result_vector(p, &inst->Dst[0]), tmp, 0); + if (src0 == zero || src1 == zero) { + if (src0 == zero) + src0 = src1; + + /* x != 0 is equivalent to -abs(x) < 0, but the latter requires only + * two instructions instead of three. + */ + i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0, + negate(src0, 1, 1, 1, 1), 0); + i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]), + get_result_flags(inst), 0, + negate(tmp, 1, 1, 1, 1), zero, 0); + } else { + i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0); + + i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]), + get_result_flags(inst), 0, src1, src0, 0); + + i915_emit_arith(p, A0_ADD, get_result_vector(p, &inst->Dst[0]), + get_result_flags(inst), 0, + get_result_vector(p, &inst->Dst[0]), tmp, 0); + } break; + } case TGSI_OPCODE_SSG: /* compute (src>0) - (src<0) */ |