diff options
author | Samuel Pitoiset <samuel.pitoiset@gmail.com> | 2020-03-26 11:40:35 +0100 |
---|---|---|
committer | Eric Engestrom <eric@engestrom.ch> | 2020-03-30 12:26:45 +0200 |
commit | dfc0a5cc14d33ea94555d4f4b316485f23cb1bd1 (patch) | |
tree | 80c3da43a9f8f645e22023022c27a22d528f0489 | |
parent | 6e4db7e726c4eb8ba026666af1069ecbe1a07ceb (diff) |
ac/nir: use llvm.amdgcn.rcp in ac_build_fdiv()
Instead of emitting 1.0 / x which includes a slow division that
LLVM doesn't always optimize even if the metadata is correctly set.
No pipeline-db changes with VEGA10/LLVM 9.
pipeline-db (VEGA10/LLVM 10):
Totals from affected shaders:
SGPRS: 6672 -> 6672 (0.00 %)
VGPRS: 6652 -> 6652 (0.00 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Code Size: 561780 -> 561692 (-0.02 %) bytes
Max Waves: 1043 -> 1043 (0.00 %)
pipeline-db (VEGA10/LLVM 11 - 92744f62478):
Totals from affected shaders:
SGPRS: 84608 -> 83768 (-0.99 %)
VGPRS: 106768 -> 106636 (-0.12 %)
Spilled SGPRs: 1625 -> 1713 (5.42 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Code Size: 10850936 -> 10726712 (-1.14 %) bytes
Max Waves: 3152 -> 3180 (0.89 %)
LLVM 11 (master) is more affected than previous versions, but
based on the small impact with LLVM 9/10, I decided to emit it
unconditionally.
Cc: 20.0 <mesa-stable@lists.freedesktop.org>
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4326>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4326>
(cherry picked from commit ba2ec1f369d2c97fc7c54ecd52b0addcfd349a31)
-rw-r--r-- | .pick_status.json | 2 | ||||
-rw-r--r-- | src/amd/llvm/ac_llvm_build.c | 30 | ||||
-rw-r--r-- | src/amd/llvm/ac_llvm_build.h | 2 |
3 files changed, 13 insertions, 21 deletions
diff --git a/.pick_status.json b/.pick_status.json index 3186759be9f..3016971a806 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -607,7 +607,7 @@ "description": "ac/nir: use llvm.amdgcn.rcp in ac_build_fdiv()", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "master_sha": null, "because_sha": null }, diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index 6532c57a234..4b2331a524a 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -65,8 +65,6 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, enum ac_float_mode float_mode, unsigned wave_size, unsigned ballot_mask_bits) { - LLVMValueRef args[1]; - ctx->context = LLVMContextCreate(); ctx->chip_class = chip_class; @@ -127,11 +125,6 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context, "invariant.load", 14); - ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6); - - args[0] = LLVMConstReal(ctx->f32, 2.5); - ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1); - ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14); @@ -707,17 +700,18 @@ ac_build_fdiv(struct ac_llvm_context *ctx, LLVMValueRef num, LLVMValueRef den) { - /* If we do (num / den), LLVM >= 7.0 does: - * return num * v_rcp_f32(den * (fabs(den) > 0x1.0p+96f ? 0x1.0p-32f : 1.0f)); - * - * If we do (num * (1 / den)), LLVM does: - * return num * v_rcp_f32(den); - */ - LLVMValueRef one = LLVMConstReal(LLVMTypeOf(num), 1.0); - LLVMValueRef rcp = LLVMBuildFDiv(ctx->builder, one, den, ""); - /* Use v_rcp_f32 instead of precise division. */ - if (!LLVMIsConstant(rcp)) - LLVMSetMetadata(rcp, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp); + unsigned type_size = ac_get_type_size(LLVMTypeOf(den)); + const char *name; + + if (type_size == 2) + name = "llvm.amdgcn.rcp.f16"; + else if (type_size == 4) + name = "llvm.amdgcn.rcp.f32"; + else + name = "llvm.amdgcn.rcp.f64"; + + LLVMValueRef rcp = ac_build_intrinsic(ctx, name, LLVMTypeOf(den), + &den, 1, AC_FUNC_ATTR_READNONE); return LLVMBuildFMul(ctx->builder, num, rcp, ""); } diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h index 772054efecd..e08ab656f9c 100644 --- a/src/amd/llvm/ac_llvm_build.h +++ b/src/amd/llvm/ac_llvm_build.h @@ -117,8 +117,6 @@ struct ac_llvm_context { unsigned range_md_kind; unsigned invariant_load_md_kind; unsigned uniform_md_kind; - unsigned fpmath_md_kind; - LLVMValueRef fpmath_md_2p5_ulp; LLVMValueRef empty_md; enum chip_class chip_class; |