diff options
author | Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> | 2020-08-11 18:52:24 +0200 |
---|---|---|
committer | Eric Engestrom <eric@engestrom.ch> | 2020-09-02 21:50:49 +0200 |
commit | 0c96f0c60044634a6b4990860f0d39716492f9f5 (patch) | |
tree | 428c0fa6fe563801ad8c773923457c64b286d6b4 | |
parent | 7f57ac625e6271118987bd8492834ac2e2e1aea2 (diff) |
ac/llvm: add option to clamp division by zero
Replace div(x) by min(div(x), FLT_MAX)) to avoid getting a NaN result
when x is 0.
A cheaper alternative would be to use legacy mult instructions but they're
not exposed by LLVM.
Cc: mesa-stable
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6259>
(cherry picked from commit 32f46a55c8229b2a8d67d895be18651a81f8e6ff)
-rw-r--r-- | .pick_status.json | 2 | ||||
-rw-r--r-- | src/amd/llvm/ac_nir_to_llvm.c | 8 | ||||
-rw-r--r-- | src/amd/llvm/ac_shader_abi.h | 3 |
3 files changed, 11 insertions, 2 deletions
diff --git a/.pick_status.json b/.pick_status.json index 1660b1ab836..b0791baf0c1 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -31,7 +31,7 @@ "description": "ac/llvm: add option to clamp division by zero", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "master_sha": null, "because_sha": null }, diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 0fcd5934af0..c6f612f58df 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -701,6 +701,9 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rcp", ac_to_float_type(&ctx->ac, def_type), src[0]); } + if (ctx->abi->clamp_div_by_zero) + result = ac_build_fmin(&ctx->ac, result, + LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX)); break; case nir_op_iand: result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], ""); @@ -847,6 +850,9 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) case nir_op_frsq: result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rsq", ac_to_float_type(&ctx->ac, def_type), src[0]); + if (ctx->abi->clamp_div_by_zero) + result = ac_build_fmin(&ctx->ac, result, + LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX)); break; case nir_op_frexp_exp: src[0] = ac_to_float(&ctx->ac, src[0]); @@ -888,7 +894,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) case nir_op_ffma: /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */ result = emit_intrin_3f_param(&ctx->ac, ctx->ac.chip_class >= GFX10 ? "llvm.fma" : "llvm.fmuladd", - ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]); + ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]); break; case nir_op_ldexp: src[0] = ac_to_float(&ctx->ac, src[0]); diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h index 18f85a7911c..337414eff38 100644 --- a/src/amd/llvm/ac_shader_abi.h +++ b/src/amd/llvm/ac_shader_abi.h @@ -186,6 +186,9 @@ struct ac_shader_abi { /* Whether bounds checks are required */ bool robust_buffer_access; + + /* Clamp div by 0 (so it won't produce NaN) */ + bool clamp_div_by_zero; }; #endif /* AC_SHADER_ABI_H */ |