summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/gallivm
diff options
context:
space:
mode:
authorZack Rusin <zackr@vmware.com>2013-12-06 01:28:25 -0500
committerZack Rusin <zackr@vmware.com>2013-12-10 16:39:48 -0500
commit155139059ba588da1161eaa692515cacdead9f4e (patch)
treef64daec33014ca5ea6ccf8d93a58ae064dda28eb /src/gallium/auxiliary/gallivm
parent1e71493afa263791b2ff10afd2fbc36a7effa73f (diff)
llvmpipe: fix blending with half-float formats
The fact that we flush denorms to zero breaks our half-float conversion and blending. This patches enables denorms for blending. It's a little tricky due to the llvm bug that makes it incorrectly reorder the mxcsr intrinsics: http://llvm.org/bugs/show_bug.cgi?id=6393 Signed-off-by: Zack Rusin <zackr@vmware.com> Reviewed-by: José Fonseca <jfonseca@vmware.com> Reviewed-by: Roland Scheidegger <sroland@vmware.com> Signed-off-by: Zack Rusin <zackr@vmware.com>
Diffstat (limited to 'src/gallium/auxiliary/gallivm')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c71
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.h11
2 files changed, 82 insertions, 0 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 70929e752b0..440dd0b6ac2 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -64,6 +64,17 @@
#include "lp_bld_arit.h"
#include "lp_bld_flow.h"
+#if defined(PIPE_ARCH_SSE)
+#include <xmmintrin.h>
+#endif
+
+#ifndef _MM_DENORMALS_ZERO_MASK
+#define _MM_DENORMALS_ZERO_MASK 0x0040
+#endif
+
+#ifndef _MM_FLUSH_ZERO_MASK
+#define _MM_FLUSH_ZERO_MASK 0x8000
+#endif
#define EXP_POLY_DEGREE 5
@@ -3489,3 +3500,63 @@ lp_build_is_inf_or_nan(struct gallivm_state *gallivm,
return ret;
}
+
+LLVMValueRef
+lp_build_fpstate_get(struct gallivm_state *gallivm)
+{
+ if (util_cpu_caps.has_sse) {
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef mxcsr_ptr = lp_build_alloca(
+ gallivm,
+ LLVMInt32TypeInContext(gallivm->context),
+ "mxcsr_ptr");
+ lp_build_intrinsic(builder,
+ "llvm.x86.sse.stmxcsr",
+ LLVMVoidTypeInContext(gallivm->context),
+ &mxcsr_ptr, 1);
+ return mxcsr_ptr;
+ }
+ return 0;
+}
+
+void
+lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm,
+ boolean zero)
+{
+ if (util_cpu_caps.has_sse) {
+ /* turn on DAZ (64) | FTZ (32768) = 32832 if available */
+ int daz_ftz = _MM_FLUSH_ZERO_MASK;
+
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef mxcsr_ptr = lp_build_fpstate_get(gallivm);
+ LLVMValueRef mxcsr =
+ LLVMBuildLoad(builder, mxcsr_ptr, "mxcsr");
+
+ if (util_cpu_caps.has_daz) {
+ /* Enable denormals are zero mode */
+ daz_ftz |= _MM_DENORMALS_ZERO_MASK;
+ }
+ if (zero) {
+ mxcsr = LLVMBuildOr(builder, mxcsr,
+ LLVMConstInt(LLVMTypeOf(mxcsr), daz_ftz, 0), "");
+ } else {
+ mxcsr = LLVMBuildAnd(builder, mxcsr,
+ LLVMConstInt(LLVMTypeOf(mxcsr), ~daz_ftz, 0), "");
+ }
+
+ LLVMBuildStore(builder, mxcsr, mxcsr_ptr);
+ lp_build_fpstate_set(gallivm, mxcsr_ptr);
+ }
+}
+
+void
+lp_build_fpstate_set(struct gallivm_state *gallivm,
+ LLVMValueRef mxcsr_ptr)
+{
+ if (util_cpu_caps.has_sse) {
+ lp_build_intrinsic(gallivm->builder,
+ "llvm.x86.sse.ldmxcsr",
+ LLVMVoidTypeInContext(gallivm->context),
+ &mxcsr_ptr, 1);
+ }
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
index 75bf89e951e..9d2909340b2 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -358,4 +358,15 @@ lp_build_is_inf_or_nan(struct gallivm_state *gallivm,
const struct lp_type type,
LLVMValueRef x);
+
+LLVMValueRef
+lp_build_fpstate_get(struct gallivm_state *gallivm);
+
+void
+lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm,
+ boolean zero);
+void
+lp_build_fpstate_set(struct gallivm_state *gallivm,
+ LLVMValueRef mxcsr);
+
#endif /* !LP_BLD_ARIT_H */