summaryrefslogtreecommitdiff
path: root/sc
diff options
context:
space:
mode:
authorLuboš Luňák <l.lunak@collabora.com>2021-10-26 23:40:47 +0200
committerLuboš Luňák <l.lunak@collabora.com>2021-10-27 15:02:11 +0200
commitef42ce579f0e4e4c436f70615f3adeb9f0f68217 (patch)
treeb688259454d3b571831bc3dd706ec3c974dd1f45 /sc
parent56b0d05991391d7a885e6928138d5512cbbdfb47 (diff)
fix AVX512 detection
The value wasn't in config_host.mk.in, so it's never been used. And also fix Calc Kahan CPU-specific code yet again :( . Change-Id: Iacfd500e5a662b2b4b96a009d129a012d278a3ad Reviewed-on: https://gerrit.libreoffice.org/c/core/+/124248 Tested-by: Jenkins Reviewed-by: Luboš Luňák <l.lunak@collabora.com>
Diffstat (limited to 'sc')
-rw-r--r--sc/inc/arraysumfunctor.hxx5
-rw-r--r--sc/inc/arraysumfunctorinternal.hxx6
-rw-r--r--sc/source/core/tool/arraysumAVX.cxx21
-rw-r--r--sc/source/core/tool/arraysumAVX512.cxx41
-rw-r--r--sc/source/core/tool/arraysumSSE2.cxx21
5 files changed, 37 insertions, 57 deletions
diff --git a/sc/inc/arraysumfunctor.hxx b/sc/inc/arraysumfunctor.hxx
index d251b4a6f9fb..b727f5893a8c 100644
--- a/sc/inc/arraysumfunctor.hxx
+++ b/sc/inc/arraysumfunctor.hxx
@@ -19,8 +19,9 @@
namespace sc::op
{
/* Checkout available optimization options */
-const bool hasAVX = cpuid::hasAVX();
-const bool hasSSE2 = cpuid::hasSSE2();
+const bool hasAVX512F = hasAVX512FCode() && cpuid::hasAVX512F();
+const bool hasAVX = hasAVXCode() && cpuid::hasAVX();
+const bool hasSSE2 = hasSSE2Code() && cpuid::hasSSE2();
/**
* If no boosts available, Unrolled KahanSum.
diff --git a/sc/inc/arraysumfunctorinternal.hxx b/sc/inc/arraysumfunctorinternal.hxx
index a06e3fc17439..e939dbd3037d 100644
--- a/sc/inc/arraysumfunctorinternal.hxx
+++ b/sc/inc/arraysumfunctorinternal.hxx
@@ -13,8 +13,6 @@
namespace sc::op
{
-SC_DLLPUBLIC extern const bool hasAVX512F;
-
// Plain old data structure, to be used by code compiled with CPU intrinsics without generating any
// code for it (so that code requiring intrinsics doesn't get accidentally selected as the one copy
// when merging duplicates).
@@ -29,6 +27,10 @@ SC_DLLPUBLIC KahanSumSimple executeAVX512F(size_t& i, size_t nSize, const double
SC_DLLPUBLIC KahanSumSimple executeAVX(size_t& i, size_t nSize, const double* pCurrent);
SC_DLLPUBLIC KahanSumSimple executeSSE2(size_t& i, size_t nSize, const double* pCurrent);
+SC_DLLPUBLIC bool hasAVX512FCode();
+SC_DLLPUBLIC bool hasAVXCode();
+SC_DLLPUBLIC bool hasSSE2Code();
+
} // namespace
/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */
diff --git a/sc/source/core/tool/arraysumAVX.cxx b/sc/source/core/tool/arraysumAVX.cxx
index c55d71f22983..e256248047d0 100644
--- a/sc/source/core/tool/arraysumAVX.cxx
+++ b/sc/source/core/tool/arraysumAVX.cxx
@@ -20,7 +20,9 @@
namespace sc::op
{
-#ifdef LO_AVX_AVAILABLE // Old processors
+#ifdef LO_AVX_AVAILABLE
+
+bool hasAVXCode() { return true; }
using namespace AVX;
@@ -48,13 +50,10 @@ static inline void sumAVX(__m256d& sum, __m256d& err, const __m256d& value)
sum = t;
}
-#endif
-
/** Execute Kahan sum with AVX.
*/
KahanSumSimple executeAVX(size_t& i, size_t nSize, const double* pCurrent)
{
-#ifdef LO_AVX_AVAILABLE
// Make sure we don't fall out of bounds.
// This works by sums of 8 terms.
// So the 8'th term is i+7
@@ -107,14 +106,16 @@ KahanSumSimple executeAVX(size_t& i, size_t nSize, const double* pCurrent)
return { sums[0], errs[0] };
}
return { 0.0, 0.0 };
-#else
- (void)i;
- (void)nSize;
- (void)pCurrent;
- abort();
-#endif
}
+#else // LO_AVX_AVAILABLE
+
+bool hasAVXCode() { return false; }
+
+KahanSumSimple executeAVX(size_t&, size_t, const double*) { abort(); }
+
+#endif
+
} // end namespace sc::op
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/tool/arraysumAVX512.cxx b/sc/source/core/tool/arraysumAVX512.cxx
index 987e5a3e6ff6..6a3235a58e2e 100644
--- a/sc/source/core/tool/arraysumAVX512.cxx
+++ b/sc/source/core/tool/arraysumAVX512.cxx
@@ -18,25 +18,11 @@
#include <stdlib.h>
-/* TODO Remove this once GCC updated and AVX512 can work. */
-#ifdef __GNUC__
-#if __GNUC__ < 9
-#ifdef LO_AVX512F_AVAILABLE
-#define HAS_LO_AVX512F_AVAILABLE
-#undef LO_AVX512F_AVAILABLE
-#endif
-#endif
-#endif
-
namespace sc::op
{
#ifdef LO_AVX512F_AVAILABLE
-const bool hasAVX512F = cpuid::hasAVX512F();
-#else
-const bool hasAVX512F = false;
-#endif
-#ifdef LO_AVX512F_AVAILABLE // New processors
+bool hasAVX512FCode() { return true; }
using namespace AVX512;
@@ -62,13 +48,10 @@ static inline void sumAVX512(__m512d& sum, __m512d& err, const __m512d& value)
sum = t;
}
-#endif
-
/** Execute Kahan sum with AVX512.
*/
KahanSumSimple executeAVX512F(size_t& i, size_t nSize, const double* pCurrent)
{
-#ifdef LO_AVX512F_AVAILABLE // New processors
// Make sure we don't fall out of bounds.
// This works by sums of 8 terms.
// So the 8'th term is i+7
@@ -122,24 +105,16 @@ KahanSumSimple executeAVX512F(size_t& i, size_t nSize, const double* pCurrent)
return { sums[0], errs[0] };
}
return { 0.0, 0.0 };
-#else
- (void)i;
- (void)nSize;
- (void)pCurrent;
- abort();
-#endif
}
-} // end namespace sc::op
+#else // LO_AVX512F_AVAILABLE
+
+bool hasAVX512FCode() { return false; }
+
+KahanSumSimple executeAVX512F(size_t&, size_t, const double*) { abort(); }
-/* TODO Remove this once GCC updated and AVX512 can work. */
-#ifdef __GNUC__
-#if __GNUC__ < 9
-#ifdef HAS_LO_AVX512F_AVAILABLE
-#define LO_AVX512F_AVAILABLE
-#undef HAS_LO_AVX512F_AVAILABLE
-#endif
-#endif
#endif
+} // end namespace sc::op
+
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/tool/arraysumSSE2.cxx b/sc/source/core/tool/arraysumSSE2.cxx
index b4edb98286f9..1a5cc2f00dfe 100644
--- a/sc/source/core/tool/arraysumSSE2.cxx
+++ b/sc/source/core/tool/arraysumSSE2.cxx
@@ -20,7 +20,9 @@
namespace sc::op
{
-#ifdef LO_SSE2_AVAILABLE // Old processors
+#ifdef LO_SSE2_AVAILABLE
+
+bool hasSSE2Code() { return true; }
using namespace SSE2;
@@ -47,13 +49,10 @@ static inline void sumSSE2(__m128d& sum, __m128d& err, const __m128d& value)
sum = t;
}
-#endif
-
/** Execute Kahan sum with SSE2.
*/
KahanSumSimple executeSSE2(size_t& i, size_t nSize, const double* pCurrent)
{
-#ifdef LO_SSE2_AVAILABLE
// Make sure we don't fall out of bounds.
// This works by sums of 8 terms.
// So the 8'th term is i+7
@@ -120,13 +119,15 @@ KahanSumSimple executeSSE2(size_t& i, size_t nSize, const double* pCurrent)
return { sums[0], errs[0] };
}
return { 0.0, 0.0 };
-#else
- (void)i;
- (void)nSize;
- (void)pCurrent;
- abort();
-#endif
}
+
+#else // LO_SSE2_AVAILABLE
+
+bool hasSSE2Code() { return false; }
+
+KahanSumSimple executeSSE2(size_t&, size_t, const double*) { abort(); }
+
+#endif
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */