summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorTomaž Vajngerl <tomaz.vajngerl@collabora.co.uk>2017-05-08 10:47:04 +0200
committerTomaž Vajngerl <quikee@gmail.com>2019-07-09 02:46:43 +0200
commitf43f9b99603736a4d54f550052509eb5f4d04b45 (patch)
tree96f8e17bc271471d54571d6ffce7146b6bcdb626 /tools
parentf65905dd0ff464774f338db44d69925f98e1766c (diff)
CPU intrinsics detection (SSE, AVX)
Adds CPU intrinsics detection in configure pass for compile time detection and "cpuid" runtime detection of which CPU instruction sets are available on the user device. Change-Id: I0ee4d0b22a7c51f72796d43e7383a31d03b437ad Reviewed-on: https://gerrit.libreoffice.org/75175 Tested-by: Jenkins Reviewed-by: Tomaž Vajngerl <quikee@gmail.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/CppunitTest_tools_test.mk4
-rw-r--r--tools/qa/cppunit/test_cpuid.cxx74
-rw-r--r--tools/source/misc/cpuid.cxx119
3 files changed, 171 insertions, 26 deletions
diff --git a/tools/CppunitTest_tools_test.mk b/tools/CppunitTest_tools_test.mk
index ad56d893ae80..a4cdf8626f3a 100644
--- a/tools/CppunitTest_tools_test.mk
+++ b/tools/CppunitTest_tools_test.mk
@@ -31,6 +31,10 @@ $(eval $(call gb_CppunitTest_add_exception_objects,tools_test, \
tools/qa/cppunit/test_xmlwalker \
))
+$(eval $(call gb_CppunitTest_add_cxxobjects,tools_test,\
+ tools/qa/cppunit/test_cpuid, $(gb_LinkTarget_EXCEPTIONFLAGS) $(INTRINSICS_CXXFLAGS) \
+))
+
$(eval $(call gb_CppunitTest_use_sdk_api,tools_test))
$(eval $(call gb_CppunitTest_use_libraries,tools_test, \
diff --git a/tools/qa/cppunit/test_cpuid.cxx b/tools/qa/cppunit/test_cpuid.cxx
new file mode 100644
index 000000000000..fdb19d0ec133
--- /dev/null
+++ b/tools/qa/cppunit/test_cpuid.cxx
@@ -0,0 +1,74 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <cppunit/TestAssert.h>
+#include <cppunit/TestFixture.h>
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/plugin/TestPlugIn.h>
+#include <tools/cpuid.hxx>
+#include <tools/simd.hxx>
+#include <rtl/ustring.hxx>
+
+namespace
+{
+class CpuInstructionSetSupport : public CppUnit::TestFixture
+{
+public:
+ void testCpuInstructionSetSupport();
+
+ CPPUNIT_TEST_SUITE(CpuInstructionSetSupport);
+ CPPUNIT_TEST(testCpuInstructionSetSupport);
+ CPPUNIT_TEST_SUITE_END();
+};
+
+void CpuInstructionSetSupport::testCpuInstructionSetSupport()
+{
+ OUString aString = cpuid::instructionSetSupportedString();
+
+ if (cpuid::isCpuInstructionSetSupported(cpuid::InstructionSetFlags::SSE2))
+ {
+ CPPUNIT_ASSERT(aString.indexOf("SSE2") >= 0);
+ }
+
+ if (cpuid::isCpuInstructionSetSupported(cpuid::InstructionSetFlags::SSSE3))
+ {
+ CPPUNIT_ASSERT(aString.indexOf("SSSE3") >= 0);
+ }
+
+ if (cpuid::isCpuInstructionSetSupported(cpuid::InstructionSetFlags::AVX))
+ {
+ CPPUNIT_ASSERT(aString.indexOf("AVX") > 0);
+ }
+
+ if (cpuid::isCpuInstructionSetSupported(cpuid::InstructionSetFlags::AVX2))
+ {
+ CPPUNIT_ASSERT(aString.indexOf("AVX2") > 0);
+ }
+
+#ifdef LO_SSE2_AVAILABLE
+ CPPUNIT_ASSERT_EQUAL(cpuid::hasSSE2(),
+ cpuid::isCpuInstructionSetSupported(cpuid::InstructionSetFlags::SSE2));
+#endif
+
+#ifdef LO_SSSE3_AVAILABLE
+ CPPUNIT_ASSERT_EQUAL(cpuid::hasSSSE3(),
+ cpuid::isCpuInstructionSetSupported(cpuid::InstructionSetFlags::SSE2));
+#endif
+
+#ifdef LO_AVX2_AVAILABLE
+ CPPUNIT_ASSERT_EQUAL(cpuid::hasAVX2(),
+ cpuid::isCpuInstructionSetSupported(cpuid::InstructionSetFlags::AVX2));
+#endif
+}
+
+CPPUNIT_TEST_SUITE_REGISTRATION(CpuInstructionSetSupport);
+
+} // end anonymous namespace
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/tools/source/misc/cpuid.cxx b/tools/source/misc/cpuid.cxx
index ee5093ce1892..e8699cbdf51c 100644
--- a/tools/source/misc/cpuid.cxx
+++ b/tools/source/misc/cpuid.cxx
@@ -11,25 +11,21 @@
#include <tools/cpuid.hxx>
#include <cstdint>
-namespace tools
-{
-namespace cpuid
-{
+namespace cpuid {
+
+namespace {
-namespace
-{
#if defined(_MSC_VER)
#include <intrin.h>
-void getCpuId(uint32_t array[4])
+void getCpuId(uint32_t array[4], uint32_t nInfoType)
{
- __cpuid(reinterpret_cast<int*>(array), 1);
+ __cpuid(reinterpret_cast<int*>(array), nInfoType);
}
-#else
-#if (defined(__i386__) || defined(__x86_64__))
+#elif (defined(__i386__) || defined(__x86_64__))
#include <cpuid.h>
-void getCpuId(uint32_t array[4])
+void getCpuId(uint32_t array[4], uint32_t nInfoType)
{
- __get_cpuid(1, array + 0, array + 1, array + 2, array + 3);
+ __cpuid_count(nInfoType, 0, *(array + 0), *(array + 1), *(array + 2), *(array + 3));
}
#else
void getCpuId(uint32_t array[4])
@@ -37,33 +33,104 @@ void getCpuId(uint32_t array[4])
array[0] = array[1] = array[2] = array[3] = 0;
}
#endif
+
+// For AVX we need to check if OS has support for ymm registers
+bool checkAVXSupportInOS()
+{
+ uint32_t xcr0 = 0;
+#if defined(_MSC_VER)
+ xcr0 = uint32_t(_xgetbv(0));
+#elif (defined(__i386__) || defined(__x86_64__))
+ __asm__("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx");
#endif
+ return ((xcr0 & 6) == 6); /* checking if xmm and ymm state are enabled in XCR0 */
}
-#if defined(LO_SSE2_AVAILABLE)
+} // end anonymous namespace
-bool hasSSE2()
+#define HYPER_bit (1 << 28)
+#define SSE2_bit (1 << 26)
+#define SSSE3_bit (1 << 9)
+#define SSE41_bit (1 << 19)
+#define SSE42_bit (1 << 20)
+#define XSAVE_bit (1 << 27)
+#define AVX_bit (1 << 28)
+#define AVX2_bit (1 << 5)
+
+InstructionSetFlags getCpuInstructionSetFlags()
{
- uint32_t cpuInfoArray[] = {0, 0, 0, 0};
- getCpuId(cpuInfoArray);
- return (cpuInfoArray[3] & (1 << 26)) != 0;
-}
+ InstructionSetFlags eInstructions = InstructionSetFlags::NONE;
-#else
+ uint32_t info[] = {0, 0, 0, 0};
+ getCpuId(info, 0);
+ int nLevel = info[0];
-bool hasSSE2() { return false; }
+ if (nLevel >= 1)
+ {
+ uint32_t aCpuInfoArray[] = {0, 0, 0, 0};
+ getCpuId(aCpuInfoArray, 1);
-#endif
+ if ((aCpuInfoArray[3] & HYPER_bit) != 0)
+ eInstructions |= InstructionSetFlags::HYPER;
-bool hasHyperThreading()
-{
- uint32_t cpuInfoArray[] = {0, 0, 0, 0};
- getCpuId(cpuInfoArray);
- return (cpuInfoArray[3] & (1 << 28)) != 0;
+ if ((aCpuInfoArray[3] & SSE2_bit) != 0)
+ eInstructions |= InstructionSetFlags::SSE2;
+
+ if ((aCpuInfoArray[2] & SSSE3_bit) != 0)
+ eInstructions |= InstructionSetFlags::SSSE3;
+
+ if ((aCpuInfoArray[2] & SSE41_bit ) != 0)
+ eInstructions |= InstructionSetFlags::SSE41;
+
+ if ((aCpuInfoArray[2] & SSE42_bit) != 0)
+ eInstructions |= InstructionSetFlags::SSE42;
+
+ if (((aCpuInfoArray[2] & AVX_bit) != 0) &&
+ ((aCpuInfoArray[2] & XSAVE_bit) != 0))
+ {
+ if (checkAVXSupportInOS())
+ {
+ eInstructions |= InstructionSetFlags::AVX;
+
+ if (nLevel >= 7)
+ {
+ uint32_t aExtendedInfo[] = {0, 0, 0, 0};
+ getCpuId(aExtendedInfo, 7);
+
+ if ((aExtendedInfo[1] & AVX2_bit) != 0)
+ eInstructions |= InstructionSetFlags::AVX2;
+ }
+ }
+ }
+ }
+
+ return eInstructions;
}
+bool isCpuInstructionSetSupported(InstructionSetFlags eInstructions)
+{
+ static InstructionSetFlags eCPUFlags = getCpuInstructionSetFlags();
+ return (eCPUFlags & eInstructions) == eInstructions;
}
+
+OUString instructionSetSupportedString()
+{
+ OUString aString;
+ if (isCpuInstructionSetSupported(InstructionSetFlags::SSE2))
+ aString += "SSE2 ";
+ if (isCpuInstructionSetSupported(InstructionSetFlags::SSSE3))
+ aString += "SSSE3 ";
+ if (isCpuInstructionSetSupported(InstructionSetFlags::SSE41))
+ aString += "SSE4.1 ";
+ if (isCpuInstructionSetSupported(InstructionSetFlags::SSE42))
+ aString += "SSE4.2 ";
+ if (isCpuInstructionSetSupported(InstructionSetFlags::AVX))
+ aString += "AVX ";
+ if (isCpuInstructionSetSupported(InstructionSetFlags::AVX2))
+ aString += "AVX2 ";
+ return aString;
}
+} // end cpuid
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */