Need to try to avoid TDR also with NVIDIA cards on Windows 7 or earlier

(TDR is Timeout detection and recovery, was introduced in Vista.) Change-Id: Ic49629adf7630e61cebcdfcc431ca96ba3fbaf7e Reviewed-on: https://gerrit.libreoffice.org/27518 Tested-by: Jenkins <ci@libreoffice.org> Reviewed-by: Tor Lillqvist <tml@collabora.com>
author: Laszlo Nemeth <nemeth@numbertext.org> 2016-07-25 15:37:18 +0300
committer: Tor Lillqvist <tml@collabora.com> 2016-07-26 07:41:38 +0000
commit: 00a58dfead6e05724b252f74e24f9f6c89a68d76 (patch)
tree: efce2894ec854c343e7bcf5d57a3ceacba4a76a2
parent: eee5d67d06c8e0c81ab0874171af0d5c4de18860 (diff)
3 files changed, 41 insertions, 7 deletions
diff --git a/include/opencl/openclwrapper.hxx b/include/opencl/openclwrapper.hxx
index afd34c680fbc..dae119292c25 100644
--- a/include/opencl/openclwrapper.hxx
+++ b/include/opencl/openclwrapper.hxx
@@ -47,7 +47,7 @@ struct OPENCL_DLLPUBLIC GPUEnv
     int mnCmdQueuePos;
     bool mnKhrFp64Flag;
     bool mnAmdFp64Flag;
-    cl_uint mnPreferredVectorWidthFloat;
+    bool mbNeedsTDRAvoidance;
 
     static bool isOpenCLEnabled();
 };
diff --git a/opencl/source/openclwrapper.cxx b/opencl/source/openclwrapper.cxx
index d763dcc22848..0dd76e74ba05 100644
--- a/opencl/source/openclwrapper.cxx
+++ b/opencl/source/openclwrapper.cxx
@@ -42,6 +42,10 @@
 #define OPENCL_DLL_NAME "libOpenCL.so.1"
 #endif
 
+#ifdef _WIN32_WINNT_WINBLUE
+#include <VersionHelpers.h>
+#endif
+
 #define DEVICE_NAME_LENGTH 1024
 #define DRIVER_VERSION_LENGTH 1024
 #define PLATFORM_VERSION_LENGTH 1024
@@ -455,6 +459,8 @@ void checkDeviceForDoubleSupport(cl_device_id deviceId, bool& bKhrFp64, bool& bA
 bool initOpenCLRunEnv( GPUEnv *gpuInfo )
 {
     OpenCLZone zone;
+    cl_uint nPreferredVectorWidthFloat;
+    char pName[64];
 
     bool bKhrFp64 = false;
     bool bAmdFp64 = false;
@@ -464,11 +470,40 @@ bool initOpenCLRunEnv( GPUEnv *gpuInfo )
     gpuInfo->mnKhrFp64Flag = bKhrFp64;
     gpuInfo->mnAmdFp64Flag = bAmdFp64;
 
-    gpuInfo->mnPreferredVectorWidthFloat = 0;
+    gpuInfo->mbNeedsTDRAvoidance = false;
 
     clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint),
-                    &gpuInfo->mnPreferredVectorWidthFloat, nullptr);
-    SAL_INFO("opencl", "CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT=" << gpuInfo->mnPreferredVectorWidthFloat);
+                    &nPreferredVectorWidthFloat, nullptr);
+    SAL_INFO("opencl", "CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT=" << nPreferredVectorWidthFloat);
+
+    clGetPlatformInfo(gpuInfo->mpPlatformID, CL_PLATFORM_NAME, 64,
+             pName, nullptr);
+
+    bool bIsNotWinOrIsWin8OrGreater = true;
+
+// the Win32 SDK 8.1 deprecates GetVersionEx()
+#ifdef _WIN32_WINNT_WINBLUE
+    bIsNotWinOrIsWin8OrGreater = IsWindows8OrGreater();
+#elif defined (_WIN32)
+    OSVERSIONINFO aVersionInfo;
+    memset( &aVersionInfo, 0, sizeof(aVersionInfo) );
+    aVersionInfo.dwOSVersionInfoSize = sizeof( aVersionInfo );
+    if (GetVersionEx( &aVersionInfo ))
+    {
+        // Windows 7 or lower?
+        if (aVersionInfo.dwMajorVersion < 6 ||
+           (aVersionInfo.dwMajorVersion == 6 && aVersionInfo.dwMinorVersion < 2))
+            bIsNotWinOrIsWin8OrGreater = false;
+    }
+#endif
+
+    // Heuristic: Certain old low-end OpenCL implementations don't
+    // work for us with too large group lengths. Looking at the preferred
+    // float vector width seems to be a way to detect these devices, except
+    // the non-working NVIDIA cards on Windows older than version 8.
+    gpuInfo->mbNeedsTDRAvoidance = ( nPreferredVectorWidthFloat == 4 ) ||
+        ( !bIsNotWinOrIsWin8OrGreater &&
+          OUString::createFromAscii(pName).indexOf("NVIDIA") > -1 );
 
     size_t nMaxParameterSize;
     clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof(size_t),
diff --git a/sc/source/core/data/formulacell.cxx b/sc/source/core/data/formulacell.cxx
index f1b239d917a2..b32d4ed45e36 100644
--- a/sc/source/core/data/formulacell.cxx
+++ b/sc/source/core/data/formulacell.cxx
@@ -4076,9 +4076,8 @@ bool ScFormulaCell::InterpretFormulaGroup()
 #ifdef _WIN32
     // Heuristic: Certain old low-end OpenCL implementations don't
     // work for us with too large group lengths. 1000 was determined
-    // empirically to be a good compromise. Looking at the preferred
-    // float vector width seems to be a way to detect these devices.
-    if (opencl::gpuEnv.mnPreferredVectorWidthFloat == 4)
+    // empirically to be a good compromise.
+    if (opencl::gpuEnv.mbNeedsTDRAvoidance)
         nMaxGroupLength = 1000;
 #endif
author	Laszlo Nemeth <nemeth@numbertext.org>	2016-07-25 15:37:18 +0300
committer	Tor Lillqvist <tml@collabora.com>	2016-07-26 07:41:38 +0000
commit	00a58dfead6e05724b252f74e24f9f6c89a68d76 (patch)
tree	efce2894ec854c343e7bcf5d57a3ceacba4a76a2
parent	eee5d67d06c8e0c81ab0874171af0d5c4de18860 (diff)