summaryrefslogtreecommitdiff
path: root/opencl
diff options
context:
space:
mode:
authorTor Lillqvist <tml@collabora.com>2014-11-27 15:13:12 +0200
committerTor Lillqvist <tml@collabora.com>2014-11-27 15:32:58 +0200
commita70b717ef872c0ac652883ecd2a82c4cc29763e2 (patch)
tree0975349b1b8798eb9d444d7c91d97d52a2f6fa4e /opencl
parentd83b031346799bff0a3298387f76b16baad2e5cf (diff)
Move more Calc-independent OpenCL stuff from the sc to the opencl module
No cleanups yet. Just removed the "sc" namespace parts now when this stuff is no longer Calc-specific. There is still horribly confusing use of the same OpenCLDevice name for both a class and as a namespace, for instance. And the OpenCLDevice class has only public static members even, so effectively it acts as just a namespace anyway... Etc. Change-Id: Idc5f30a721df0101426c676f04a85e02c5dc8443
Diffstat (limited to 'opencl')
-rw-r--r--opencl/Library_opencl.mk7
-rw-r--r--opencl/inc/opencl_device.hxx25
-rw-r--r--opencl/inc/opencl_device_selection.h641
-rw-r--r--opencl/source/opencl_device.cxx598
-rw-r--r--opencl/source/openclwrapper.cxx804
5 files changed, 2075 insertions, 0 deletions
diff --git a/opencl/Library_opencl.mk b/opencl/Library_opencl.mk
index 51ca62c71302..ad1af3761b08 100644
--- a/opencl/Library_opencl.mk
+++ b/opencl/Library_opencl.mk
@@ -9,6 +9,11 @@
$(eval $(call gb_Library_Library,opencl))
+$(eval $(call gb_Library_set_include,opencl,\
+ -I$(SRCDIR)/opencl/inc \
+ $$(INCLUDE) \
+))
+
$(eval $(call gb_Library_add_defs,opencl,\
-DOPENCL_DLLIMPLEMENTATION \
))
@@ -36,6 +41,8 @@ $(eval $(call gb_Library_use_libraries,opencl,\
$(eval $(call gb_Library_add_exception_objects,opencl,\
opencl/source/openclconfig \
+ opencl/source/openclwrapper \
+ opencl/source/opencl_device \
opencl/source/platforminfo \
))
diff --git a/opencl/inc/opencl_device.hxx b/opencl/inc/opencl_device.hxx
new file mode 100644
index 000000000000..7435879ecc83
--- /dev/null
+++ b/opencl/inc/opencl_device.hxx
@@ -0,0 +1,25 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_OPENCL_INC_OPENCL_DEVICE_HXX
+#define INCLUDED_OPENCL_INC_OPENCL_DEVICE_HXX
+
+#include "opencl_device_selection.h"
+
+namespace OpenCLDevice {
+
+ds_device getDeviceSelection(const char* pFileName, bool bForceSelection = false);
+bool selectedDeviceIsOpenCL(ds_device device);
+bool selectedDeviceIsNativeCPU(ds_device device);
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/opencl/inc/opencl_device_selection.h b/opencl/inc/opencl_device_selection.h
new file mode 100644
index 000000000000..03373f4a3d0f
--- /dev/null
+++ b/opencl/inc/opencl_device_selection.h
@@ -0,0 +1,641 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_OPENCL_INC_OPENCL_DEVICE_SELECTION_H
+#define INCLUDED_OPENCL_INC_OPENCL_DEVICE_SELECTION_H
+
+#ifdef _MSC_VER
+#define _CRT_SECURE_NO_WARNINGS
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <clew.h>
+
+#define DS_DEVICE_NAME_LENGTH 256
+
+enum ds_status
+{
+ DS_SUCCESS = 0
+ ,DS_INVALID_PROFILE = 1000
+ ,DS_MEMORY_ERROR
+ , DS_INVALID_PERF_EVALUATOR_TYPE
+ , DS_INVALID_PERF_EVALUATOR
+ , DS_PERF_EVALUATOR_ERROR
+ , DS_FILE_ERROR
+ , DS_UNKNOWN_DEVICE_TYPE
+ , DS_PROFILE_FILE_ERROR
+ , DS_SCORE_SERIALIZER_ERROR
+ , DS_SCORE_DESERIALIZER_ERROR
+};
+
+// device type
+enum ds_device_type
+{
+ DS_DEVICE_NATIVE_CPU = 0
+ ,DS_DEVICE_OPENCL_DEVICE
+};
+
+
+struct ds_device
+{
+ ds_device_type type;
+ cl_device_id oclDeviceID;
+ char* oclPlatformVendor;
+ char* oclDeviceName;
+ char* oclDriverVersion;
+ void* score; // a pointer to the score data, the content/format is application defined
+};
+
+struct ds_profile
+{
+ unsigned int numDevices;
+ ds_device* devices;
+ const char* version;
+};
+
+// deallocate memory used by score
+typedef ds_status(* ds_score_release)(void* score);
+inline ds_status releaseDSProfile(ds_profile* profile, ds_score_release sr)
+{
+ ds_status status = DS_SUCCESS;
+ if (profile != NULL)
+ {
+ if (profile->devices != NULL && sr != NULL)
+ {
+ unsigned int i;
+ for (i = 0; i < profile->numDevices; i++)
+ {
+ free(profile->devices[i].oclPlatformVendor);
+ free(profile->devices[i].oclDeviceName);
+ free(profile->devices[i].oclDriverVersion);
+ status = sr(profile->devices[i].score);
+ if (status != DS_SUCCESS) break;
+ }
+ free(profile->devices);
+ }
+ free(profile);
+ }
+ return status;
+}
+
+
+inline ds_status initDSProfile(ds_profile** p, const char* version)
+{
+ int numDevices;
+ cl_uint numPlatforms;
+ cl_platform_id* platforms = NULL;
+ cl_device_id* devices = NULL;
+ ds_status status = DS_SUCCESS;
+ ds_profile* profile = NULL;
+ unsigned int next;
+ unsigned int i;
+
+ if (p == NULL) return DS_INVALID_PROFILE;
+
+ profile = (ds_profile*)malloc(sizeof(ds_profile));
+ if (profile == NULL) return DS_MEMORY_ERROR;
+
+ memset(profile, 0, sizeof(ds_profile));
+
+ clGetPlatformIDs(0, NULL, &numPlatforms);
+ if (numPlatforms != 0)
+ {
+ platforms = (cl_platform_id*)malloc(numPlatforms * sizeof(cl_platform_id));
+ if (platforms == NULL)
+ {
+ status = DS_MEMORY_ERROR;
+ goto cleanup;
+ }
+ clGetPlatformIDs(numPlatforms, platforms, NULL);
+ }
+
+ numDevices = 0;
+ for (i = 0; i < (unsigned int)numPlatforms; i++)
+ {
+ cl_uint num;
+ clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &num);
+ numDevices += num;
+ }
+ if (numDevices != 0)
+ {
+ devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));
+ if (devices == NULL)
+ {
+ status = DS_MEMORY_ERROR;
+ goto cleanup;
+ }
+ }
+
+ profile->numDevices = numDevices + 1; // +1 to numDevices to include the native CPU
+ profile->devices = (ds_device*)malloc(profile->numDevices * sizeof(ds_device));
+ if (profile->devices == NULL)
+ {
+ profile->numDevices = 0;
+ status = DS_MEMORY_ERROR;
+ goto cleanup;
+ }
+ memset(profile->devices, 0, profile->numDevices * sizeof(ds_device));
+
+ next = 0;
+ for (i = 0; i < (unsigned int)numPlatforms; i++)
+ {
+ cl_uint num;
+ unsigned j;
+ char vendor[256];
+ if (clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(vendor), vendor, NULL) != CL_SUCCESS)
+ vendor[0] = '\0';
+ clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, numDevices, devices, &num);
+ for (j = 0; j < num; j++, next++)
+ {
+ char buffer[DS_DEVICE_NAME_LENGTH];
+ size_t length;
+
+ profile->devices[next].type = DS_DEVICE_OPENCL_DEVICE;
+ profile->devices[next].oclDeviceID = devices[j];
+
+ profile->devices[next].oclPlatformVendor = strdup(vendor);
+
+ clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_NAME
+ , DS_DEVICE_NAME_LENGTH, &buffer, NULL);
+ length = strlen(buffer);
+ profile->devices[next].oclDeviceName = (char*)malloc(length + 1);
+ memcpy(profile->devices[next].oclDeviceName, buffer, length + 1);
+
+ clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DRIVER_VERSION
+ , DS_DEVICE_NAME_LENGTH, &buffer, NULL);
+ length = strlen(buffer);
+ profile->devices[next].oclDriverVersion = (char*)malloc(length + 1);
+ memcpy(profile->devices[next].oclDriverVersion, buffer, length + 1);
+ }
+ }
+ profile->devices[next].type = DS_DEVICE_NATIVE_CPU;
+ profile->version = version;
+
+cleanup:
+ if (platforms) free(platforms);
+ if (devices) free(devices);
+ if (status == DS_SUCCESS)
+ {
+ *p = profile;
+ }
+ else
+ {
+ if (profile)
+ {
+ if (profile->devices) free(profile->devices);
+ free(profile);
+ }
+ }
+ return status;
+}
+
+// Pointer to a function that calculates the score of a device (ex: device->score)
+// update the data size of score. The encoding and the format of the score data
+// is implementation defined. The function should return DS_SUCCESS if there's no error to be reported.
+typedef ds_status(* ds_perf_evaluator)(ds_device* device, void* data);
+
+typedef enum {
+ DS_EVALUATE_ALL
+ , DS_EVALUATE_NEW_ONLY
+} ds_evaluation_type;
+
+inline ds_status profileDevices(ds_profile* profile, const ds_evaluation_type type,
+ ds_perf_evaluator evaluator, void* evaluatorData, unsigned int* numUpdates)
+{
+ ds_status status = DS_SUCCESS;
+ unsigned int i;
+ unsigned int updates = 0;
+
+ if (profile == NULL)
+ {
+ return DS_INVALID_PROFILE;
+ }
+ if (evaluator == NULL)
+ {
+ return DS_INVALID_PERF_EVALUATOR;
+ }
+
+ for (i = 0; i < profile->numDevices; i++)
+ {
+ ds_status evaluatorStatus;
+
+ switch (type)
+ {
+ case DS_EVALUATE_NEW_ONLY:
+ if (profile->devices[i].score != NULL) break;
+ // else fall through
+ case DS_EVALUATE_ALL:
+ evaluatorStatus = evaluator(profile->devices + i, evaluatorData);
+ if (evaluatorStatus != DS_SUCCESS)
+ {
+ status = evaluatorStatus;
+ return status;
+ }
+ updates++;
+ break;
+ default:
+ return DS_INVALID_PERF_EVALUATOR_TYPE;
+ break;
+ };
+ }
+ if (numUpdates) *numUpdates = updates;
+ return status;
+}
+
+
+#define DS_TAG_VERSION "<version>"
+#define DS_TAG_VERSION_END "</version>"
+#define DS_TAG_DEVICE "<device>"
+#define DS_TAG_DEVICE_END "</device>"
+#define DS_TAG_SCORE "<score>"
+#define DS_TAG_SCORE_END "</score>"
+#define DS_TAG_DEVICE_TYPE "<type>"
+#define DS_TAG_DEVICE_TYPE_END "</type>"
+#define DS_TAG_DEVICE_NAME "<name>"
+#define DS_TAG_DEVICE_NAME_END "</name>"
+#define DS_TAG_DEVICE_DRIVER_VERSION "<driver>"
+#define DS_TAG_DEVICE_DRIVER_VERSION_END "</driver>"
+
+#define DS_DEVICE_NATIVE_CPU_STRING "native_cpu"
+
+typedef ds_status(* ds_score_serializer)(ds_device* device, void** serializedScore, unsigned int* serializedScoreSize);
+inline ds_status writeProfileToFile(ds_profile* profile, ds_score_serializer serializer, const char* file)
+{
+ ds_status status = DS_SUCCESS;
+ FILE* profileFile = NULL;
+
+
+ if (profile == NULL) return DS_INVALID_PROFILE;
+
+ profileFile = fopen(file, "wb");
+ if (profileFile == NULL)
+ {
+ status = DS_FILE_ERROR;
+ }
+ else
+ {
+ unsigned int i;
+
+ // write version string
+ fwrite(DS_TAG_VERSION, sizeof(char), strlen(DS_TAG_VERSION), profileFile);
+ fwrite(profile->version, sizeof(char), strlen(profile->version), profileFile);
+ fwrite(DS_TAG_VERSION_END, sizeof(char), strlen(DS_TAG_VERSION_END), profileFile);
+ fwrite("\n", sizeof(char), 1, profileFile);
+
+ for (i = 0; i < profile->numDevices && status == DS_SUCCESS; i++)
+ {
+ void* serializedScore;
+ unsigned int serializedScoreSize;
+
+ fwrite(DS_TAG_DEVICE, sizeof(char), strlen(DS_TAG_DEVICE), profileFile);
+
+ fwrite(DS_TAG_DEVICE_TYPE, sizeof(char), strlen(DS_TAG_DEVICE_TYPE), profileFile);
+ fwrite(&profile->devices[i].type, sizeof(ds_device_type), 1, profileFile);
+ fwrite(DS_TAG_DEVICE_TYPE_END, sizeof(char), strlen(DS_TAG_DEVICE_TYPE_END), profileFile);
+
+ switch (profile->devices[i].type)
+ {
+ case DS_DEVICE_NATIVE_CPU:
+ {
+ // There's no need to emit a device name for the native CPU device.
+ /*
+ fwrite(DS_TAG_DEVICE_NAME, sizeof(char), strlen(DS_TAG_DEVICE_NAME), profileFile);
+ fwrite(DS_DEVICE_NATIVE_CPU_STRING,sizeof(char),strlen(DS_DEVICE_NATIVE_CPU_STRING), profileFile);
+ fwrite(DS_TAG_DEVICE_NAME_END, sizeof(char), strlen(DS_TAG_DEVICE_NAME_END), profileFile);
+ */
+ }
+ break;
+ case DS_DEVICE_OPENCL_DEVICE:
+ {
+ fwrite(DS_TAG_DEVICE_NAME, sizeof(char), strlen(DS_TAG_DEVICE_NAME), profileFile);
+ fwrite(profile->devices[i].oclDeviceName, sizeof(char), strlen(profile->devices[i].oclDeviceName), profileFile);
+ fwrite(DS_TAG_DEVICE_NAME_END, sizeof(char), strlen(DS_TAG_DEVICE_NAME_END), profileFile);
+
+ fwrite(DS_TAG_DEVICE_DRIVER_VERSION, sizeof(char), strlen(DS_TAG_DEVICE_DRIVER_VERSION), profileFile);
+ fwrite(profile->devices[i].oclDriverVersion, sizeof(char), strlen(profile->devices[i].oclDriverVersion), profileFile);
+ fwrite(DS_TAG_DEVICE_DRIVER_VERSION_END, sizeof(char), strlen(DS_TAG_DEVICE_DRIVER_VERSION_END), profileFile);
+ }
+ break;
+ default:
+ break;
+ };
+
+ fwrite(DS_TAG_SCORE, sizeof(char), strlen(DS_TAG_SCORE), profileFile);
+ status = serializer(profile->devices + i, &serializedScore, &serializedScoreSize);
+ if (status == DS_SUCCESS && serializedScore != NULL && serializedScoreSize > 0)
+ {
+ fwrite(serializedScore, sizeof(char), serializedScoreSize, profileFile);
+ free(serializedScore);
+ }
+ fwrite(DS_TAG_SCORE_END, sizeof(char), strlen(DS_TAG_SCORE_END), profileFile);
+ fwrite(DS_TAG_DEVICE_END, sizeof(char), strlen(DS_TAG_DEVICE_END), profileFile);
+ fwrite("\n", sizeof(char), 1, profileFile);
+ }
+ fclose(profileFile);
+ }
+ return status;
+}
+
+
+inline ds_status readProFile(const char* fileName, char** content, size_t* contentSize)
+{
+ FILE* input = NULL;
+ size_t size = 0;
+ char* binary = NULL;
+ long pos = -1;
+
+ *contentSize = 0;
+ *content = NULL;
+
+ input = fopen(fileName, "rb");
+ if (input == NULL)
+ {
+ return DS_FILE_ERROR;
+ }
+
+ fseek(input, 0L, SEEK_END);
+ pos = ftell(input);
+ if (pos < 0)
+ {
+ fclose(input);
+ return DS_FILE_ERROR;
+ }
+
+ size = pos;
+ rewind(input);
+ binary = (char*)malloc(size);
+ if (binary == NULL)
+ {
+ fclose(input);
+ return DS_FILE_ERROR;
+ }
+ size_t bytesRead = fread(binary, sizeof(char), size, input);
+ (void) bytesRead; // avoid warning
+ fclose(input);
+
+ *contentSize = size;
+ *content = binary;
+ return DS_SUCCESS;
+}
+
+
+inline const char* findString(const char* contentStart, const char* contentEnd, const char* string)
+{
+ size_t stringLength;
+ const char* currentPosition;
+ const char* found;
+ found = NULL;
+ stringLength = strlen(string);
+ currentPosition = contentStart;
+ for (currentPosition = contentStart; currentPosition < contentEnd; currentPosition++)
+ {
+ if (*currentPosition == string[0])
+ {
+ if (currentPosition + stringLength < contentEnd)
+ {
+ if (strncmp(currentPosition, string, stringLength) == 0)
+ {
+ found = currentPosition;
+ break;
+ }
+ }
+ }
+ }
+ return found;
+}
+
+
+typedef ds_status(* ds_score_deserializer)(ds_device* device, const unsigned char* serializedScore, unsigned int serializedScoreSize);
+inline ds_status readProfileFromFile(ds_profile* profile, ds_score_deserializer deserializer, const char* file)
+{
+
+ ds_status status = DS_SUCCESS;
+ char* contentStart = NULL;
+ const char* contentEnd = NULL;
+ size_t contentSize;
+
+ if (profile == NULL) return DS_INVALID_PROFILE;
+
+ status = readProFile(file, &contentStart, &contentSize);
+ if (status == DS_SUCCESS)
+ {
+ const char* currentPosition;
+ const char* dataStart;
+ const char* dataEnd;
+ size_t versionStringLength;
+
+ contentEnd = contentStart + contentSize;
+ currentPosition = contentStart;
+
+
+ // parse the version string
+ dataStart = findString(currentPosition, contentEnd, DS_TAG_VERSION);
+ if (dataStart == NULL)
+ {
+ status = DS_PROFILE_FILE_ERROR;
+ goto cleanup;
+ }
+ dataStart += strlen(DS_TAG_VERSION);
+
+ dataEnd = findString(dataStart, contentEnd, DS_TAG_VERSION_END);
+ if (dataEnd == NULL)
+ {
+ status = DS_PROFILE_FILE_ERROR;
+ goto cleanup;
+ }
+
+ versionStringLength = strlen(profile->version);
+ if (versionStringLength != static_cast<size_t>(dataEnd - dataStart)
+ || strncmp(profile->version, dataStart, versionStringLength) != 0)
+ {
+ // version mismatch
+ status = DS_PROFILE_FILE_ERROR;
+ goto cleanup;
+ }
+ currentPosition = dataEnd + strlen(DS_TAG_VERSION_END);
+
+ // parse the device information
+ while (true)
+ {
+ unsigned int i;
+
+ const char* deviceTypeStart;
+ const char* deviceTypeEnd;
+ ds_device_type deviceType;
+
+ const char* deviceNameStart;
+ const char* deviceNameEnd;
+
+ const char* deviceScoreStart;
+ const char* deviceScoreEnd;
+
+ const char* deviceDriverStart;
+ const char* deviceDriverEnd;
+
+ dataStart = findString(currentPosition, contentEnd, DS_TAG_DEVICE);
+ if (dataStart == NULL)
+ {
+ // nothing useful remain, quit...
+ break;
+ }
+ dataStart += strlen(DS_TAG_DEVICE);
+ dataEnd = findString(dataStart, contentEnd, DS_TAG_DEVICE_END);
+ if (dataEnd == NULL)
+ {
+ status = DS_PROFILE_FILE_ERROR;
+ goto cleanup;
+ }
+
+ // parse the device type
+ deviceTypeStart = findString(dataStart, contentEnd, DS_TAG_DEVICE_TYPE);
+ if (deviceTypeStart == NULL)
+ {
+ status = DS_PROFILE_FILE_ERROR;
+ goto cleanup;
+ }
+ deviceTypeStart += strlen(DS_TAG_DEVICE_TYPE);
+ deviceTypeEnd = findString(deviceTypeStart, contentEnd, DS_TAG_DEVICE_TYPE_END);
+ if (deviceTypeEnd == NULL)
+ {
+ status = DS_PROFILE_FILE_ERROR;
+ goto cleanup;
+ }
+ memcpy(&deviceType, deviceTypeStart, sizeof(ds_device_type));
+
+
+ // parse the device name
+ if (deviceType == DS_DEVICE_OPENCL_DEVICE)
+ {
+
+ deviceNameStart = findString(dataStart, contentEnd, DS_TAG_DEVICE_NAME);
+ if (deviceNameStart == NULL)
+ {
+ status = DS_PROFILE_FILE_ERROR;
+ goto cleanup;
+ }
+ deviceNameStart += strlen(DS_TAG_DEVICE_NAME);
+ deviceNameEnd = findString(deviceNameStart, contentEnd, DS_TAG_DEVICE_NAME_END);
+ if (deviceNameEnd == NULL)
+ {
+ status = DS_PROFILE_FILE_ERROR;
+ goto cleanup;
+ }
+
+
+ deviceDriverStart = findString(dataStart, contentEnd, DS_TAG_DEVICE_DRIVER_VERSION);
+ if (deviceDriverStart == NULL)
+ {
+ status = DS_PROFILE_FILE_ERROR;
+ goto cleanup;
+ }
+ deviceDriverStart += strlen(DS_TAG_DEVICE_DRIVER_VERSION);
+ deviceDriverEnd = findString(deviceDriverStart, contentEnd, DS_TAG_DEVICE_DRIVER_VERSION_END);
+ if (deviceDriverEnd == NULL)
+ {
+ status = DS_PROFILE_FILE_ERROR;
+ goto cleanup;
+ }
+
+
+ // check if this device is on the system
+ for (i = 0; i < profile->numDevices; i++)
+ {
+ if (profile->devices[i].type == DS_DEVICE_OPENCL_DEVICE)
+ {
+ size_t actualDeviceNameLength;
+ size_t driverVersionLength;
+
+ actualDeviceNameLength = strlen(profile->devices[i].oclDeviceName);
+ driverVersionLength = strlen(profile->devices[i].oclDriverVersion);
+ if (actualDeviceNameLength == static_cast<size_t>(deviceNameEnd - deviceNameStart)
+ && driverVersionLength == static_cast<size_t>(deviceDriverEnd - deviceDriverStart)
+ && strncmp(profile->devices[i].oclDeviceName, deviceNameStart, actualDeviceNameLength) == 0
+ && strncmp(profile->devices[i].oclDriverVersion, deviceDriverStart, driverVersionLength) == 0)
+ {
+
+ deviceScoreStart = findString(dataStart, contentEnd, DS_TAG_SCORE);
+ if (deviceScoreStart == NULL)
+ {
+ status = DS_PROFILE_FILE_ERROR;
+ goto cleanup;
+ }
+ deviceScoreStart += strlen(DS_TAG_SCORE);
+ deviceScoreEnd = findString(deviceScoreStart, contentEnd, DS_TAG_SCORE_END);
+ status = deserializer(profile->devices + i, (const unsigned char*)deviceScoreStart, deviceScoreEnd - deviceScoreStart);
+ if (status != DS_SUCCESS)
+ {
+ goto cleanup;
+ }
+ }
+ }
+ }
+
+ }
+ else if (deviceType == DS_DEVICE_NATIVE_CPU)
+ {
+ for (i = 0; i < profile->numDevices; i++)
+ {
+ if (profile->devices[i].type == DS_DEVICE_NATIVE_CPU)
+ {
+ deviceScoreStart = findString(dataStart, contentEnd, DS_TAG_SCORE);
+ if (deviceScoreStart == NULL)
+ {
+ status = DS_PROFILE_FILE_ERROR;
+ goto cleanup;
+ }
+ deviceScoreStart += strlen(DS_TAG_SCORE);
+ deviceScoreEnd = findString(deviceScoreStart, contentEnd, DS_TAG_SCORE_END);
+ status = deserializer(profile->devices + i, (const unsigned char*)deviceScoreStart, deviceScoreEnd - deviceScoreStart);
+ if (status != DS_SUCCESS)
+ {
+ goto cleanup;
+ }
+ }
+ }
+ }
+
+ // skip over the current one to find the next device
+ currentPosition = dataEnd + strlen(DS_TAG_DEVICE_END);
+ }
+ }
+cleanup:
+ if (contentStart != NULL) free(contentStart);
+ if (status != DS_SUCCESS)
+ return status;
+
+ // Check that all the devices present had valid cached scores. If
+ // not, return DS_INVALID_PROFILE and let the caller re-evaluate
+ // scores for present devices, and write a new profile file.
+ for (unsigned int i = 0; i < profile->numDevices; i++)
+ if (profile->devices[i].score == NULL)
+ return DS_INVALID_PROFILE;
+
+ return DS_SUCCESS;
+}
+
+inline ds_status getNumDeviceWithEmptyScore(ds_profile* profile, unsigned int* num)
+{
+ unsigned int i;
+ if (profile == NULL || num == NULL) return DS_MEMORY_ERROR;
+ *num = 0;
+ for (i = 0; i < profile->numDevices; i++)
+ {
+ if (profile->devices[i].score == NULL)
+ {
+ (*num)++;
+ }
+ }
+ return DS_SUCCESS;
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/opencl/source/opencl_device.cxx b/opencl/source/opencl_device.cxx
new file mode 100644
index 000000000000..204ab345b5e0
--- /dev/null
+++ b/opencl/source/opencl_device.cxx
@@ -0,0 +1,598 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifdef _WIN32
+#include <prewin.h>
+#include <postwin.h>
+#elif defined __MACH__
+#include <mach/mach_time.h>
+#else
+#include <sys/time.h>
+#endif
+
+#include <time.h>
+#include <math.h>
+#include <float.h>
+#include <iostream>
+#include <sstream>
+#include <vector>
+
+#include <boost/scoped_ptr.hpp>
+
+#include <comphelper/random.hxx>
+#include <opencl/openclconfig.hxx>
+#include <opencl/openclwrapper.hxx>
+#include <opencl/platforminfo.hxx>
+#include <sal/log.hxx>
+
+#include "opencl_device.hxx"
+
+#define INPUTSIZE 15360
+#define OUTPUTSIZE 15360
+
+#define STRINGIFY(...) #__VA_ARGS__"\n"
+
+#define DS_CHECK_STATUS(status, name) \
+ if (CL_SUCCESS != status) \
+ { \
+ SAL_INFO("opencl.device", "Error code is " << status << " at " name); \
+ }
+
+namespace OpenCLDevice {
+
+bool bIsInited = false;
+bool bIsDeviceSelected = false;
+ds_device selectedDevice;
+
+struct LibreOfficeDeviceScore
+{
+ double fTime; // small time means faster device
+ bool bNoCLErrors; // were there any opencl errors
+};
+
+struct LibreOfficeDeviceEvaluationIO
+{
+ std::vector<double> input0;
+ std::vector<double> input1;
+ std::vector<double> input2;
+ std::vector<double> input3;
+ std::vector<double> output;
+ unsigned long inputSize;
+ unsigned long outputSize;
+};
+
+struct timer
+{
+#ifdef _WIN32
+ LARGE_INTEGER start;
+#else
+ long long start;
+#endif
+};
+
+const char* source = STRINGIFY(
+\n#if defined(KHR_DP_EXTENSION)
+\n#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+\n#elif defined(AMD_DP_EXTENSION)
+\n#pragma OPENCL EXTENSION cl_amd_fp64 : enable
+\n#endif
+ \n
+ int isNan(fp_t a) { return a != a; }
+ fp_t fsum(fp_t a, fp_t b) { return a + b; }
+
+ fp_t fAverage(__global fp_t* input)
+{
+ fp_t sum = 0;
+ int count = 0;
+ for (int i = 0; i < INPUTSIZE; i++)
+ {
+ if (!isNan(input[i]))
+ {
+ sum = fsum(input[i], sum);
+ count += 1;
+ }
+ }
+ return sum / (fp_t)count;
+}
+ fp_t fMin(__global fp_t* input)
+{
+ fp_t min = MAXFLOAT;
+ for (int i = 0; i < INPUTSIZE; i++)
+ {
+ if (!isNan(input[i]))
+ {
+ min = fmin(input[i], min);
+ }
+ }
+ return min;
+}
+ fp_t fSoP(__global fp_t* input0, __global fp_t* input1)
+{
+ fp_t sop = 0.0;
+ for (int i = 0; i < INPUTSIZE; i++)
+ {
+ sop += (isNan(input0[i]) ? 0 : input0[i]) * (isNan(input1[i]) ? 0 : input1[i]);
+ }
+ return sop;
+}
+ __kernel void DynamicKernel(
+ __global fp_t* result, __global fp_t* input0, __global fp_t* input1, __global fp_t* input2, __global fp_t* input3)
+{
+ int gid0 = get_global_id(0);
+ fp_t tmp0 = fAverage(input0);
+ fp_t tmp1 = fMin(input1) * fSoP(input2, input3);
+ result[gid0] = fsum(tmp0, tmp1);
+}
+ );
+
+size_t sourceSize[] = { strlen(source) };
+
+/*************************************************************************/
+/* INTERNAL FUNCTIONS */
+/*************************************************************************/
+/* Timer functions - start timer */
+void timerStart(timer* mytimer)
+{
+#ifdef _WIN32
+ QueryPerformanceCounter(&mytimer->start);
+#elif defined __MACH__
+ mytimer->start = mach_absolute_time();
+#else
+ struct timespec s;
+ clock_gettime(CLOCK_MONOTONIC, &s);
+ mytimer->start = (long long)s.tv_sec * (long long)1.0E6 + (long long)s.tv_nsec / (long long)1.0E3;
+#endif
+}
+
+/* Timer functions - get current value */
+double timerCurrent(timer* mytimer)
+{
+#ifdef _WIN32
+ LARGE_INTEGER stop, frequency;
+ QueryPerformanceCounter(&stop);
+ QueryPerformanceFrequency(&frequency);
+ double time = ((double)(stop.QuadPart - mytimer->start.QuadPart) / frequency.QuadPart);
+#elif defined __MACH__
+ static mach_timebase_info_data_t info = { 0, 0 };
+ if (info.numer == 0)
+ mach_timebase_info(&info);
+ long long stop = mach_absolute_time();
+ double time = ((stop - mytimer->start) * (double) info.numer / info.denom) / 1.0E9;
+#else
+ struct timespec s;
+ long long stop;
+ clock_gettime(CLOCK_MONOTONIC, &s);
+ stop = (long long)s.tv_sec * (long long)1.0E6 + (long long)s.tv_nsec / (long long)1.0E3;
+ double time = ((double)(stop - mytimer->start) / 1.0E6);
+#endif
+ return time;
+}
+
+/* Random number generator */
+double random(double min, double max)
+{
+ if (min == max)
+ return min;
+ return comphelper::rng::uniform_real_distribution(min, max);
+}
+
+/* Populate input */
+void populateInput(LibreOfficeDeviceEvaluationIO* testData)
+{
+ double* input0 = &testData->input0[0];
+ double* input1 = &testData->input1[0];
+ double* input2 = &testData->input2[0];
+ double* input3 = &testData->input3[0];
+ for (unsigned long i = 0; i < testData->inputSize; i++)
+ {
+ input0[i] = random(0, i);
+ input1[i] = random(0, i);
+ input2[i] = random(0, i);
+ input3[i] = random(0, i);
+ }
+}
+/* Encode score object as byte string */
+ds_status serializeScore(ds_device* device, void** serializedScore, unsigned int* serializedScoreSize)
+{
+ *serializedScoreSize = sizeof(LibreOfficeDeviceScore);
+ *serializedScore = (void*)new unsigned char[*serializedScoreSize];
+ memcpy(*serializedScore, device->score, *serializedScoreSize);
+ return DS_SUCCESS;
+}
+
+/* Parses byte string and stores in score object */
+ds_status deserializeScore(ds_device* device, const unsigned char* serializedScore, unsigned int serializedScoreSize)
+{
+ // check that serializedScoreSize == sizeof(LibreOfficeDeviceScore);
+ device->score = new LibreOfficeDeviceScore;
+ memcpy(device->score, serializedScore, serializedScoreSize);
+ return DS_SUCCESS;
+}
+
+/* Releases memory held by score */
+ds_status releaseScore(void* score)
+{
+ if (NULL != score)
+ {
+ delete (LibreOfficeDeviceScore*)score;
+ }
+ return DS_SUCCESS;
+}
+
+/* Evaluate devices */
+ds_status evaluateScoreForDevice(ds_device* device, void* evalData)
+{
+ if (DS_DEVICE_OPENCL_DEVICE == device->type)
+ {
+ /* Evaluating an OpenCL device */
+ SAL_INFO("opencl.device", "Device: \"" << device->oclDeviceName << "\" (OpenCL) evaluation...");
+ cl_int clStatus;
+ /* Check for 64-bit float extensions */
+ size_t aDevExtInfoSize = 0;
+ clStatus = clGetDeviceInfo(device->oclDeviceID, CL_DEVICE_EXTENSIONS, 0, NULL, &aDevExtInfoSize);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clGetDeviceInfo");
+
+ char* aExtInfo = new char[aDevExtInfoSize];
+ clStatus = clGetDeviceInfo(device->oclDeviceID, CL_DEVICE_EXTENSIONS, sizeof(char) * aDevExtInfoSize, aExtInfo, NULL);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clGetDeviceInfo");
+ bool bKhrFp64Flag = false;
+ bool bAmdFp64Flag = false;
+ const char* buildOption = NULL;
+ std::string tmpStr("-Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16 -DINPUTSIZE=");
+ std::ostringstream tmpOStrStr;
+ tmpOStrStr << std::dec << INPUTSIZE;
+ tmpStr.append(tmpOStrStr.str());
+
+ if ((std::string(aExtInfo)).find("cl_khr_fp64") != std::string::npos)
+ {
+ bKhrFp64Flag = true;
+ //buildOption = "-D KHR_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16";
+ tmpStr.append(" -DKHR_DP_EXTENSION");
+ buildOption = tmpStr.c_str();
+ SAL_INFO("opencl.device", "... has cl_khr_fp64");
+ }
+ else if ((std::string(aExtInfo)).find("cl_amd_fp64") != std::string::npos)
+ {
+ bAmdFp64Flag = true;
+ //buildOption = "-D AMD_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16";
+ tmpStr.append(" -DAMD_DP_EXTENSION");
+ buildOption = tmpStr.c_str();
+ SAL_INFO("opencl.device", "... has cl_amd_fp64");
+ }
+ delete[] aExtInfo;
+
+ if (!bKhrFp64Flag && !bAmdFp64Flag)
+ {
+ /* No 64-bit float support */
+ device->score = (void*)new LibreOfficeDeviceScore;
+ ((LibreOfficeDeviceScore*)device->score)->fTime = DBL_MAX;
+ ((LibreOfficeDeviceScore*)device->score)->bNoCLErrors = true;
+ SAL_INFO("opencl.device", "... no fp64 support");
+ }
+ else
+ {
+ /* 64-bit float support present */
+
+ /* Create context and command queue */
+ cl_context clContext = clCreateContext(NULL, 1, &device->oclDeviceID, NULL, NULL, &clStatus);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateContext");
+ cl_command_queue clQueue = clCreateCommandQueue(clContext, device->oclDeviceID, 0, &clStatus);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateCommandQueue");
+
+ /* Build program */
+ cl_program clProgram = clCreateProgramWithSource(clContext, 1, &source, sourceSize, &clStatus);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateProgramWithSource");
+ clStatus = clBuildProgram(clProgram, 1, &device->oclDeviceID, buildOption, NULL, NULL);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clBuildProgram");
+ if (CL_SUCCESS != clStatus)
+ {
+ /* Build program failed */
+ size_t length;
+ char* buildLog;
+ clStatus = clGetProgramBuildInfo(clProgram, device->oclDeviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
+ buildLog = (char*)malloc(length);
+ clGetProgramBuildInfo(clProgram, device->oclDeviceID, CL_PROGRAM_BUILD_LOG, length, buildLog, &length);
+ SAL_INFO("opencl.device", "Build Errors:\n" << buildLog);
+ free(buildLog);
+
+ device->score = (void*)new LibreOfficeDeviceScore;
+ ((LibreOfficeDeviceScore*)device->score)->fTime = DBL_MAX;
+ ((LibreOfficeDeviceScore*)device->score)->bNoCLErrors = false;
+ }
+ else
+ {
+ /* Build program succeeded */
+ timer kernelTime;
+ timerStart(&kernelTime);
+
+ /* Run kernel */
+ LibreOfficeDeviceEvaluationIO* testData = (LibreOfficeDeviceEvaluationIO*)evalData;
+ cl_kernel clKernel = clCreateKernel(clProgram, "DynamicKernel", &clStatus);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateKernel");
+ cl_mem clResult = clCreateBuffer(clContext, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->outputSize, &testData->output[0], &clStatus);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clResult");
+ cl_mem clInput0 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input0[0], &clStatus);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput0");
+ cl_mem clInput1 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input1[0], &clStatus);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput1");
+ cl_mem clInput2 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input2[0], &clStatus);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput2");
+ cl_mem clInput3 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input3[0], &clStatus);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput3");
+ clStatus = clSetKernelArg(clKernel, 0, sizeof(cl_mem), (void*)&clResult);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clResult");
+ clStatus = clSetKernelArg(clKernel, 1, sizeof(cl_mem), (void*)&clInput0);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput0");
+ clStatus = clSetKernelArg(clKernel, 2, sizeof(cl_mem), (void*)&clInput1);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput1");
+ clStatus = clSetKernelArg(clKernel, 3, sizeof(cl_mem), (void*)&clInput2);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput2");
+ clStatus = clSetKernelArg(clKernel, 4, sizeof(cl_mem), (void*)&clInput3);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput3");
+ size_t globalWS[1] = { testData->outputSize };
+ size_t localSize[1] = { 64 };
+ clStatus = clEnqueueNDRangeKernel(clQueue, clKernel, 1, 0, globalWS, localSize, 0, NULL, NULL);
+ DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clEnqueueNDRangeKernel");
+ clFinish(clQueue);
+ clReleaseMemObject(clInput3);
+ clReleaseMemObject(clInput2);
+ clReleaseMemObject(clInput1);
+ clReleaseMemObject(clInput0);
+ clReleaseMemObject(clResult);
+ clReleaseKernel(clKernel);
+
+ device->score = (void*)new LibreOfficeDeviceScore;
+ ((LibreOfficeDeviceScore*)device->score)->fTime = timerCurrent(&kernelTime);
+ ((LibreOfficeDeviceScore*)device->score)->bNoCLErrors = true;
+ }
+
+ clReleaseProgram(clProgram);
+ clReleaseCommandQueue(clQueue);
+ clReleaseContext(clContext);
+ }
+ }
+ else
+ {
+ /* Evaluating an Native CPU device */
+ SAL_INFO("opencl.device", "Device: \"CPU\" (Native) evaluation...");
+ timer kernelTime;
+ timerStart(&kernelTime);
+
+ LibreOfficeDeviceEvaluationIO* testData = (LibreOfficeDeviceEvaluationIO*)evalData;
+ for (unsigned long j = 0; j < testData->outputSize; j++)
+ {
+ double fAverage = 0.0f;
+ double fMin = DBL_MAX;
+ double fSoP = 0.0f;
+ for (unsigned long i = 0; i < testData->inputSize; i++)
+ {
+ fAverage += testData->input0[i];
+ fMin = ((fMin < testData->input1[i]) ? fMin : testData->input1[i]);
+ fSoP += testData->input2[i] * testData->input3[i];
+ }
+ fAverage /= testData->inputSize;
+ testData->output[j] = fAverage + (fMin * fSoP);
+ }
+
+ // InterpretTail - the S/W fallback is nothing like as efficient
+ // as any good openCL implementation: no SIMD, tons of branching
+ // in the inner loops etc. Generously characterise it as only 10x
+ // slower than the above.
+ float fInterpretTailFactor = 10.0;
+
+ device->score = (void*)new LibreOfficeDeviceScore;
+ ((LibreOfficeDeviceScore*)device->score)->fTime = timerCurrent(&kernelTime);
+ ((LibreOfficeDeviceScore*)device->score)->bNoCLErrors = true;
+
+ ((LibreOfficeDeviceScore*)device->score)->fTime *= fInterpretTailFactor;
+ }
+ return DS_SUCCESS;
+}
+
+/* Pick best device */
+ds_status pickBestDevice(ds_profile* profile, int* bestDeviceIdx)
+{
+ double bestScore = DBL_MAX;
+ *bestDeviceIdx = -1;
+
+ for (unsigned int d = 0; d < profile->numDevices; d++)
+ {
+ ds_device device = profile->devices[d];
+ LibreOfficeDeviceScore *pScore = (LibreOfficeDeviceScore*)device.score;
+
+ // Check blacklist and whitelist for actual devices
+ if (device.type == DS_DEVICE_OPENCL_DEVICE)
+ {
+ // There is a silly impedance mismatch here. Why do we
+ // need two different ways to describe an OpenCL platform
+ // and an OpenCL device driver?
+
+ OpenCLPlatformInfo aPlatform;
+ OpenCLDeviceInfo aDevice;
+
+ // We know that only the below fields are used by checkForKnownBadCompilers()
+ aPlatform.maVendor = OUString(device.oclPlatformVendor, strlen(device.oclPlatformVendor), RTL_TEXTENCODING_UTF8);
+ aDevice.maName = OUString(device.oclDeviceName, strlen(device.oclDeviceName), RTL_TEXTENCODING_UTF8);
+ aDevice.maDriver = OUString(device.oclDriverVersion, strlen(device.oclDriverVersion), RTL_TEXTENCODING_UTF8);
+
+ // If blacklisted or not whitelisted, ignore it
+ if (OpenCLConfig::get().checkImplementation(aPlatform, aDevice))
+ {
+ SAL_INFO("opencl.device", "Device[" << d << "] " << device.oclDeviceName << " is blacklisted or not whitelisted");
+ pScore->fTime = DBL_MAX;
+ pScore->bNoCLErrors = true;
+ }
+ }
+
+ double fScore = DBL_MAX;
+ if (pScore)
+ {
+ fScore = pScore->fTime;
+ }
+ else
+ {
+ SAL_INFO("opencl.device", "Unusual null score");
+ }
+
+ if (DS_DEVICE_OPENCL_DEVICE == device.type)
+ {
+ SAL_INFO("opencl.device", "Device[" << d << "] " << device.oclDeviceName << " (OpenCL) score is " << fScore);
+ }
+ else
+ {
+ SAL_INFO("opencl.device", "Device[" << d << "] CPU (Native) score is " << fScore);
+ }
+ if (fScore < bestScore)
+ {
+ bestScore = fScore;
+ *bestDeviceIdx = d;
+ }
+ }
+ if (DS_DEVICE_OPENCL_DEVICE == profile->devices[*bestDeviceIdx].type)
+ {
+ SAL_INFO("opencl.device", "Selected Device[" << *bestDeviceIdx << "]: " << profile->devices[*bestDeviceIdx].oclDeviceName << "(OpenCL).");
+ }
+ else
+ {
+ SAL_INFO("opencl.device", "Selected Device[" << *bestDeviceIdx << "]: CPU (Native).");
+ }
+
+ return DS_SUCCESS;
+}
+
+/* Return device ID for matching device name */
+int matchDevice(ds_profile* profile, char* deviceName)
+{
+ int deviceMatch = -1;
+ for (unsigned int d = 0; d < profile->numDevices - 1; d++)
+ {
+ if ((std::string(profile->devices[d].oclDeviceName)).find(deviceName) != std::string::npos) deviceMatch = d;
+ }
+ if (std::string("NATIVE_CPU").find(deviceName) != std::string::npos) deviceMatch = profile->numDevices - 1;
+ return deviceMatch;
+}
+
+/*************************************************************************/
+/* EXTERNAL FUNCTIONS */
+/*************************************************************************/
+ds_device getDeviceSelection(const char* sProfilePath, bool bForceSelection)
+{
+ /* Run only if device is not yet selected */
+ if (!bIsDeviceSelected || bForceSelection)
+ {
+ /* Setup */
+ ds_status status;
+ ds_profile* profile = NULL;
+ status = initDSProfile(&profile, "LibreOffice v0.1");
+
+ if (!profile)
+ {
+ // failed to initialize profile.
+ selectedDevice.type = DS_DEVICE_NATIVE_CPU;
+ return selectedDevice;
+ }
+
+ /* Try reading scores from file */
+ std::string tmpStr(sProfilePath);
+ const char* fileName = tmpStr.append("sc_opencl_device_profile.dat").c_str();
+ if (!bForceSelection)
+ {
+ status = readProfileFromFile(profile, deserializeScore, fileName);
+ }
+ else
+ {
+ status = DS_INVALID_PROFILE;
+ SAL_INFO("opencl.device", "Performing forced profiling.");
+ }
+ if (DS_SUCCESS != status)
+ {
+ if (!bForceSelection)
+ {
+ SAL_INFO("opencl.device", "Profile file not available (" << fileName << "); performing profiling.");
+ }
+
+ /* Populate input data for micro-benchmark */
+ boost::scoped_ptr<LibreOfficeDeviceEvaluationIO> testData(new LibreOfficeDeviceEvaluationIO);
+ testData->inputSize = INPUTSIZE;
+ testData->outputSize = OUTPUTSIZE;
+ testData->input0.resize(testData->inputSize);
+ testData->input1.resize(testData->inputSize);
+ testData->input2.resize(testData->inputSize);
+ testData->input3.resize(testData->inputSize);
+ testData->output.resize(testData->outputSize);
+ populateInput(testData.get());
+
+ /* Perform evaluations */
+ unsigned int numUpdates;
+ status = profileDevices(profile, DS_EVALUATE_ALL, evaluateScoreForDevice, (void*)testData.get(), &numUpdates);
+
+ if (DS_SUCCESS == status)
+ {
+ /* Write scores to file */
+ status = writeProfileToFile(profile, serializeScore, fileName);
+ if (DS_SUCCESS == status)
+ {
+ SAL_INFO("opencl.device", "Scores written to file (" << fileName << ").");
+ }
+ else
+ {
+ SAL_INFO("opencl.device", "Error saving scores to file (" << fileName << "); scores not written to file.");
+ }
+ }
+ else
+ {
+ SAL_INFO("opencl.device", "Unable to evaluate performance; scores not written to file.");
+ }
+ }
+ else
+ {
+ SAL_INFO("opencl.device", "Profile read from file (" << fileName << ").");
+ }
+
+ /* Pick best device */
+ int bestDeviceIdx;
+ pickBestDevice(profile, &bestDeviceIdx);
+
+ /* Overide if necessary */
+ char* overrideDeviceStr = getenv("SC_OPENCL_DEVICE_OVERRIDE");
+ if (NULL != overrideDeviceStr)
+ {
+ int overrideDeviceIdx = matchDevice(profile, overrideDeviceStr);
+ if (-1 != overrideDeviceIdx)
+ {
+ SAL_INFO("opencl.device", "Overriding Device Selection (SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr << ").");
+ bestDeviceIdx = overrideDeviceIdx;
+ if (DS_DEVICE_OPENCL_DEVICE == profile->devices[bestDeviceIdx].type)
+ {
+ SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx << "]: " << profile->devices[bestDeviceIdx].oclDeviceName << " (OpenCL).");
+ }
+ else
+ {
+ SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx << "]: CPU (Native).");
+ }
+ }
+ else
+ {
+ SAL_INFO("opencl.device", "Ignoring invalid SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr << ").");
+ }
+ }
+
+ /* Final device selection */
+ selectedDevice = profile->devices[bestDeviceIdx];
+ bIsDeviceSelected = true;
+
+ /* Release profile */
+ releaseDSProfile(profile, releaseScore);
+ }
+ return selectedDevice;
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/opencl/source/openclwrapper.cxx b/opencl/source/openclwrapper.cxx
new file mode 100644
index 000000000000..86ba6cd3a4e3
--- /dev/null
+++ b/opencl/source/openclwrapper.cxx
@@ -0,0 +1,804 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <config_folders.h>
+
+#include "opencl_device.hxx"
+
+#include <comphelper/string.hxx>
+#include <opencl/openclconfig.hxx>
+#include <opencl/openclwrapper.hxx>
+#include <osl/file.hxx>
+#include <rtl/bootstrap.hxx>
+#include <rtl/digest.h>
+#include <rtl/strbuf.hxx>
+#include <rtl/ustring.hxx>
+#include <sal/config.h>
+
+#include <boost/scoped_array.hpp>
+#include <unicode/regex.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <cmath>
+
+#ifdef _WIN32
+#include <prewin.h>
+#include <postwin.h>
+#define OPENCL_DLL_NAME "OpenCL.dll"
+#elif defined(MACOSX)
+#define OPENCL_DLL_NAME NULL
+#else
+#define OPENCL_DLL_NAME "libOpenCL.so"
+#endif
+
+#define DEVICE_NAME_LENGTH 1024
+#define DRIVER_VERSION_LENGTH 1024
+#define PLATFORM_VERSION_LENGTH 1024
+
+using namespace std;
+
+namespace opencl {
+
+GPUEnv OpenCLDevice::gpuEnv;
+bool OpenCLDevice::bIsInited = false;
+
+namespace {
+
+OString generateMD5(const void* pData, size_t length)
+{
+ sal_uInt8 pBuffer[RTL_DIGEST_LENGTH_MD5];
+ rtlDigestError aError = rtl_digest_MD5(pData, length,
+ pBuffer, RTL_DIGEST_LENGTH_MD5);
+ SAL_WARN_IF(aError != rtl_Digest_E_None, "opencl", "md5 generation failed");
+
+ OStringBuffer aBuffer;
+ const char* pString = "0123456789ABCDEF";
+ for(size_t i = 0; i < RTL_DIGEST_LENGTH_MD5; ++i)
+ {
+ sal_uInt8 val = pBuffer[i];
+ aBuffer.append(pString[val/16]);
+ aBuffer.append(pString[val%16]);
+ }
+ return aBuffer.makeStringAndClear();
+}
+
+OString getCacheFolder()
+{
+ OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/");
+ rtl::Bootstrap::expandMacros(url);
+
+ osl::Directory::create(url);
+
+ return rtl::OUStringToOString(url, RTL_TEXTENCODING_UTF8);
+}
+
+}
+
+OString OpenCLDevice::maCacheFolder = getCacheFolder();
+
+void OpenCLDevice::setKernelEnv( KernelEnv *envInfo )
+{
+ envInfo->mpkContext = gpuEnv.mpContext;
+ envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue;
+ envInfo->mpkProgram = gpuEnv.mpArryPrograms[0];
+}
+
+namespace {
+
+OString createFileName(cl_device_id deviceId, const char* clFileName)
+{
+ OString fileName(clFileName);
+ sal_Int32 nIndex = fileName.lastIndexOf(".cl");
+ if(nIndex > 0)
+ fileName = fileName.copy(0, nIndex);
+
+ char deviceName[DEVICE_NAME_LENGTH] = {0};
+ clGetDeviceInfo(deviceId, CL_DEVICE_NAME,
+ sizeof(deviceName), deviceName, NULL);
+
+ char driverVersion[DRIVER_VERSION_LENGTH] = {0};
+ clGetDeviceInfo(deviceId, CL_DRIVER_VERSION,
+ sizeof(driverVersion), driverVersion, NULL);
+
+ cl_platform_id platformId;
+ clGetDeviceInfo(deviceId, CL_DEVICE_PLATFORM,
+ sizeof(platformId), &platformId, NULL);
+
+ char platformVersion[PLATFORM_VERSION_LENGTH] = {0};
+ clGetPlatformInfo(platformId, CL_PLATFORM_VERSION, sizeof(platformVersion),
+ platformVersion, NULL);
+
+ // create hash for deviceName + driver version + platform version
+ OString aString = OString(deviceName) + driverVersion + platformVersion;
+ OString aHash = generateMD5(aString.getStr(), aString.getLength());
+
+ return OpenCLDevice::maCacheFolder + fileName + "-" +
+ aHash + ".bin";
+}
+
+}
+
+std::vector<boost::shared_ptr<osl::File> > OpenCLDevice::binaryGenerated( const char * clFileName, cl_context context )
+{
+ size_t numDevices=0;
+
+ std::vector<boost::shared_ptr<osl::File> > aGeneratedFiles;
+ cl_int clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
+ 0, NULL, &numDevices );
+ numDevices /= sizeof(numDevices);
+
+ if(clStatus != CL_SUCCESS)
+ return aGeneratedFiles;
+
+ // grab the handles to all of the devices in the context.
+ boost::scoped_array<cl_device_id> pArryDevsID(new cl_device_id[numDevices]);
+ clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
+ sizeof( cl_device_id ) * numDevices, pArryDevsID.get(), NULL );
+
+ if(clStatus != CL_SUCCESS)
+ return aGeneratedFiles;
+
+ for ( size_t i = 0; i < numDevices; i++ )
+ {
+ if ( pArryDevsID[i] != 0 )
+ {
+ OString fileName = createFileName(gpuEnv.mpArryDevsID[i], clFileName);
+ osl::File* pNewFile = new osl::File(rtl::OStringToOUString(fileName, RTL_TEXTENCODING_UTF8));
+ if(pNewFile->open(osl_File_OpenFlag_Read) == osl::FileBase::E_None)
+ {
+ aGeneratedFiles.push_back(boost::shared_ptr<osl::File>(pNewFile));
+ SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: success");
+ }
+ else
+ {
+ SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: FAIL");
+ delete pNewFile;
+ break;
+ }
+ }
+ }
+
+ return aGeneratedFiles;
+}
+
+bool OpenCLDevice::writeBinaryToFile( const OString& rFileName, const char* binary, size_t numBytes )
+{
+ osl::File file(rtl::OStringToOUString(rFileName, RTL_TEXTENCODING_UTF8));
+ osl::FileBase::RC status = file.open(
+ osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
+
+ if(status != osl::FileBase::E_None)
+ return false;
+
+ sal_uInt64 nBytesWritten = 0;
+ file.write( binary, numBytes, nBytesWritten );
+
+ assert(numBytes == nBytesWritten);
+
+ return true;
+}
+
+bool OpenCLDevice::generatBinFromKernelSource( cl_program program, const char * clFileName )
+{
+ cl_uint numDevices;
+
+ cl_int clStatus = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES,
+ sizeof(numDevices), &numDevices, NULL );
+ CHECK_OPENCL( clStatus, "clGetProgramInfo" );
+
+ std::vector<cl_device_id> pArryDevsID(numDevices);
+ /* grab the handles to all of the devices in the program. */
+ clStatus = clGetProgramInfo( program, CL_PROGRAM_DEVICES,
+ sizeof(cl_device_id) * numDevices, &pArryDevsID[0], NULL );
+ CHECK_OPENCL( clStatus, "clGetProgramInfo" );
+
+ /* figure out the sizes of each of the binaries. */
+ std::vector<size_t> binarySizes(numDevices);
+
+ clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES,
+ sizeof(size_t) * numDevices, &binarySizes[0], NULL );
+ CHECK_OPENCL( clStatus, "clGetProgramInfo" );
+
+ /* copy over all of the generated binaries. */
+ boost::scoped_array<char*> binaries(new char*[numDevices]);
+
+ for ( size_t i = 0; i < numDevices; i++ )
+ {
+ if ( binarySizes[i] != 0 )
+ {
+ binaries[i] = new char[binarySizes[i]];
+ }
+ else
+ {
+ binaries[i] = NULL;
+ }
+ }
+
+ clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARIES,
+ sizeof(char *) * numDevices, binaries.get(), NULL );
+ CHECK_OPENCL(clStatus,"clGetProgramInfo");
+
+ /* dump out each binary into its own separate file. */
+ for ( size_t i = 0; i < numDevices; i++ )
+ {
+
+ if ( binarySizes[i] != 0 )
+ {
+ OString fileName = createFileName(pArryDevsID[i], clFileName);
+ if ( !writeBinaryToFile( fileName,
+ binaries[i], binarySizes[i] ) )
+ SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': FAIL");
+ else
+ SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': success");
+ }
+ }
+
+ // Release all resouces and memory
+ for ( size_t i = 0; i < numDevices; i++ )
+ {
+ delete[] binaries[i];
+ }
+
+ return true;
+}
+
+bool OpenCLDevice::initOpenCLAttr( OpenCLEnv * env )
+{
+ if ( gpuEnv.mnIsUserCreated )
+ return true;
+
+ gpuEnv.mpContext = env->mpOclContext;
+ gpuEnv.mpPlatformID = env->mpOclPlatformID;
+ gpuEnv.mpDevID = env->mpOclDevsID;
+ gpuEnv.mpCmdQueue = env->mpOclCmdQueue;
+
+ gpuEnv.mnIsUserCreated = 1;
+
+ return false;
+}
+
+void OpenCLDevice::releaseOpenCLEnv( GPUEnv *gpuInfo )
+{
+ if ( !bIsInited )
+ {
+ return;
+ }
+
+ if ( gpuEnv.mpCmdQueue )
+ {
+ clReleaseCommandQueue( gpuEnv.mpCmdQueue );
+ gpuEnv.mpCmdQueue = NULL;
+ }
+ if ( gpuEnv.mpContext )
+ {
+ clReleaseContext( gpuEnv.mpContext );
+ gpuEnv.mpContext = NULL;
+ }
+ bIsInited = false;
+ gpuInfo->mnIsUserCreated = 0;
+ free( gpuInfo->mpArryDevsID );
+
+ return;
+}
+
+namespace {
+
+bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
+{
+ cl_int clStatus;
+ //char options[512];
+ // create a cl program executable for all the devices specified
+ if (!gpuInfo->mnIsUserCreated)
+ {
+ clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID,
+ buildOption, NULL, NULL);
+ }
+ else
+ {
+ clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID),
+ buildOption, NULL, NULL);
+ }
+
+ if ( clStatus != CL_SUCCESS )
+ {
+ size_t length;
+ if ( !gpuInfo->mnIsUserCreated )
+ {
+ clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
+ CL_PROGRAM_BUILD_LOG, 0, NULL, &length );
+ }
+ else
+ {
+ clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
+ CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
+ }
+ if ( clStatus != CL_SUCCESS )
+ {
+ return false;
+ }
+
+ boost::scoped_array<char> buildLog(new char[length]);
+ if ( !gpuInfo->mnIsUserCreated )
+ {
+ clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
+ CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length );
+ }
+ else
+ {
+ clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
+ CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length );
+ }
+ if ( clStatus != CL_SUCCESS )
+ {
+ return false;
+ }
+
+ OString aBuildLogFileURL = OpenCLDevice::maCacheFolder + "kernel-build.log";
+ osl::File aBuildLogFile(rtl::OStringToOUString(aBuildLogFileURL, RTL_TEXTENCODING_UTF8));
+ osl::FileBase::RC status = aBuildLogFile.open(
+ osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
+
+ if(status != osl::FileBase::E_None)
+ return false;
+
+ sal_uInt64 nBytesWritten = 0;
+ aBuildLogFile.write( buildLog.get(), length, nBytesWritten );
+
+ return false;
+ }
+
+ return true;
+}
+
+}
+
+bool OpenCLDevice::buildProgramFromBinary(const char* buildOption, GPUEnv* gpuInfo, const char* filename, int idx)
+{
+ size_t numDevices;
+ cl_int clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
+ 0, NULL, &numDevices );
+ numDevices /= sizeof(numDevices);
+ CHECK_OPENCL( clStatus, "clGetContextInfo" );
+
+ std::vector<boost::shared_ptr<osl::File> > aGeneratedFiles = binaryGenerated(
+ filename, gpuInfo->mpContext );
+
+ if (aGeneratedFiles.size() == numDevices)
+ {
+ boost::scoped_array<size_t> length(new size_t[numDevices]);
+ boost::scoped_array<unsigned char*> pBinary(new unsigned char*[numDevices]);
+ for(size_t i = 0; i < numDevices; ++i)
+ {
+ sal_uInt64 nSize;
+ aGeneratedFiles[i]->getSize(nSize);
+ unsigned char* binary = new unsigned char[nSize];
+ sal_uInt64 nBytesRead;
+ aGeneratedFiles[i]->read(binary, nSize, nBytesRead);
+ if(nSize != nBytesRead)
+ assert(false);
+
+ length[i] = nBytesRead;
+
+ pBinary[i] = binary;
+ }
+
+ // grab the handles to all of the devices in the context.
+ boost::scoped_array<cl_device_id> pArryDevsID(new cl_device_id[numDevices]);
+ clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
+ sizeof( cl_device_id ) * numDevices, pArryDevsID.get(), NULL );
+
+ if(clStatus != CL_SUCCESS)
+ {
+ for(size_t i = 0; i < numDevices; ++i)
+ {
+ delete[] pBinary[i];
+ }
+ return false;
+ }
+
+ cl_int binary_status;
+
+ gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices,
+ pArryDevsID.get(), length.get(), (const unsigned char**) pBinary.get(),
+ &binary_status, &clStatus );
+ if(clStatus != CL_SUCCESS)
+ {
+ // something went wrong, fall back to compiling from source
+ return false;
+ }
+ for(size_t i = 0; i < numDevices; ++i)
+ {
+ delete[] pBinary[i];
+ }
+ }
+
+ if ( !gpuInfo->mpArryPrograms[idx] )
+ {
+ return false;
+ }
+ return buildProgram(buildOption, gpuInfo, idx);
+}
+
+bool OpenCLDevice::initOpenCLRunEnv( int argc )
+{
+ if ( ( argc > MAX_CLFILE_NUM ) || ( argc < 0 ) )
+ return true;
+
+ if ( !bIsInited )
+ {
+ if ( !gpuEnv.mnIsUserCreated )
+ memset( &gpuEnv, 0, sizeof(gpuEnv) );
+
+ //initialize devices, context, command_queue
+ bool status = initOpenCLRunEnv( &gpuEnv );
+ if ( status )
+ {
+ return true;
+ }
+ //initialize program, kernelName, kernelCount
+ if( getenv( "SC_FLOAT" ) )
+ {
+ gpuEnv.mnKhrFp64Flag = false;
+ gpuEnv.mnAmdFp64Flag = false;
+ }
+ if( gpuEnv.mnKhrFp64Flag )
+ {
+ SAL_INFO("opencl", "Use Khr double");
+ }
+ else if( gpuEnv.mnAmdFp64Flag )
+ {
+ SAL_INFO("opencl", "Use AMD double type");
+ }
+ else
+ {
+ SAL_INFO("opencl", "USE float type");
+ }
+ bIsInited = true;
+ }
+ return false;
+}
+
+namespace {
+
+void checkDeviceForDoubleSupport(cl_device_id deviceId, bool& bKhrFp64, bool& bAmdFp64)
+{
+ bKhrFp64 = false;
+ bAmdFp64 = false;
+
+ // Check device extensions for double type
+ size_t aDevExtInfoSize = 0;
+
+ cl_uint clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS, 0, NULL, &aDevExtInfoSize );
+ if( clStatus != CL_SUCCESS )
+ return;
+
+ boost::scoped_array<char> pExtInfo(new char[aDevExtInfoSize]);
+
+ clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS,
+ sizeof(char) * aDevExtInfoSize, pExtInfo.get(), NULL);
+
+ if( clStatus != CL_SUCCESS )
+ return;
+
+ if ( strstr( pExtInfo.get(), "cl_khr_fp64" ) )
+ {
+ bKhrFp64 = true;
+ }
+ else
+ {
+ // Check if cl_amd_fp64 extension is supported
+ if ( strstr( pExtInfo.get(), "cl_amd_fp64" ) )
+ bAmdFp64 = true;
+ }
+}
+
+}
+
+bool OpenCLDevice::initOpenCLRunEnv( GPUEnv *gpuInfo )
+{
+ bool bKhrFp64 = false;
+ bool bAmdFp64 = false;
+
+ checkDeviceForDoubleSupport(gpuInfo->mpArryDevsID[0], bKhrFp64, bAmdFp64);
+
+ gpuInfo->mnKhrFp64Flag = bKhrFp64;
+ gpuInfo->mnAmdFp64Flag = bAmdFp64;
+
+ return false;
+}
+
+namespace {
+
+// based on crashes and hanging during kernel compilation
+void createDeviceInfo(cl_device_id aDeviceId, OpenCLPlatformInfo& rPlatformInfo)
+{
+ OpenCLDeviceInfo aDeviceInfo;
+ aDeviceInfo.device = aDeviceId;
+
+ char pName[DEVICE_NAME_LENGTH];
+ cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_NAME, DEVICE_NAME_LENGTH, pName, NULL);
+ if(nState != CL_SUCCESS)
+ return;
+
+ aDeviceInfo.maName = OUString::createFromAscii(pName);
+
+ char pVendor[DEVICE_NAME_LENGTH];
+ nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_VENDOR, DEVICE_NAME_LENGTH, pVendor, NULL);
+ if(nState != CL_SUCCESS)
+ return;
+
+ aDeviceInfo.maVendor = OUString::createFromAscii(pVendor);
+
+ cl_ulong nMemSize;
+ nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(nMemSize), &nMemSize, NULL);
+ if(nState != CL_SUCCESS)
+ return;
+
+ aDeviceInfo.mnMemory = nMemSize;
+
+ cl_uint nClockFrequency;
+ nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(nClockFrequency), &nClockFrequency, NULL);
+ if(nState != CL_SUCCESS)
+ return;
+
+ aDeviceInfo.mnFrequency = nClockFrequency;
+
+ cl_uint nComputeUnits;
+ nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(nComputeUnits), &nComputeUnits, NULL);
+ if(nState != CL_SUCCESS)
+ return;
+
+ char pDriver[DEVICE_NAME_LENGTH];
+ nState = clGetDeviceInfo(aDeviceId, CL_DRIVER_VERSION, DEVICE_NAME_LENGTH, pDriver, NULL);
+
+ if(nState != CL_SUCCESS)
+ return;
+
+ aDeviceInfo.maDriver = OUString::createFromAscii(pDriver);
+
+ bool bKhrFp64 = false;
+ bool bAmdFp64 = false;
+ checkDeviceForDoubleSupport(aDeviceId, bKhrFp64, bAmdFp64);
+
+ // only list devices that support double
+ if(!bKhrFp64 && !bAmdFp64)
+ return;
+
+ aDeviceInfo.mnComputeUnits = nComputeUnits;
+
+ if(!OpenCLConfig::get().checkImplementation(rPlatformInfo, aDeviceInfo))
+ rPlatformInfo.maDevices.push_back(aDeviceInfo);
+}
+
+bool createPlatformInfo(cl_platform_id nPlatformId, OpenCLPlatformInfo& rPlatformInfo)
+{
+ rPlatformInfo.platform = nPlatformId;
+ char pName[64];
+ cl_int nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_NAME, 64,
+ pName, NULL);
+ if(nState != CL_SUCCESS)
+ return false;
+ rPlatformInfo.maName = OUString::createFromAscii(pName);
+
+ char pVendor[64];
+ nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_VENDOR, 64,
+ pVendor, NULL);
+ if(nState != CL_SUCCESS)
+ return false;
+
+ rPlatformInfo.maVendor = OUString::createFromAscii(pVendor);
+
+ cl_uint nDevices;
+ nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, 0, NULL, &nDevices);
+ if(nState != CL_SUCCESS)
+ return false;
+
+ // memory leak that does not matter
+ // memory is stored in static variable that lives through the whole program
+ cl_device_id* pDevices = new cl_device_id[nDevices];
+ nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, nDevices, pDevices, NULL);
+ if(nState != CL_SUCCESS)
+ return false;
+
+ for(size_t i = 0; i < nDevices; ++i)
+ {
+ createDeviceInfo(pDevices[i], rPlatformInfo);
+ }
+
+ return true;
+}
+
+}
+
+const std::vector<OpenCLPlatformInfo>& fillOpenCLInfo()
+{
+ static std::vector<OpenCLPlatformInfo> aPlatforms;
+ if(!aPlatforms.empty())
+ return aPlatforms;
+
+ int status = clewInit(OPENCL_DLL_NAME);
+ if (status < 0)
+ return aPlatforms;
+
+ cl_uint nPlatforms;
+ cl_int nState = clGetPlatformIDs(0, NULL, &nPlatforms);
+
+ if(nState != CL_SUCCESS)
+ return aPlatforms;
+
+ // memory leak that does not matter,
+ // memory is stored in static instance aPlatforms
+ cl_platform_id* pPlatforms = new cl_platform_id[nPlatforms];
+ nState = clGetPlatformIDs(nPlatforms, pPlatforms, NULL);
+
+ if(nState != CL_SUCCESS)
+ return aPlatforms;
+
+ for(size_t i = 0; i < nPlatforms; ++i)
+ {
+ OpenCLPlatformInfo aPlatformInfo;
+ if(createPlatformInfo(pPlatforms[i], aPlatformInfo))
+ aPlatforms.push_back(aPlatformInfo);
+ }
+
+ return aPlatforms;
+}
+
+namespace {
+
+cl_device_id findDeviceIdByDeviceString(const OUString& rString, const std::vector<OpenCLPlatformInfo>& rPlatforms)
+{
+ std::vector<OpenCLPlatformInfo>::const_iterator it = rPlatforms.begin(), itEnd = rPlatforms.end();
+ for(; it != itEnd; ++it)
+ {
+ std::vector<OpenCLDeviceInfo>::const_iterator itr = it->maDevices.begin(), itrEnd = it->maDevices.end();
+ for(; itr != itrEnd; ++itr)
+ {
+ OUString aDeviceId = it->maVendor + " " + itr->maName;
+ if(rString == aDeviceId)
+ {
+ return static_cast<cl_device_id>(itr->device);
+ }
+ }
+ }
+
+ return NULL;
+}
+
+void findDeviceInfoFromDeviceId(cl_device_id aDeviceId, size_t& rDeviceId, size_t& rPlatformId)
+{
+ cl_platform_id platformId;
+ cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_PLATFORM,
+ sizeof(platformId), &platformId, NULL);
+
+ if(nState != CL_SUCCESS)
+ return;
+
+ const std::vector<OpenCLPlatformInfo>& rPlatforms = fillOpenCLInfo();
+ for(size_t i = 0; i < rPlatforms.size(); ++i)
+ {
+ cl_platform_id platId = static_cast<cl_platform_id>(rPlatforms[i].platform);
+ if(platId != platformId)
+ continue;
+
+ for(size_t j = 0; j < rPlatforms[i].maDevices.size(); ++j)
+ {
+ cl_device_id id = static_cast<cl_device_id>(rPlatforms[i].maDevices[j].device);
+ if(id == aDeviceId)
+ {
+ rDeviceId = j;
+ rPlatformId = i;
+ return;
+ }
+ }
+ }
+}
+
+}
+
+bool switchOpenCLDevice(const OUString* pDevice, bool bAutoSelect, bool bForceEvaluation)
+{
+ if(fillOpenCLInfo().empty())
+ return false;
+
+ cl_device_id pDeviceId = NULL;
+ if(pDevice)
+ pDeviceId = findDeviceIdByDeviceString(*pDevice, fillOpenCLInfo());
+
+ if(!pDeviceId || bAutoSelect)
+ {
+ int status = clewInit(OPENCL_DLL_NAME);
+ if (status < 0)
+ return false;
+
+ OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/");
+ rtl::Bootstrap::expandMacros(url);
+ OUString path;
+ osl::FileBase::getSystemPathFromFileURL(url,path);
+ OString dsFileName = rtl::OUStringToOString(path, RTL_TEXTENCODING_UTF8);
+ ds_device pSelectedDevice = ::OpenCLDevice::getDeviceSelection(dsFileName.getStr(), bForceEvaluation);
+ pDeviceId = pSelectedDevice.oclDeviceID;
+
+ }
+
+ if(OpenCLDevice::gpuEnv.mpDevID == pDeviceId)
+ {
+ // we don't need to change anything
+ // still the same device
+ return pDeviceId != NULL;
+ }
+
+ cl_platform_id platformId;
+ cl_int nState = clGetDeviceInfo(pDeviceId, CL_DEVICE_PLATFORM,
+ sizeof(platformId), &platformId, NULL);
+
+ cl_context_properties cps[3];
+ cps[0] = CL_CONTEXT_PLATFORM;
+ cps[1] = reinterpret_cast<cl_context_properties>(platformId);
+ cps[2] = 0;
+ cl_context context = clCreateContext( cps, 1, &pDeviceId, NULL, NULL, &nState );
+
+ if(nState != CL_SUCCESS || context == NULL)
+ {
+ if(context != NULL)
+ clReleaseContext(context);
+
+ SAL_WARN("opencl", "failed to set/switch opencl device");
+ return false;
+ }
+
+ cl_command_queue command_queue = clCreateCommandQueue(
+ context, pDeviceId, 0, &nState);
+
+ if(command_queue == NULL || nState != CL_SUCCESS)
+ {
+ if(command_queue != NULL)
+ clReleaseCommandQueue(command_queue);
+
+ clReleaseContext(context);
+ SAL_WARN("opencl", "failed to set/switch opencl device");
+ return false;
+ }
+
+ OpenCLDevice::releaseOpenCLEnv(&OpenCLDevice::gpuEnv);
+ OpenCLEnv env;
+ env.mpOclPlatformID = platformId;
+ env.mpOclContext = context;
+ env.mpOclDevsID = pDeviceId;
+ env.mpOclCmdQueue = command_queue;
+ OpenCLDevice::initOpenCLAttr(&env);
+
+ // why do we need this at all?
+
+ // (Assuming the above question refers to the mpArryDevsID
+ // initialisation below.) Because otherwise the code crashes in
+ // initOpenCLRunEnv(). Confused? You should be.
+
+ OpenCLDevice::gpuEnv.mpArryDevsID = (cl_device_id*) malloc( sizeof(cl_device_id) );
+ OpenCLDevice::gpuEnv.mpArryDevsID[0] = pDeviceId;
+
+ return !OpenCLDevice::initOpenCLRunEnv(0);
+}
+
+void getOpenCLDeviceInfo(size_t& rDeviceId, size_t& rPlatformId)
+{
+ int status = clewInit(OPENCL_DLL_NAME);
+ if (status < 0)
+ return;
+
+ cl_device_id id = OpenCLDevice::gpuEnv.mpDevID;
+ findDeviceInfoFromDeviceId(id, rDeviceId, rPlatformId);
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */