diff options
author | Tor Lillqvist <tml@collabora.com> | 2014-11-27 15:13:12 +0200 |
---|---|---|
committer | Tor Lillqvist <tml@collabora.com> | 2014-11-27 15:32:58 +0200 |
commit | a70b717ef872c0ac652883ecd2a82c4cc29763e2 (patch) | |
tree | 0975349b1b8798eb9d444d7c91d97d52a2f6fa4e /opencl/source | |
parent | d83b031346799bff0a3298387f76b16baad2e5cf (diff) |
Move more Calc-independent OpenCL stuff from the sc to the opencl module
No cleanups yet. Just removed the "sc" namespace parts now when this stuff is
no longer Calc-specific. There is still horribly confusing use of the same
OpenCLDevice name for both a class and as a namespace, for instance. And the
OpenCLDevice class has only public static members even, so effectively it acts
as just a namespace anyway... Etc.
Change-Id: Idc5f30a721df0101426c676f04a85e02c5dc8443
Diffstat (limited to 'opencl/source')
-rw-r--r-- | opencl/source/opencl_device.cxx | 598 | ||||
-rw-r--r-- | opencl/source/openclwrapper.cxx | 804 |
2 files changed, 1402 insertions, 0 deletions
diff --git a/opencl/source/opencl_device.cxx b/opencl/source/opencl_device.cxx new file mode 100644 index 000000000000..204ab345b5e0 --- /dev/null +++ b/opencl/source/opencl_device.cxx @@ -0,0 +1,598 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#ifdef _WIN32 +#include <prewin.h> +#include <postwin.h> +#elif defined __MACH__ +#include <mach/mach_time.h> +#else +#include <sys/time.h> +#endif + +#include <time.h> +#include <math.h> +#include <float.h> +#include <iostream> +#include <sstream> +#include <vector> + +#include <boost/scoped_ptr.hpp> + +#include <comphelper/random.hxx> +#include <opencl/openclconfig.hxx> +#include <opencl/openclwrapper.hxx> +#include <opencl/platforminfo.hxx> +#include <sal/log.hxx> + +#include "opencl_device.hxx" + +#define INPUTSIZE 15360 +#define OUTPUTSIZE 15360 + +#define STRINGIFY(...) #__VA_ARGS__"\n" + +#define DS_CHECK_STATUS(status, name) \ + if (CL_SUCCESS != status) \ + { \ + SAL_INFO("opencl.device", "Error code is " << status << " at " name); \ + } + +namespace OpenCLDevice { + +bool bIsInited = false; +bool bIsDeviceSelected = false; +ds_device selectedDevice; + +struct LibreOfficeDeviceScore +{ + double fTime; // small time means faster device + bool bNoCLErrors; // were there any opencl errors +}; + +struct LibreOfficeDeviceEvaluationIO +{ + std::vector<double> input0; + std::vector<double> input1; + std::vector<double> input2; + std::vector<double> input3; + std::vector<double> output; + unsigned long inputSize; + unsigned long outputSize; +}; + +struct timer +{ +#ifdef _WIN32 + LARGE_INTEGER start; +#else + long long start; +#endif +}; + +const char* source = STRINGIFY( +\n#if defined(KHR_DP_EXTENSION) +\n#pragma OPENCL EXTENSION cl_khr_fp64 : enable +\n#elif defined(AMD_DP_EXTENSION) +\n#pragma OPENCL EXTENSION cl_amd_fp64 : enable +\n#endif + \n + int isNan(fp_t a) { return a != a; } + fp_t fsum(fp_t a, fp_t b) { return a + b; } + + fp_t fAverage(__global fp_t* input) +{ + fp_t sum = 0; + int count = 0; + for (int i = 0; i < INPUTSIZE; i++) + { + if (!isNan(input[i])) + { + sum = fsum(input[i], sum); + count += 1; + } + } + return sum / (fp_t)count; +} + fp_t fMin(__global fp_t* input) +{ + fp_t min = MAXFLOAT; + for (int i = 0; i < INPUTSIZE; i++) + { + if (!isNan(input[i])) + { + min = fmin(input[i], min); + } + } + return min; +} + fp_t fSoP(__global fp_t* input0, __global fp_t* input1) +{ + fp_t sop = 0.0; + for (int i = 0; i < INPUTSIZE; i++) + { + sop += (isNan(input0[i]) ? 0 : input0[i]) * (isNan(input1[i]) ? 0 : input1[i]); + } + return sop; +} + __kernel void DynamicKernel( + __global fp_t* result, __global fp_t* input0, __global fp_t* input1, __global fp_t* input2, __global fp_t* input3) +{ + int gid0 = get_global_id(0); + fp_t tmp0 = fAverage(input0); + fp_t tmp1 = fMin(input1) * fSoP(input2, input3); + result[gid0] = fsum(tmp0, tmp1); +} + ); + +size_t sourceSize[] = { strlen(source) }; + +/*************************************************************************/ +/* INTERNAL FUNCTIONS */ +/*************************************************************************/ +/* Timer functions - start timer */ +void timerStart(timer* mytimer) +{ +#ifdef _WIN32 + QueryPerformanceCounter(&mytimer->start); +#elif defined __MACH__ + mytimer->start = mach_absolute_time(); +#else + struct timespec s; + clock_gettime(CLOCK_MONOTONIC, &s); + mytimer->start = (long long)s.tv_sec * (long long)1.0E6 + (long long)s.tv_nsec / (long long)1.0E3; +#endif +} + +/* Timer functions - get current value */ +double timerCurrent(timer* mytimer) +{ +#ifdef _WIN32 + LARGE_INTEGER stop, frequency; + QueryPerformanceCounter(&stop); + QueryPerformanceFrequency(&frequency); + double time = ((double)(stop.QuadPart - mytimer->start.QuadPart) / frequency.QuadPart); +#elif defined __MACH__ + static mach_timebase_info_data_t info = { 0, 0 }; + if (info.numer == 0) + mach_timebase_info(&info); + long long stop = mach_absolute_time(); + double time = ((stop - mytimer->start) * (double) info.numer / info.denom) / 1.0E9; +#else + struct timespec s; + long long stop; + clock_gettime(CLOCK_MONOTONIC, &s); + stop = (long long)s.tv_sec * (long long)1.0E6 + (long long)s.tv_nsec / (long long)1.0E3; + double time = ((double)(stop - mytimer->start) / 1.0E6); +#endif + return time; +} + +/* Random number generator */ +double random(double min, double max) +{ + if (min == max) + return min; + return comphelper::rng::uniform_real_distribution(min, max); +} + +/* Populate input */ +void populateInput(LibreOfficeDeviceEvaluationIO* testData) +{ + double* input0 = &testData->input0[0]; + double* input1 = &testData->input1[0]; + double* input2 = &testData->input2[0]; + double* input3 = &testData->input3[0]; + for (unsigned long i = 0; i < testData->inputSize; i++) + { + input0[i] = random(0, i); + input1[i] = random(0, i); + input2[i] = random(0, i); + input3[i] = random(0, i); + } +} +/* Encode score object as byte string */ +ds_status serializeScore(ds_device* device, void** serializedScore, unsigned int* serializedScoreSize) +{ + *serializedScoreSize = sizeof(LibreOfficeDeviceScore); + *serializedScore = (void*)new unsigned char[*serializedScoreSize]; + memcpy(*serializedScore, device->score, *serializedScoreSize); + return DS_SUCCESS; +} + +/* Parses byte string and stores in score object */ +ds_status deserializeScore(ds_device* device, const unsigned char* serializedScore, unsigned int serializedScoreSize) +{ + // check that serializedScoreSize == sizeof(LibreOfficeDeviceScore); + device->score = new LibreOfficeDeviceScore; + memcpy(device->score, serializedScore, serializedScoreSize); + return DS_SUCCESS; +} + +/* Releases memory held by score */ +ds_status releaseScore(void* score) +{ + if (NULL != score) + { + delete (LibreOfficeDeviceScore*)score; + } + return DS_SUCCESS; +} + +/* Evaluate devices */ +ds_status evaluateScoreForDevice(ds_device* device, void* evalData) +{ + if (DS_DEVICE_OPENCL_DEVICE == device->type) + { + /* Evaluating an OpenCL device */ + SAL_INFO("opencl.device", "Device: \"" << device->oclDeviceName << "\" (OpenCL) evaluation..."); + cl_int clStatus; + /* Check for 64-bit float extensions */ + size_t aDevExtInfoSize = 0; + clStatus = clGetDeviceInfo(device->oclDeviceID, CL_DEVICE_EXTENSIONS, 0, NULL, &aDevExtInfoSize); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clGetDeviceInfo"); + + char* aExtInfo = new char[aDevExtInfoSize]; + clStatus = clGetDeviceInfo(device->oclDeviceID, CL_DEVICE_EXTENSIONS, sizeof(char) * aDevExtInfoSize, aExtInfo, NULL); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clGetDeviceInfo"); + bool bKhrFp64Flag = false; + bool bAmdFp64Flag = false; + const char* buildOption = NULL; + std::string tmpStr("-Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16 -DINPUTSIZE="); + std::ostringstream tmpOStrStr; + tmpOStrStr << std::dec << INPUTSIZE; + tmpStr.append(tmpOStrStr.str()); + + if ((std::string(aExtInfo)).find("cl_khr_fp64") != std::string::npos) + { + bKhrFp64Flag = true; + //buildOption = "-D KHR_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16"; + tmpStr.append(" -DKHR_DP_EXTENSION"); + buildOption = tmpStr.c_str(); + SAL_INFO("opencl.device", "... has cl_khr_fp64"); + } + else if ((std::string(aExtInfo)).find("cl_amd_fp64") != std::string::npos) + { + bAmdFp64Flag = true; + //buildOption = "-D AMD_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16"; + tmpStr.append(" -DAMD_DP_EXTENSION"); + buildOption = tmpStr.c_str(); + SAL_INFO("opencl.device", "... has cl_amd_fp64"); + } + delete[] aExtInfo; + + if (!bKhrFp64Flag && !bAmdFp64Flag) + { + /* No 64-bit float support */ + device->score = (void*)new LibreOfficeDeviceScore; + ((LibreOfficeDeviceScore*)device->score)->fTime = DBL_MAX; + ((LibreOfficeDeviceScore*)device->score)->bNoCLErrors = true; + SAL_INFO("opencl.device", "... no fp64 support"); + } + else + { + /* 64-bit float support present */ + + /* Create context and command queue */ + cl_context clContext = clCreateContext(NULL, 1, &device->oclDeviceID, NULL, NULL, &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateContext"); + cl_command_queue clQueue = clCreateCommandQueue(clContext, device->oclDeviceID, 0, &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateCommandQueue"); + + /* Build program */ + cl_program clProgram = clCreateProgramWithSource(clContext, 1, &source, sourceSize, &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateProgramWithSource"); + clStatus = clBuildProgram(clProgram, 1, &device->oclDeviceID, buildOption, NULL, NULL); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clBuildProgram"); + if (CL_SUCCESS != clStatus) + { + /* Build program failed */ + size_t length; + char* buildLog; + clStatus = clGetProgramBuildInfo(clProgram, device->oclDeviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &length); + buildLog = (char*)malloc(length); + clGetProgramBuildInfo(clProgram, device->oclDeviceID, CL_PROGRAM_BUILD_LOG, length, buildLog, &length); + SAL_INFO("opencl.device", "Build Errors:\n" << buildLog); + free(buildLog); + + device->score = (void*)new LibreOfficeDeviceScore; + ((LibreOfficeDeviceScore*)device->score)->fTime = DBL_MAX; + ((LibreOfficeDeviceScore*)device->score)->bNoCLErrors = false; + } + else + { + /* Build program succeeded */ + timer kernelTime; + timerStart(&kernelTime); + + /* Run kernel */ + LibreOfficeDeviceEvaluationIO* testData = (LibreOfficeDeviceEvaluationIO*)evalData; + cl_kernel clKernel = clCreateKernel(clProgram, "DynamicKernel", &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateKernel"); + cl_mem clResult = clCreateBuffer(clContext, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->outputSize, &testData->output[0], &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clResult"); + cl_mem clInput0 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input0[0], &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput0"); + cl_mem clInput1 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input1[0], &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput1"); + cl_mem clInput2 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input2[0], &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput2"); + cl_mem clInput3 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input3[0], &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput3"); + clStatus = clSetKernelArg(clKernel, 0, sizeof(cl_mem), (void*)&clResult); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clResult"); + clStatus = clSetKernelArg(clKernel, 1, sizeof(cl_mem), (void*)&clInput0); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput0"); + clStatus = clSetKernelArg(clKernel, 2, sizeof(cl_mem), (void*)&clInput1); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput1"); + clStatus = clSetKernelArg(clKernel, 3, sizeof(cl_mem), (void*)&clInput2); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput2"); + clStatus = clSetKernelArg(clKernel, 4, sizeof(cl_mem), (void*)&clInput3); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput3"); + size_t globalWS[1] = { testData->outputSize }; + size_t localSize[1] = { 64 }; + clStatus = clEnqueueNDRangeKernel(clQueue, clKernel, 1, 0, globalWS, localSize, 0, NULL, NULL); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clEnqueueNDRangeKernel"); + clFinish(clQueue); + clReleaseMemObject(clInput3); + clReleaseMemObject(clInput2); + clReleaseMemObject(clInput1); + clReleaseMemObject(clInput0); + clReleaseMemObject(clResult); + clReleaseKernel(clKernel); + + device->score = (void*)new LibreOfficeDeviceScore; + ((LibreOfficeDeviceScore*)device->score)->fTime = timerCurrent(&kernelTime); + ((LibreOfficeDeviceScore*)device->score)->bNoCLErrors = true; + } + + clReleaseProgram(clProgram); + clReleaseCommandQueue(clQueue); + clReleaseContext(clContext); + } + } + else + { + /* Evaluating an Native CPU device */ + SAL_INFO("opencl.device", "Device: \"CPU\" (Native) evaluation..."); + timer kernelTime; + timerStart(&kernelTime); + + LibreOfficeDeviceEvaluationIO* testData = (LibreOfficeDeviceEvaluationIO*)evalData; + for (unsigned long j = 0; j < testData->outputSize; j++) + { + double fAverage = 0.0f; + double fMin = DBL_MAX; + double fSoP = 0.0f; + for (unsigned long i = 0; i < testData->inputSize; i++) + { + fAverage += testData->input0[i]; + fMin = ((fMin < testData->input1[i]) ? fMin : testData->input1[i]); + fSoP += testData->input2[i] * testData->input3[i]; + } + fAverage /= testData->inputSize; + testData->output[j] = fAverage + (fMin * fSoP); + } + + // InterpretTail - the S/W fallback is nothing like as efficient + // as any good openCL implementation: no SIMD, tons of branching + // in the inner loops etc. Generously characterise it as only 10x + // slower than the above. + float fInterpretTailFactor = 10.0; + + device->score = (void*)new LibreOfficeDeviceScore; + ((LibreOfficeDeviceScore*)device->score)->fTime = timerCurrent(&kernelTime); + ((LibreOfficeDeviceScore*)device->score)->bNoCLErrors = true; + + ((LibreOfficeDeviceScore*)device->score)->fTime *= fInterpretTailFactor; + } + return DS_SUCCESS; +} + +/* Pick best device */ +ds_status pickBestDevice(ds_profile* profile, int* bestDeviceIdx) +{ + double bestScore = DBL_MAX; + *bestDeviceIdx = -1; + + for (unsigned int d = 0; d < profile->numDevices; d++) + { + ds_device device = profile->devices[d]; + LibreOfficeDeviceScore *pScore = (LibreOfficeDeviceScore*)device.score; + + // Check blacklist and whitelist for actual devices + if (device.type == DS_DEVICE_OPENCL_DEVICE) + { + // There is a silly impedance mismatch here. Why do we + // need two different ways to describe an OpenCL platform + // and an OpenCL device driver? + + OpenCLPlatformInfo aPlatform; + OpenCLDeviceInfo aDevice; + + // We know that only the below fields are used by checkForKnownBadCompilers() + aPlatform.maVendor = OUString(device.oclPlatformVendor, strlen(device.oclPlatformVendor), RTL_TEXTENCODING_UTF8); + aDevice.maName = OUString(device.oclDeviceName, strlen(device.oclDeviceName), RTL_TEXTENCODING_UTF8); + aDevice.maDriver = OUString(device.oclDriverVersion, strlen(device.oclDriverVersion), RTL_TEXTENCODING_UTF8); + + // If blacklisted or not whitelisted, ignore it + if (OpenCLConfig::get().checkImplementation(aPlatform, aDevice)) + { + SAL_INFO("opencl.device", "Device[" << d << "] " << device.oclDeviceName << " is blacklisted or not whitelisted"); + pScore->fTime = DBL_MAX; + pScore->bNoCLErrors = true; + } + } + + double fScore = DBL_MAX; + if (pScore) + { + fScore = pScore->fTime; + } + else + { + SAL_INFO("opencl.device", "Unusual null score"); + } + + if (DS_DEVICE_OPENCL_DEVICE == device.type) + { + SAL_INFO("opencl.device", "Device[" << d << "] " << device.oclDeviceName << " (OpenCL) score is " << fScore); + } + else + { + SAL_INFO("opencl.device", "Device[" << d << "] CPU (Native) score is " << fScore); + } + if (fScore < bestScore) + { + bestScore = fScore; + *bestDeviceIdx = d; + } + } + if (DS_DEVICE_OPENCL_DEVICE == profile->devices[*bestDeviceIdx].type) + { + SAL_INFO("opencl.device", "Selected Device[" << *bestDeviceIdx << "]: " << profile->devices[*bestDeviceIdx].oclDeviceName << "(OpenCL)."); + } + else + { + SAL_INFO("opencl.device", "Selected Device[" << *bestDeviceIdx << "]: CPU (Native)."); + } + + return DS_SUCCESS; +} + +/* Return device ID for matching device name */ +int matchDevice(ds_profile* profile, char* deviceName) +{ + int deviceMatch = -1; + for (unsigned int d = 0; d < profile->numDevices - 1; d++) + { + if ((std::string(profile->devices[d].oclDeviceName)).find(deviceName) != std::string::npos) deviceMatch = d; + } + if (std::string("NATIVE_CPU").find(deviceName) != std::string::npos) deviceMatch = profile->numDevices - 1; + return deviceMatch; +} + +/*************************************************************************/ +/* EXTERNAL FUNCTIONS */ +/*************************************************************************/ +ds_device getDeviceSelection(const char* sProfilePath, bool bForceSelection) +{ + /* Run only if device is not yet selected */ + if (!bIsDeviceSelected || bForceSelection) + { + /* Setup */ + ds_status status; + ds_profile* profile = NULL; + status = initDSProfile(&profile, "LibreOffice v0.1"); + + if (!profile) + { + // failed to initialize profile. + selectedDevice.type = DS_DEVICE_NATIVE_CPU; + return selectedDevice; + } + + /* Try reading scores from file */ + std::string tmpStr(sProfilePath); + const char* fileName = tmpStr.append("sc_opencl_device_profile.dat").c_str(); + if (!bForceSelection) + { + status = readProfileFromFile(profile, deserializeScore, fileName); + } + else + { + status = DS_INVALID_PROFILE; + SAL_INFO("opencl.device", "Performing forced profiling."); + } + if (DS_SUCCESS != status) + { + if (!bForceSelection) + { + SAL_INFO("opencl.device", "Profile file not available (" << fileName << "); performing profiling."); + } + + /* Populate input data for micro-benchmark */ + boost::scoped_ptr<LibreOfficeDeviceEvaluationIO> testData(new LibreOfficeDeviceEvaluationIO); + testData->inputSize = INPUTSIZE; + testData->outputSize = OUTPUTSIZE; + testData->input0.resize(testData->inputSize); + testData->input1.resize(testData->inputSize); + testData->input2.resize(testData->inputSize); + testData->input3.resize(testData->inputSize); + testData->output.resize(testData->outputSize); + populateInput(testData.get()); + + /* Perform evaluations */ + unsigned int numUpdates; + status = profileDevices(profile, DS_EVALUATE_ALL, evaluateScoreForDevice, (void*)testData.get(), &numUpdates); + + if (DS_SUCCESS == status) + { + /* Write scores to file */ + status = writeProfileToFile(profile, serializeScore, fileName); + if (DS_SUCCESS == status) + { + SAL_INFO("opencl.device", "Scores written to file (" << fileName << ")."); + } + else + { + SAL_INFO("opencl.device", "Error saving scores to file (" << fileName << "); scores not written to file."); + } + } + else + { + SAL_INFO("opencl.device", "Unable to evaluate performance; scores not written to file."); + } + } + else + { + SAL_INFO("opencl.device", "Profile read from file (" << fileName << ")."); + } + + /* Pick best device */ + int bestDeviceIdx; + pickBestDevice(profile, &bestDeviceIdx); + + /* Overide if necessary */ + char* overrideDeviceStr = getenv("SC_OPENCL_DEVICE_OVERRIDE"); + if (NULL != overrideDeviceStr) + { + int overrideDeviceIdx = matchDevice(profile, overrideDeviceStr); + if (-1 != overrideDeviceIdx) + { + SAL_INFO("opencl.device", "Overriding Device Selection (SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr << ")."); + bestDeviceIdx = overrideDeviceIdx; + if (DS_DEVICE_OPENCL_DEVICE == profile->devices[bestDeviceIdx].type) + { + SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx << "]: " << profile->devices[bestDeviceIdx].oclDeviceName << " (OpenCL)."); + } + else + { + SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx << "]: CPU (Native)."); + } + } + else + { + SAL_INFO("opencl.device", "Ignoring invalid SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr << ")."); + } + } + + /* Final device selection */ + selectedDevice = profile->devices[bestDeviceIdx]; + bIsDeviceSelected = true; + + /* Release profile */ + releaseDSProfile(profile, releaseScore); + } + return selectedDevice; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/opencl/source/openclwrapper.cxx b/opencl/source/openclwrapper.cxx new file mode 100644 index 000000000000..86ba6cd3a4e3 --- /dev/null +++ b/opencl/source/openclwrapper.cxx @@ -0,0 +1,804 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <config_folders.h> + +#include "opencl_device.hxx" + +#include <comphelper/string.hxx> +#include <opencl/openclconfig.hxx> +#include <opencl/openclwrapper.hxx> +#include <osl/file.hxx> +#include <rtl/bootstrap.hxx> +#include <rtl/digest.h> +#include <rtl/strbuf.hxx> +#include <rtl/ustring.hxx> +#include <sal/config.h> + +#include <boost/scoped_array.hpp> +#include <unicode/regex.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <cmath> + +#ifdef _WIN32 +#include <prewin.h> +#include <postwin.h> +#define OPENCL_DLL_NAME "OpenCL.dll" +#elif defined(MACOSX) +#define OPENCL_DLL_NAME NULL +#else +#define OPENCL_DLL_NAME "libOpenCL.so" +#endif + +#define DEVICE_NAME_LENGTH 1024 +#define DRIVER_VERSION_LENGTH 1024 +#define PLATFORM_VERSION_LENGTH 1024 + +using namespace std; + +namespace opencl { + +GPUEnv OpenCLDevice::gpuEnv; +bool OpenCLDevice::bIsInited = false; + +namespace { + +OString generateMD5(const void* pData, size_t length) +{ + sal_uInt8 pBuffer[RTL_DIGEST_LENGTH_MD5]; + rtlDigestError aError = rtl_digest_MD5(pData, length, + pBuffer, RTL_DIGEST_LENGTH_MD5); + SAL_WARN_IF(aError != rtl_Digest_E_None, "opencl", "md5 generation failed"); + + OStringBuffer aBuffer; + const char* pString = "0123456789ABCDEF"; + for(size_t i = 0; i < RTL_DIGEST_LENGTH_MD5; ++i) + { + sal_uInt8 val = pBuffer[i]; + aBuffer.append(pString[val/16]); + aBuffer.append(pString[val%16]); + } + return aBuffer.makeStringAndClear(); +} + +OString getCacheFolder() +{ + OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/"); + rtl::Bootstrap::expandMacros(url); + + osl::Directory::create(url); + + return rtl::OUStringToOString(url, RTL_TEXTENCODING_UTF8); +} + +} + +OString OpenCLDevice::maCacheFolder = getCacheFolder(); + +void OpenCLDevice::setKernelEnv( KernelEnv *envInfo ) +{ + envInfo->mpkContext = gpuEnv.mpContext; + envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue; + envInfo->mpkProgram = gpuEnv.mpArryPrograms[0]; +} + +namespace { + +OString createFileName(cl_device_id deviceId, const char* clFileName) +{ + OString fileName(clFileName); + sal_Int32 nIndex = fileName.lastIndexOf(".cl"); + if(nIndex > 0) + fileName = fileName.copy(0, nIndex); + + char deviceName[DEVICE_NAME_LENGTH] = {0}; + clGetDeviceInfo(deviceId, CL_DEVICE_NAME, + sizeof(deviceName), deviceName, NULL); + + char driverVersion[DRIVER_VERSION_LENGTH] = {0}; + clGetDeviceInfo(deviceId, CL_DRIVER_VERSION, + sizeof(driverVersion), driverVersion, NULL); + + cl_platform_id platformId; + clGetDeviceInfo(deviceId, CL_DEVICE_PLATFORM, + sizeof(platformId), &platformId, NULL); + + char platformVersion[PLATFORM_VERSION_LENGTH] = {0}; + clGetPlatformInfo(platformId, CL_PLATFORM_VERSION, sizeof(platformVersion), + platformVersion, NULL); + + // create hash for deviceName + driver version + platform version + OString aString = OString(deviceName) + driverVersion + platformVersion; + OString aHash = generateMD5(aString.getStr(), aString.getLength()); + + return OpenCLDevice::maCacheFolder + fileName + "-" + + aHash + ".bin"; +} + +} + +std::vector<boost::shared_ptr<osl::File> > OpenCLDevice::binaryGenerated( const char * clFileName, cl_context context ) +{ + size_t numDevices=0; + + std::vector<boost::shared_ptr<osl::File> > aGeneratedFiles; + cl_int clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES, + 0, NULL, &numDevices ); + numDevices /= sizeof(numDevices); + + if(clStatus != CL_SUCCESS) + return aGeneratedFiles; + + // grab the handles to all of the devices in the context. + boost::scoped_array<cl_device_id> pArryDevsID(new cl_device_id[numDevices]); + clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES, + sizeof( cl_device_id ) * numDevices, pArryDevsID.get(), NULL ); + + if(clStatus != CL_SUCCESS) + return aGeneratedFiles; + + for ( size_t i = 0; i < numDevices; i++ ) + { + if ( pArryDevsID[i] != 0 ) + { + OString fileName = createFileName(gpuEnv.mpArryDevsID[i], clFileName); + osl::File* pNewFile = new osl::File(rtl::OStringToOUString(fileName, RTL_TEXTENCODING_UTF8)); + if(pNewFile->open(osl_File_OpenFlag_Read) == osl::FileBase::E_None) + { + aGeneratedFiles.push_back(boost::shared_ptr<osl::File>(pNewFile)); + SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: success"); + } + else + { + SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: FAIL"); + delete pNewFile; + break; + } + } + } + + return aGeneratedFiles; +} + +bool OpenCLDevice::writeBinaryToFile( const OString& rFileName, const char* binary, size_t numBytes ) +{ + osl::File file(rtl::OStringToOUString(rFileName, RTL_TEXTENCODING_UTF8)); + osl::FileBase::RC status = file.open( + osl_File_OpenFlag_Write | osl_File_OpenFlag_Create ); + + if(status != osl::FileBase::E_None) + return false; + + sal_uInt64 nBytesWritten = 0; + file.write( binary, numBytes, nBytesWritten ); + + assert(numBytes == nBytesWritten); + + return true; +} + +bool OpenCLDevice::generatBinFromKernelSource( cl_program program, const char * clFileName ) +{ + cl_uint numDevices; + + cl_int clStatus = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES, + sizeof(numDevices), &numDevices, NULL ); + CHECK_OPENCL( clStatus, "clGetProgramInfo" ); + + std::vector<cl_device_id> pArryDevsID(numDevices); + /* grab the handles to all of the devices in the program. */ + clStatus = clGetProgramInfo( program, CL_PROGRAM_DEVICES, + sizeof(cl_device_id) * numDevices, &pArryDevsID[0], NULL ); + CHECK_OPENCL( clStatus, "clGetProgramInfo" ); + + /* figure out the sizes of each of the binaries. */ + std::vector<size_t> binarySizes(numDevices); + + clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, + sizeof(size_t) * numDevices, &binarySizes[0], NULL ); + CHECK_OPENCL( clStatus, "clGetProgramInfo" ); + + /* copy over all of the generated binaries. */ + boost::scoped_array<char*> binaries(new char*[numDevices]); + + for ( size_t i = 0; i < numDevices; i++ ) + { + if ( binarySizes[i] != 0 ) + { + binaries[i] = new char[binarySizes[i]]; + } + else + { + binaries[i] = NULL; + } + } + + clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARIES, + sizeof(char *) * numDevices, binaries.get(), NULL ); + CHECK_OPENCL(clStatus,"clGetProgramInfo"); + + /* dump out each binary into its own separate file. */ + for ( size_t i = 0; i < numDevices; i++ ) + { + + if ( binarySizes[i] != 0 ) + { + OString fileName = createFileName(pArryDevsID[i], clFileName); + if ( !writeBinaryToFile( fileName, + binaries[i], binarySizes[i] ) ) + SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': FAIL"); + else + SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': success"); + } + } + + // Release all resouces and memory + for ( size_t i = 0; i < numDevices; i++ ) + { + delete[] binaries[i]; + } + + return true; +} + +bool OpenCLDevice::initOpenCLAttr( OpenCLEnv * env ) +{ + if ( gpuEnv.mnIsUserCreated ) + return true; + + gpuEnv.mpContext = env->mpOclContext; + gpuEnv.mpPlatformID = env->mpOclPlatformID; + gpuEnv.mpDevID = env->mpOclDevsID; + gpuEnv.mpCmdQueue = env->mpOclCmdQueue; + + gpuEnv.mnIsUserCreated = 1; + + return false; +} + +void OpenCLDevice::releaseOpenCLEnv( GPUEnv *gpuInfo ) +{ + if ( !bIsInited ) + { + return; + } + + if ( gpuEnv.mpCmdQueue ) + { + clReleaseCommandQueue( gpuEnv.mpCmdQueue ); + gpuEnv.mpCmdQueue = NULL; + } + if ( gpuEnv.mpContext ) + { + clReleaseContext( gpuEnv.mpContext ); + gpuEnv.mpContext = NULL; + } + bIsInited = false; + gpuInfo->mnIsUserCreated = 0; + free( gpuInfo->mpArryDevsID ); + + return; +} + +namespace { + +bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx) +{ + cl_int clStatus; + //char options[512]; + // create a cl program executable for all the devices specified + if (!gpuInfo->mnIsUserCreated) + { + clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID, + buildOption, NULL, NULL); + } + else + { + clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID), + buildOption, NULL, NULL); + } + + if ( clStatus != CL_SUCCESS ) + { + size_t length; + if ( !gpuInfo->mnIsUserCreated ) + { + clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0], + CL_PROGRAM_BUILD_LOG, 0, NULL, &length ); + } + else + { + clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID, + CL_PROGRAM_BUILD_LOG, 0, NULL, &length); + } + if ( clStatus != CL_SUCCESS ) + { + return false; + } + + boost::scoped_array<char> buildLog(new char[length]); + if ( !gpuInfo->mnIsUserCreated ) + { + clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0], + CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length ); + } + else + { + clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID, + CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length ); + } + if ( clStatus != CL_SUCCESS ) + { + return false; + } + + OString aBuildLogFileURL = OpenCLDevice::maCacheFolder + "kernel-build.log"; + osl::File aBuildLogFile(rtl::OStringToOUString(aBuildLogFileURL, RTL_TEXTENCODING_UTF8)); + osl::FileBase::RC status = aBuildLogFile.open( + osl_File_OpenFlag_Write | osl_File_OpenFlag_Create ); + + if(status != osl::FileBase::E_None) + return false; + + sal_uInt64 nBytesWritten = 0; + aBuildLogFile.write( buildLog.get(), length, nBytesWritten ); + + return false; + } + + return true; +} + +} + +bool OpenCLDevice::buildProgramFromBinary(const char* buildOption, GPUEnv* gpuInfo, const char* filename, int idx) +{ + size_t numDevices; + cl_int clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES, + 0, NULL, &numDevices ); + numDevices /= sizeof(numDevices); + CHECK_OPENCL( clStatus, "clGetContextInfo" ); + + std::vector<boost::shared_ptr<osl::File> > aGeneratedFiles = binaryGenerated( + filename, gpuInfo->mpContext ); + + if (aGeneratedFiles.size() == numDevices) + { + boost::scoped_array<size_t> length(new size_t[numDevices]); + boost::scoped_array<unsigned char*> pBinary(new unsigned char*[numDevices]); + for(size_t i = 0; i < numDevices; ++i) + { + sal_uInt64 nSize; + aGeneratedFiles[i]->getSize(nSize); + unsigned char* binary = new unsigned char[nSize]; + sal_uInt64 nBytesRead; + aGeneratedFiles[i]->read(binary, nSize, nBytesRead); + if(nSize != nBytesRead) + assert(false); + + length[i] = nBytesRead; + + pBinary[i] = binary; + } + + // grab the handles to all of the devices in the context. + boost::scoped_array<cl_device_id> pArryDevsID(new cl_device_id[numDevices]); + clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES, + sizeof( cl_device_id ) * numDevices, pArryDevsID.get(), NULL ); + + if(clStatus != CL_SUCCESS) + { + for(size_t i = 0; i < numDevices; ++i) + { + delete[] pBinary[i]; + } + return false; + } + + cl_int binary_status; + + gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices, + pArryDevsID.get(), length.get(), (const unsigned char**) pBinary.get(), + &binary_status, &clStatus ); + if(clStatus != CL_SUCCESS) + { + // something went wrong, fall back to compiling from source + return false; + } + for(size_t i = 0; i < numDevices; ++i) + { + delete[] pBinary[i]; + } + } + + if ( !gpuInfo->mpArryPrograms[idx] ) + { + return false; + } + return buildProgram(buildOption, gpuInfo, idx); +} + +bool OpenCLDevice::initOpenCLRunEnv( int argc ) +{ + if ( ( argc > MAX_CLFILE_NUM ) || ( argc < 0 ) ) + return true; + + if ( !bIsInited ) + { + if ( !gpuEnv.mnIsUserCreated ) + memset( &gpuEnv, 0, sizeof(gpuEnv) ); + + //initialize devices, context, command_queue + bool status = initOpenCLRunEnv( &gpuEnv ); + if ( status ) + { + return true; + } + //initialize program, kernelName, kernelCount + if( getenv( "SC_FLOAT" ) ) + { + gpuEnv.mnKhrFp64Flag = false; + gpuEnv.mnAmdFp64Flag = false; + } + if( gpuEnv.mnKhrFp64Flag ) + { + SAL_INFO("opencl", "Use Khr double"); + } + else if( gpuEnv.mnAmdFp64Flag ) + { + SAL_INFO("opencl", "Use AMD double type"); + } + else + { + SAL_INFO("opencl", "USE float type"); + } + bIsInited = true; + } + return false; +} + +namespace { + +void checkDeviceForDoubleSupport(cl_device_id deviceId, bool& bKhrFp64, bool& bAmdFp64) +{ + bKhrFp64 = false; + bAmdFp64 = false; + + // Check device extensions for double type + size_t aDevExtInfoSize = 0; + + cl_uint clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS, 0, NULL, &aDevExtInfoSize ); + if( clStatus != CL_SUCCESS ) + return; + + boost::scoped_array<char> pExtInfo(new char[aDevExtInfoSize]); + + clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS, + sizeof(char) * aDevExtInfoSize, pExtInfo.get(), NULL); + + if( clStatus != CL_SUCCESS ) + return; + + if ( strstr( pExtInfo.get(), "cl_khr_fp64" ) ) + { + bKhrFp64 = true; + } + else + { + // Check if cl_amd_fp64 extension is supported + if ( strstr( pExtInfo.get(), "cl_amd_fp64" ) ) + bAmdFp64 = true; + } +} + +} + +bool OpenCLDevice::initOpenCLRunEnv( GPUEnv *gpuInfo ) +{ + bool bKhrFp64 = false; + bool bAmdFp64 = false; + + checkDeviceForDoubleSupport(gpuInfo->mpArryDevsID[0], bKhrFp64, bAmdFp64); + + gpuInfo->mnKhrFp64Flag = bKhrFp64; + gpuInfo->mnAmdFp64Flag = bAmdFp64; + + return false; +} + +namespace { + +// based on crashes and hanging during kernel compilation +void createDeviceInfo(cl_device_id aDeviceId, OpenCLPlatformInfo& rPlatformInfo) +{ + OpenCLDeviceInfo aDeviceInfo; + aDeviceInfo.device = aDeviceId; + + char pName[DEVICE_NAME_LENGTH]; + cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_NAME, DEVICE_NAME_LENGTH, pName, NULL); + if(nState != CL_SUCCESS) + return; + + aDeviceInfo.maName = OUString::createFromAscii(pName); + + char pVendor[DEVICE_NAME_LENGTH]; + nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_VENDOR, DEVICE_NAME_LENGTH, pVendor, NULL); + if(nState != CL_SUCCESS) + return; + + aDeviceInfo.maVendor = OUString::createFromAscii(pVendor); + + cl_ulong nMemSize; + nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(nMemSize), &nMemSize, NULL); + if(nState != CL_SUCCESS) + return; + + aDeviceInfo.mnMemory = nMemSize; + + cl_uint nClockFrequency; + nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(nClockFrequency), &nClockFrequency, NULL); + if(nState != CL_SUCCESS) + return; + + aDeviceInfo.mnFrequency = nClockFrequency; + + cl_uint nComputeUnits; + nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(nComputeUnits), &nComputeUnits, NULL); + if(nState != CL_SUCCESS) + return; + + char pDriver[DEVICE_NAME_LENGTH]; + nState = clGetDeviceInfo(aDeviceId, CL_DRIVER_VERSION, DEVICE_NAME_LENGTH, pDriver, NULL); + + if(nState != CL_SUCCESS) + return; + + aDeviceInfo.maDriver = OUString::createFromAscii(pDriver); + + bool bKhrFp64 = false; + bool bAmdFp64 = false; + checkDeviceForDoubleSupport(aDeviceId, bKhrFp64, bAmdFp64); + + // only list devices that support double + if(!bKhrFp64 && !bAmdFp64) + return; + + aDeviceInfo.mnComputeUnits = nComputeUnits; + + if(!OpenCLConfig::get().checkImplementation(rPlatformInfo, aDeviceInfo)) + rPlatformInfo.maDevices.push_back(aDeviceInfo); +} + +bool createPlatformInfo(cl_platform_id nPlatformId, OpenCLPlatformInfo& rPlatformInfo) +{ + rPlatformInfo.platform = nPlatformId; + char pName[64]; + cl_int nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_NAME, 64, + pName, NULL); + if(nState != CL_SUCCESS) + return false; + rPlatformInfo.maName = OUString::createFromAscii(pName); + + char pVendor[64]; + nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_VENDOR, 64, + pVendor, NULL); + if(nState != CL_SUCCESS) + return false; + + rPlatformInfo.maVendor = OUString::createFromAscii(pVendor); + + cl_uint nDevices; + nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, 0, NULL, &nDevices); + if(nState != CL_SUCCESS) + return false; + + // memory leak that does not matter + // memory is stored in static variable that lives through the whole program + cl_device_id* pDevices = new cl_device_id[nDevices]; + nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, nDevices, pDevices, NULL); + if(nState != CL_SUCCESS) + return false; + + for(size_t i = 0; i < nDevices; ++i) + { + createDeviceInfo(pDevices[i], rPlatformInfo); + } + + return true; +} + +} + +const std::vector<OpenCLPlatformInfo>& fillOpenCLInfo() +{ + static std::vector<OpenCLPlatformInfo> aPlatforms; + if(!aPlatforms.empty()) + return aPlatforms; + + int status = clewInit(OPENCL_DLL_NAME); + if (status < 0) + return aPlatforms; + + cl_uint nPlatforms; + cl_int nState = clGetPlatformIDs(0, NULL, &nPlatforms); + + if(nState != CL_SUCCESS) + return aPlatforms; + + // memory leak that does not matter, + // memory is stored in static instance aPlatforms + cl_platform_id* pPlatforms = new cl_platform_id[nPlatforms]; + nState = clGetPlatformIDs(nPlatforms, pPlatforms, NULL); + + if(nState != CL_SUCCESS) + return aPlatforms; + + for(size_t i = 0; i < nPlatforms; ++i) + { + OpenCLPlatformInfo aPlatformInfo; + if(createPlatformInfo(pPlatforms[i], aPlatformInfo)) + aPlatforms.push_back(aPlatformInfo); + } + + return aPlatforms; +} + +namespace { + +cl_device_id findDeviceIdByDeviceString(const OUString& rString, const std::vector<OpenCLPlatformInfo>& rPlatforms) +{ + std::vector<OpenCLPlatformInfo>::const_iterator it = rPlatforms.begin(), itEnd = rPlatforms.end(); + for(; it != itEnd; ++it) + { + std::vector<OpenCLDeviceInfo>::const_iterator itr = it->maDevices.begin(), itrEnd = it->maDevices.end(); + for(; itr != itrEnd; ++itr) + { + OUString aDeviceId = it->maVendor + " " + itr->maName; + if(rString == aDeviceId) + { + return static_cast<cl_device_id>(itr->device); + } + } + } + + return NULL; +} + +void findDeviceInfoFromDeviceId(cl_device_id aDeviceId, size_t& rDeviceId, size_t& rPlatformId) +{ + cl_platform_id platformId; + cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_PLATFORM, + sizeof(platformId), &platformId, NULL); + + if(nState != CL_SUCCESS) + return; + + const std::vector<OpenCLPlatformInfo>& rPlatforms = fillOpenCLInfo(); + for(size_t i = 0; i < rPlatforms.size(); ++i) + { + cl_platform_id platId = static_cast<cl_platform_id>(rPlatforms[i].platform); + if(platId != platformId) + continue; + + for(size_t j = 0; j < rPlatforms[i].maDevices.size(); ++j) + { + cl_device_id id = static_cast<cl_device_id>(rPlatforms[i].maDevices[j].device); + if(id == aDeviceId) + { + rDeviceId = j; + rPlatformId = i; + return; + } + } + } +} + +} + +bool switchOpenCLDevice(const OUString* pDevice, bool bAutoSelect, bool bForceEvaluation) +{ + if(fillOpenCLInfo().empty()) + return false; + + cl_device_id pDeviceId = NULL; + if(pDevice) + pDeviceId = findDeviceIdByDeviceString(*pDevice, fillOpenCLInfo()); + + if(!pDeviceId || bAutoSelect) + { + int status = clewInit(OPENCL_DLL_NAME); + if (status < 0) + return false; + + OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/"); + rtl::Bootstrap::expandMacros(url); + OUString path; + osl::FileBase::getSystemPathFromFileURL(url,path); + OString dsFileName = rtl::OUStringToOString(path, RTL_TEXTENCODING_UTF8); + ds_device pSelectedDevice = ::OpenCLDevice::getDeviceSelection(dsFileName.getStr(), bForceEvaluation); + pDeviceId = pSelectedDevice.oclDeviceID; + + } + + if(OpenCLDevice::gpuEnv.mpDevID == pDeviceId) + { + // we don't need to change anything + // still the same device + return pDeviceId != NULL; + } + + cl_platform_id platformId; + cl_int nState = clGetDeviceInfo(pDeviceId, CL_DEVICE_PLATFORM, + sizeof(platformId), &platformId, NULL); + + cl_context_properties cps[3]; + cps[0] = CL_CONTEXT_PLATFORM; + cps[1] = reinterpret_cast<cl_context_properties>(platformId); + cps[2] = 0; + cl_context context = clCreateContext( cps, 1, &pDeviceId, NULL, NULL, &nState ); + + if(nState != CL_SUCCESS || context == NULL) + { + if(context != NULL) + clReleaseContext(context); + + SAL_WARN("opencl", "failed to set/switch opencl device"); + return false; + } + + cl_command_queue command_queue = clCreateCommandQueue( + context, pDeviceId, 0, &nState); + + if(command_queue == NULL || nState != CL_SUCCESS) + { + if(command_queue != NULL) + clReleaseCommandQueue(command_queue); + + clReleaseContext(context); + SAL_WARN("opencl", "failed to set/switch opencl device"); + return false; + } + + OpenCLDevice::releaseOpenCLEnv(&OpenCLDevice::gpuEnv); + OpenCLEnv env; + env.mpOclPlatformID = platformId; + env.mpOclContext = context; + env.mpOclDevsID = pDeviceId; + env.mpOclCmdQueue = command_queue; + OpenCLDevice::initOpenCLAttr(&env); + + // why do we need this at all? + + // (Assuming the above question refers to the mpArryDevsID + // initialisation below.) Because otherwise the code crashes in + // initOpenCLRunEnv(). Confused? You should be. + + OpenCLDevice::gpuEnv.mpArryDevsID = (cl_device_id*) malloc( sizeof(cl_device_id) ); + OpenCLDevice::gpuEnv.mpArryDevsID[0] = pDeviceId; + + return !OpenCLDevice::initOpenCLRunEnv(0); +} + +void getOpenCLDeviceInfo(size_t& rDeviceId, size_t& rPlatformId) +{ + int status = clewInit(OPENCL_DLL_NAME); + if (status < 0) + return; + + cl_device_id id = OpenCLDevice::gpuEnv.mpDevID; + findDeviceInfoFromDeviceId(id, rDeviceId, rPlatformId); +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |