diff options
author | Jagan Lokanatha <Jagan.Lokanatha@amd.com> | 2013-11-19 16:36:00 -0500 |
---|---|---|
committer | Kohei Yoshida <kohei.yoshida@collabora.com> | 2013-11-20 09:41:30 -0500 |
commit | 1989677db7492ea2e91efa9ec6b12578106d6e9f (patch) | |
tree | a12b9afe28ce6bc0eb0875e0d528e5bf3c45faa1 /sc | |
parent | d45483489513692be77a61cf343663e839483204 (diff) |
Enable workload-based device selection in OpenCL.
Change-Id: I8af49ccf498cafb48f9b82fabc4910c754ba0f96
Diffstat (limited to 'sc')
-rw-r--r-- | sc/Library_scopencl.mk | 1 | ||||
-rw-r--r-- | sc/source/core/opencl/opencl_device.cxx | 541 | ||||
-rw-r--r-- | sc/source/core/opencl/opencl_device.hxx | 26 | ||||
-rw-r--r-- | sc/source/core/opencl/opencl_device_selection.h | 614 | ||||
-rw-r--r-- | sc/source/core/opencl/openclwrapper.cxx | 26 |
5 files changed, 1191 insertions, 17 deletions
diff --git a/sc/Library_scopencl.mk b/sc/Library_scopencl.mk index 2c0ac1662b43..f4d400bfeb98 100644 --- a/sc/Library_scopencl.mk +++ b/sc/Library_scopencl.mk @@ -37,6 +37,7 @@ $(eval $(call gb_Library_use_libraries,scopencl,\ $(eval $(call gb_Library_add_exception_objects,scopencl,\ sc/source/core/opencl/formulagroupcl \ sc/source/core/opencl/openclwrapper \ + sc/source/core/opencl/opencl_device \ sc/source/core/opencl/opbase \ sc/source/core/opencl/op_financial \ sc/source/core/opencl/op_database \ diff --git a/sc/source/core/opencl/opencl_device.cxx b/sc/source/core/opencl/opencl_device.cxx new file mode 100644 index 000000000000..a1532ba6564a --- /dev/null +++ b/sc/source/core/opencl/opencl_device.cxx @@ -0,0 +1,541 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#pragma warning (disable : 4996) + +#ifdef _WIN32 +#include <Windows.h> +#else +#include <sys/time.h> +#endif +#include <time.h> +#include <math.h> +#include <iostream> +#include <sstream> +#include "opencl_device.hxx" + + +#define INPUTSIZE 256*40 +#define OUTPUTSIZE 256*40 + +#define STRINGIFY(...) #__VA_ARGS__"\n" +#define LOG_PRINTF(x) (std::cout << x << std::endl) +//#define LOG_PRINTF(x) + +#define DS_CHECK_STATUS(status, name) \ + if (CL_SUCCESS != status) \ + { \ + LOG_PRINTF("[OCL] Error code is " << status << " at " << name); \ + } + +namespace sc { namespace OpenCLDevice { + +bool bIsInited = false; +bool bIsDeviceSelected = false; +ds_device selectedDevice; + +typedef struct LibreOfficeDeviceScore +{ + double fTime; // small time means faster device + bool bNoCLErrors; // were there any opencl errors +} LibreOfficeDeviceScore; + +typedef struct LibreOfficeDeviceEvaluationIO +{ + double* input0; + double* input1; + double* input2; + double* input3; + double* output; + unsigned long inputSize; + unsigned long outputSize; +} LibreOfficeDeviceEvaluationIO; + +typedef struct timer +{ +#ifdef _WIN32 + LARGE_INTEGER start, stop, frequency; +#else + long long start, stop, frequency; +#endif +} timer; + +const char* source = STRINGIFY( +\n#if defined(KHR_DP_EXTENSION) +\n#pragma OPENCL EXTENSION cl_khr_fp64 : enable +\n#elif defined(AMD_DP_EXTENSION) +\n#pragma OPENCL EXTENSION cl_amd_fp64 : enable +\n#endif + \n + int isNan(fp_t a) { return a != a; } + fp_t fsum(fp_t a, fp_t b) { return a + b; } + + fp_t fAverage(__global fp_t* input) +{ + fp_t sum = 0; + int count = 0; + for (int i = 0; i < INPUTSIZE; i++) + { + if (!isNan(input[i])) + { + sum = fsum(input[i], sum); + count += 1; + } + } + return sum / (fp_t)count; +} + fp_t fMin(__global fp_t* input) +{ + fp_t min = MAXFLOAT; + for (int i = 0; i < INPUTSIZE; i++) + { + if (!isNan(input[i])) + { + min = fmin(input[i], min); + } + } + return min; +} + fp_t fSoP(__global fp_t* input0, __global fp_t* input1) +{ + fp_t sop = 0.0; + for (int i = 0; i < INPUTSIZE; i++) + { + sop += (isNan(input0[i]) ? 0 : input0[i]) * (isNan(input1[i]) ? 0 : input1[i]); + } + return sop; +} + __kernel void DynamicKernel( + __global fp_t* result, __global fp_t* input0, __global fp_t* input1, __global fp_t* input2, __global fp_t* input3) +{ + int gid0 = get_global_id(0); + fp_t tmp0 = fAverage(input0); + fp_t tmp1 = fMin(input1) * fSoP(input2, input3); + result[gid0] = fsum(tmp0, tmp1); +} + ); + +size_t sourceSize[] = { strlen(source) }; + +/*************************************************************************/ +/* INTERNAL FUNCTIONS */ +/*************************************************************************/ +/* Timer functions - start timer */ +void timerStart(timer* mytimer) +{ +#ifdef _WIN32 + QueryPerformanceCounter(&mytimer->start); +#else + struct timespec s; + clock_gettime(CLOCK_MONOTONIC, &s); + mytimer->start = (long long)s.tv_sec * (long long)1.0E6 + (long long)s.tv_nsec / (long long)1.0E3; +#endif +} + +/* Timer functions - stop timer and return difference */ +double timerStop(timer* mytimer) +{ +#ifdef _WIN32 + QueryPerformanceCounter(&mytimer->stop); + QueryPerformanceFrequency(&mytimer->frequency); + double time = ((double)(mytimer->stop.QuadPart - mytimer->start.QuadPart) / mytimer->frequency.QuadPart); +#else + struct timespec s; + clock_gettime(CLOCK_MONOTONIC, &s); + mytimer->stop = (long long)s.tv_sec * (long long)1.0E6 + (long long)s.tv_nsec / (long long)1.0E3; + mytimer->frequency = (long long)1.0E6; + double time = ((double)(mytimer->stop - mytimer->start) / mytimer->frequency); +#endif + return time; +} + +/* Random number generator */ +double random(double min, double max) +{ + return floor(((double)rand() / ((unsigned int)RAND_MAX + 1)) * (max - min + 1) + min); +} + +/* Populate input */ +void populateInput(LibreOfficeDeviceEvaluationIO* testData) +{ + srand((unsigned int)time(NULL)); + double* input0 = testData->input0; + double* input1 = testData->input1; + double* input2 = testData->input2; + double* input3 = testData->input3; + for (unsigned long i = 0; i < testData->inputSize; i++) + { + input0[i] = random(0, i); + input1[i] = random(0, i); + input2[i] = random(0, i); + input3[i] = random(0, i); + } +} +/* Encode score object as byte string */ +ds_status serializeScore(ds_device* device, void** serializedScore, unsigned int* serializedScoreSize) +{ + *serializedScoreSize = sizeof(LibreOfficeDeviceScore); + *serializedScore = (void*)new unsigned char[*serializedScoreSize]; + memcpy(*serializedScore, device->score, *serializedScoreSize); + return DS_SUCCESS; +} + +/* Parses byte string and stores in score object */ +ds_status deserializeScore(ds_device* device, const unsigned char* serializedScore, unsigned int serializedScoreSize) +{ + // check that serializedScoreSize == sizeof(LibreOfficeDeviceScore); + device->score = new LibreOfficeDeviceScore; + memcpy(device->score, serializedScore, serializedScoreSize); + return DS_SUCCESS; +} + +/* Releases memory held by score */ +ds_status releaseScore(void* score) +{ + if (NULL != score) + { + delete (LibreOfficeDeviceScore*)score; + } + return DS_SUCCESS; +} + +/* Evaluate devices */ +ds_status evaluateScoreForDevice(ds_device* device, void* evalData) +{ + if (DS_DEVICE_OPENCL_DEVICE == device->type) + { + /* Evaluating an OpenCL device */ + LOG_PRINTF("[DS] Device: \"" << device->oclDeviceName << "\" (OpenCL) evaluation..."); + cl_int clStatus; + cl_context clContext; + cl_command_queue clQueue; + cl_program clProgram; + cl_kernel clKernel; + + /* Check for 64-bit float extensions */ + size_t aDevExtInfoSize = 0; + clStatus = clGetDeviceInfo(device->oclDeviceID, CL_DEVICE_EXTENSIONS, 0, NULL, &aDevExtInfoSize); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clGetDeviceInfo"); + + char* aExtInfo = new char[aDevExtInfoSize]; + clStatus = clGetDeviceInfo(device->oclDeviceID, CL_DEVICE_EXTENSIONS, sizeof(char) * aDevExtInfoSize, aExtInfo, NULL); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clGetDeviceInfo"); + + bool bKhrFp64Flag = false; + bool bAmdFp64Flag = false; + const char* buildOption; + std::string tmpStr("-Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16 -DINPUTSIZE="); + tmpStr.append(dynamic_cast<std::ostringstream&>(std::ostringstream() << std::dec << INPUTSIZE).str()); + + if ((std::string(aExtInfo)).find("cl_khr_fp64") != std::string::npos) + { + bKhrFp64Flag = true; + //buildOption = "-D KHR_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16"; + tmpStr.append(" -DKHR_DP_EXTENSION"); + buildOption = tmpStr.c_str(); + } + else if ((std::string(aExtInfo)).find("cl_amd_fp64") != std::string::npos) + { + bAmdFp64Flag = true; + //buildOption = "-D AMD_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16"; + tmpStr.append(" -DAMD_DP_EXTENSION"); + buildOption = tmpStr.c_str(); + } + delete[] aExtInfo; + + if (!bKhrFp64Flag && !bAmdFp64Flag) + { + /* No 64-bit float support */ + device->score = (void*)new LibreOfficeDeviceScore; + ((LibreOfficeDeviceScore*)device->score)->fTime = DBL_MAX; + ((LibreOfficeDeviceScore*)device->score)->bNoCLErrors = true; + } + else + { + /* 64-bit float support present */ + + /* Create context and command queue */ + clContext = clCreateContext(NULL, 1, &device->oclDeviceID, NULL, NULL, &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateContext"); + clQueue = clCreateCommandQueue(clContext, device->oclDeviceID, 0, &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateCommandQueue"); + + /* Build program */ + clProgram = clCreateProgramWithSource(clContext, 1, &source, sourceSize, &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateProgramWithSource"); + clStatus = clBuildProgram(clProgram, 1, &device->oclDeviceID, buildOption, NULL, NULL); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clBuildProgram"); + if (CL_SUCCESS != clStatus) + { + /* Build program failed */ + size_t length; + char* buildLog; + clStatus = clGetProgramBuildInfo(clProgram, device->oclDeviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &length); + buildLog = (char*)malloc(length); + clStatus = clGetProgramBuildInfo(clProgram, device->oclDeviceID, CL_PROGRAM_BUILD_LOG, length, buildLog, &length); + LOG_PRINTF("[OCL] Build Errors" << std::endl << buildLog); + free(buildLog); + + device->score = (void*)new LibreOfficeDeviceScore; + ((LibreOfficeDeviceScore*)device->score)->fTime = DBL_MAX; + ((LibreOfficeDeviceScore*)device->score)->bNoCLErrors = false; + } + else + { + /* Build program succeeded */ + timer kernelTime; + timerStart(&kernelTime); + + /* Run kernel */ + LibreOfficeDeviceEvaluationIO* testData = (LibreOfficeDeviceEvaluationIO*)evalData; + clKernel = clCreateKernel(clProgram, "DynamicKernel", &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateKernel"); + cl_mem clResult = clCreateBuffer(clContext, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->outputSize, testData->output, &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clResult"); + cl_mem clInput0 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, testData->input0, &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput0"); + cl_mem clInput1 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, testData->input1, &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput1"); + cl_mem clInput2 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, testData->input2, &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput2"); + cl_mem clInput3 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, testData->input3, &clStatus); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput3"); + clStatus = clSetKernelArg(clKernel, 0, sizeof(cl_mem), (void*)&clResult); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clResult"); + clStatus = clSetKernelArg(clKernel, 1, sizeof(cl_mem), (void*)&clInput0); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput0"); + clStatus = clSetKernelArg(clKernel, 2, sizeof(cl_mem), (void*)&clInput1); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput1"); + clStatus = clSetKernelArg(clKernel, 3, sizeof(cl_mem), (void*)&clInput2); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput2"); + clStatus = clSetKernelArg(clKernel, 4, sizeof(cl_mem), (void*)&clInput3); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput3"); + size_t globalWS[1] = { testData->outputSize }; + size_t localSize[1] = { 64 }; + clStatus = clEnqueueNDRangeKernel(clQueue, clKernel, 1, 0, globalWS, localSize, 0, NULL, NULL); + DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clEnqueueNDRangeKernel"); + clFinish(clQueue); + clReleaseMemObject(clInput3); + clReleaseMemObject(clInput2); + clReleaseMemObject(clInput1); + clReleaseMemObject(clInput0); + clReleaseMemObject(clResult); + clReleaseKernel(clKernel); + + device->score = (void*)new LibreOfficeDeviceScore; + ((LibreOfficeDeviceScore*)device->score)->fTime = timerStop(&kernelTime); + ((LibreOfficeDeviceScore*)device->score)->bNoCLErrors = true; + } + + clReleaseProgram(clProgram); + clReleaseCommandQueue(clQueue); + clReleaseContext(clContext); + } + } + else + { + /* Evaluating an Native CPU device */ + LOG_PRINTF("[DS] Device: \"CPU\" (Native) evaluation..."); + timer kernelTime; + timerStart(&kernelTime); + + LibreOfficeDeviceEvaluationIO* testData = (LibreOfficeDeviceEvaluationIO*)evalData; + for (unsigned long j = 0; j < testData->outputSize; j++) + { + double fAverage = 0.0f; + double fMin = DBL_MAX; + double fSoP = 0.0f; + for (unsigned long i = 0; i < testData->inputSize; i++) + { + fAverage += testData->input0[i]; + fMin = ((fMin < testData->input1[i]) ? fMin : testData->input1[i]); + fSoP += testData->input2[i] * testData->input3[i]; + } + fAverage /= testData->inputSize; + testData->output[j] = fAverage + (fMin * fSoP); + } + + device->score = (void*)new LibreOfficeDeviceScore; + ((LibreOfficeDeviceScore*)device->score)->fTime = timerStop(&kernelTime); + ((LibreOfficeDeviceScore*)device->score)->bNoCLErrors = true; + } + return DS_SUCCESS; +} + +/* Pick best device */ +ds_status pickBestDevice(ds_profile* profile, int* bestDeviceIdx) +{ + double bestScore = DBL_MAX; + *bestDeviceIdx = -1; + + for (unsigned int d = 0; d < profile->numDevices; d++) + { + ds_device device = profile->devices[d]; + LibreOfficeDeviceScore score = *(LibreOfficeDeviceScore*)device.score; + if (DS_DEVICE_OPENCL_DEVICE == device.type) + { + LOG_PRINTF("[DS] Device[" << d << "] " << device.oclDeviceName << " (OpenCL) score is " << score.fTime); + } + else + { + LOG_PRINTF("[DS] Device[" << d << "] CPU (Native) score is " << score.fTime); + } + if (score.fTime < bestScore) + { + bestScore = score.fTime; + *bestDeviceIdx = d; + } + } + if (DS_DEVICE_OPENCL_DEVICE == profile->devices[*bestDeviceIdx].type) + { + LOG_PRINTF("[DS] Selected Device[" << *bestDeviceIdx << "]: " << profile->devices[*bestDeviceIdx].oclDeviceName << "(OpenCL)."); + } + else + { + LOG_PRINTF("[DS] Selected Device[" << *bestDeviceIdx << "]: CPU (Native)."); + } + + return DS_SUCCESS; +} + +/* Return device ID for matching device name */ +int matchDevice(ds_profile* profile, char* deviceName) +{ + int deviceMatch = -1; + for (unsigned int d = 0; d < profile->numDevices - 1; d++) + { + if ((std::string(profile->devices[d].oclDeviceName)).find(deviceName) != std::string::npos) deviceMatch = d; + } + if (std::string("NATIVE_CPU").find(deviceName) != std::string::npos) deviceMatch = profile->numDevices - 1; + return deviceMatch; +} + +/*************************************************************************/ +/* EXTERNAL FUNCTIONS */ +/*************************************************************************/ +ds_device getDeviceSelection(const char* sProfilePath, bool bForceSelection) +{ + /* Run only if device is not yet selected */ + if (!bIsDeviceSelected || bForceSelection) + { + /* Setup */ + ds_status status; + ds_profile* profile; + status = initDSProfile(&profile, "LibreOffice v0.1"); + + /* Try reading scores from file */ + std::string tmpStr(sProfilePath); + const char* fileName = tmpStr.append("sc_opencl_device_profile.dat").c_str(); + if (!bForceSelection) + { + status = readProfileFromFile(profile, deserializeScore, fileName); + } + else + { + status = DS_INVALID_PROFILE; + LOG_PRINTF("[DS] Performing forced profiling."); + } + if (DS_SUCCESS != status) + { + if (!bForceSelection) LOG_PRINTF("[DS] Profile file not available (" << fileName << "); performing profiling."); + + /* Populate input data for micro-benchmark */ + LibreOfficeDeviceEvaluationIO* testData = new LibreOfficeDeviceEvaluationIO; + testData->inputSize = INPUTSIZE; + testData->outputSize = OUTPUTSIZE; + testData->input0 = new double[testData->inputSize]; + testData->input1 = new double[testData->inputSize]; + testData->input2 = new double[testData->inputSize]; + testData->input3 = new double[testData->inputSize]; + testData->output = new double[testData->outputSize]; + populateInput(testData); + + /* Perform evaluations */ + unsigned int numUpdates; + status = profileDevices(profile, DS_EVALUATE_ALL, evaluateScoreForDevice, (void*)testData, &numUpdates); + delete testData->output; + delete testData->input3; + delete testData->input2; + delete testData->input1; + delete testData->input0; + delete testData; + if (DS_SUCCESS == status) + { + /* Write scores to file */ + status = writeProfileToFile(profile, serializeScore, fileName); + if (DS_SUCCESS == status) + { + LOG_PRINTF("[DS] Scores written to file (" << fileName << ")."); + } + else + { + LOG_PRINTF("[DS] Error saving scores to file (" << fileName << "); scores not written to file."); + } + } + else + { + LOG_PRINTF("[DS] Unable to evaluate performance; scores not written to file."); + } + } + else + { + LOG_PRINTF("[DS] Profile read from file (" << fileName << ")."); + } + + /* Pick best device */ + int bestDeviceIdx; + pickBestDevice(profile, &bestDeviceIdx); + + /* Overide if necessary */ + char* overrideDeviceStr = getenv("SC_OPENCL_DEVICE_OVERRIDE"); + if (NULL != overrideDeviceStr) + { + unsigned int overrideDeviceIdx = matchDevice(profile, overrideDeviceStr); + if (-1 != overrideDeviceIdx) + { + LOG_PRINTF("[DS] Overriding Device Selection (SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr << ")."); + bestDeviceIdx = overrideDeviceIdx; + if (DS_DEVICE_OPENCL_DEVICE == profile->devices[bestDeviceIdx].type) + { + LOG_PRINTF("[DS] Selected Device[" << bestDeviceIdx << "]: " << profile->devices[bestDeviceIdx].oclDeviceName << " (OpenCL)."); + } + else + { + LOG_PRINTF("[DS] Selected Device[" << bestDeviceIdx << "]: CPU (Native)."); + } + } + else + { + LOG_PRINTF("[DS] Ignoring invalid SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr << ")."); + } + } + + /* Final device selection */ + selectedDevice = profile->devices[bestDeviceIdx]; + bIsDeviceSelected = true; + + /* Release profile */ + status = releaseDSProfile(profile, releaseScore); + } + return selectedDevice; +} + +bool selectedDeviceIsOpenCL(ds_device device) +{ + return (DS_DEVICE_OPENCL_DEVICE == device.type); +} + +bool selectedDeviceIsNativeCPU(ds_device device) +{ + return (DS_DEVICE_NATIVE_CPU == device.type); +} + +}} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sc/source/core/opencl/opencl_device.hxx b/sc/source/core/opencl/opencl_device.hxx new file mode 100644 index 000000000000..e013943f68c7 --- /dev/null +++ b/sc/source/core/opencl/opencl_device.hxx @@ -0,0 +1,26 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#ifndef SC_OPENCL_DEVICE_HXX +#define SC_OPENCL_DEVICE_HXX + +#pragma once +#include "opencl_device_selection.h" + +namespace sc { namespace OpenCLDevice { + +ds_device getDeviceSelection(const char* pFileName, bool bForceSelection = false); +bool selectedDeviceIsOpenCL(ds_device device); +bool selectedDeviceIsNativeCPU(ds_device device); + +}} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sc/source/core/opencl/opencl_device_selection.h b/sc/source/core/opencl/opencl_device_selection.h new file mode 100644 index 000000000000..6e4eb58b222f --- /dev/null +++ b/sc/source/core/opencl/opencl_device_selection.h @@ -0,0 +1,614 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#ifndef SC_DEVICE_SELECTION_H +#define SC_DEVICE_SELECTION_H + + +#ifdef _MSC_VER +#define _CRT_SECURE_NO_WARNINGS +#endif + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "clcc/clew.h" + +#define DS_DEVICE_NAME_LENGTH 256 + +typedef enum { + DS_SUCCESS = 0 + ,DS_INVALID_PROFILE = 1000 + ,DS_MEMORY_ERROR + , DS_INVALID_PERF_EVALUATOR_TYPE + , DS_INVALID_PERF_EVALUATOR + , DS_PERF_EVALUATOR_ERROR + , DS_FILE_ERROR + , DS_UNKNOWN_DEVICE_TYPE + , DS_PROFILE_FILE_ERROR + , DS_SCORE_SERIALIZER_ERROR + , DS_SCORE_DESERIALIZER_ERROR +} ds_status; + +// device type +typedef enum { + DS_DEVICE_NATIVE_CPU = 0 + ,DS_DEVICE_OPENCL_DEVICE +} ds_device_type; + + +typedef struct { + ds_device_type type; + cl_device_id oclDeviceID; + char* oclDeviceName; + char* oclDriverVersion; + void* score; // a pointer to the score data, the content/format is application defined +} ds_device; + +typedef struct { + unsigned int numDevices; + ds_device* devices; + const char* version; +} ds_profile; + +// deallocate memory used by score +typedef ds_status(* ds_score_release)(void* score); +static ds_status releaseDSProfile(ds_profile* profile, ds_score_release sr) +{ + ds_status status = DS_SUCCESS; + if (profile != NULL) + { + if (profile->devices != NULL && sr != NULL) + { + unsigned int i; + for (i = 0; i < profile->numDevices; i++) + { + free(profile->devices[i].oclDeviceName); + free(profile->devices[i].oclDriverVersion); + status = sr(profile->devices[i].score); + if (status != DS_SUCCESS) break; + } + free(profile->devices); + } + free(profile); + } + return status; +} + + +static ds_status initDSProfile(ds_profile** p, const char* version) +{ + int numDevices; + cl_uint numPlatforms; + cl_platform_id* platforms = NULL; + cl_device_id* devices = NULL; + ds_status status = DS_SUCCESS; + ds_profile* profile = NULL; + unsigned int next; + unsigned int i; + + if (p == NULL) return DS_INVALID_PROFILE; + + profile = (ds_profile*)malloc(sizeof(ds_profile)); + if (profile == NULL) return DS_MEMORY_ERROR; + + memset(profile, 0, sizeof(ds_profile)); + + clGetPlatformIDs(0, NULL, &numPlatforms); + if (numPlatforms != 0) + { + platforms = (cl_platform_id*)malloc(numPlatforms * sizeof(cl_platform_id)); + if (platforms == NULL) + { + status = DS_MEMORY_ERROR; + goto cleanup; + } + clGetPlatformIDs(numPlatforms, platforms, NULL); + } + + numDevices = 0; + for (i = 0; i < (unsigned int)numPlatforms; i++) + { + cl_uint num; + clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &num); + numDevices += num; + } + if (numDevices != 0) + { + devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id)); + if (devices == NULL) + { + status = DS_MEMORY_ERROR; + goto cleanup; + } + } + + profile->numDevices = numDevices + 1; // +1 to numDevices to include the native CPU + profile->devices = (ds_device*)malloc(profile->numDevices * sizeof(ds_device)); + if (profile->devices == NULL) + { + profile->numDevices = 0; + status = DS_MEMORY_ERROR; + goto cleanup; + } + memset(profile->devices, 0, profile->numDevices * sizeof(ds_device)); + + next = 0; + for (i = 0; i < (unsigned int)numPlatforms; i++) + { + cl_uint num; + unsigned j; + clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, numDevices, devices, &num); + for (j = 0; j < num; j++, next++) + { + char buffer[DS_DEVICE_NAME_LENGTH]; + size_t length; + + profile->devices[next].type = DS_DEVICE_OPENCL_DEVICE; + profile->devices[next].oclDeviceID = devices[j]; + + clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_NAME + , DS_DEVICE_NAME_LENGTH, &buffer, NULL); + length = strlen(buffer); + profile->devices[next].oclDeviceName = (char*)malloc(length + 1); + memcpy(profile->devices[next].oclDeviceName, buffer, length + 1); + + clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DRIVER_VERSION + , DS_DEVICE_NAME_LENGTH, &buffer, NULL); + length = strlen(buffer); + profile->devices[next].oclDriverVersion = (char*)malloc(length + 1); + memcpy(profile->devices[next].oclDriverVersion, buffer, length + 1); + } + } + profile->devices[next].type = DS_DEVICE_NATIVE_CPU; + profile->version = version; + +cleanup: + if (platforms) free(platforms); + if (devices) free(devices); + if (status == DS_SUCCESS) + { + *p = profile; + } + else + { + if (profile) + { + if (profile->devices) free(profile->devices); + free(profile); + } + } + return status; +} + +// Pointer to a function that calculates the score of a device (ex: device->score) +// update the data size of score. The encoding and the format of the score data +// is implementation defined. The function should return DS_SUCCESS if there's no error to be reported. +typedef ds_status(* ds_perf_evaluator)(ds_device* device, void* data); + +typedef enum { + DS_EVALUATE_ALL + , DS_EVALUATE_NEW_ONLY +} ds_evaluation_type; + +static ds_status profileDevices(ds_profile* profile, const ds_evaluation_type type + , ds_perf_evaluator evaluator, void* evaluatorData, unsigned int* numUpdates) +{ + ds_status status = DS_SUCCESS; + unsigned int i; + unsigned int updates = 0; + + if (profile == NULL) + { + return DS_INVALID_PROFILE; + } + if (evaluator == NULL) + { + return DS_INVALID_PERF_EVALUATOR; + } + + for (i = 0; i < profile->numDevices; i++) + { + ds_status evaluatorStatus; + + switch (type) + { + case DS_EVALUATE_NEW_ONLY: + if (profile->devices[i].score != NULL) break; + // else fall through + case DS_EVALUATE_ALL: + evaluatorStatus = evaluator(profile->devices + i, evaluatorData); + if (evaluatorStatus != DS_SUCCESS) + { + status = evaluatorStatus; + return status; + } + updates++; + break; + default: + return DS_INVALID_PERF_EVALUATOR_TYPE; + break; + }; + } + if (numUpdates) *numUpdates = updates; + return status; +} + + +#define DS_TAG_VERSION "<version>" +#define DS_TAG_VERSION_END "</version>" +#define DS_TAG_DEVICE "<device>" +#define DS_TAG_DEVICE_END "</device>" +#define DS_TAG_SCORE "<score>" +#define DS_TAG_SCORE_END "</score>" +#define DS_TAG_DEVICE_TYPE "<type>" +#define DS_TAG_DEVICE_TYPE_END "</type>" +#define DS_TAG_DEVICE_NAME "<name>" +#define DS_TAG_DEVICE_NAME_END "</name>" +#define DS_TAG_DEVICE_DRIVER_VERSION "<driver>" +#define DS_TAG_DEVICE_DRIVER_VERSION_END "</driver>" + +#define DS_DEVICE_NATIVE_CPU_STRING "native_cpu" + + + +typedef ds_status(* ds_score_serializer)(ds_device* device, void** serializedScore, unsigned int* serializedScoreSize); +static ds_status writeProfileToFile(ds_profile* profile, ds_score_serializer serializer, const char* file) +{ + ds_status status = DS_SUCCESS; + FILE* profileFile = NULL; + + + if (profile == NULL) return DS_INVALID_PROFILE; + + profileFile = fopen(file, "wb"); + if (profileFile == NULL) + { + status = DS_FILE_ERROR; + } + else + { + unsigned int i; + + // write version string + fwrite(DS_TAG_VERSION, sizeof(char), strlen(DS_TAG_VERSION), profileFile); + fwrite(profile->version, sizeof(char), strlen(profile->version), profileFile); + fwrite(DS_TAG_VERSION_END, sizeof(char), strlen(DS_TAG_VERSION_END), profileFile); + fwrite("\n", sizeof(char), 1, profileFile); + + for (i = 0; i < profile->numDevices && status == DS_SUCCESS; i++) + { + void* serializedScore; + unsigned int serializedScoreSize; + + fwrite(DS_TAG_DEVICE, sizeof(char), strlen(DS_TAG_DEVICE), profileFile); + + fwrite(DS_TAG_DEVICE_TYPE, sizeof(char), strlen(DS_TAG_DEVICE_TYPE), profileFile); + fwrite(&profile->devices[i].type, sizeof(ds_device_type), 1, profileFile); + fwrite(DS_TAG_DEVICE_TYPE_END, sizeof(char), strlen(DS_TAG_DEVICE_TYPE_END), profileFile); + + switch (profile->devices[i].type) + { + case DS_DEVICE_NATIVE_CPU: + { + // There's no need to emit a device name for the native CPU device. + /* + fwrite(DS_TAG_DEVICE_NAME, sizeof(char), strlen(DS_TAG_DEVICE_NAME), profileFile); + fwrite(DS_DEVICE_NATIVE_CPU_STRING,sizeof(char),strlen(DS_DEVICE_NATIVE_CPU_STRING), profileFile); + fwrite(DS_TAG_DEVICE_NAME_END, sizeof(char), strlen(DS_TAG_DEVICE_NAME_END), profileFile); + */ + } + break; + case DS_DEVICE_OPENCL_DEVICE: + { + fwrite(DS_TAG_DEVICE_NAME, sizeof(char), strlen(DS_TAG_DEVICE_NAME), profileFile); + fwrite(profile->devices[i].oclDeviceName, sizeof(char), strlen(profile->devices[i].oclDeviceName), profileFile); + fwrite(DS_TAG_DEVICE_NAME_END, sizeof(char), strlen(DS_TAG_DEVICE_NAME_END), profileFile); + + fwrite(DS_TAG_DEVICE_DRIVER_VERSION, sizeof(char), strlen(DS_TAG_DEVICE_DRIVER_VERSION), profileFile); + fwrite(profile->devices[i].oclDriverVersion, sizeof(char), strlen(profile->devices[i].oclDriverVersion), profileFile); + fwrite(DS_TAG_DEVICE_DRIVER_VERSION_END, sizeof(char), strlen(DS_TAG_DEVICE_DRIVER_VERSION_END), profileFile); + } + break; + default: + status = DS_UNKNOWN_DEVICE_TYPE; + break; + }; + + fwrite(DS_TAG_SCORE, sizeof(char), strlen(DS_TAG_SCORE), profileFile); + status = serializer(profile->devices + i, &serializedScore, &serializedScoreSize); + if (status == DS_SUCCESS && serializedScore != NULL && serializedScoreSize > 0) + { + fwrite(serializedScore, sizeof(char), serializedScoreSize, profileFile); + free(serializedScore); + } + fwrite(DS_TAG_SCORE_END, sizeof(char), strlen(DS_TAG_SCORE_END), profileFile); + fwrite(DS_TAG_DEVICE_END, sizeof(char), strlen(DS_TAG_DEVICE_END), profileFile); + fwrite("\n", sizeof(char), 1, profileFile); + } + fclose(profileFile); + } + return status; +} + + +static ds_status readProFile(const char* fileName, char** content, size_t* contentSize) +{ + FILE* input = NULL; + size_t size = 0; + char* binary = NULL; + + *contentSize = 0; + *content = NULL; + + input = fopen(fileName, "rb"); + if (input == NULL) + { + return DS_FILE_ERROR; + } + + fseek(input, 0L, SEEK_END); + size = ftell(input); + rewind(input); + binary = (char*)malloc(size); + if (binary == NULL) + { + return DS_FILE_ERROR; + } + fread(binary, sizeof(char), size, input); + fclose(input); + + *contentSize = size; + *content = binary; + return DS_SUCCESS; +} + + +static const char* findString(const char* contentStart, const char* contentEnd, const char* string) +{ + size_t stringLength; + const char* currentPosition; + const char* found; + found = NULL; + stringLength = strlen(string); + currentPosition = contentStart; + for (currentPosition = contentStart; currentPosition < contentEnd; currentPosition++) + { + if (*currentPosition == string[0]) + { + if (currentPosition + stringLength < contentEnd) + { + if (strncmp(currentPosition, string, stringLength) == 0) + { + found = currentPosition; + break; + } + } + } + } + return found; +} + + +typedef ds_status(* ds_score_deserializer)(ds_device* device, const unsigned char* serializedScore, unsigned int serializedScoreSize); +static ds_status readProfileFromFile(ds_profile* profile, ds_score_deserializer deserializer, const char* file) +{ + + ds_status status = DS_SUCCESS; + char* contentStart = NULL; + const char* contentEnd = NULL; + size_t contentSize; + + if (profile == NULL) return DS_INVALID_PROFILE; + + status = readProFile(file, &contentStart, &contentSize); + if (status == DS_SUCCESS) + { + const char* currentPosition; + const char* dataStart; + const char* dataEnd; + size_t versionStringLength; + + contentEnd = contentStart + contentSize; + currentPosition = contentStart; + + + // parse the version string + dataStart = findString(currentPosition, contentEnd, DS_TAG_VERSION); + if (dataStart == NULL) + { + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + dataStart += strlen(DS_TAG_VERSION); + + dataEnd = findString(dataStart, contentEnd, DS_TAG_VERSION_END); + if (dataEnd == NULL) + { + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + + versionStringLength = strlen(profile->version); + if (versionStringLength != (dataEnd - dataStart) + || strncmp(profile->version, dataStart, versionStringLength) != 0) + { + // version mismatch + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + currentPosition = dataEnd + strlen(DS_TAG_VERSION_END); + + // parse the device information + while (1) + { + unsigned int i; + + const char* deviceTypeStart; + const char* deviceTypeEnd; + ds_device_type deviceType; + + const char* deviceNameStart; + const char* deviceNameEnd; + + const char* deviceScoreStart; + const char* deviceScoreEnd; + + const char* deviceDriverStart; + const char* deviceDriverEnd; + + dataStart = findString(currentPosition, contentEnd, DS_TAG_DEVICE); + if (dataStart == NULL) + { + // nothing useful remain, quit... + break; + } + dataStart += strlen(DS_TAG_DEVICE); + dataEnd = findString(dataStart, contentEnd, DS_TAG_DEVICE_END); + if (dataEnd == NULL) + { + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + + // parse the device type + deviceTypeStart = findString(dataStart, contentEnd, DS_TAG_DEVICE_TYPE); + if (deviceTypeStart == NULL) + { + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + deviceTypeStart += strlen(DS_TAG_DEVICE_TYPE); + deviceTypeEnd = findString(deviceTypeStart, contentEnd, DS_TAG_DEVICE_TYPE_END); + if (deviceTypeEnd == NULL) + { + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + memcpy(&deviceType, deviceTypeStart, sizeof(ds_device_type)); + + + // parse the device name + if (deviceType == DS_DEVICE_OPENCL_DEVICE) + { + + deviceNameStart = findString(dataStart, contentEnd, DS_TAG_DEVICE_NAME); + if (deviceNameStart == NULL) + { + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + deviceNameStart += strlen(DS_TAG_DEVICE_NAME); + deviceNameEnd = findString(deviceNameStart, contentEnd, DS_TAG_DEVICE_NAME_END); + if (deviceNameEnd == NULL) + { + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + + + deviceDriverStart = findString(dataStart, contentEnd, DS_TAG_DEVICE_DRIVER_VERSION); + if (deviceDriverStart == NULL) + { + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + deviceDriverStart += strlen(DS_TAG_DEVICE_DRIVER_VERSION); + deviceDriverEnd = findString(deviceDriverStart, contentEnd, DS_TAG_DEVICE_DRIVER_VERSION_END); + if (deviceDriverEnd == NULL) + { + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + + + // check if this device is on the system + for (i = 0; i < profile->numDevices; i++) + { + if (profile->devices[i].type == DS_DEVICE_OPENCL_DEVICE) + { + size_t actualDeviceNameLength; + size_t driverVersionLength; + + actualDeviceNameLength = strlen(profile->devices[i].oclDeviceName); + driverVersionLength = strlen(profile->devices[i].oclDriverVersion); + if (actualDeviceNameLength == (deviceNameEnd - deviceNameStart) + && driverVersionLength == (deviceDriverEnd - deviceDriverStart) + && strncmp(profile->devices[i].oclDeviceName, deviceNameStart, actualDeviceNameLength) == 0 + && strncmp(profile->devices[i].oclDriverVersion, deviceDriverStart, driverVersionLength) == 0) + { + + deviceScoreStart = findString(dataStart, contentEnd, DS_TAG_SCORE); + if (deviceNameStart == NULL) + { + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + deviceScoreStart += strlen(DS_TAG_SCORE); + deviceScoreEnd = findString(deviceScoreStart, contentEnd, DS_TAG_SCORE_END); + status = deserializer(profile->devices + i, (const unsigned char*)deviceScoreStart, deviceScoreEnd - deviceScoreStart); + if (status != DS_SUCCESS) + { + goto cleanup; + } + } + } + } + + } + else if (deviceType == DS_DEVICE_NATIVE_CPU) + { + for (i = 0; i < profile->numDevices; i++) + { + if (profile->devices[i].type == DS_DEVICE_NATIVE_CPU) + { + deviceScoreStart = findString(dataStart, contentEnd, DS_TAG_SCORE); + if (deviceScoreStart == NULL) + { + status = DS_PROFILE_FILE_ERROR; + goto cleanup; + } + deviceScoreStart += strlen(DS_TAG_SCORE); + deviceScoreEnd = findString(deviceScoreStart, contentEnd, DS_TAG_SCORE_END); + status = deserializer(profile->devices + i, (const unsigned char*)deviceScoreStart, deviceScoreEnd - deviceScoreStart); + if (status != DS_SUCCESS) + { + goto cleanup; + } + } + } + } + + // skip over the current one to find the next device + currentPosition = dataEnd + strlen(DS_TAG_DEVICE_END); + } + } +cleanup: + if (contentStart != NULL) free(contentStart); + return status; +} + +static ds_status getNumDeviceWithEmptyScore(ds_profile* profile, unsigned int* num) +{ + unsigned int i; + if (profile == NULL || num == NULL) return DS_MEMORY_ERROR; + *num = 0; + for (i = 0; i < profile->numDevices; i++) + { + if (profile->devices[i].score == NULL) + { + *num++; + } + } + return DS_SUCCESS; +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sc/source/core/opencl/openclwrapper.cxx b/sc/source/core/opencl/openclwrapper.cxx index 9a056aa0fd46..efe52a41ac42 100644 --- a/sc/source/core/opencl/openclwrapper.cxx +++ b/sc/source/core/opencl/openclwrapper.cxx @@ -19,6 +19,7 @@ #include "sal/config.h" #include <osl/file.hxx> +#include "opencl_device.hxx" #include <stdio.h> #include <stdlib.h> @@ -898,23 +899,14 @@ bool switchOpenclDevice(const OUString* pDevice, bool bAutoSelect) if(!pDeviceId || bAutoSelect) { - size_t nComputeUnits = 0; - // clever algorithm - const std::vector<OpenclPlatformInfo>& rPlatform = fillOpenCLInfo(); - for(std::vector<OpenclPlatformInfo>::const_iterator it = - rPlatform.begin(), itEnd = rPlatform.end(); it != itEnd; ++it) - { - for(std::vector<OpenclDeviceInfo>::const_iterator itr = - it->maDevices.begin(), itrEnd = it->maDevices.end(); - itr != itrEnd; ++itr) - { - if(itr->mnComputeUnits > nComputeUnits) - { - pDeviceId = reinterpret_cast<cl_device_id>(itr->device); - nComputeUnits = itr->mnComputeUnits; - } - } - } + OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/"); + rtl::Bootstrap::expandMacros(url); + OUString path; + osl::FileBase::getSystemPathFromFileURL(url,path); + OString dsFileName = rtl::OUStringToOString(path, RTL_TEXTENCODING_UTF8); + ds_device pSelectedDevice = sc::OpenCLDevice::getDeviceSelection(dsFileName.getStr()); + pDeviceId = pSelectedDevice.oclDeviceID; + } if(OpenclDevice::gpuEnv.mpDevID == pDeviceId) |