summaryrefslogtreecommitdiff
path: root/opencl
diff options
context:
space:
mode:
authorTor Lillqvist <tml@collabora.com>2016-06-29 18:25:26 +0300
committerTor Lillqvist <tml@collabora.com>2016-06-29 16:09:15 +0000
commit68106cbac586f25836d3482c068eee2ffb7c99fc (patch)
treecf13c7747009db86d93087411412586208e295d2 /opencl
parentf4f580f4a4683ab09c94cfd2dfa9d203881e3eb5 (diff)
Don't waste too much time evaluating the "score" for the "native CPU"
This code took tens of second typically, which is utterly pointless. Instead run the loop for max a second, after which estimate how long it would have taken to process all of the data. Don't confuse the "native CPU" term here with a CPU-based device of an actual OpenCL implementation. That is a completely different thing. This "native CPU" thing here is just our way to estimate how much time it takes to do calculations using normal C++ code on the CPU. Change-Id: I92f5eedc06bbaaef6a9b5322fefec9d41f0db505 Reviewed-on: https://gerrit.libreoffice.org/26774 Reviewed-by: Michael Meeks <michael.meeks@collabora.com> Tested-by: Jenkins <ci@libreoffice.org>
Diffstat (limited to 'opencl')
-rw-r--r--opencl/source/opencl_device.cxx17
1 files changed, 14 insertions, 3 deletions
diff --git a/opencl/source/opencl_device.cxx b/opencl/source/opencl_device.cxx
index 0a0d28ee4200..3dc9a29413ad 100644
--- a/opencl/source/opencl_device.cxx
+++ b/opencl/source/opencl_device.cxx
@@ -327,7 +327,8 @@ ds_status evaluateScoreForDevice(ds_device& rDevice, std::unique_ptr<LibreOffice
timer kernelTime;
timerStart(&kernelTime);
- for (unsigned long j = 0; j < testData->outputSize; j++)
+ unsigned long j;
+ for (j = 0; j < testData->outputSize; j++)
{
double fAverage = 0.0f;
double fMin = DBL_MAX;
@@ -340,15 +341,25 @@ ds_status evaluateScoreForDevice(ds_device& rDevice, std::unique_ptr<LibreOffice
}
fAverage /= testData->inputSize;
testData->output[j] = fAverage + (fMin * fSoP);
+ // Don't run for much longer than one second
+ if (j > 0 && j % 100 == 0)
+ {
+ rDevice.fTime = timerCurrent(&kernelTime);
+ if (rDevice.fTime >= 1)
+ break;
+ }
}
+ rDevice.fTime = timerCurrent(&kernelTime);
+
+ // Scale time to how long it would have taken to go all the way to outputSize
+ rDevice.fTime /= ((double) j / testData->outputSize);
+
// InterpretTail - the S/W fallback is nothing like as efficient
// as any good openCL implementation: no SIMD, tons of branching
// in the inner loops etc. Generously characterise it as only 10x
// slower than the above.
float fInterpretTailFactor = 10.0;
-
- rDevice.fTime = timerCurrent(&kernelTime);
rDevice.fTime *= fInterpretTailFactor;
rDevice.bErrors = false;
}