summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEike Rathke <erack@redhat.com>2018-12-11 23:48:37 +0100
committerEike Rathke <erack@redhat.com>2018-12-12 01:26:01 +0100
commit3c964980da07892a02d5ac721d80558c459532d0 (patch)
tree6aad0644fba0f8372cce7e053c38ba8ed292eadc
parentd1363c39201e93aa83028fd837fe12f48f79f63b (diff)
Use Welford one-pass variance algorithm for DataPilot / pivot table
... instead of the naïve algorithm. Short test case with 4 values: 10000000007 10000000011 10000000013 10000000017 Naïve Var: -21845.3333333333 Welford Var: 17.3333314259847 VAR() two-pass: 17.3333333333333 Change-Id: I2f27ab91166551e96c0e467f41bd6e6d49b50295 Reviewed-on: https://gerrit.libreoffice.org/64993 Reviewed-by: Eike Rathke <erack@redhat.com> Tested-by: Jenkins
-rw-r--r--sc/inc/dptabres.hxx14
-rw-r--r--sc/source/core/data/dptabres.cxx37
2 files changed, 30 insertions, 21 deletions
diff --git a/sc/inc/dptabres.hxx b/sc/inc/dptabres.hxx
index d28e192863c6..0a20f3cffa76 100644
--- a/sc/inc/dptabres.hxx
+++ b/sc/inc/dptabres.hxx
@@ -24,6 +24,7 @@
#include "dpfilteredcache.hxx"
#include "calcmacros.hxx"
#include "dpitemdata.hxx"
+#include "subtotal.hxx"
#include <com/sun/star/sheet/DataPilotFieldOrientation.hpp>
#include <com/sun/star/sheet/DataPilotFieldReference.hpp>
@@ -144,18 +145,19 @@ struct ScDPRelativePos
// Possible values for the nCount member:
// (greater than 0 counts the collected values)
-const long SC_DPAGG_EMPTY = 0; // empty during data collection
-const long SC_DPAGG_DATA_ERROR = -1; // error during data collection
-const long SC_DPAGG_RESULT_EMPTY = -2; // empty result calculated
-const long SC_DPAGG_RESULT_VALID = -3; // valid result calculated
-const long SC_DPAGG_RESULT_ERROR = -4; // error in calculated result
+const sal_Int64 SC_DPAGG_EMPTY = 0; // empty during data collection
+const sal_Int64 SC_DPAGG_DATA_ERROR = -1; // error during data collection
+const sal_Int64 SC_DPAGG_RESULT_EMPTY = -2; // empty result calculated
+const sal_Int64 SC_DPAGG_RESULT_VALID = -3; // valid result calculated
+const sal_Int64 SC_DPAGG_RESULT_ERROR = -4; // error in calculated result
class ScDPAggData
{
private:
+ WelfordRunner maWelford;
double fVal;
double fAux;
- long nCount;
+ sal_Int64 nCount;
std::unique_ptr<ScDPAggData> pChild;
std::vector<double> mSortedValues;
diff --git a/sc/source/core/data/dptabres.cxx b/sc/source/core/data/dptabres.cxx
index 305f6d8ac711..d989875b7359 100644
--- a/sc/source/core/data/dptabres.cxx
+++ b/sc/source/core/data/dptabres.cxx
@@ -428,15 +428,7 @@ void ScDPAggData::Update( const ScDPValue& rNext, ScSubTotalFunc eFunc, const Sc
case SUBTOTAL_FUNC_STDP:
case SUBTOTAL_FUNC_VAR:
case SUBTOTAL_FUNC_VARP:
- {
- // fAux is used to sum up squares
- if ( !SubTotal::SafePlus( fVal, rNext.mfValue ) )
- nCount = -1; // -1 for error
- double fAdd = rNext.mfValue;
- if ( !SubTotal::SafeMult( fAdd, rNext.mfValue ) ||
- !SubTotal::SafePlus( fAux, fAdd ) )
- nCount = -1; // -1 for error
- }
+ maWelford.update( rNext.mfValue);
break;
case SUBTOTAL_FUNC_MED:
{
@@ -486,14 +478,19 @@ void ScDPAggData::Calculate( ScSubTotalFunc eFunc, const ScDPSubTotalState& rSub
case SUBTOTAL_FUNC_MED:
case SUBTOTAL_FUNC_MAX:
case SUBTOTAL_FUNC_MIN:
+ bError = ( nCount <= 0 ); // no data is an error
+ break;
+
case SUBTOTAL_FUNC_STDP:
case SUBTOTAL_FUNC_VARP:
bError = ( nCount <= 0 ); // no data is an error
+ assert(bError || nCount == static_cast<sal_Int64>(maWelford.getCount()));
break;
case SUBTOTAL_FUNC_STD:
case SUBTOTAL_FUNC_VAR:
bError = ( nCount < 2 ); // need at least 2 values
+ assert(bError || nCount == static_cast<sal_Int64>(maWelford.getCount()));
break;
default:
@@ -525,23 +522,33 @@ void ScDPAggData::Calculate( ScSubTotalFunc eFunc, const ScDPSubTotalState& rSub
fResult = fVal / static_cast<double>(nCount);
break;
- //TODO: use safe mul for fVal * fVal
-
case SUBTOTAL_FUNC_STD:
if ( nCount >= 2 )
- fResult = sqrt((fAux - fVal*fVal/static_cast<double>(nCount)) / static_cast<double>(nCount-1));
+ {
+ fResult = maWelford.getVarianceSample();
+ if (fResult < 0.0)
+ bError = true;
+ else
+ fResult = sqrt( fResult);
+ }
break;
case SUBTOTAL_FUNC_VAR:
if ( nCount >= 2 )
- fResult = (fAux - fVal*fVal/static_cast<double>(nCount)) / static_cast<double>(nCount-1);
+ fResult = maWelford.getVarianceSample();
break;
case SUBTOTAL_FUNC_STDP:
if ( nCount > 0 )
- fResult = sqrt((fAux - fVal*fVal/static_cast<double>(nCount)) / static_cast<double>(nCount));
+ {
+ fResult = maWelford.getVariancePopulation();
+ if (fResult < 0.0)
+ bError = true;
+ else
+ fResult = sqrt( fResult);
+ }
break;
case SUBTOTAL_FUNC_VARP:
if ( nCount > 0 )
- fResult = (fAux - fVal*fVal/static_cast<double>(nCount)) / static_cast<double>(nCount);
+ fResult = maWelford.getVariancePopulation();
break;
case SUBTOTAL_FUNC_MED:
{