diff options
author | Eike Rathke <erack@redhat.com> | 2018-12-11 23:48:37 +0100 |
---|---|---|
committer | Eike Rathke <erack@redhat.com> | 2018-12-12 01:26:01 +0100 |
commit | 3c964980da07892a02d5ac721d80558c459532d0 (patch) | |
tree | 6aad0644fba0f8372cce7e053c38ba8ed292eadc | |
parent | d1363c39201e93aa83028fd837fe12f48f79f63b (diff) |
Use Welford one-pass variance algorithm for DataPilot / pivot table
... instead of the naïve algorithm. Short test case with 4 values:
10000000007
10000000011
10000000013
10000000017
Naïve Var: -21845.3333333333
Welford Var: 17.3333314259847
VAR() two-pass: 17.3333333333333
Change-Id: I2f27ab91166551e96c0e467f41bd6e6d49b50295
Reviewed-on: https://gerrit.libreoffice.org/64993
Reviewed-by: Eike Rathke <erack@redhat.com>
Tested-by: Jenkins
-rw-r--r-- | sc/inc/dptabres.hxx | 14 | ||||
-rw-r--r-- | sc/source/core/data/dptabres.cxx | 37 |
2 files changed, 30 insertions, 21 deletions
diff --git a/sc/inc/dptabres.hxx b/sc/inc/dptabres.hxx index d28e192863c6..0a20f3cffa76 100644 --- a/sc/inc/dptabres.hxx +++ b/sc/inc/dptabres.hxx @@ -24,6 +24,7 @@ #include "dpfilteredcache.hxx" #include "calcmacros.hxx" #include "dpitemdata.hxx" +#include "subtotal.hxx" #include <com/sun/star/sheet/DataPilotFieldOrientation.hpp> #include <com/sun/star/sheet/DataPilotFieldReference.hpp> @@ -144,18 +145,19 @@ struct ScDPRelativePos // Possible values for the nCount member: // (greater than 0 counts the collected values) -const long SC_DPAGG_EMPTY = 0; // empty during data collection -const long SC_DPAGG_DATA_ERROR = -1; // error during data collection -const long SC_DPAGG_RESULT_EMPTY = -2; // empty result calculated -const long SC_DPAGG_RESULT_VALID = -3; // valid result calculated -const long SC_DPAGG_RESULT_ERROR = -4; // error in calculated result +const sal_Int64 SC_DPAGG_EMPTY = 0; // empty during data collection +const sal_Int64 SC_DPAGG_DATA_ERROR = -1; // error during data collection +const sal_Int64 SC_DPAGG_RESULT_EMPTY = -2; // empty result calculated +const sal_Int64 SC_DPAGG_RESULT_VALID = -3; // valid result calculated +const sal_Int64 SC_DPAGG_RESULT_ERROR = -4; // error in calculated result class ScDPAggData { private: + WelfordRunner maWelford; double fVal; double fAux; - long nCount; + sal_Int64 nCount; std::unique_ptr<ScDPAggData> pChild; std::vector<double> mSortedValues; diff --git a/sc/source/core/data/dptabres.cxx b/sc/source/core/data/dptabres.cxx index 305f6d8ac711..d989875b7359 100644 --- a/sc/source/core/data/dptabres.cxx +++ b/sc/source/core/data/dptabres.cxx @@ -428,15 +428,7 @@ void ScDPAggData::Update( const ScDPValue& rNext, ScSubTotalFunc eFunc, const Sc case SUBTOTAL_FUNC_STDP: case SUBTOTAL_FUNC_VAR: case SUBTOTAL_FUNC_VARP: - { - // fAux is used to sum up squares - if ( !SubTotal::SafePlus( fVal, rNext.mfValue ) ) - nCount = -1; // -1 for error - double fAdd = rNext.mfValue; - if ( !SubTotal::SafeMult( fAdd, rNext.mfValue ) || - !SubTotal::SafePlus( fAux, fAdd ) ) - nCount = -1; // -1 for error - } + maWelford.update( rNext.mfValue); break; case SUBTOTAL_FUNC_MED: { @@ -486,14 +478,19 @@ void ScDPAggData::Calculate( ScSubTotalFunc eFunc, const ScDPSubTotalState& rSub case SUBTOTAL_FUNC_MED: case SUBTOTAL_FUNC_MAX: case SUBTOTAL_FUNC_MIN: + bError = ( nCount <= 0 ); // no data is an error + break; + case SUBTOTAL_FUNC_STDP: case SUBTOTAL_FUNC_VARP: bError = ( nCount <= 0 ); // no data is an error + assert(bError || nCount == static_cast<sal_Int64>(maWelford.getCount())); break; case SUBTOTAL_FUNC_STD: case SUBTOTAL_FUNC_VAR: bError = ( nCount < 2 ); // need at least 2 values + assert(bError || nCount == static_cast<sal_Int64>(maWelford.getCount())); break; default: @@ -525,23 +522,33 @@ void ScDPAggData::Calculate( ScSubTotalFunc eFunc, const ScDPSubTotalState& rSub fResult = fVal / static_cast<double>(nCount); break; - //TODO: use safe mul for fVal * fVal - case SUBTOTAL_FUNC_STD: if ( nCount >= 2 ) - fResult = sqrt((fAux - fVal*fVal/static_cast<double>(nCount)) / static_cast<double>(nCount-1)); + { + fResult = maWelford.getVarianceSample(); + if (fResult < 0.0) + bError = true; + else + fResult = sqrt( fResult); + } break; case SUBTOTAL_FUNC_VAR: if ( nCount >= 2 ) - fResult = (fAux - fVal*fVal/static_cast<double>(nCount)) / static_cast<double>(nCount-1); + fResult = maWelford.getVarianceSample(); break; case SUBTOTAL_FUNC_STDP: if ( nCount > 0 ) - fResult = sqrt((fAux - fVal*fVal/static_cast<double>(nCount)) / static_cast<double>(nCount)); + { + fResult = maWelford.getVariancePopulation(); + if (fResult < 0.0) + bError = true; + else + fResult = sqrt( fResult); + } break; case SUBTOTAL_FUNC_VARP: if ( nCount > 0 ) - fResult = (fAux - fVal*fVal/static_cast<double>(nCount)) / static_cast<double>(nCount); + fResult = maWelford.getVariancePopulation(); break; case SUBTOTAL_FUNC_MED: { |