diff options
author | Eike Rathke <erack@redhat.com> | 2018-12-21 20:57:08 +0100 |
---|---|---|
committer | Eike Rathke <erack@redhat.com> | 2018-12-21 22:58:04 +0100 |
commit | 0a2533aacc2dc98790510fdafd144aad66f231f2 (patch) | |
tree | fb7af9d8ec46c5fa325ed976fdbefedbccbb4fb5 /sc/source/ui/StatisticsDialogs | |
parent | 6a6a68738114a796e36047f41797da0cacf6dc54 (diff) |
Data -> Statistics: sample random values in random order
The previous implementation sampled random values in the order of
the population data. This may be unexpected and is also not what
other spreadsheet implementations do. Instead, pick the random
values in random order. Keeping order can be made an option as
future feature. Code is prepared to sample WR (WithReplacement) as
well, additionally to the now (and previously) implemented WOR
(WithOutReplacement).
Change-Id: I83734d36605b28cf44c0cc2bbc2dfcafaef025f4
Reviewed-on: https://gerrit.libreoffice.org/65559
Reviewed-by: Eike Rathke <erack@redhat.com>
Tested-by: Jenkins
Diffstat (limited to 'sc/source/ui/StatisticsDialogs')
-rw-r--r-- | sc/source/ui/StatisticsDialogs/SamplingDialog.cxx | 82 |
1 files changed, 81 insertions, 1 deletions
diff --git a/sc/source/ui/StatisticsDialogs/SamplingDialog.cxx b/sc/source/ui/StatisticsDialogs/SamplingDialog.cxx index 1717baa1282e..b43cef257d4e 100644 --- a/sc/source/ui/StatisticsDialogs/SamplingDialog.cxx +++ b/sc/source/ui/StatisticsDialogs/SamplingDialog.cxx @@ -219,6 +219,86 @@ ScRange ScSamplingDialog::PerformPeriodicSampling(ScDocShell* pDocShell) return ScRange(mOutputAddress, ScAddress(outTab, outRow, outTab) ); } +ScRange ScSamplingDialog::PerformRandomSampling(ScDocShell* pDocShell) +{ + ScAddress aStart = mInputRange.aStart; + ScAddress aEnd = mInputRange.aEnd; + + SCTAB outTab = mOutputAddress.Tab(); + SCROW outRow = mOutputAddress.Row(); + + const sal_Int64 nSampleSize = mpSampleSize->GetValue(); + + // This implementation groups by columns. Other options could be grouping + // by rows or area. + const sal_Int64 nPopulationSize = aEnd.Row() - aStart.Row() + 1; + + /* TODO: the previously existing implementation was WOR, we may want to + * additionally offer WR as option. */ + bool bWithReplacement = false; + + // WOR (WithOutReplacement) can't draw more than population. Catch that in + // the caller. + assert( bWithReplacement || nSampleSize <= nPopulationSize); + if (!bWithReplacement && nSampleSize > nPopulationSize) + // Would enter an endless loop below, bail out. + return ScRange( mOutputAddress); + + for (SCROW inTab = aStart.Tab(); inTab <= aEnd.Tab(); inTab++) + { + SCCOL outCol = mOutputAddress.Col(); + for (SCCOL inCol = aStart.Col(); inCol <= aEnd.Col(); inCol++) + { + outRow = mOutputAddress.Row(); + std::vector<bool> vUsed( nPopulationSize, false); + + while ((outRow - mOutputAddress.Row()) < nSampleSize) + { + // [a,b] *both* inclusive + SCROW nRandom = comphelper::rng::uniform_int_distribution( aStart.Row(), aEnd.Row()); + + if (!bWithReplacement) + { + nRandom -= aStart.Row(); + if (vUsed[nRandom]) + { + // Find a nearest one, preferring forwards. + // Again: it's essential that the loop is entered only + // if nSampleSize<=nPopulationSize, which is checked + // above. + SCROW nBack = nRandom; + SCROW nForw = nRandom; + do + { + if (nForw < nPopulationSize - 1 && !vUsed[++nForw]) + { + nRandom = nForw; + break; + } + if (nBack > 0 && !vUsed[--nBack]) + { + nRandom = nBack; + break; + } + } + while (true); + } + vUsed[nRandom] = true; + nRandom += aStart.Row(); + } + + const double fValue = mDocument->GetValue( ScAddress(inCol, nRandom, inTab) ); + pDocShell->GetDocFunc().SetValueCell(ScAddress(outCol, outRow, outTab), fValue, true); + outRow++; + } + outCol++; + } + outTab++; + } + + return ScRange(mOutputAddress, ScAddress(outTab, outRow, outTab) ); +} + ScRange ScSamplingDialog::PerformRandomSamplingKeepOrder(ScDocShell* pDocShell) { ScAddress aStart = mInputRange.aStart; @@ -277,7 +357,7 @@ void ScSamplingDialog::PerformSampling() if (mpRandomMethodRadio->IsChecked()) { - aModifiedRange = PerformRandomSamplingKeepOrder(pDocShell); + aModifiedRange = PerformRandomSampling(pDocShell); } else if (mpPeriodicMethodRadio->IsChecked()) { |