summaryrefslogtreecommitdiff
path: root/sc/source/ui/StatisticsDialogs
diff options
context:
space:
mode:
authorEike Rathke <erack@redhat.com>2018-12-21 20:57:08 +0100
committerEike Rathke <erack@redhat.com>2018-12-21 22:58:04 +0100
commit0a2533aacc2dc98790510fdafd144aad66f231f2 (patch)
treefb7af9d8ec46c5fa325ed976fdbefedbccbb4fb5 /sc/source/ui/StatisticsDialogs
parent6a6a68738114a796e36047f41797da0cacf6dc54 (diff)
Data -> Statistics: sample random values in random order
The previous implementation sampled random values in the order of the population data. This may be unexpected and is also not what other spreadsheet implementations do. Instead, pick the random values in random order. Keeping order can be made an option as future feature. Code is prepared to sample WR (WithReplacement) as well, additionally to the now (and previously) implemented WOR (WithOutReplacement). Change-Id: I83734d36605b28cf44c0cc2bbc2dfcafaef025f4 Reviewed-on: https://gerrit.libreoffice.org/65559 Reviewed-by: Eike Rathke <erack@redhat.com> Tested-by: Jenkins
Diffstat (limited to 'sc/source/ui/StatisticsDialogs')
-rw-r--r--sc/source/ui/StatisticsDialogs/SamplingDialog.cxx82
1 files changed, 81 insertions, 1 deletions
diff --git a/sc/source/ui/StatisticsDialogs/SamplingDialog.cxx b/sc/source/ui/StatisticsDialogs/SamplingDialog.cxx
index 1717baa1282e..b43cef257d4e 100644
--- a/sc/source/ui/StatisticsDialogs/SamplingDialog.cxx
+++ b/sc/source/ui/StatisticsDialogs/SamplingDialog.cxx
@@ -219,6 +219,86 @@ ScRange ScSamplingDialog::PerformPeriodicSampling(ScDocShell* pDocShell)
return ScRange(mOutputAddress, ScAddress(outTab, outRow, outTab) );
}
+ScRange ScSamplingDialog::PerformRandomSampling(ScDocShell* pDocShell)
+{
+ ScAddress aStart = mInputRange.aStart;
+ ScAddress aEnd = mInputRange.aEnd;
+
+ SCTAB outTab = mOutputAddress.Tab();
+ SCROW outRow = mOutputAddress.Row();
+
+ const sal_Int64 nSampleSize = mpSampleSize->GetValue();
+
+ // This implementation groups by columns. Other options could be grouping
+ // by rows or area.
+ const sal_Int64 nPopulationSize = aEnd.Row() - aStart.Row() + 1;
+
+ /* TODO: the previously existing implementation was WOR, we may want to
+ * additionally offer WR as option. */
+ bool bWithReplacement = false;
+
+ // WOR (WithOutReplacement) can't draw more than population. Catch that in
+ // the caller.
+ assert( bWithReplacement || nSampleSize <= nPopulationSize);
+ if (!bWithReplacement && nSampleSize > nPopulationSize)
+ // Would enter an endless loop below, bail out.
+ return ScRange( mOutputAddress);
+
+ for (SCROW inTab = aStart.Tab(); inTab <= aEnd.Tab(); inTab++)
+ {
+ SCCOL outCol = mOutputAddress.Col();
+ for (SCCOL inCol = aStart.Col(); inCol <= aEnd.Col(); inCol++)
+ {
+ outRow = mOutputAddress.Row();
+ std::vector<bool> vUsed( nPopulationSize, false);
+
+ while ((outRow - mOutputAddress.Row()) < nSampleSize)
+ {
+ // [a,b] *both* inclusive
+ SCROW nRandom = comphelper::rng::uniform_int_distribution( aStart.Row(), aEnd.Row());
+
+ if (!bWithReplacement)
+ {
+ nRandom -= aStart.Row();
+ if (vUsed[nRandom])
+ {
+ // Find a nearest one, preferring forwards.
+ // Again: it's essential that the loop is entered only
+ // if nSampleSize<=nPopulationSize, which is checked
+ // above.
+ SCROW nBack = nRandom;
+ SCROW nForw = nRandom;
+ do
+ {
+ if (nForw < nPopulationSize - 1 && !vUsed[++nForw])
+ {
+ nRandom = nForw;
+ break;
+ }
+ if (nBack > 0 && !vUsed[--nBack])
+ {
+ nRandom = nBack;
+ break;
+ }
+ }
+ while (true);
+ }
+ vUsed[nRandom] = true;
+ nRandom += aStart.Row();
+ }
+
+ const double fValue = mDocument->GetValue( ScAddress(inCol, nRandom, inTab) );
+ pDocShell->GetDocFunc().SetValueCell(ScAddress(outCol, outRow, outTab), fValue, true);
+ outRow++;
+ }
+ outCol++;
+ }
+ outTab++;
+ }
+
+ return ScRange(mOutputAddress, ScAddress(outTab, outRow, outTab) );
+}
+
ScRange ScSamplingDialog::PerformRandomSamplingKeepOrder(ScDocShell* pDocShell)
{
ScAddress aStart = mInputRange.aStart;
@@ -277,7 +357,7 @@ void ScSamplingDialog::PerformSampling()
if (mpRandomMethodRadio->IsChecked())
{
- aModifiedRange = PerformRandomSamplingKeepOrder(pDocShell);
+ aModifiedRange = PerformRandomSampling(pDocShell);
}
else if (mpPeriodicMethodRadio->IsChecked())
{