summaryrefslogtreecommitdiff
path: root/sc/source/ui/StatisticsDialogs
diff options
context:
space:
mode:
authorDennis Francis <dennis.francis@collabora.co.uk>2018-06-25 23:42:26 +0530
committerTomaž Vajngerl <quikee@gmail.com>2018-07-02 23:19:40 +0200
commitb7a02f2bb66b990289eb7f4dc80069d1545179a4 (patch)
tree48511f6ff8c4a4b63421ec2e55c934e611777308 /sc/source/ui/StatisticsDialogs
parentfa503091cce61b0288645efeeab0937b11fe5403 (diff)
tdf#109042 : Add support for multivariate regression...
to regression tool. This means we now support more than one X variable(independent variable). One caveat is that all X variable observations needs to be present adjacent to each other in the same table. For example if data is grouped by columns, a valid organization of X variables look like :- X Variables ----> A B C ... XVar1 XVar2 XVar3 ... XVarN | 0.1 0.45 0.32 ... Observations 0.34 0.23 0.54 ... | 0.23 0.56 0.90 ... | 0.32 0.11 0.78 ... V This patch also makes our regression tool output to have similar structure to what Excel and Gnumeric does. This means more statistical measures are added including confidence intervals for all parmeter estimates. We already have support for Logarithmic and Power regression in addition to plain Linear regression. This patch's multivariate support extends to all of these types of regressions. Earlier all regression statistics were computed separately from scratch, which mostly compute the same regression multiple times. This would slow things down if the data-set being analysed is big. This is not true anymore as we use LINEST() formula. LINEST() formula provides all the necessary statistics needed in regression analysis, so here it is called just once and its output components are referenced to compute other statistics(derived). Following are the UI changes for the regression dialog box :- 1. Changed the regression-type selectors from check-boxes to radio-buttons. So only one type of regression can be done at a time. This is because the output of a single regression type itself shows a lot of information and if do all types of regression, it is hard to read and interpret especially for bigger data-sets with lots of X variables. 2. Allow the variable's ranges to have label in them, via a checkbox. If labels are provided, they are used to annotate the variable specific statistics and the user can easily identify the stats corresponding to each variable. 3. More robust input validity checks, with error messages at the bottom of the dialog to let the user know which of their entry is invalid. 4. User can enter the confidence level (default = 95%) for computing the confidence intervals of each estimate. 5. Make residual computations optional via a check-box, as this involves writing a table with all X's and Y with predicted Y and residual for each observation. If the data-set is big, or the user just care about the estimates and confidence intervals, they can avoid this. Finally the patch includes a uitest that tests all 3 types of regressions with a small dataset. The ground truths for the tests were obtained by running regression tool in Gnumeric. Change-Id: I9762b716eae14b9fbd16e2c7228edf9e1930dc93 Reviewed-on: https://gerrit.libreoffice.org/56809 Tested-by: Jenkins Reviewed-by: Michael Meeks <michael.meeks@collabora.com> Reviewed-by: Tomaž Vajngerl <quikee@gmail.com>
Diffstat (limited to 'sc/source/ui/StatisticsDialogs')
-rw-r--r--sc/source/ui/StatisticsDialogs/RegressionDialog.cxx707
-rw-r--r--sc/source/ui/StatisticsDialogs/StatisticsTwoVariableDialog.cxx28
-rw-r--r--sc/source/ui/StatisticsDialogs/TableFillingAndNavigationTools.cxx6
3 files changed, 606 insertions, 135 deletions
diff --git a/sc/source/ui/StatisticsDialogs/RegressionDialog.cxx b/sc/source/ui/StatisticsDialogs/RegressionDialog.cxx
index a07e94286c58..f7bdbd3271ba 100644
--- a/sc/source/ui/StatisticsDialogs/RegressionDialog.cxx
+++ b/sc/source/ui/StatisticsDialogs/RegressionDialog.cxx
@@ -8,13 +8,10 @@
*
*/
-#include <memory>
#include <sfx2/dispatch.hxx>
#include <svl/zforlist.hxx>
#include <svl/undo.hxx>
-#include <formulacell.hxx>
-#include <rangelst.hxx>
#include <scitems.hxx>
#include <docsh.hxx>
#include <document.hxx>
@@ -26,8 +23,90 @@
#include <scresid.hxx>
#include <strings.hrc>
+/*
+ Some regression basics
+ ----------------------
+
+ 1. Linear regression fits using data, a linear function between the dependent variable and the independent variable(s).
+ The basic form of this function is :-
+
+ y = b + m_1*x_1 + m_2*x_2 + ... + m_k*x_k
+
+ where y is the dependent variable
+ x_1, x_2, ..., x_k are the k independent variables
+ b is the intercept
+ m_1, m_2, ..., m_k are the slopes corresponding to the variables x_1, x_2, ..., x_k respectively.
+
+
+ This equation for n observations can be compactly written using matrices as :-
+
+ y = X*A
+
+ where y is the n dimensional column vector containing dependent variable observations.
+ where X is matrix of shape n*(k+1) where a row looks like [ 1 x_1 x_2 ... x_k ]
+ A is the k+1 dimensional column vector [ b m_1 m_2 ... m_k ]
+
+ Calc formula LINEST(Y_array ; X_array) can be used to compute all entries in "A" along with many other statistics.
+
+
+ 2. Logarithmic regression is basically used to find a linear function between the dependent variable and
+ the natural logarithm of the independent variable(s).
+ So the basic form of this functions is :-
+
+ y = b + m_1*ln(x_1) + m_2*ln(x_2) + ... + m_k*ln(x_k)
+
+ This can be again written in a compact matrix form for n observations.
+
+ y = ln(X)*A
+
+ where y is the n dimensional column vector containing dependent variable observations.
+ where X is matrix of shape n*(k+1) where a row looks like [ e x_1 x_2 ... x_k ]
+ A is the k+1 dimensional column vector [ b m_1 m_2 ... m_k ]
+
+ To estimate A, we use the formula =LINEST(Y_array ; LN(X_array))
+
+
+ 3. Power regression is used to fit the following model :-
+
+ y = b * (x_1 ^ m_1) * (x_2 ^ m_2) * ... * (x_k ^ m_k)
+
+ To reduce this to a linear function(so that we can still use LINEST()), we take natural logarithm on both sides
+
+ ln(y) = c + m_1*ln(x_1) + m_2*ln(x_2) + ... + m_k*ln(x_k) ; where c = ln(b)
+
+
+ This again can be written compactly in matrix form as :-
+
+ ln(y) = ln(X)*A
+
+ where y is the n dimensional column vector containing dependent variable observations.
+ where X is matrix of shape n*(k+1) where a row looks like [ e x_1 x_2 ... x_k ]
+ A is the k+1 dimensional column vector [ c m_1 m_2 ... m_k ]
+
+ To estimate A, we use the formula =LINEST(LN(Y_array) ; LN(X_array))
+
+ Once we get A, to get back y from x's we use the formula :-
+
+ y = exp( ln(X)*A )
+
+
+
+ Some references for computing confidence interval for the regression coefficients :-
+
+ [1] https://en.wikipedia.org/wiki/Student%27s_t-test#Slope_of_a_regression_line
+ [2] https://en.wikipedia.org/wiki/Simple_linear_regression#Normality_assumption
+ [3] https://onlinecourses.science.psu.edu/stat414/node/280
+
+ */
+
namespace
{
+ enum class ScRegType {
+ LINEAR,
+ LOGARITHMIC,
+ POWER
+ };
+
const char* constRegressionModel[] =
{
STR_LABEL_LINEAR,
@@ -35,53 +114,51 @@ namespace
STR_LABEL_POWER
};
- OUString constTemplateRSQUARED[] =
- {
- "=RSQ(%VARIABLE2_RANGE% ; %VARIABLE1_RANGE%)",
- "=RSQ(%VARIABLE2_RANGE% ; LN(%VARIABLE1_RANGE%))",
- "=RSQ(LN(%VARIABLE2_RANGE%) ; LN(%VARIABLE1_RANGE%))"
- };
-
- OUString constTemplatesSTDERR[] =
- {
- "=STEYX(%VARIABLE2_RANGE% ; %VARIABLE1_RANGE%)",
- "=STEYX(%VARIABLE2_RANGE% ; LN(%VARIABLE1_RANGE%))",
- "=STEYX(LN(%VARIABLE2_RANGE%) ; LN(%VARIABLE1_RANGE%))"
- };
-
- OUString constTemplatesSLOPE[] =
- {
- "=SLOPE(%VARIABLE2_RANGE% ; %VARIABLE1_RANGE%)",
- "=SLOPE(%VARIABLE2_RANGE% ; LN(%VARIABLE1_RANGE%))",
- "=SLOPE(LN(%VARIABLE2_RANGE%) ; LN(%VARIABLE1_RANGE%))"
- };
-
- OUString constTemplatesINTERCEPT[] =
+ OUString constTemplateLINEST[] =
{
- "=INTERCEPT(%VARIABLE2_RANGE% ; %VARIABLE1_RANGE%)",
- "=INTERCEPT(%VARIABLE2_RANGE% ; LN(%VARIABLE1_RANGE%))",
- "=EXP(INTERCEPT(LN(%VARIABLE2_RANGE%) ; LN(%VARIABLE1_RANGE%)))"
+ "=LINEST(%VARIABLE2_RANGE% ; %VARIABLE1_RANGE% ; TRUE ; TRUE)",
+ "=LINEST(%VARIABLE2_RANGE% ; LN(%VARIABLE1_RANGE%) ; TRUE ; TRUE)",
+ "=LINEST(LN(%VARIABLE2_RANGE%) ; LN(%VARIABLE1_RANGE%) ; TRUE ; TRUE)"
};
OUString constRegressionFormula[] =
{
- "=%A% * %ADDRESS% + %B%",
- "=%A% * LN(%ADDRESS%) + %B%",
- "=%B% * %ADDRESS% ^ %A%"
+ "=MMULT(%XDATAMATRIX_RANGE% ; %SLOPES_RANGE%) + %INTERCEPT_ADDR%",
+ "=MMULT(LN(%XDATAMATRIX_RANGE%) ; %SLOPES_RANGE%) + %INTERCEPT_ADDR%",
+ "=EXP(MMULT(LN(%XDATAMATRIX_RANGE%) ; %SLOPES_RANGE%) + %INTERCEPT_ADDR%)"
};
} // end anonymous namespace
+static size_t lcl_GetNumRowsColsInRange(const ScRange& rRange, bool bRows)
+{
+ if (bRows)
+ return rRange.aEnd.Row() - rRange.aStart.Row() + 1;
+
+ return rRange.aEnd.Col() - rRange.aStart.Col() + 1;
+}
+
ScRegressionDialog::ScRegressionDialog(
SfxBindings* pSfxBindings, SfxChildWindow* pChildWindow,
vcl::Window* pParent, ScViewData* pViewData ) :
ScStatisticsTwoVariableDialog(
pSfxBindings, pChildWindow, pParent, pViewData,
- "RegressionDialog", "modules/scalc/ui/regressiondialog.ui" )
+ "RegressionDialog", "modules/scalc/ui/regressiondialog.ui" ),
+ mbUnivariate(true),
+ mnNumIndependentVars(1),
+ mnNumObservations(0),
+ mbUse3DAddresses(false)
{
- get(mpLinearCheckBox, "linear-check");
- get(mpLogarithmicCheckBox, "logarithmic-check");
- get(mpPowerCheckBox, "power-check");
+ get(mpWithLabelsCheckBox, "withlabels-check");
+ get(mpLinearRadioButton, "linear-radio");
+ get(mpLogarithmicRadioButton, "logarithmic-radio");
+ get(mpPowerRadioButton, "power-radio");
+ get(mpConfidenceLevelField, "confidencelevel-spin");
+ get(mpCalcResidualsCheckBox, "calcresiduals-check");
+ get(mpErrorMessage, "error-message");
+ mpWithLabelsCheckBox->SetToggleHdl(LINK(this, ScRegressionDialog, CheckBoxHdl));
+ mpConfidenceLevelField->SetModifyHdl(LINK(this, ScRegressionDialog, NumericFieldHdl));
+ mpCalcResidualsCheckBox->SetToggleHdl(LINK(this, ScRegressionDialog, CheckBoxHdl));
}
ScRegressionDialog::~ScRegressionDialog()
@@ -96,9 +173,13 @@ bool ScRegressionDialog::Close()
void ScRegressionDialog::dispose()
{
- mpLinearCheckBox.disposeAndClear();
- mpLogarithmicCheckBox.disposeAndClear();
- mpPowerCheckBox.disposeAndClear();
+ mpWithLabelsCheckBox.disposeAndClear();
+ mpLinearRadioButton.disposeAndClear();
+ mpLogarithmicRadioButton.disposeAndClear();
+ mpPowerRadioButton.disposeAndClear();
+ mpConfidenceLevelField.disposeAndClear();
+ mpCalcResidualsCheckBox.disposeAndClear();
+ mpErrorMessage.disposeAndClear();
ScStatisticsTwoVariableDialog::dispose();
}
@@ -112,131 +193,509 @@ ScRange ScRegressionDialog::ApplyOutput(ScDocShell* pDocShell)
AddressWalkerWriter aOutput(mOutputAddress, pDocShell, mDocument,
formula::FormulaGrammar::mergeToGrammar( formula::FormulaGrammar::GRAM_ENGLISH, mAddressDetails.eConv));
FormulaTemplate aTemplate(mDocument);
- aTemplate.autoReplaceUses3D(false);
+ aTemplate.autoReplaceUses3D(mbUse3DAddresses);
+
+ // max col of our output should account for
+ // 1. constant term column,
+ // 2. mnNumIndependentVars columns
+ // 3. Actual Y column
+ // 4. Predicted Y column
+ // 5. Residual Column
+ SCCOL nOutputMaxCol = mOutputAddress.Col() + mnNumIndependentVars + 3;
+
+ ScRange aXDataRange(GetDataRange(mVariable1Range));
+ ScRange aYDataRange(GetDataRange(mVariable2Range));
+
+ aTemplate.autoReplaceRange("%VARIABLE1_RANGE%", aXDataRange);
+ aTemplate.autoReplaceRange("%VARIABLE2_RANGE%", aYDataRange);
+ size_t nRegressionIndex = GetRegressionTypeIndex();
+ ScRegType eRegType = static_cast<ScRegType>(nRegressionIndex);
+ bool bTakeLogX = eRegType == ScRegType::LOGARITHMIC || eRegType == ScRegType::POWER;
+
+ WriteRawRegressionResults(aOutput, aTemplate, nRegressionIndex);
+ WriteRegressionStatistics(aOutput, aTemplate);
+ WriteRegressionANOVAResults(aOutput, aTemplate);
+ WriteRegressionEstimatesWithCI(aOutput, aTemplate, bTakeLogX);
+ if (mpCalcResidualsCheckBox->IsChecked())
+ WritePredictionsWithResiduals(aOutput, aTemplate, nRegressionIndex);
+
+ ScAddress aMaxAddress(aOutput.mMaximumAddress);
+ aMaxAddress.SetCol(std::max(aMaxAddress.Col(), nOutputMaxCol));
+ return ScRange(aOutput.mMinimumAddress, aMaxAddress);
+}
- std::unique_ptr<DataRangeIterator> pVariable1Iterator;
- if (mGroupedBy == BY_COLUMN)
- pVariable1Iterator.reset(new DataRangeByColumnIterator(mVariable1Range));
- else
- pVariable1Iterator.reset(new DataRangeByRowIterator(mVariable1Range));
+bool ScRegressionDialog::InputRangesValid()
+{
+ if (!mVariable1Range.IsValid())
+ {
+ mpErrorMessage->SetText(ScResId(STR_MESSAGE_XINVALID_RANGE));
+ return false;
+ }
- std::unique_ptr<DataRangeIterator> pVariable2Iterator;
- if (mGroupedBy == BY_COLUMN)
- pVariable2Iterator.reset(new DataRangeByColumnIterator(mVariable2Range));
- else
- pVariable2Iterator.reset(new DataRangeByRowIterator(mVariable2Range));
+ if (!mVariable2Range.IsValid())
+ {
+ mpErrorMessage->SetText(ScResId(STR_MESSAGE_YINVALID_RANGE));
+ return false;
+ }
- aTemplate.autoReplaceRange("%VARIABLE1_RANGE%", pVariable1Iterator->get());
- aTemplate.autoReplaceRange("%VARIABLE2_RANGE%", pVariable2Iterator->get());
+ if (!mOutputAddress.IsValid())
+ {
+ mpErrorMessage->SetText(ScResId(STR_MESSAGE_INVALID_OUTPUT_ADDR));
+ return false;
+ }
- aOutput.writeBoldString(ScResId(STR_REGRESSION));
- aOutput.newLine();
- aOutput.newLine();
- aOutput.push();
+ {
+ double fConfidenceLevel = mpConfidenceLevelField->GetValue();
+ if ( fConfidenceLevel <= 0.0 || fConfidenceLevel >= 100.0 )
+ {
+ mpErrorMessage->SetText(ScResId(STR_MESSAGE_INVALID_CONFIDENCE_LEVEL));
+ return false;
+ }
+ }
- // REGRESSION MODEL
- aOutput.writeString(ScResId(STR_LABEL_REGRESSION_MODEL));
- aOutput.nextRow();
+ mVariable1Range.PutInOrder();
+ mVariable2Range.PutInOrder();
- // RSQUARED
- aOutput.writeString(ScResId(STR_LABEL_RSQUARED));
- aOutput.nextRow();
+ bool bGroupedByColumn = mGroupedBy == BY_COLUMN;
- // Standard Error
- aOutput.writeString(ScResId(STRID_CALC_STD_ERROR));
- aOutput.nextRow();
+ bool bYHasSingleDim = (
+ (bGroupedByColumn &&
+ mVariable2Range.aStart.Col() == mVariable2Range.aEnd.Col()) ||
+ (!bGroupedByColumn &&
+ mVariable2Range.aStart.Row() == mVariable2Range.aEnd.Row()));
- aOutput.nextRow();
+ if (!bYHasSingleDim)
+ {
+ if (bGroupedByColumn)
+ mpErrorMessage->SetText(ScResId(STR_MESSAGE_YVARIABLE_MULTI_COLUMN));
+ else
+ mpErrorMessage->SetText(ScResId(STR_MESSAGE_YVARIABLE_MULTI_ROW));
+ return false;
+ }
- // Slope
- aOutput.writeString(ScResId(STR_LABEL_SLOPE));
- aOutput.nextRow();
+ bool bWithLabels = mpWithLabelsCheckBox->IsChecked();
- // Intercept
- aOutput.writeString(ScResId(STR_LABEL_INTERCEPT));
- aOutput.nextRow();
+ size_t nYObs = lcl_GetNumRowsColsInRange(mVariable2Range, bGroupedByColumn);
+ size_t nNumXVars = lcl_GetNumRowsColsInRange(mVariable1Range, !bGroupedByColumn);
+ mbUnivariate = nNumXVars == 1;
+ // Observation count mismatch check
+ if (lcl_GetNumRowsColsInRange(mVariable1Range, bGroupedByColumn) != nYObs)
+ {
+ if (mbUnivariate)
+ mpErrorMessage->SetText(ScResId(STR_MESSAGE_UNIVARIATE_NUMOBS_MISMATCH));
+ else
+ mpErrorMessage->SetText(ScResId(STR_MESSAGE_MULTIVARIATE_NUMOBS_MISMATCH));
+ return false;
+ }
+
+ mnNumIndependentVars = nNumXVars;
+ mnNumObservations = bWithLabels ? nYObs - 1 : nYObs;
+
+ mbUse3DAddresses = mVariable1Range.aStart.Tab() != mOutputAddress.Tab() ||
+ mVariable2Range.aStart.Tab() != mOutputAddress.Tab();
+
+ mpErrorMessage->SetText("");
- aOutput.nextRow();
+ return true;
+}
+
+size_t ScRegressionDialog::GetRegressionTypeIndex()
+{
+ if (mpLinearRadioButton->IsChecked())
+ return 0;
+ if (mpLogarithmicRadioButton->IsChecked())
+ return 1;
+ return 2;
+}
- size_t nVariable1Size = pVariable1Iterator->size();
+ScRange ScRegressionDialog::GetDataRange(const ScRange& rRange)
+{
+ if (!mpWithLabelsCheckBox->IsChecked())
+ return rRange;
- OUString sFormula;
+ ScRange aDataRange(rRange);
if (mGroupedBy == BY_COLUMN)
- sFormula = "=INDEX(%VARIABLE1_RANGE%; %VAR1_CELL_INDEX%; 1)";
+ aDataRange.aStart.IncRow(1);
else
- sFormula = "=INDEX(%VARIABLE1_RANGE%; 1; %VAR1_CELL_INDEX%)";
+ aDataRange.aStart.IncCol(1);
+
+ return aDataRange;
+}
+
+OUString ScRegressionDialog::GetVariableNameFormula(bool bXVar, size_t nIndex, bool bWithLog)
+{
+ if (bXVar && nIndex == 0)
+ return "=\"" + ScResId(STR_LABEL_INTERCEPT) + "\"";
- for (size_t i = 0; i < nVariable1Size; i++)
+ if (mpWithLabelsCheckBox->IsChecked())
{
- aTemplate.setTemplate(sFormula);
- aTemplate.applyNumber("%VAR1_CELL_INDEX%", i + 1);
- aOutput.writeFormula(aTemplate.getTemplate());
- aOutput.nextRow();
+ ScAddress aAddr(bXVar ? mVariable1Range.aStart : mVariable2Range.aStart);
+ if (mGroupedBy == BY_COLUMN)
+ aAddr.IncCol(nIndex - 1);
+ else
+ aAddr.IncRow(nIndex - 1);
+
+ ScRefFlags eAddrFlag = mbUse3DAddresses ? ScRefFlags::ADDR_ABS_3D : ScRefFlags::ADDR_ABS;
+ return bWithLog ? OUString("=CONCAT(\"LN(\";" +
+ aAddr.Format(eAddrFlag, mDocument, mDocument->GetAddressConvention()) + ";\")\")") :
+ OUString("=" + aAddr.Format(eAddrFlag, mDocument, mDocument->GetAddressConvention()));
}
- aOutput.reset();
+ OUString aDefaultVarName;
+
+ if (bXVar)
+ aDefaultVarName = "X" + OUString::number(nIndex);
+ else
+ aDefaultVarName = "Y";
+
+ return bWithLog ? OUString("=\"LN(" + aDefaultVarName + ")\"") :
+ OUString("=\"" + aDefaultVarName + "\"");
+}
+
+OUString ScRegressionDialog::GetXVariableNameFormula(size_t nIndex, bool bWithLog)
+{
+ assert(nIndex <= mnNumIndependentVars);
+ return GetVariableNameFormula(true, nIndex, bWithLog);
+}
- bool aEnabledRegressionTypes[3];
+OUString ScRegressionDialog::GetYVariableNameFormula(bool bWithLog)
+{
+ return GetVariableNameFormula(false, 1, bWithLog);
+}
- aEnabledRegressionTypes[0] = mpLinearCheckBox->IsChecked();
- aEnabledRegressionTypes[1] = mpLogarithmicCheckBox->IsChecked();
- aEnabledRegressionTypes[2] = mpPowerCheckBox->IsChecked();
+void ScRegressionDialog::WriteRawRegressionResults(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate,
+ size_t nRegressionIndex)
+{
+ rOutput.writeBoldString(ScResId(STR_REGRESSION));
+ rOutput.newLine();
+ // REGRESSION MODEL
+ rOutput.writeString(ScResId(STR_LABEL_REGRESSION_MODEL));
+ rOutput.nextColumn();
+ rOutput.writeString(ScResId(constRegressionModel[nRegressionIndex]));
+ rOutput.newLine();
+ rOutput.newLine();
+
+ rOutput.writeString(ScResId(STR_LINEST_RAW_OUTPUT_TITLE));
+ rOutput.newLine();
+ rOutput.push();
+ rTemplate.setTemplate(constTemplateLINEST[nRegressionIndex]);
+ rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1 + mnNumIndependentVars, 5);
+ // Add LINEST result components to template
+ // 1. Add ranges for coeffients and standard errors for indep. vars and the intercept.
+ // Note that these two are in the reverse order(m_n, m_n-1, ..., m_1, b) w.r.t what we expect.
+ rTemplate.autoReplaceRange("%COEFFICIENTS_REV_RANGE%", ScRange(rOutput.current(), rOutput.current(mnNumIndependentVars)));
+ rTemplate.autoReplaceRange("%SERRORSX_REV_RANGE%", ScRange(rOutput.current(0, 1), rOutput.current(mnNumIndependentVars, 1)));
+
+ // 2. Add R-squared and standard error for y estimate.
+ rTemplate.autoReplaceAddress("%RSQUARED_ADDR%", rOutput.current(0, 2));
+ rTemplate.autoReplaceAddress("%SERRORY_ADDR%", rOutput.current(1, 2));
+
+ // 3. Add F statistic and degrees of freedom
+ rTemplate.autoReplaceAddress("%FSTATISTIC_ADDR%", rOutput.current(0, 3));
+ rTemplate.autoReplaceAddress("%DoFRESID_ADDR%", rOutput.current(1, 3));
+
+ // 4. Add regression sum of squares and residual sum of squares
+ rTemplate.autoReplaceAddress("%SSREG_ADDR%", rOutput.current(0, 4));
+ rTemplate.autoReplaceAddress("%SSRESID_ADDR%", rOutput.current(1, 4));
+
+ rOutput.push(0, 4);
+ rOutput.newLine();
+}
- sal_Int16 nColumn = 0;
+void ScRegressionDialog::WriteRegressionStatistics(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate)
+{
+ rOutput.newLine();
+ rOutput.writeString(ScResId(STR_LABEL_REGRESSION_STATISTICS));
+ rOutput.newLine();
- for (size_t nRegressionIndex = 0; nRegressionIndex < SAL_N_ELEMENTS(aEnabledRegressionTypes); ++nRegressionIndex)
+ const char* aMeasureNames[] =
{
- if (!aEnabledRegressionTypes[nRegressionIndex])
- continue;
+ STR_LABEL_RSQUARED,
+ STRID_CALC_STD_ERROR,
+ STR_LABEL_XVARIABLES_COUNT,
+ STR_OBSERVATIONS_LABEL,
+ STR_LABEL_ADJUSTED_RSQUARED
+ };
- aOutput.nextColumn();
- nColumn += 1;
+ OUString aMeasureFormulas[] =
+ {
+ "=%RSQUARED_ADDR%",
+ "=%SERRORY_ADDR%",
+ "=" + OUString::number(mnNumIndependentVars),
+ "=" + OUString::number(mnNumObservations),
+ "=1 - (1 - %RSQUARED_ADDR%)*(%NUMOBS_ADDR% - 1)/(%NUMOBS_ADDR% - %NUMXVARS_ADDR% - 1)"
+ };
+
+ rTemplate.autoReplaceAddress("%NUMXVARS_ADDR%", rOutput.current(1, 2));
+ rTemplate.autoReplaceAddress("%NUMOBS_ADDR%", rOutput.current(1, 3));
+
+ for (size_t nIdx = 0; nIdx < SAL_N_ELEMENTS(aMeasureNames); ++nIdx)
+ {
+ rOutput.writeString(ScResId(aMeasureNames[nIdx]));
+ rOutput.nextColumn();
+ rTemplate.setTemplate(aMeasureFormulas[nIdx]);
+ rOutput.writeFormula(rTemplate.getTemplate());
+ rOutput.newLine();
+ }
+}
+
+void ScRegressionDialog::WriteRegressionANOVAResults(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate)
+{
+ rOutput.newLine();
+ rOutput.writeString(ScResId(STR_LABEL_ANOVA));
+ rOutput.newLine();
- // REGRESSION MODEL
- aOutput.writeString(ScResId(constRegressionModel[nRegressionIndex]));
- aOutput.nextRow();
+ const size_t nColsInTable = 6;
+ const size_t nRowsInTable = 4;
+ OUString aTable[nRowsInTable][nColsInTable] =
+ {
+ {
+ "",
+ ScResId(STR_ANOVA_LABEL_DF),
+ ScResId(STR_ANOVA_LABEL_SS),
+ ScResId(STR_ANOVA_LABEL_MS),
+ ScResId(STR_ANOVA_LABEL_F),
+ ScResId(STR_ANOVA_LABEL_SIGNIFICANCE_F)
+ },
+ {
+ ScResId(STR_REGRESSION),
+ "=%NUMXVARS_ADDR%",
+ "=%SSREG_ADDR%",
+ "=%SSREG_ADDR% / %DoFREG_ADDR%",
+ "=%FSTATISTIC_ADDR%",
+ "=FDIST(%FSTATISTIC_ADDR% ; %DoFREG_ADDR% ; %DoFRESID_ADDR%)"
+ },
+ {
+ ScResId(STR_LABEL_RESIDUAL),
+ "=%DoFRESID_ADDR%",
+ "=%SSRESID_ADDR%",
+ "=%SSRESID_ADDR% / %DoFRESID_ADDR%",
+ "",
+ ""
+ },
+ {
+ ScResId(STR_ANOVA_LABEL_TOTAL),
+ "=%DoFREG_ADDR% + %DoFRESID_ADDR%",
+ "=%SSREG_ADDR% + %SSRESID_ADDR%",
+ "",
+ "",
+ ""
+ }
+ };
+
+ rTemplate.autoReplaceAddress("%DoFREG_ADDR%", rOutput.current(1, 1));
+
+ // Cell getter lambda
+ std::function<CellValueGetter> aCellGetterFunc = [&aTable](size_t nRowIdx, size_t nColIdx) -> const OUString&
+ {
+ return aTable[nRowIdx][nColIdx];
+ };
- // RSQUARED
- aTemplate.setTemplate(constTemplateRSQUARED[nRegressionIndex]);
- aOutput.writeMatrixFormula(aTemplate.getTemplate());
- aTemplate.autoReplaceAddress("%RSQUARED%", aOutput.current());
- aOutput.nextRow();
+ // Cell writer lambda
+ std::function<CellWriter> aCellWriterFunc = [&rOutput, &rTemplate]
+ (const OUString& rContent, size_t /*nRowIdx*/, size_t /*nColIdx*/)
+ {
+ if (!rContent.isEmpty())
+ {
+ if (rContent.startsWith("="))
+ {
+ rTemplate.setTemplate(rContent);
+ rOutput.writeFormula(rTemplate.getTemplate());
+ }
+ else
+ rOutput.writeString(rContent);
+ }
+ };
- // Standard Error
- aTemplate.setTemplate(constTemplatesSTDERR[nRegressionIndex]);
- aOutput.writeMatrixFormula(aTemplate.getTemplate());
- aTemplate.autoReplaceAddress("%STD_ERROR%", aOutput.current());
- aOutput.nextRow();
+ WriteTable(aCellGetterFunc, nRowsInTable, nColsInTable, rOutput, aCellWriterFunc);
- aOutput.nextRow();
+ // User given confidence level
+ rOutput.newLine();
+ rOutput.writeString(ScResId(STR_LABEL_CONFIDENCE_LEVEL));
+ rOutput.nextColumn();
+ rOutput.writeString(OUString::number(mpConfidenceLevelField->GetValue() / 100.0));
+ rTemplate.autoReplaceAddress("%CONFIDENCE_LEVEL_ADDR%", rOutput.current());
+ rOutput.newLine();
+}
- // Slope
- aTemplate.setTemplate(constTemplatesSLOPE[nRegressionIndex]);
- aOutput.writeMatrixFormula(aTemplate.getTemplate());
- aTemplate.autoReplaceAddress("%A%", aOutput.current());
- aOutput.nextRow();
+// Write slopes, intercept, their standard errors, t-statistics, p-value, confidence intervals
+void ScRegressionDialog::WriteRegressionEstimatesWithCI(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate,
+ bool bTakeLogX)
+{
+ rOutput.newLine();
+ SCROW nLastRow = rOutput.current(0, 1 + mnNumIndependentVars).Row();
+
+ // Coefficients & Std.Errors ranges (column vectors) in this table (yet to populate).
+ rTemplate.autoReplaceRange("%COEFFICIENTS_RANGE%",
+ ScRange(rOutput.current(1, 1),
+ rOutput.current(1, 1 + mnNumIndependentVars)));
+ rTemplate.autoReplaceRange("%SLOPES_RANGE%", // Excludes the intercept
+ ScRange(rOutput.current(1, 2),
+ rOutput.current(1, 1 + mnNumIndependentVars)));
+ rTemplate.autoReplaceAddress("%INTERCEPT_ADDR%", rOutput.current(1, 1));
+ rTemplate.autoReplaceRange("%SERRORSX_RANGE%",
+ ScRange(rOutput.current(2, 1),
+ rOutput.current(2, 1 + mnNumIndependentVars)));
+ // t-Statistics range in this table (yet to populate)
+ rTemplate.autoReplaceRange("%TSTAT_RANGE%",
+ ScRange(rOutput.current(3, 1),
+ rOutput.current(3, 1 + mnNumIndependentVars)));
+
+ const size_t nColsInTable = 7;
+ const size_t nRowsInTable = 2;
+ OUString aTable[nRowsInTable][nColsInTable] =
+ {
+ {
+ "",
+ ScResId(STR_LABEL_COEFFICIENTS),
+ ScResId(STRID_CALC_STD_ERROR),
+ ScResId(STR_LABEL_TSTATISTIC),
+ ScResId(STR_P_VALUE_LABEL),
- // Intercept
- aTemplate.setTemplate(constTemplatesINTERCEPT[nRegressionIndex]);
- aOutput.writeMatrixFormula(aTemplate.getTemplate());
- aTemplate.autoReplaceAddress("%B%", aOutput.current());
- aOutput.nextRow();
+ "=CONCAT(\"" + ScResId(STR_LABEL_LOWER) +
+ " \" ; INT(%CONFIDENCE_LEVEL_ADDR% * 100) ; \"%\")",
- aOutput.nextRow();
+ "=CONCAT(\"" + ScResId(STR_LABEL_UPPER) +
+ " \" ; INT(%CONFIDENCE_LEVEL_ADDR% * 100) ; \"%\")",
+ },
- for (size_t i = 0; i < nVariable1Size; i++)
+ // Following are matrix formulas of size numcols = 1, numrows = (mnNumIndependentVars + 1)
{
- aTemplate.setTemplate(constRegressionFormula[nRegressionIndex]);
- aTemplate.applyAddress("%ADDRESS%", aOutput.current(-nColumn), false);
- aOutput.writeFormula(aTemplate.getTemplate());
+ "",
+ // This puts the coefficients in the reverse order compared to that in LINEST output.
+ "=INDEX(%COEFFICIENTS_REV_RANGE%; 1 ; " + OUString::number(nLastRow + 2) + " - ROW())",
+ // This puts the standard errors in the reverse order compared to that in LINEST output.
+ "=INDEX(%SERRORSX_REV_RANGE%; 1 ; " + OUString::number(nLastRow + 2) + " - ROW())",
+ // t-Statistic
+ "=%COEFFICIENTS_RANGE% / %SERRORSX_RANGE%",
+ // p-Value
+ "=TDIST(ABS(%TSTAT_RANGE%) ; %DoFRESID_ADDR% ; 2 )",
+ // Lower limit of confidence interval
+ "=%COEFFICIENTS_RANGE% - %SERRORSX_RANGE% * "
+ "TINV(1 - %CONFIDENCE_LEVEL_ADDR% ; %DoFRESID_ADDR%)",
+ // Upper limit of confidence interval
+ "=%COEFFICIENTS_RANGE% + %SERRORSX_RANGE% * "
+ "TINV(1 - %CONFIDENCE_LEVEL_ADDR% ; %DoFRESID_ADDR%)"
+ }
+ };
- aOutput.nextRow();
+ // Cell getter lambda
+ std::function<CellValueGetter> aCellGetterFunc = [&aTable](size_t nRowIdx, size_t nColIdx) -> const OUString&
+ {
+ return aTable[nRowIdx][nColIdx];
+ };
+
+ // Cell writer lambda
+ size_t nNumIndependentVars = mnNumIndependentVars;
+ std::function<CellWriter> aCellWriterFunc = [&rOutput, &rTemplate, nNumIndependentVars]
+ (const OUString& rContent, size_t nRowIdx, size_t /*nColIdx*/)
+ {
+ if (!rContent.isEmpty())
+ {
+ if (rContent.startsWith("="))
+ {
+ rTemplate.setTemplate(rContent);
+ if (nRowIdx == 0)
+ rOutput.writeFormula(rTemplate.getTemplate());
+ else
+ rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, 1 + nNumIndependentVars);
+ }
+ else
+ rOutput.writeString(rContent);
}
+ };
+
+ WriteTable(aCellGetterFunc, nRowsInTable, nColsInTable, rOutput, aCellWriterFunc);
+
+ // Go back to the second row and first column of the table to
+ // fill the names of variables + intercept
+ rOutput.push(0, -1);
- aOutput.resetRow();
+ for (size_t nXvarIdx = 0; nXvarIdx <= mnNumIndependentVars; ++nXvarIdx)
+ {
+ rOutput.writeFormula(GetXVariableNameFormula(nXvarIdx, bTakeLogX));
+ rOutput.newLine();
}
- return ScRange(aOutput.mMinimumAddress, aOutput.mMaximumAddress);
+}
+
+// Re-write all observations in group-by column mode with predictions and residuals
+void ScRegressionDialog::WritePredictionsWithResiduals(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate,
+ size_t nRegressionIndex)
+{
+ bool bGroupedByColumn = mGroupedBy == BY_COLUMN;
+ rOutput.newLine();
+ rOutput.push();
+
+ // Range of X variables with rows as observations and columns as variables.
+ ScRange aDataMatrixRange(rOutput.current(0, 1), rOutput.current(mnNumIndependentVars - 1, mnNumObservations));
+ rTemplate.autoReplaceRange("%XDATAMATRIX_RANGE%", aDataMatrixRange);
+
+ // Write X variable names
+ for (size_t nXvarIdx = 1; nXvarIdx <= mnNumIndependentVars; ++nXvarIdx)
+ {
+ // Here we write the X variables without any transformation(LN)
+ rOutput.writeFormula(GetXVariableNameFormula(nXvarIdx, false));
+ rOutput.nextColumn();
+ }
+ rOutput.reset();
+
+ // Write the X data matrix
+ rOutput.nextRow();
+ OUString aDataMatrixFormula = bGroupedByColumn ? OUString("=%VARIABLE1_RANGE%") : OUString("=TRANSPOSE(%VARIABLE1_RANGE%)");
+ rTemplate.setTemplate(aDataMatrixFormula);
+ rOutput.writeMatrixFormula(rTemplate.getTemplate(), mnNumIndependentVars, mnNumObservations);
+
+ // Write predicted values
+ rOutput.push(mnNumIndependentVars, -1);
+ rOutput.writeString(ScResId(STR_LABEL_PREDICTEDY));
+ rOutput.nextRow();
+ rTemplate.setTemplate(constRegressionFormula[nRegressionIndex]);
+ rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, mnNumObservations);
+ rTemplate.autoReplaceRange("%PREDICTEDY_RANGE%", ScRange(rOutput.current(), rOutput.current(0, mnNumObservations - 1)));
+
+ // Write actual Y
+ rOutput.push(1, -1);
+ rOutput.writeFormula(GetYVariableNameFormula(false));
+ rOutput.nextRow();
+ OUString aYVectorFormula = bGroupedByColumn ? OUString("=%VARIABLE2_RANGE%") : OUString("=TRANSPOSE(%VARIABLE2_RANGE%)");
+ rTemplate.setTemplate(aYVectorFormula);
+ rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, mnNumObservations);
+ rTemplate.autoReplaceRange("%ACTUALY_RANGE%", ScRange(rOutput.current(), rOutput.current(0, mnNumObservations - 1)));
+
+ // Write residual
+ rOutput.push(1, -1);
+ rOutput.writeString(ScResId(STR_LABEL_RESIDUAL));
+ rOutput.nextRow();
+ rTemplate.setTemplate("=%ACTUALY_RANGE% - %PREDICTEDY_RANGE%");
+ rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, mnNumObservations);
+}
+
+// Generic table writer
+void ScRegressionDialog::WriteTable(std::function<CellValueGetter>& rCellGetter,
+ size_t nRowsInTable, size_t nColsInTable,
+ AddressWalkerWriter& rOutput,
+ std::function<CellWriter>& rFunc)
+{
+ for (size_t nRowIdx = 0; nRowIdx < nRowsInTable; ++nRowIdx)
+ {
+ for (size_t nColIdx = 0; nColIdx < nColsInTable; ++nColIdx)
+ {
+ rFunc(rCellGetter(nRowIdx, nColIdx), nRowIdx, nColIdx);
+ rOutput.nextColumn();
+ }
+ rOutput.newLine();
+ }
+}
+
+IMPL_LINK_NOARG(ScRegressionDialog, CheckBoxHdl, CheckBox&, void)
+{
+ ValidateDialogInput();
+}
+
+IMPL_LINK_NOARG(ScRegressionDialog, NumericFieldHdl, Edit&, void)
+{
+ ValidateDialogInput();
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/ui/StatisticsDialogs/StatisticsTwoVariableDialog.cxx b/sc/source/ui/StatisticsDialogs/StatisticsTwoVariableDialog.cxx
index 0fa8c96f0333..85fff5b57ad2 100644
--- a/sc/source/ui/StatisticsDialogs/StatisticsTwoVariableDialog.cxx
+++ b/sc/source/ui/StatisticsDialogs/StatisticsTwoVariableDialog.cxx
@@ -199,11 +199,7 @@ void ScStatisticsTwoVariableDialog::SetReference( const ScRange& rReferenceRange
}
}
- // Enable OK if all ranges are set.
- if (mVariable1Range.IsValid() && mVariable2Range.IsValid() && mOutputAddress.IsValid())
- mpButtonOk->Enable();
- else
- mpButtonOk->Disable();
+ ValidateDialogInput();
}
IMPL_LINK_NOARG( ScStatisticsTwoVariableDialog, OkClicked, Button*, void )
@@ -246,6 +242,8 @@ IMPL_LINK_NOARG( ScStatisticsTwoVariableDialog, GroupByChanged, RadioButton&, vo
mGroupedBy = BY_COLUMN;
else if (mpGroupByRowsRadio->IsChecked())
mGroupedBy = BY_ROW;
+
+ ValidateDialogInput();
}
IMPL_LINK_NOARG( ScStatisticsTwoVariableDialog, RefInputModifyHandler, Edit&, void )
@@ -313,11 +311,7 @@ IMPL_LINK_NOARG( ScStatisticsTwoVariableDialog, RefInputModifyHandler, Edit&, vo
}
}
- // Enable OK if all ranges are set.
- if (mVariable1Range.IsValid() && mVariable2Range.IsValid() && mOutputAddress.IsValid())
- mpButtonOk->Enable();
- else
- mpButtonOk->Disable();
+ ValidateDialogInput();
}
void ScStatisticsTwoVariableDialog::CalculateInputAndWriteToOutput()
@@ -333,4 +327,18 @@ void ScStatisticsTwoVariableDialog::CalculateInputAndWriteToOutput()
pDocShell->PostPaint( aOutputRange, PaintPartFlags::Grid );
}
+bool ScStatisticsTwoVariableDialog::InputRangesValid()
+{
+ return mVariable1Range.IsValid() && mVariable2Range.IsValid() && mOutputAddress.IsValid();
+}
+
+void ScStatisticsTwoVariableDialog::ValidateDialogInput()
+{
+ // Enable OK button if all inputs are ok.
+ if (InputRangesValid())
+ mpButtonOk->Enable();
+ else
+ mpButtonOk->Disable();
+}
+
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/ui/StatisticsDialogs/TableFillingAndNavigationTools.cxx b/sc/source/ui/StatisticsDialogs/TableFillingAndNavigationTools.cxx
index 5200e541a8f5..16fc8f3c84f7 100644
--- a/sc/source/ui/StatisticsDialogs/TableFillingAndNavigationTools.cxx
+++ b/sc/source/ui/StatisticsDialogs/TableFillingAndNavigationTools.cxx
@@ -162,11 +162,15 @@ void AddressWalkerWriter::writeFormula(const OUString& aFormula)
new ScFormulaCell(mpDocument, mCurrentAddress, aFormula, meGrammar), true);
}
-void AddressWalkerWriter::writeMatrixFormula(const OUString& aFormula)
+void AddressWalkerWriter::writeMatrixFormula(const OUString& aFormula, SCCOL nCols, SCROW nRows)
{
ScRange aRange;
aRange.aStart = mCurrentAddress;
aRange.aEnd = mCurrentAddress;
+ if (nCols > 1)
+ aRange.aEnd.IncCol(nCols - 1);
+ if (nRows > 1)
+ aRange.aEnd.IncRow(nRows - 1);
mpDocShell->GetDocFunc().EnterMatrix(aRange, nullptr, nullptr, aFormula, false, false, OUString(), meGrammar );
}