diff options
Diffstat (limited to 'sc/source')
6 files changed, 655 insertions, 139 deletions
diff --git a/sc/source/ui/StatisticsDialogs/RegressionDialog.cxx b/sc/source/ui/StatisticsDialogs/RegressionDialog.cxx index a07e94286c58..f7bdbd3271ba 100644 --- a/sc/source/ui/StatisticsDialogs/RegressionDialog.cxx +++ b/sc/source/ui/StatisticsDialogs/RegressionDialog.cxx @@ -8,13 +8,10 @@ * */ -#include <memory> #include <sfx2/dispatch.hxx> #include <svl/zforlist.hxx> #include <svl/undo.hxx> -#include <formulacell.hxx> -#include <rangelst.hxx> #include <scitems.hxx> #include <docsh.hxx> #include <document.hxx> @@ -26,8 +23,90 @@ #include <scresid.hxx> #include <strings.hrc> +/* + Some regression basics + ---------------------- + + 1. Linear regression fits using data, a linear function between the dependent variable and the independent variable(s). + The basic form of this function is :- + + y = b + m_1*x_1 + m_2*x_2 + ... + m_k*x_k + + where y is the dependent variable + x_1, x_2, ..., x_k are the k independent variables + b is the intercept + m_1, m_2, ..., m_k are the slopes corresponding to the variables x_1, x_2, ..., x_k respectively. + + + This equation for n observations can be compactly written using matrices as :- + + y = X*A + + where y is the n dimensional column vector containing dependent variable observations. + where X is matrix of shape n*(k+1) where a row looks like [ 1 x_1 x_2 ... x_k ] + A is the k+1 dimensional column vector [ b m_1 m_2 ... m_k ] + + Calc formula LINEST(Y_array ; X_array) can be used to compute all entries in "A" along with many other statistics. + + + 2. Logarithmic regression is basically used to find a linear function between the dependent variable and + the natural logarithm of the independent variable(s). + So the basic form of this functions is :- + + y = b + m_1*ln(x_1) + m_2*ln(x_2) + ... + m_k*ln(x_k) + + This can be again written in a compact matrix form for n observations. + + y = ln(X)*A + + where y is the n dimensional column vector containing dependent variable observations. + where X is matrix of shape n*(k+1) where a row looks like [ e x_1 x_2 ... x_k ] + A is the k+1 dimensional column vector [ b m_1 m_2 ... m_k ] + + To estimate A, we use the formula =LINEST(Y_array ; LN(X_array)) + + + 3. Power regression is used to fit the following model :- + + y = b * (x_1 ^ m_1) * (x_2 ^ m_2) * ... * (x_k ^ m_k) + + To reduce this to a linear function(so that we can still use LINEST()), we take natural logarithm on both sides + + ln(y) = c + m_1*ln(x_1) + m_2*ln(x_2) + ... + m_k*ln(x_k) ; where c = ln(b) + + + This again can be written compactly in matrix form as :- + + ln(y) = ln(X)*A + + where y is the n dimensional column vector containing dependent variable observations. + where X is matrix of shape n*(k+1) where a row looks like [ e x_1 x_2 ... x_k ] + A is the k+1 dimensional column vector [ c m_1 m_2 ... m_k ] + + To estimate A, we use the formula =LINEST(LN(Y_array) ; LN(X_array)) + + Once we get A, to get back y from x's we use the formula :- + + y = exp( ln(X)*A ) + + + + Some references for computing confidence interval for the regression coefficients :- + + [1] https://en.wikipedia.org/wiki/Student%27s_t-test#Slope_of_a_regression_line + [2] https://en.wikipedia.org/wiki/Simple_linear_regression#Normality_assumption + [3] https://onlinecourses.science.psu.edu/stat414/node/280 + + */ + namespace { + enum class ScRegType { + LINEAR, + LOGARITHMIC, + POWER + }; + const char* constRegressionModel[] = { STR_LABEL_LINEAR, @@ -35,53 +114,51 @@ namespace STR_LABEL_POWER }; - OUString constTemplateRSQUARED[] = - { - "=RSQ(%VARIABLE2_RANGE% ; %VARIABLE1_RANGE%)", - "=RSQ(%VARIABLE2_RANGE% ; LN(%VARIABLE1_RANGE%))", - "=RSQ(LN(%VARIABLE2_RANGE%) ; LN(%VARIABLE1_RANGE%))" - }; - - OUString constTemplatesSTDERR[] = - { - "=STEYX(%VARIABLE2_RANGE% ; %VARIABLE1_RANGE%)", - "=STEYX(%VARIABLE2_RANGE% ; LN(%VARIABLE1_RANGE%))", - "=STEYX(LN(%VARIABLE2_RANGE%) ; LN(%VARIABLE1_RANGE%))" - }; - - OUString constTemplatesSLOPE[] = - { - "=SLOPE(%VARIABLE2_RANGE% ; %VARIABLE1_RANGE%)", - "=SLOPE(%VARIABLE2_RANGE% ; LN(%VARIABLE1_RANGE%))", - "=SLOPE(LN(%VARIABLE2_RANGE%) ; LN(%VARIABLE1_RANGE%))" - }; - - OUString constTemplatesINTERCEPT[] = + OUString constTemplateLINEST[] = { - "=INTERCEPT(%VARIABLE2_RANGE% ; %VARIABLE1_RANGE%)", - "=INTERCEPT(%VARIABLE2_RANGE% ; LN(%VARIABLE1_RANGE%))", - "=EXP(INTERCEPT(LN(%VARIABLE2_RANGE%) ; LN(%VARIABLE1_RANGE%)))" + "=LINEST(%VARIABLE2_RANGE% ; %VARIABLE1_RANGE% ; TRUE ; TRUE)", + "=LINEST(%VARIABLE2_RANGE% ; LN(%VARIABLE1_RANGE%) ; TRUE ; TRUE)", + "=LINEST(LN(%VARIABLE2_RANGE%) ; LN(%VARIABLE1_RANGE%) ; TRUE ; TRUE)" }; OUString constRegressionFormula[] = { - "=%A% * %ADDRESS% + %B%", - "=%A% * LN(%ADDRESS%) + %B%", - "=%B% * %ADDRESS% ^ %A%" + "=MMULT(%XDATAMATRIX_RANGE% ; %SLOPES_RANGE%) + %INTERCEPT_ADDR%", + "=MMULT(LN(%XDATAMATRIX_RANGE%) ; %SLOPES_RANGE%) + %INTERCEPT_ADDR%", + "=EXP(MMULT(LN(%XDATAMATRIX_RANGE%) ; %SLOPES_RANGE%) + %INTERCEPT_ADDR%)" }; } // end anonymous namespace +static size_t lcl_GetNumRowsColsInRange(const ScRange& rRange, bool bRows) +{ + if (bRows) + return rRange.aEnd.Row() - rRange.aStart.Row() + 1; + + return rRange.aEnd.Col() - rRange.aStart.Col() + 1; +} + ScRegressionDialog::ScRegressionDialog( SfxBindings* pSfxBindings, SfxChildWindow* pChildWindow, vcl::Window* pParent, ScViewData* pViewData ) : ScStatisticsTwoVariableDialog( pSfxBindings, pChildWindow, pParent, pViewData, - "RegressionDialog", "modules/scalc/ui/regressiondialog.ui" ) + "RegressionDialog", "modules/scalc/ui/regressiondialog.ui" ), + mbUnivariate(true), + mnNumIndependentVars(1), + mnNumObservations(0), + mbUse3DAddresses(false) { - get(mpLinearCheckBox, "linear-check"); - get(mpLogarithmicCheckBox, "logarithmic-check"); - get(mpPowerCheckBox, "power-check"); + get(mpWithLabelsCheckBox, "withlabels-check"); + get(mpLinearRadioButton, "linear-radio"); + get(mpLogarithmicRadioButton, "logarithmic-radio"); + get(mpPowerRadioButton, "power-radio"); + get(mpConfidenceLevelField, "confidencelevel-spin"); + get(mpCalcResidualsCheckBox, "calcresiduals-check"); + get(mpErrorMessage, "error-message"); + mpWithLabelsCheckBox->SetToggleHdl(LINK(this, ScRegressionDialog, CheckBoxHdl)); + mpConfidenceLevelField->SetModifyHdl(LINK(this, ScRegressionDialog, NumericFieldHdl)); + mpCalcResidualsCheckBox->SetToggleHdl(LINK(this, ScRegressionDialog, CheckBoxHdl)); } ScRegressionDialog::~ScRegressionDialog() @@ -96,9 +173,13 @@ bool ScRegressionDialog::Close() void ScRegressionDialog::dispose() { - mpLinearCheckBox.disposeAndClear(); - mpLogarithmicCheckBox.disposeAndClear(); - mpPowerCheckBox.disposeAndClear(); + mpWithLabelsCheckBox.disposeAndClear(); + mpLinearRadioButton.disposeAndClear(); + mpLogarithmicRadioButton.disposeAndClear(); + mpPowerRadioButton.disposeAndClear(); + mpConfidenceLevelField.disposeAndClear(); + mpCalcResidualsCheckBox.disposeAndClear(); + mpErrorMessage.disposeAndClear(); ScStatisticsTwoVariableDialog::dispose(); } @@ -112,131 +193,509 @@ ScRange ScRegressionDialog::ApplyOutput(ScDocShell* pDocShell) AddressWalkerWriter aOutput(mOutputAddress, pDocShell, mDocument, formula::FormulaGrammar::mergeToGrammar( formula::FormulaGrammar::GRAM_ENGLISH, mAddressDetails.eConv)); FormulaTemplate aTemplate(mDocument); - aTemplate.autoReplaceUses3D(false); + aTemplate.autoReplaceUses3D(mbUse3DAddresses); + + // max col of our output should account for + // 1. constant term column, + // 2. mnNumIndependentVars columns + // 3. Actual Y column + // 4. Predicted Y column + // 5. Residual Column + SCCOL nOutputMaxCol = mOutputAddress.Col() + mnNumIndependentVars + 3; + + ScRange aXDataRange(GetDataRange(mVariable1Range)); + ScRange aYDataRange(GetDataRange(mVariable2Range)); + + aTemplate.autoReplaceRange("%VARIABLE1_RANGE%", aXDataRange); + aTemplate.autoReplaceRange("%VARIABLE2_RANGE%", aYDataRange); + size_t nRegressionIndex = GetRegressionTypeIndex(); + ScRegType eRegType = static_cast<ScRegType>(nRegressionIndex); + bool bTakeLogX = eRegType == ScRegType::LOGARITHMIC || eRegType == ScRegType::POWER; + + WriteRawRegressionResults(aOutput, aTemplate, nRegressionIndex); + WriteRegressionStatistics(aOutput, aTemplate); + WriteRegressionANOVAResults(aOutput, aTemplate); + WriteRegressionEstimatesWithCI(aOutput, aTemplate, bTakeLogX); + if (mpCalcResidualsCheckBox->IsChecked()) + WritePredictionsWithResiduals(aOutput, aTemplate, nRegressionIndex); + + ScAddress aMaxAddress(aOutput.mMaximumAddress); + aMaxAddress.SetCol(std::max(aMaxAddress.Col(), nOutputMaxCol)); + return ScRange(aOutput.mMinimumAddress, aMaxAddress); +} - std::unique_ptr<DataRangeIterator> pVariable1Iterator; - if (mGroupedBy == BY_COLUMN) - pVariable1Iterator.reset(new DataRangeByColumnIterator(mVariable1Range)); - else - pVariable1Iterator.reset(new DataRangeByRowIterator(mVariable1Range)); +bool ScRegressionDialog::InputRangesValid() +{ + if (!mVariable1Range.IsValid()) + { + mpErrorMessage->SetText(ScResId(STR_MESSAGE_XINVALID_RANGE)); + return false; + } - std::unique_ptr<DataRangeIterator> pVariable2Iterator; - if (mGroupedBy == BY_COLUMN) - pVariable2Iterator.reset(new DataRangeByColumnIterator(mVariable2Range)); - else - pVariable2Iterator.reset(new DataRangeByRowIterator(mVariable2Range)); + if (!mVariable2Range.IsValid()) + { + mpErrorMessage->SetText(ScResId(STR_MESSAGE_YINVALID_RANGE)); + return false; + } - aTemplate.autoReplaceRange("%VARIABLE1_RANGE%", pVariable1Iterator->get()); - aTemplate.autoReplaceRange("%VARIABLE2_RANGE%", pVariable2Iterator->get()); + if (!mOutputAddress.IsValid()) + { + mpErrorMessage->SetText(ScResId(STR_MESSAGE_INVALID_OUTPUT_ADDR)); + return false; + } - aOutput.writeBoldString(ScResId(STR_REGRESSION)); - aOutput.newLine(); - aOutput.newLine(); - aOutput.push(); + { + double fConfidenceLevel = mpConfidenceLevelField->GetValue(); + if ( fConfidenceLevel <= 0.0 || fConfidenceLevel >= 100.0 ) + { + mpErrorMessage->SetText(ScResId(STR_MESSAGE_INVALID_CONFIDENCE_LEVEL)); + return false; + } + } - // REGRESSION MODEL - aOutput.writeString(ScResId(STR_LABEL_REGRESSION_MODEL)); - aOutput.nextRow(); + mVariable1Range.PutInOrder(); + mVariable2Range.PutInOrder(); - // RSQUARED - aOutput.writeString(ScResId(STR_LABEL_RSQUARED)); - aOutput.nextRow(); + bool bGroupedByColumn = mGroupedBy == BY_COLUMN; - // Standard Error - aOutput.writeString(ScResId(STRID_CALC_STD_ERROR)); - aOutput.nextRow(); + bool bYHasSingleDim = ( + (bGroupedByColumn && + mVariable2Range.aStart.Col() == mVariable2Range.aEnd.Col()) || + (!bGroupedByColumn && + mVariable2Range.aStart.Row() == mVariable2Range.aEnd.Row())); - aOutput.nextRow(); + if (!bYHasSingleDim) + { + if (bGroupedByColumn) + mpErrorMessage->SetText(ScResId(STR_MESSAGE_YVARIABLE_MULTI_COLUMN)); + else + mpErrorMessage->SetText(ScResId(STR_MESSAGE_YVARIABLE_MULTI_ROW)); + return false; + } - // Slope - aOutput.writeString(ScResId(STR_LABEL_SLOPE)); - aOutput.nextRow(); + bool bWithLabels = mpWithLabelsCheckBox->IsChecked(); - // Intercept - aOutput.writeString(ScResId(STR_LABEL_INTERCEPT)); - aOutput.nextRow(); + size_t nYObs = lcl_GetNumRowsColsInRange(mVariable2Range, bGroupedByColumn); + size_t nNumXVars = lcl_GetNumRowsColsInRange(mVariable1Range, !bGroupedByColumn); + mbUnivariate = nNumXVars == 1; + // Observation count mismatch check + if (lcl_GetNumRowsColsInRange(mVariable1Range, bGroupedByColumn) != nYObs) + { + if (mbUnivariate) + mpErrorMessage->SetText(ScResId(STR_MESSAGE_UNIVARIATE_NUMOBS_MISMATCH)); + else + mpErrorMessage->SetText(ScResId(STR_MESSAGE_MULTIVARIATE_NUMOBS_MISMATCH)); + return false; + } + + mnNumIndependentVars = nNumXVars; + mnNumObservations = bWithLabels ? nYObs - 1 : nYObs; + + mbUse3DAddresses = mVariable1Range.aStart.Tab() != mOutputAddress.Tab() || + mVariable2Range.aStart.Tab() != mOutputAddress.Tab(); + + mpErrorMessage->SetText(""); - aOutput.nextRow(); + return true; +} + +size_t ScRegressionDialog::GetRegressionTypeIndex() +{ + if (mpLinearRadioButton->IsChecked()) + return 0; + if (mpLogarithmicRadioButton->IsChecked()) + return 1; + return 2; +} - size_t nVariable1Size = pVariable1Iterator->size(); +ScRange ScRegressionDialog::GetDataRange(const ScRange& rRange) +{ + if (!mpWithLabelsCheckBox->IsChecked()) + return rRange; - OUString sFormula; + ScRange aDataRange(rRange); if (mGroupedBy == BY_COLUMN) - sFormula = "=INDEX(%VARIABLE1_RANGE%; %VAR1_CELL_INDEX%; 1)"; + aDataRange.aStart.IncRow(1); else - sFormula = "=INDEX(%VARIABLE1_RANGE%; 1; %VAR1_CELL_INDEX%)"; + aDataRange.aStart.IncCol(1); + + return aDataRange; +} + +OUString ScRegressionDialog::GetVariableNameFormula(bool bXVar, size_t nIndex, bool bWithLog) +{ + if (bXVar && nIndex == 0) + return "=\"" + ScResId(STR_LABEL_INTERCEPT) + "\""; - for (size_t i = 0; i < nVariable1Size; i++) + if (mpWithLabelsCheckBox->IsChecked()) { - aTemplate.setTemplate(sFormula); - aTemplate.applyNumber("%VAR1_CELL_INDEX%", i + 1); - aOutput.writeFormula(aTemplate.getTemplate()); - aOutput.nextRow(); + ScAddress aAddr(bXVar ? mVariable1Range.aStart : mVariable2Range.aStart); + if (mGroupedBy == BY_COLUMN) + aAddr.IncCol(nIndex - 1); + else + aAddr.IncRow(nIndex - 1); + + ScRefFlags eAddrFlag = mbUse3DAddresses ? ScRefFlags::ADDR_ABS_3D : ScRefFlags::ADDR_ABS; + return bWithLog ? OUString("=CONCAT(\"LN(\";" + + aAddr.Format(eAddrFlag, mDocument, mDocument->GetAddressConvention()) + ";\")\")") : + OUString("=" + aAddr.Format(eAddrFlag, mDocument, mDocument->GetAddressConvention())); } - aOutput.reset(); + OUString aDefaultVarName; + + if (bXVar) + aDefaultVarName = "X" + OUString::number(nIndex); + else + aDefaultVarName = "Y"; + + return bWithLog ? OUString("=\"LN(" + aDefaultVarName + ")\"") : + OUString("=\"" + aDefaultVarName + "\""); +} + +OUString ScRegressionDialog::GetXVariableNameFormula(size_t nIndex, bool bWithLog) +{ + assert(nIndex <= mnNumIndependentVars); + return GetVariableNameFormula(true, nIndex, bWithLog); +} - bool aEnabledRegressionTypes[3]; +OUString ScRegressionDialog::GetYVariableNameFormula(bool bWithLog) +{ + return GetVariableNameFormula(false, 1, bWithLog); +} - aEnabledRegressionTypes[0] = mpLinearCheckBox->IsChecked(); - aEnabledRegressionTypes[1] = mpLogarithmicCheckBox->IsChecked(); - aEnabledRegressionTypes[2] = mpPowerCheckBox->IsChecked(); +void ScRegressionDialog::WriteRawRegressionResults(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate, + size_t nRegressionIndex) +{ + rOutput.writeBoldString(ScResId(STR_REGRESSION)); + rOutput.newLine(); + // REGRESSION MODEL + rOutput.writeString(ScResId(STR_LABEL_REGRESSION_MODEL)); + rOutput.nextColumn(); + rOutput.writeString(ScResId(constRegressionModel[nRegressionIndex])); + rOutput.newLine(); + rOutput.newLine(); + + rOutput.writeString(ScResId(STR_LINEST_RAW_OUTPUT_TITLE)); + rOutput.newLine(); + rOutput.push(); + rTemplate.setTemplate(constTemplateLINEST[nRegressionIndex]); + rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1 + mnNumIndependentVars, 5); + // Add LINEST result components to template + // 1. Add ranges for coeffients and standard errors for indep. vars and the intercept. + // Note that these two are in the reverse order(m_n, m_n-1, ..., m_1, b) w.r.t what we expect. + rTemplate.autoReplaceRange("%COEFFICIENTS_REV_RANGE%", ScRange(rOutput.current(), rOutput.current(mnNumIndependentVars))); + rTemplate.autoReplaceRange("%SERRORSX_REV_RANGE%", ScRange(rOutput.current(0, 1), rOutput.current(mnNumIndependentVars, 1))); + + // 2. Add R-squared and standard error for y estimate. + rTemplate.autoReplaceAddress("%RSQUARED_ADDR%", rOutput.current(0, 2)); + rTemplate.autoReplaceAddress("%SERRORY_ADDR%", rOutput.current(1, 2)); + + // 3. Add F statistic and degrees of freedom + rTemplate.autoReplaceAddress("%FSTATISTIC_ADDR%", rOutput.current(0, 3)); + rTemplate.autoReplaceAddress("%DoFRESID_ADDR%", rOutput.current(1, 3)); + + // 4. Add regression sum of squares and residual sum of squares + rTemplate.autoReplaceAddress("%SSREG_ADDR%", rOutput.current(0, 4)); + rTemplate.autoReplaceAddress("%SSRESID_ADDR%", rOutput.current(1, 4)); + + rOutput.push(0, 4); + rOutput.newLine(); +} - sal_Int16 nColumn = 0; +void ScRegressionDialog::WriteRegressionStatistics(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate) +{ + rOutput.newLine(); + rOutput.writeString(ScResId(STR_LABEL_REGRESSION_STATISTICS)); + rOutput.newLine(); - for (size_t nRegressionIndex = 0; nRegressionIndex < SAL_N_ELEMENTS(aEnabledRegressionTypes); ++nRegressionIndex) + const char* aMeasureNames[] = { - if (!aEnabledRegressionTypes[nRegressionIndex]) - continue; + STR_LABEL_RSQUARED, + STRID_CALC_STD_ERROR, + STR_LABEL_XVARIABLES_COUNT, + STR_OBSERVATIONS_LABEL, + STR_LABEL_ADJUSTED_RSQUARED + }; - aOutput.nextColumn(); - nColumn += 1; + OUString aMeasureFormulas[] = + { + "=%RSQUARED_ADDR%", + "=%SERRORY_ADDR%", + "=" + OUString::number(mnNumIndependentVars), + "=" + OUString::number(mnNumObservations), + "=1 - (1 - %RSQUARED_ADDR%)*(%NUMOBS_ADDR% - 1)/(%NUMOBS_ADDR% - %NUMXVARS_ADDR% - 1)" + }; + + rTemplate.autoReplaceAddress("%NUMXVARS_ADDR%", rOutput.current(1, 2)); + rTemplate.autoReplaceAddress("%NUMOBS_ADDR%", rOutput.current(1, 3)); + + for (size_t nIdx = 0; nIdx < SAL_N_ELEMENTS(aMeasureNames); ++nIdx) + { + rOutput.writeString(ScResId(aMeasureNames[nIdx])); + rOutput.nextColumn(); + rTemplate.setTemplate(aMeasureFormulas[nIdx]); + rOutput.writeFormula(rTemplate.getTemplate()); + rOutput.newLine(); + } +} + +void ScRegressionDialog::WriteRegressionANOVAResults(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate) +{ + rOutput.newLine(); + rOutput.writeString(ScResId(STR_LABEL_ANOVA)); + rOutput.newLine(); - // REGRESSION MODEL - aOutput.writeString(ScResId(constRegressionModel[nRegressionIndex])); - aOutput.nextRow(); + const size_t nColsInTable = 6; + const size_t nRowsInTable = 4; + OUString aTable[nRowsInTable][nColsInTable] = + { + { + "", + ScResId(STR_ANOVA_LABEL_DF), + ScResId(STR_ANOVA_LABEL_SS), + ScResId(STR_ANOVA_LABEL_MS), + ScResId(STR_ANOVA_LABEL_F), + ScResId(STR_ANOVA_LABEL_SIGNIFICANCE_F) + }, + { + ScResId(STR_REGRESSION), + "=%NUMXVARS_ADDR%", + "=%SSREG_ADDR%", + "=%SSREG_ADDR% / %DoFREG_ADDR%", + "=%FSTATISTIC_ADDR%", + "=FDIST(%FSTATISTIC_ADDR% ; %DoFREG_ADDR% ; %DoFRESID_ADDR%)" + }, + { + ScResId(STR_LABEL_RESIDUAL), + "=%DoFRESID_ADDR%", + "=%SSRESID_ADDR%", + "=%SSRESID_ADDR% / %DoFRESID_ADDR%", + "", + "" + }, + { + ScResId(STR_ANOVA_LABEL_TOTAL), + "=%DoFREG_ADDR% + %DoFRESID_ADDR%", + "=%SSREG_ADDR% + %SSRESID_ADDR%", + "", + "", + "" + } + }; + + rTemplate.autoReplaceAddress("%DoFREG_ADDR%", rOutput.current(1, 1)); + + // Cell getter lambda + std::function<CellValueGetter> aCellGetterFunc = [&aTable](size_t nRowIdx, size_t nColIdx) -> const OUString& + { + return aTable[nRowIdx][nColIdx]; + }; - // RSQUARED - aTemplate.setTemplate(constTemplateRSQUARED[nRegressionIndex]); - aOutput.writeMatrixFormula(aTemplate.getTemplate()); - aTemplate.autoReplaceAddress("%RSQUARED%", aOutput.current()); - aOutput.nextRow(); + // Cell writer lambda + std::function<CellWriter> aCellWriterFunc = [&rOutput, &rTemplate] + (const OUString& rContent, size_t /*nRowIdx*/, size_t /*nColIdx*/) + { + if (!rContent.isEmpty()) + { + if (rContent.startsWith("=")) + { + rTemplate.setTemplate(rContent); + rOutput.writeFormula(rTemplate.getTemplate()); + } + else + rOutput.writeString(rContent); + } + }; - // Standard Error - aTemplate.setTemplate(constTemplatesSTDERR[nRegressionIndex]); - aOutput.writeMatrixFormula(aTemplate.getTemplate()); - aTemplate.autoReplaceAddress("%STD_ERROR%", aOutput.current()); - aOutput.nextRow(); + WriteTable(aCellGetterFunc, nRowsInTable, nColsInTable, rOutput, aCellWriterFunc); - aOutput.nextRow(); + // User given confidence level + rOutput.newLine(); + rOutput.writeString(ScResId(STR_LABEL_CONFIDENCE_LEVEL)); + rOutput.nextColumn(); + rOutput.writeString(OUString::number(mpConfidenceLevelField->GetValue() / 100.0)); + rTemplate.autoReplaceAddress("%CONFIDENCE_LEVEL_ADDR%", rOutput.current()); + rOutput.newLine(); +} - // Slope - aTemplate.setTemplate(constTemplatesSLOPE[nRegressionIndex]); - aOutput.writeMatrixFormula(aTemplate.getTemplate()); - aTemplate.autoReplaceAddress("%A%", aOutput.current()); - aOutput.nextRow(); +// Write slopes, intercept, their standard errors, t-statistics, p-value, confidence intervals +void ScRegressionDialog::WriteRegressionEstimatesWithCI(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate, + bool bTakeLogX) +{ + rOutput.newLine(); + SCROW nLastRow = rOutput.current(0, 1 + mnNumIndependentVars).Row(); + + // Coefficients & Std.Errors ranges (column vectors) in this table (yet to populate). + rTemplate.autoReplaceRange("%COEFFICIENTS_RANGE%", + ScRange(rOutput.current(1, 1), + rOutput.current(1, 1 + mnNumIndependentVars))); + rTemplate.autoReplaceRange("%SLOPES_RANGE%", // Excludes the intercept + ScRange(rOutput.current(1, 2), + rOutput.current(1, 1 + mnNumIndependentVars))); + rTemplate.autoReplaceAddress("%INTERCEPT_ADDR%", rOutput.current(1, 1)); + rTemplate.autoReplaceRange("%SERRORSX_RANGE%", + ScRange(rOutput.current(2, 1), + rOutput.current(2, 1 + mnNumIndependentVars))); + // t-Statistics range in this table (yet to populate) + rTemplate.autoReplaceRange("%TSTAT_RANGE%", + ScRange(rOutput.current(3, 1), + rOutput.current(3, 1 + mnNumIndependentVars))); + + const size_t nColsInTable = 7; + const size_t nRowsInTable = 2; + OUString aTable[nRowsInTable][nColsInTable] = + { + { + "", + ScResId(STR_LABEL_COEFFICIENTS), + ScResId(STRID_CALC_STD_ERROR), + ScResId(STR_LABEL_TSTATISTIC), + ScResId(STR_P_VALUE_LABEL), - // Intercept - aTemplate.setTemplate(constTemplatesINTERCEPT[nRegressionIndex]); - aOutput.writeMatrixFormula(aTemplate.getTemplate()); - aTemplate.autoReplaceAddress("%B%", aOutput.current()); - aOutput.nextRow(); + "=CONCAT(\"" + ScResId(STR_LABEL_LOWER) + + " \" ; INT(%CONFIDENCE_LEVEL_ADDR% * 100) ; \"%\")", - aOutput.nextRow(); + "=CONCAT(\"" + ScResId(STR_LABEL_UPPER) + + " \" ; INT(%CONFIDENCE_LEVEL_ADDR% * 100) ; \"%\")", + }, - for (size_t i = 0; i < nVariable1Size; i++) + // Following are matrix formulas of size numcols = 1, numrows = (mnNumIndependentVars + 1) { - aTemplate.setTemplate(constRegressionFormula[nRegressionIndex]); - aTemplate.applyAddress("%ADDRESS%", aOutput.current(-nColumn), false); - aOutput.writeFormula(aTemplate.getTemplate()); + "", + // This puts the coefficients in the reverse order compared to that in LINEST output. + "=INDEX(%COEFFICIENTS_REV_RANGE%; 1 ; " + OUString::number(nLastRow + 2) + " - ROW())", + // This puts the standard errors in the reverse order compared to that in LINEST output. + "=INDEX(%SERRORSX_REV_RANGE%; 1 ; " + OUString::number(nLastRow + 2) + " - ROW())", + // t-Statistic + "=%COEFFICIENTS_RANGE% / %SERRORSX_RANGE%", + // p-Value + "=TDIST(ABS(%TSTAT_RANGE%) ; %DoFRESID_ADDR% ; 2 )", + // Lower limit of confidence interval + "=%COEFFICIENTS_RANGE% - %SERRORSX_RANGE% * " + "TINV(1 - %CONFIDENCE_LEVEL_ADDR% ; %DoFRESID_ADDR%)", + // Upper limit of confidence interval + "=%COEFFICIENTS_RANGE% + %SERRORSX_RANGE% * " + "TINV(1 - %CONFIDENCE_LEVEL_ADDR% ; %DoFRESID_ADDR%)" + } + }; - aOutput.nextRow(); + // Cell getter lambda + std::function<CellValueGetter> aCellGetterFunc = [&aTable](size_t nRowIdx, size_t nColIdx) -> const OUString& + { + return aTable[nRowIdx][nColIdx]; + }; + + // Cell writer lambda + size_t nNumIndependentVars = mnNumIndependentVars; + std::function<CellWriter> aCellWriterFunc = [&rOutput, &rTemplate, nNumIndependentVars] + (const OUString& rContent, size_t nRowIdx, size_t /*nColIdx*/) + { + if (!rContent.isEmpty()) + { + if (rContent.startsWith("=")) + { + rTemplate.setTemplate(rContent); + if (nRowIdx == 0) + rOutput.writeFormula(rTemplate.getTemplate()); + else + rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, 1 + nNumIndependentVars); + } + else + rOutput.writeString(rContent); } + }; + + WriteTable(aCellGetterFunc, nRowsInTable, nColsInTable, rOutput, aCellWriterFunc); + + // Go back to the second row and first column of the table to + // fill the names of variables + intercept + rOutput.push(0, -1); - aOutput.resetRow(); + for (size_t nXvarIdx = 0; nXvarIdx <= mnNumIndependentVars; ++nXvarIdx) + { + rOutput.writeFormula(GetXVariableNameFormula(nXvarIdx, bTakeLogX)); + rOutput.newLine(); } - return ScRange(aOutput.mMinimumAddress, aOutput.mMaximumAddress); +} + +// Re-write all observations in group-by column mode with predictions and residuals +void ScRegressionDialog::WritePredictionsWithResiduals(AddressWalkerWriter& rOutput, FormulaTemplate& rTemplate, + size_t nRegressionIndex) +{ + bool bGroupedByColumn = mGroupedBy == BY_COLUMN; + rOutput.newLine(); + rOutput.push(); + + // Range of X variables with rows as observations and columns as variables. + ScRange aDataMatrixRange(rOutput.current(0, 1), rOutput.current(mnNumIndependentVars - 1, mnNumObservations)); + rTemplate.autoReplaceRange("%XDATAMATRIX_RANGE%", aDataMatrixRange); + + // Write X variable names + for (size_t nXvarIdx = 1; nXvarIdx <= mnNumIndependentVars; ++nXvarIdx) + { + // Here we write the X variables without any transformation(LN) + rOutput.writeFormula(GetXVariableNameFormula(nXvarIdx, false)); + rOutput.nextColumn(); + } + rOutput.reset(); + + // Write the X data matrix + rOutput.nextRow(); + OUString aDataMatrixFormula = bGroupedByColumn ? OUString("=%VARIABLE1_RANGE%") : OUString("=TRANSPOSE(%VARIABLE1_RANGE%)"); + rTemplate.setTemplate(aDataMatrixFormula); + rOutput.writeMatrixFormula(rTemplate.getTemplate(), mnNumIndependentVars, mnNumObservations); + + // Write predicted values + rOutput.push(mnNumIndependentVars, -1); + rOutput.writeString(ScResId(STR_LABEL_PREDICTEDY)); + rOutput.nextRow(); + rTemplate.setTemplate(constRegressionFormula[nRegressionIndex]); + rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, mnNumObservations); + rTemplate.autoReplaceRange("%PREDICTEDY_RANGE%", ScRange(rOutput.current(), rOutput.current(0, mnNumObservations - 1))); + + // Write actual Y + rOutput.push(1, -1); + rOutput.writeFormula(GetYVariableNameFormula(false)); + rOutput.nextRow(); + OUString aYVectorFormula = bGroupedByColumn ? OUString("=%VARIABLE2_RANGE%") : OUString("=TRANSPOSE(%VARIABLE2_RANGE%)"); + rTemplate.setTemplate(aYVectorFormula); + rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, mnNumObservations); + rTemplate.autoReplaceRange("%ACTUALY_RANGE%", ScRange(rOutput.current(), rOutput.current(0, mnNumObservations - 1))); + + // Write residual + rOutput.push(1, -1); + rOutput.writeString(ScResId(STR_LABEL_RESIDUAL)); + rOutput.nextRow(); + rTemplate.setTemplate("=%ACTUALY_RANGE% - %PREDICTEDY_RANGE%"); + rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, mnNumObservations); +} + +// Generic table writer +void ScRegressionDialog::WriteTable(std::function<CellValueGetter>& rCellGetter, + size_t nRowsInTable, size_t nColsInTable, + AddressWalkerWriter& rOutput, + std::function<CellWriter>& rFunc) +{ + for (size_t nRowIdx = 0; nRowIdx < nRowsInTable; ++nRowIdx) + { + for (size_t nColIdx = 0; nColIdx < nColsInTable; ++nColIdx) + { + rFunc(rCellGetter(nRowIdx, nColIdx), nRowIdx, nColIdx); + rOutput.nextColumn(); + } + rOutput.newLine(); + } +} + +IMPL_LINK_NOARG(ScRegressionDialog, CheckBoxHdl, CheckBox&, void) +{ + ValidateDialogInput(); +} + +IMPL_LINK_NOARG(ScRegressionDialog, NumericFieldHdl, Edit&, void) +{ + ValidateDialogInput(); } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sc/source/ui/StatisticsDialogs/StatisticsTwoVariableDialog.cxx b/sc/source/ui/StatisticsDialogs/StatisticsTwoVariableDialog.cxx index 0fa8c96f0333..85fff5b57ad2 100644 --- a/sc/source/ui/StatisticsDialogs/StatisticsTwoVariableDialog.cxx +++ b/sc/source/ui/StatisticsDialogs/StatisticsTwoVariableDialog.cxx @@ -199,11 +199,7 @@ void ScStatisticsTwoVariableDialog::SetReference( const ScRange& rReferenceRange } } - // Enable OK if all ranges are set. - if (mVariable1Range.IsValid() && mVariable2Range.IsValid() && mOutputAddress.IsValid()) - mpButtonOk->Enable(); - else - mpButtonOk->Disable(); + ValidateDialogInput(); } IMPL_LINK_NOARG( ScStatisticsTwoVariableDialog, OkClicked, Button*, void ) @@ -246,6 +242,8 @@ IMPL_LINK_NOARG( ScStatisticsTwoVariableDialog, GroupByChanged, RadioButton&, vo mGroupedBy = BY_COLUMN; else if (mpGroupByRowsRadio->IsChecked()) mGroupedBy = BY_ROW; + + ValidateDialogInput(); } IMPL_LINK_NOARG( ScStatisticsTwoVariableDialog, RefInputModifyHandler, Edit&, void ) @@ -313,11 +311,7 @@ IMPL_LINK_NOARG( ScStatisticsTwoVariableDialog, RefInputModifyHandler, Edit&, vo } } - // Enable OK if all ranges are set. - if (mVariable1Range.IsValid() && mVariable2Range.IsValid() && mOutputAddress.IsValid()) - mpButtonOk->Enable(); - else - mpButtonOk->Disable(); + ValidateDialogInput(); } void ScStatisticsTwoVariableDialog::CalculateInputAndWriteToOutput() @@ -333,4 +327,18 @@ void ScStatisticsTwoVariableDialog::CalculateInputAndWriteToOutput() pDocShell->PostPaint( aOutputRange, PaintPartFlags::Grid ); } +bool ScStatisticsTwoVariableDialog::InputRangesValid() +{ + return mVariable1Range.IsValid() && mVariable2Range.IsValid() && mOutputAddress.IsValid(); +} + +void ScStatisticsTwoVariableDialog::ValidateDialogInput() +{ + // Enable OK button if all inputs are ok. + if (InputRangesValid()) + mpButtonOk->Enable(); + else + mpButtonOk->Disable(); +} + /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sc/source/ui/StatisticsDialogs/TableFillingAndNavigationTools.cxx b/sc/source/ui/StatisticsDialogs/TableFillingAndNavigationTools.cxx index 5200e541a8f5..16fc8f3c84f7 100644 --- a/sc/source/ui/StatisticsDialogs/TableFillingAndNavigationTools.cxx +++ b/sc/source/ui/StatisticsDialogs/TableFillingAndNavigationTools.cxx @@ -162,11 +162,15 @@ void AddressWalkerWriter::writeFormula(const OUString& aFormula) new ScFormulaCell(mpDocument, mCurrentAddress, aFormula, meGrammar), true); } -void AddressWalkerWriter::writeMatrixFormula(const OUString& aFormula) +void AddressWalkerWriter::writeMatrixFormula(const OUString& aFormula, SCCOL nCols, SCROW nRows) { ScRange aRange; aRange.aStart = mCurrentAddress; aRange.aEnd = mCurrentAddress; + if (nCols > 1) + aRange.aEnd.IncCol(nCols - 1); + if (nRows > 1) + aRange.aEnd.IncRow(nRows - 1); mpDocShell->GetDocFunc().EnterMatrix(aRange, nullptr, nullptr, aFormula, false, false, OUString(), meGrammar ); } diff --git a/sc/source/ui/inc/RegressionDialog.hxx b/sc/source/ui/inc/RegressionDialog.hxx index 254562e93dbc..722fece34279 100644 --- a/sc/source/ui/inc/RegressionDialog.hxx +++ b/sc/source/ui/inc/RegressionDialog.hxx @@ -15,9 +15,18 @@ class ScRegressionDialog : public ScStatisticsTwoVariableDialog { - VclPtr<CheckBox> mpLinearCheckBox; - VclPtr<CheckBox> mpLogarithmicCheckBox; - VclPtr<CheckBox> mpPowerCheckBox; + VclPtr<CheckBox> mpWithLabelsCheckBox; + VclPtr<RadioButton> mpLinearRadioButton; + VclPtr<RadioButton> mpLogarithmicRadioButton; + VclPtr<RadioButton> mpPowerRadioButton; + VclPtr<FixedText> mpErrorMessage; + VclPtr<NumericField> mpConfidenceLevelField; + VclPtr<CheckBox> mpCalcResidualsCheckBox; + + bool mbUnivariate; + size_t mnNumIndependentVars; + size_t mnNumObservations; + bool mbUse3DAddresses; public: ScRegressionDialog( @@ -32,6 +41,40 @@ protected: void dispose() override; virtual const char* GetUndoNameId() override; virtual ScRange ApplyOutput(ScDocShell* pDocShell) override; + virtual bool InputRangesValid() override; + +private: + + using CellValueGetter = const OUString&(size_t, size_t); + using CellWriter = void(const OUString&, size_t, size_t); + + size_t GetRegressionTypeIndex(); + ScRange GetDataRange(const ScRange& rRange); + OUString GetVariableNameFormula(bool bXVar, size_t nIndex, bool bWithLog); + OUString GetXVariableNameFormula(size_t nIndex, bool bWithLog); + OUString GetYVariableNameFormula(bool bWithLog); + + // Helper methods for writing different parts of regression results. + void WriteRawRegressionResults(AddressWalkerWriter& rOutput, + FormulaTemplate& rTemplate, + size_t nRegressionIndex); + void WriteRegressionStatistics(AddressWalkerWriter& rOutput, + FormulaTemplate& rTemplate); + void WriteRegressionANOVAResults(AddressWalkerWriter& rOutput, + FormulaTemplate& rTemplate); + void WriteRegressionEstimatesWithCI(AddressWalkerWriter& rOutput, + FormulaTemplate& rTemplate, + bool bTakeLogX); + void WritePredictionsWithResiduals(AddressWalkerWriter& rOutput, + FormulaTemplate& rTemplate, + size_t nRegressionIndex); + // Generic table writer + void WriteTable(std::function<CellValueGetter>& rCellGetter, size_t nRowsInTable, + size_t nColsInTable, AddressWalkerWriter& rOutput, + std::function<CellWriter>& rFunc); + + DECL_LINK( CheckBoxHdl, CheckBox&, void ); + DECL_LINK( NumericFieldHdl, Edit&, void ); }; diff --git a/sc/source/ui/inc/StatisticsTwoVariableDialog.hxx b/sc/source/ui/inc/StatisticsTwoVariableDialog.hxx index bcfdcb319940..ceb14e4f4e79 100644 --- a/sc/source/ui/inc/StatisticsTwoVariableDialog.hxx +++ b/sc/source/ui/inc/StatisticsTwoVariableDialog.hxx @@ -43,6 +43,8 @@ protected: virtual ScRange ApplyOutput(ScDocShell* pDocShell) = 0; virtual const char* GetUndoNameId() = 0; + virtual bool InputRangesValid(); + virtual void ValidateDialogInput(); // Widgets VclPtr<FixedText> mpVariable1RangeLabel; diff --git a/sc/source/ui/inc/TableFillingAndNavigationTools.hxx b/sc/source/ui/inc/TableFillingAndNavigationTools.hxx index 9c00bf31e351..073c89d98b0d 100644 --- a/sc/source/ui/inc/TableFillingAndNavigationTools.hxx +++ b/sc/source/ui/inc/TableFillingAndNavigationTools.hxx @@ -85,7 +85,7 @@ public: formula::FormulaGrammar::Grammar eGrammar ); void writeFormula(const OUString& aFormula); - void writeMatrixFormula(const OUString& aFormula); + void writeMatrixFormula(const OUString& aFormula, SCCOL nCols = 1, SCROW nRows = 1); void writeString(const OUString& aString); void writeString(const char* aCharArray); void writeBoldString(const OUString& aString); |