summaryrefslogtreecommitdiff
path: root/writerfilter
diff options
context:
space:
mode:
authorMichael Warner <michael.warner.ut+libreoffice@gmail.com>2020-07-03 10:18:33 -0400
committerXisco Fauli <xiscofauli@libreoffice.org>2020-08-31 19:32:31 +0200
commita8dbdf4c41d0299aee6e155323f217e2fa1565c9 (patch)
tree5278863a62ab3a470b958679fee87aa65bba8672 /writerfilter
parent139ce6588a2980e52df9a106a254c16e6f27ea8d (diff)
tdf133647 tdf123386 tdf123389 Improved .docx table formula import
Converts table formula syntax from MS Word to LibreOffice. This version uses the list separator of the document for the formula regexen; however, it does not convert the decimal or list separators in the case where the person opening the document is using a different locale from the author. Change-Id: I9600a0bea060a76705a7ad6b051ed4fdd50b9d40 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/98614 Tested-by: Jenkins Tested-by: László Németh <nemeth@numbertext.org> Reviewed-by: László Németh <nemeth@numbertext.org> (cherry picked from commit 68e74bdf63e992666016c790e8e4cfd5b28d6abe) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/101676 Reviewed-by: Xisco Fauli <xiscofauli@libreoffice.org>
Diffstat (limited to 'writerfilter')
-rw-r--r--writerfilter/Library_writerfilter.mk4
-rw-r--r--writerfilter/source/dmapper/DomainMapper_Impl.cxx71
-rw-r--r--writerfilter/source/dmapper/DomainMapper_Impl.hxx3
-rw-r--r--writerfilter/source/dmapper/SettingsTable.cxx17
-rw-r--r--writerfilter/source/dmapper/SettingsTable.hxx3
5 files changed, 95 insertions, 3 deletions
diff --git a/writerfilter/Library_writerfilter.mk b/writerfilter/Library_writerfilter.mk
index 61e62d66e10d..cbda03e9dcdd 100644
--- a/writerfilter/Library_writerfilter.mk
+++ b/writerfilter/Library_writerfilter.mk
@@ -51,7 +51,9 @@ $(eval $(call gb_Library_use_libraries,writerfilter,\
$(eval $(call gb_Library_use_externals,writerfilter,\
boost_headers \
- icu_headers \
+ icui18n \
+ icuuc \
+ icu_headers \
libxml2 \
))
diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.cxx b/writerfilter/source/dmapper/DomainMapper_Impl.cxx
index 61b3becc0c68..ddef7048aeb8 100644
--- a/writerfilter/source/dmapper/DomainMapper_Impl.cxx
+++ b/writerfilter/source/dmapper/DomainMapper_Impl.cxx
@@ -88,6 +88,8 @@
#include <map>
#include <tuple>
#include <unordered_map>
+#include <regex>
+#include <algorithm>
#include <officecfg/Office/Common.hxx>
#include <filter/msfilter/util.hxx>
@@ -100,6 +102,9 @@
#include <tools/diagnose_ex.h>
#include <sal/log.hxx>
+#include <unicode/errorcode.h>
+#include <unicode/regex.h>
+
using namespace ::com::sun::star;
using namespace oox;
namespace writerfilter::dmapper{
@@ -4172,6 +4177,70 @@ void DomainMapper_Impl::handleFieldAsk
}
}
+/**
+ * Converts a Microsoft Word field formula into LibreOffice syntax
+ * @param input The Microsoft Word field formula, with no leading '=' sign
+ * @return An equivalent LibreOffice field formula
+ */
+OUString DomainMapper_Impl::convertFieldFormula(const OUString& input) {
+
+ OUString listSeparator = m_pSettingsTable->GetListSeparator();
+
+ /* Replace logical condition functions with LO equivalent operators */
+ OUString changed = input.replaceAll(" <> ", " NEQ ");
+ changed = changed.replaceAll(" <= ", " LEQ ");
+ changed = changed.replaceAll(" >= ", " GEQ ");
+ changed = changed.replaceAll(" = " , " EQ ");
+ changed = changed.replaceAll(" < " , " L ");
+ changed = changed.replaceAll(" > " , " G ");
+
+ changed = changed.replaceAll("<>", " NEQ ");
+ changed = changed.replaceAll("<=", " LEQ ");
+ changed = changed.replaceAll(">=", " GEQ ");
+ changed = changed.replaceAll("=" , " EQ ");
+ changed = changed.replaceAll("<" , " L ");
+ changed = changed.replaceAll(">" , " G ");
+
+ /* Replace function calls with infix keywords for AND(), OR(), and ROUND(). Nothing needs to be
+ * done for NOT(). This simple regex will work properly with most common cases. However, it may
+ * not work correctly when the arguments are nested subcalls to other functions, like
+ * ROUND(MIN(1,2),MAX(3,4)). See TDF#134765. */
+ icu::ErrorCode status;
+ icu::UnicodeString usInput(changed.getStr());
+ const uint32_t rMatcherFlags = UREGEX_CASE_INSENSITIVE;
+ OUString regex = "\\b(AND|OR|ROUND)\\s*\\(\\s*([^" + listSeparator + "]+)\\s*" + listSeparator + "\\s*([^)]+)\\s*\\)";
+ icu::UnicodeString usRegex(regex.getStr());
+ icu::RegexMatcher rmatch1(usRegex, usInput, rMatcherFlags, status);
+ usInput = rmatch1.replaceAll(icu::UnicodeString("(($2) $1 ($3))"), status);
+
+ /* Assumes any remaining list separators separate arguments to functions that accept lists
+ * (SUM, MIN, MAX, MEAN, etc.) */
+ usInput.findAndReplace(icu::UnicodeString(listSeparator.getStr()), "|");
+
+ /* Surround single cell references with angle brackets.
+ * If there is ever added a function name that ends with a digit, this regex will need to be revisited. */
+ icu::RegexMatcher rmatch2("\\b([A-Z]{1,3}[0-9]+)\\b(?![(])", usInput, rMatcherFlags, status);
+ usInput = rmatch2.replaceAll(icu::UnicodeString("<$1>"), status);
+
+ /* Cell references must be upper case */
+ icu::RegexMatcher rmatch3("<[a-z]{1,3}[0-9]+>", usInput, rMatcherFlags, status);
+ icu::UnicodeString replacedCellRefs;
+ while (rmatch3.find(status) && status.isSuccess()) {
+ rmatch3.appendReplacement(replacedCellRefs, rmatch3.group(status).toUpper(), status);
+ }
+ rmatch3.appendTail(replacedCellRefs);
+
+ /* Fix up cell ranges */
+ icu::RegexMatcher rmatch4("<([A-Z]{1,3}[0-9]+)>:<([A-Z]{1,3}[0-9]+)>", replacedCellRefs, rMatcherFlags, status);
+ usInput = rmatch4.replaceAll(icu::UnicodeString("<$1:$2>"), status);
+
+ /* Fix up user defined names */
+ icu::RegexMatcher rmatch5("DEFINED\\s*\\(<([A-Z]+[0-9]+)>\\)", usInput, rMatcherFlags, status);
+ usInput = rmatch5.replaceAll(icu::UnicodeString("DEFINED($1)"), status);
+
+ return OUString(usInput.getTerminatedBuffer());
+}
+
void DomainMapper_Impl::handleFieldFormula
(const FieldContextPtr& pContext,
uno::Reference< beans::XPropertySet > const& xFieldProperties)
@@ -4191,7 +4260,7 @@ void DomainMapper_Impl::handleFieldFormula
return;
// we don't copy the = symbol from the command
- OUString formula = command.copy(1);
+ OUString formula = convertFieldFormula(command.copy(1));
xFieldProperties->setPropertyValue(getPropertyName(PROP_CONTENT), uno::makeAny(formula));
xFieldProperties->setPropertyValue(getPropertyName(PROP_NUMBER_FORMAT), uno::makeAny(sal_Int32(0)));
diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.hxx b/writerfilter/source/dmapper/DomainMapper_Impl.hxx
index db0a03fa7792..dc0693ecae6e 100644
--- a/writerfilter/source/dmapper/DomainMapper_Impl.hxx
+++ b/writerfilter/source/dmapper/DomainMapper_Impl.hxx
@@ -818,7 +818,8 @@ public:
(const FieldContextPtr& pContext,
css::uno::Reference< css::uno::XInterface > & xFieldInterface,
css::uno::Reference< css::beans::XPropertySet > const& xFieldProperties);
- static void handleFieldFormula
+ OUString convertFieldFormula(const OUString& input);
+ void handleFieldFormula
(const FieldContextPtr& pContext,
css::uno::Reference< css::beans::XPropertySet > const& xFieldProperties);
void handleAutoNum
diff --git a/writerfilter/source/dmapper/SettingsTable.cxx b/writerfilter/source/dmapper/SettingsTable.cxx
index e7af3f19e911..412826d6a7cd 100644
--- a/writerfilter/source/dmapper/SettingsTable.cxx
+++ b/writerfilter/source/dmapper/SettingsTable.cxx
@@ -265,6 +265,8 @@ struct SettingsTable_Impl
bool m_bReadOnly;
bool m_bDisplayBackgroundShape;
bool m_bNoLeading = false;
+ OUString m_sDecimalSymbol;
+ OUString m_sListSeparator;
uno::Sequence<beans::PropertyValue> m_pThemeFontLangProps;
@@ -303,6 +305,8 @@ struct SettingsTable_Impl
, m_sRedlineProtectionKey()
, m_bReadOnly(false)
, m_bDisplayBackgroundShape(false)
+ , m_sDecimalSymbol(".")
+ , m_sListSeparator(",")
, m_pThemeFontLangProps(3)
, m_pCurrentCompatSetting(3)
{}
@@ -479,8 +483,10 @@ void SettingsTable::lcl_sprm(Sprm& rSprm)
case NS_ooxml::LN_CT_Settings_doNotIncludeSubdocsInStats: // 92554; // Do Not Include Content in Text Boxes, Footnotes, and Endnotes in Document Statistics)
break;
case NS_ooxml::LN_CT_Settings_decimalSymbol: // 92562;
+ m_pImpl->m_sDecimalSymbol = pValue->getString();
break;
case NS_ooxml::LN_CT_Settings_listSeparator: // 92563;
+ m_pImpl->m_sListSeparator = pValue->getString();
break;
case NS_ooxml::LN_CT_Settings_rsids: // 92549; revision save Ids - probably not necessary
break;
@@ -687,6 +693,17 @@ sal_Int16 SettingsTable::GetHypenationZone() const
return m_pImpl->m_nHyphenationZone;
}
+OUString SettingsTable::GetDecimalSymbol() const
+{
+ return m_pImpl->m_sDecimalSymbol;
+}
+
+OUString SettingsTable::GetListSeparator() const
+{
+ return m_pImpl->m_sListSeparator;
+}
+
+
uno::Sequence<beans::PropertyValue> const & SettingsTable::GetThemeFontLangProperties() const
{
return m_pImpl->m_pThemeFontLangProps;
diff --git a/writerfilter/source/dmapper/SettingsTable.hxx b/writerfilter/source/dmapper/SettingsTable.hxx
index 888fe4ad342e..6dd0545add1f 100644
--- a/writerfilter/source/dmapper/SettingsTable.hxx
+++ b/writerfilter/source/dmapper/SettingsTable.hxx
@@ -79,6 +79,9 @@ class SettingsTable : public LoggedProperties, public LoggedTable
bool GetNoHyphenateCaps() const;
sal_Int16 GetHypenationZone() const;
+ OUString GetDecimalSymbol() const;
+ OUString GetListSeparator() const;
+
css::uno::Sequence<css::beans::PropertyValue> const & GetThemeFontLangProperties() const;
css::uno::Sequence<css::beans::PropertyValue> GetCompatSettings() const;