summaryrefslogtreecommitdiff
path: root/writerfilter
diff options
context:
space:
mode:
authorMichael Warner <michael.warner.ut+libreoffice@gmail.com>2020-07-03 10:18:33 -0400
committerLászló Németh <nemeth@numbertext.org>2020-08-17 11:15:25 +0200
commit68e74bdf63e992666016c790e8e4cfd5b28d6abe (patch)
tree903d1bfbe952d5ff6c800d49a2329d1eb2153bf1 /writerfilter
parent3297c44c14d088c83bf729457e3d4ca629dc283c (diff)
tdf133647 tdf123386 tdf123389 Improved .docx table formula import
Converts table formula syntax from MS Word to LibreOffice. This version uses the list separator of the document for the formula regexen; however, it does not convert the decimal or list separators in the case where the person opening the document is using a different locale from the author. Change-Id: I9600a0bea060a76705a7ad6b051ed4fdd50b9d40 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/98614 Tested-by: Jenkins Tested-by: László Németh <nemeth@numbertext.org> Reviewed-by: László Németh <nemeth@numbertext.org>
Diffstat (limited to 'writerfilter')
-rw-r--r--writerfilter/Library_writerfilter.mk4
-rw-r--r--writerfilter/source/dmapper/DomainMapper_Impl.cxx71
-rw-r--r--writerfilter/source/dmapper/DomainMapper_Impl.hxx3
-rw-r--r--writerfilter/source/dmapper/SettingsTable.cxx17
-rw-r--r--writerfilter/source/dmapper/SettingsTable.hxx3
5 files changed, 95 insertions, 3 deletions
diff --git a/writerfilter/Library_writerfilter.mk b/writerfilter/Library_writerfilter.mk
index 61e62d66e10d..cbda03e9dcdd 100644
--- a/writerfilter/Library_writerfilter.mk
+++ b/writerfilter/Library_writerfilter.mk
@@ -51,7 +51,9 @@ $(eval $(call gb_Library_use_libraries,writerfilter,\
$(eval $(call gb_Library_use_externals,writerfilter,\
boost_headers \
- icu_headers \
+ icui18n \
+ icuuc \
+ icu_headers \
libxml2 \
))
diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.cxx b/writerfilter/source/dmapper/DomainMapper_Impl.cxx
index e33a6d753364..4e8afddc8d50 100644
--- a/writerfilter/source/dmapper/DomainMapper_Impl.cxx
+++ b/writerfilter/source/dmapper/DomainMapper_Impl.cxx
@@ -88,6 +88,8 @@
#include <map>
#include <tuple>
#include <unordered_map>
+#include <regex>
+#include <algorithm>
#include <officecfg/Office/Common.hxx>
#include <filter/msfilter/util.hxx>
@@ -100,6 +102,9 @@
#include <tools/diagnose_ex.h>
#include <sal/log.hxx>
+#include <unicode/errorcode.h>
+#include <unicode/regex.h>
+
using namespace ::com::sun::star;
using namespace oox;
namespace writerfilter::dmapper{
@@ -4196,6 +4201,70 @@ void DomainMapper_Impl::handleFieldAsk
}
}
+/**
+ * Converts a Microsoft Word field formula into LibreOffice syntax
+ * @param input The Microsoft Word field formula, with no leading '=' sign
+ * @return An equivalent LibreOffice field formula
+ */
+OUString DomainMapper_Impl::convertFieldFormula(const OUString& input) {
+
+ OUString listSeparator = m_pSettingsTable->GetListSeparator();
+
+ /* Replace logical condition functions with LO equivalent operators */
+ OUString changed = input.replaceAll(" <> ", " NEQ ");
+ changed = changed.replaceAll(" <= ", " LEQ ");
+ changed = changed.replaceAll(" >= ", " GEQ ");
+ changed = changed.replaceAll(" = " , " EQ ");
+ changed = changed.replaceAll(" < " , " L ");
+ changed = changed.replaceAll(" > " , " G ");
+
+ changed = changed.replaceAll("<>", " NEQ ");
+ changed = changed.replaceAll("<=", " LEQ ");
+ changed = changed.replaceAll(">=", " GEQ ");
+ changed = changed.replaceAll("=" , " EQ ");
+ changed = changed.replaceAll("<" , " L ");
+ changed = changed.replaceAll(">" , " G ");
+
+ /* Replace function calls with infix keywords for AND(), OR(), and ROUND(). Nothing needs to be
+ * done for NOT(). This simple regex will work properly with most common cases. However, it may
+ * not work correctly when the arguments are nested subcalls to other functions, like
+ * ROUND(MIN(1,2),MAX(3,4)). See TDF#134765. */
+ icu::ErrorCode status;
+ icu::UnicodeString usInput(changed.getStr());
+ const uint32_t rMatcherFlags = UREGEX_CASE_INSENSITIVE;
+ OUString regex = "\\b(AND|OR|ROUND)\\s*\\(\\s*([^" + listSeparator + "]+)\\s*" + listSeparator + "\\s*([^)]+)\\s*\\)";
+ icu::UnicodeString usRegex(regex.getStr());
+ icu::RegexMatcher rmatch1(usRegex, usInput, rMatcherFlags, status);
+ usInput = rmatch1.replaceAll(icu::UnicodeString("(($2) $1 ($3))"), status);
+
+ /* Assumes any remaining list separators separate arguments to functions that accept lists
+ * (SUM, MIN, MAX, MEAN, etc.) */
+ usInput.findAndReplace(icu::UnicodeString(listSeparator.getStr()), "|");
+
+ /* Surround single cell references with angle brackets.
+ * If there is ever added a function name that ends with a digit, this regex will need to be revisited. */
+ icu::RegexMatcher rmatch2("\\b([A-Z]{1,3}[0-9]+)\\b(?![(])", usInput, rMatcherFlags, status);
+ usInput = rmatch2.replaceAll(icu::UnicodeString("<$1>"), status);
+
+ /* Cell references must be upper case */
+ icu::RegexMatcher rmatch3("<[a-z]{1,3}[0-9]+>", usInput, rMatcherFlags, status);
+ icu::UnicodeString replacedCellRefs;
+ while (rmatch3.find(status) && status.isSuccess()) {
+ rmatch3.appendReplacement(replacedCellRefs, rmatch3.group(status).toUpper(), status);
+ }
+ rmatch3.appendTail(replacedCellRefs);
+
+ /* Fix up cell ranges */
+ icu::RegexMatcher rmatch4("<([A-Z]{1,3}[0-9]+)>:<([A-Z]{1,3}[0-9]+)>", replacedCellRefs, rMatcherFlags, status);
+ usInput = rmatch4.replaceAll(icu::UnicodeString("<$1:$2>"), status);
+
+ /* Fix up user defined names */
+ icu::RegexMatcher rmatch5("DEFINED\\s*\\(<([A-Z]+[0-9]+)>\\)", usInput, rMatcherFlags, status);
+ usInput = rmatch5.replaceAll(icu::UnicodeString("DEFINED($1)"), status);
+
+ return OUString(usInput.getTerminatedBuffer());
+}
+
void DomainMapper_Impl::handleFieldFormula
(const FieldContextPtr& pContext,
uno::Reference< beans::XPropertySet > const& xFieldProperties)
@@ -4215,7 +4284,7 @@ void DomainMapper_Impl::handleFieldFormula
return;
// we don't copy the = symbol from the command
- OUString formula = command.copy(1);
+ OUString formula = convertFieldFormula(command.copy(1));
xFieldProperties->setPropertyValue(getPropertyName(PROP_CONTENT), uno::makeAny(formula));
xFieldProperties->setPropertyValue(getPropertyName(PROP_NUMBER_FORMAT), uno::makeAny(sal_Int32(0)));
diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.hxx b/writerfilter/source/dmapper/DomainMapper_Impl.hxx
index 2597f6877f34..54a0f6c7ac25 100644
--- a/writerfilter/source/dmapper/DomainMapper_Impl.hxx
+++ b/writerfilter/source/dmapper/DomainMapper_Impl.hxx
@@ -818,7 +818,8 @@ public:
(const FieldContextPtr& pContext,
css::uno::Reference< css::uno::XInterface > & xFieldInterface,
css::uno::Reference< css::beans::XPropertySet > const& xFieldProperties);
- static void handleFieldFormula
+ OUString convertFieldFormula(const OUString& input);
+ void handleFieldFormula
(const FieldContextPtr& pContext,
css::uno::Reference< css::beans::XPropertySet > const& xFieldProperties);
void handleAutoNum
diff --git a/writerfilter/source/dmapper/SettingsTable.cxx b/writerfilter/source/dmapper/SettingsTable.cxx
index 5ac2400cd493..2042ade7dc66 100644
--- a/writerfilter/source/dmapper/SettingsTable.cxx
+++ b/writerfilter/source/dmapper/SettingsTable.cxx
@@ -265,6 +265,8 @@ struct SettingsTable_Impl
bool m_bReadOnly;
bool m_bDisplayBackgroundShape;
bool m_bNoLeading = false;
+ OUString m_sDecimalSymbol;
+ OUString m_sListSeparator;
uno::Sequence<beans::PropertyValue> m_pThemeFontLangProps;
@@ -303,6 +305,8 @@ struct SettingsTable_Impl
, m_sRedlineProtectionKey()
, m_bReadOnly(false)
, m_bDisplayBackgroundShape(false)
+ , m_sDecimalSymbol(".")
+ , m_sListSeparator(",")
, m_pThemeFontLangProps(3)
, m_pCurrentCompatSetting(3)
{}
@@ -479,8 +483,10 @@ void SettingsTable::lcl_sprm(Sprm& rSprm)
case NS_ooxml::LN_CT_Settings_doNotIncludeSubdocsInStats: // 92554; // Do Not Include Content in Text Boxes, Footnotes, and Endnotes in Document Statistics)
break;
case NS_ooxml::LN_CT_Settings_decimalSymbol: // 92562;
+ m_pImpl->m_sDecimalSymbol = pValue->getString();
break;
case NS_ooxml::LN_CT_Settings_listSeparator: // 92563;
+ m_pImpl->m_sListSeparator = pValue->getString();
break;
case NS_ooxml::LN_CT_Settings_rsids: // 92549; revision save Ids - probably not necessary
break;
@@ -687,6 +693,17 @@ sal_Int16 SettingsTable::GetHypenationZone() const
return m_pImpl->m_nHyphenationZone;
}
+OUString SettingsTable::GetDecimalSymbol() const
+{
+ return m_pImpl->m_sDecimalSymbol;
+}
+
+OUString SettingsTable::GetListSeparator() const
+{
+ return m_pImpl->m_sListSeparator;
+}
+
+
uno::Sequence<beans::PropertyValue> const & SettingsTable::GetThemeFontLangProperties() const
{
return m_pImpl->m_pThemeFontLangProps;
diff --git a/writerfilter/source/dmapper/SettingsTable.hxx b/writerfilter/source/dmapper/SettingsTable.hxx
index d91db71ab1f2..2ff62576a980 100644
--- a/writerfilter/source/dmapper/SettingsTable.hxx
+++ b/writerfilter/source/dmapper/SettingsTable.hxx
@@ -78,6 +78,9 @@ class SettingsTable : public LoggedProperties, public LoggedTable
bool GetNoHyphenateCaps() const;
sal_Int16 GetHypenationZone() const;
+ OUString GetDecimalSymbol() const;
+ OUString GetListSeparator() const;
+
css::uno::Sequence<css::beans::PropertyValue> const & GetThemeFontLangProperties() const;
css::uno::Sequence<css::beans::PropertyValue> GetCompatSettings() const;