summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Kaganski <mike.kaganski@collabora.com>2017-06-23 14:48:03 +0300
committerMike Kaganski <mike.kaganski@collabora.com>2017-06-27 15:43:25 +0200
commita4a1467bc47b81ad68ecad0d5e2e163670582919 (patch)
treec045a2c70fa9441321da29323ed1bcdc82804ba2
parentd5b19100ca4d3670d1b5367e8000739af60a6892 (diff)
tdf#108714: allow <w:br> as direct child of <w:body>
LibreOffice doesn't accept <w:br> element as a child of <w:body>. ECMA-376-1:2016 17.3.3.1 describes br as element of a run content, and points to CT_Br in §A.1. CT_Br may appear only as part of EG_RunInnerContent. In turn, EG_RunInnerContent may appear only inside CT_R. So, using <w:br> outside of <w:r> produces ill-formed OOXML. Open XML SDK 2.5 Productivity Tool for Microsoft Office confirms that, showing OpenXmlUnknownElement error. However, Word accepts it as direct child of <w:body>. It behaves as if the <w:br> were used as first element in first run of the following <w:p> (thus creating page break after next paragraph). Another Word bug that provokes third-parties to create ill-formed documents, and requires LibreOffice to be bug-to-bug compatible. This commit makes the following changes: 1. Registers a dedicated complex type CT_Br_OutOfOrder to handle those unusual breaks, with corresponding handler function. 2. In the handler function, saves the gathered property set to parser state to use later in next paragraph group handler. This reproduces Word behaviour. Change-Id: I5df6927e2de9266b58f87807319ad1c4977e45a7 Reviewed-on: https://gerrit.libreoffice.org/39168 Tested-by: Jenkins <ci@libreoffice.org> Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
-rw-r--r--sw/qa/extras/ooxmlimport/data/tdf108714.docxbin0 -> 1310 bytes
-rw-r--r--sw/qa/extras/ooxmlimport/ooxmlimport.cxx47
-rw-r--r--writerfilter/source/ooxml/OOXMLFastContextHandler.cxx13
-rw-r--r--writerfilter/source/ooxml/OOXMLFastContextHandler.hxx1
-rw-r--r--writerfilter/source/ooxml/OOXMLParserState.cxx17
-rw-r--r--writerfilter/source/ooxml/OOXMLParserState.hxx4
-rw-r--r--writerfilter/source/ooxml/factoryimpl_ns.py2
-rw-r--r--writerfilter/source/ooxml/model.xml18
8 files changed, 101 insertions, 1 deletions
diff --git a/sw/qa/extras/ooxmlimport/data/tdf108714.docx b/sw/qa/extras/ooxmlimport/data/tdf108714.docx
new file mode 100644
index 000000000000..e564d44a648b
--- /dev/null
+++ b/sw/qa/extras/ooxmlimport/data/tdf108714.docx
Binary files differ
diff --git a/sw/qa/extras/ooxmlimport/ooxmlimport.cxx b/sw/qa/extras/ooxmlimport/ooxmlimport.cxx
index a5f7cf21cd96..97dd9128b9ea 100644
--- a/sw/qa/extras/ooxmlimport/ooxmlimport.cxx
+++ b/sw/qa/extras/ooxmlimport/ooxmlimport.cxx
@@ -27,6 +27,7 @@
#include <com/sun/star/drawing/GraphicExportFilter.hpp>
#include <com/sun/star/drawing/EnhancedCustomShapeAdjustmentValue.hpp>
#include <com/sun/star/lang/XServiceInfo.hpp>
+#include <com/sun/star/style/BreakType.hpp>
#include <com/sun/star/style/XStyleFamiliesSupplier.hpp>
#include <com/sun/star/text/HoriOrientation.hpp>
#include <com/sun/star/text/RelOrientation.hpp>
@@ -1312,6 +1313,52 @@ DECLARE_OOXMLIMPORT_TEST(testVmlAdjustments, "vml-adjustments.docx")
CPPUNIT_ASSERT_EQUAL(sal_Int32(17639), aAdjustmentValue.Value.get<sal_Int32>());
}
+DECLARE_OOXMLIMPORT_TEST(testTdf108714, "tdf108714.docx")
+{
+ CPPUNIT_ASSERT_EQUAL(4, getParagraphs());
+ CPPUNIT_ASSERT_EQUAL_MESSAGE("Page break is absent - we lost bug-to-bug compatibility with Word", 3, getPages());
+
+ // The second (empty) paragraph must be at first page, despite the <w:br> element was before it.
+ // That's because Word treats such break as first element in first run of following paragraph:
+ //
+ // <w:br w:type="page"/>
+ // <w:p>
+ // <w:r>
+ // <w:t/>
+ // </w:r>
+ // </w:p>
+ //
+ // is equal to
+ //
+ // <w:p>
+ // <w:r>
+ // <w:br w:type="page"/>
+ // </w:r>
+ // </w:p>
+ //
+ // which emits page break after that empty paragraph.
+
+ uno::Reference< text::XTextRange > paragraph = getParagraph(1);
+ CPPUNIT_ASSERT_EQUAL(OUString("Paragraph 1"), paragraph->getString());
+ style::BreakType breakType = getProperty<style::BreakType>(paragraph, "BreakType");
+ CPPUNIT_ASSERT_EQUAL(style::BreakType_NONE, breakType);
+
+ paragraph = getParagraph(2);
+ CPPUNIT_ASSERT_EQUAL(OUString(), paragraph->getString());
+ breakType = getProperty<style::BreakType>(paragraph, "BreakType");
+ CPPUNIT_ASSERT_EQUAL(style::BreakType_NONE, breakType);
+
+ paragraph = getParagraph(3);
+ CPPUNIT_ASSERT_EQUAL(OUString("Paragraph 2"), paragraph->getString());
+ breakType = getProperty<style::BreakType>(paragraph, "BreakType");
+ CPPUNIT_ASSERT_EQUAL(style::BreakType_PAGE_BEFORE, breakType);
+
+ paragraph = getParagraph(4);
+ CPPUNIT_ASSERT_EQUAL(OUString("Paragraph 3"), paragraph->getString());
+ breakType = getProperty<style::BreakType>(paragraph, "BreakType");
+ CPPUNIT_ASSERT_EQUAL(style::BreakType_PAGE_BEFORE, breakType);
+}
+
// tests should only be added to ooxmlIMPORT *if* they fail round-tripping in ooxmlEXPORT
CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
index e1b61a69411b..5c5290d75e45 100644
--- a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
+++ b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
@@ -386,6 +386,10 @@ void OOXMLFastContextHandler::startParagraphGroup()
{
mpStream->startParagraphGroup();
mpParserState->setInParagraphGroup(true);
+
+ // tdf#108714 : if we have a postponed break information,
+ // then apply it now, before any other paragraph content.
+ mpParserState->resolvePostponedBreak(*mpStream);
}
}
}
@@ -1055,6 +1059,15 @@ void OOXMLFastContextHandlerProperties::handleBreak()
}
}
+// tdf#108714 : allow <w:br> at block level (despite this is illegal according to ECMA-376-1:2016)
+void OOXMLFastContextHandlerProperties::handleOutOfOrderBreak()
+{
+ if(isForwardEvents())
+ {
+ mpParserState->setPostponedBreak(getPropertySet());
+ }
+}
+
void OOXMLFastContextHandlerProperties::handleOLE()
{
OOXMLOLEHandler aOLEHandler(this);
diff --git a/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx b/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx
index ba0ee9a3d66b..d7ec09e48e4d 100644
--- a/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx
+++ b/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx
@@ -277,6 +277,7 @@ public:
void handleComment();
void handlePicture();
void handleBreak();
+ void handleOutOfOrderBreak();
void handleOLE();
void handleFontRel();
void handleHyperlinkURL();
diff --git a/writerfilter/source/ooxml/OOXMLParserState.cxx b/writerfilter/source/ooxml/OOXMLParserState.cxx
index 71d0290b707a..a655488e3194 100644
--- a/writerfilter/source/ooxml/OOXMLParserState.cxx
+++ b/writerfilter/source/ooxml/OOXMLParserState.cxx
@@ -20,6 +20,7 @@
#include <stdio.h>
#include <iostream>
#include "OOXMLParserState.hxx"
+#include "Handler.hxx"
namespace writerfilter {
namespace ooxml
@@ -210,6 +211,22 @@ void OOXMLParserState::setTableProperties(const OOXMLPropertySet::Pointer_t& pPr
}
}
+// tdf#108714
+void OOXMLParserState::resolvePostponedBreak(Stream & rStream)
+{
+ if (mpPostponedBreak)
+ {
+ OOXMLBreakHandler aBreakHandler(rStream);
+ mpPostponedBreak->resolve(aBreakHandler);
+ mpPostponedBreak.reset();
+ }
+}
+
+void OOXMLParserState::setPostponedBreak(const OOXMLPropertySet::Pointer_t & pProps)
+{
+ mpPostponedBreak = pProps;
+}
+
void OOXMLParserState::startTable()
{
OOXMLPropertySet::Pointer_t pCellProps;
diff --git a/writerfilter/source/ooxml/OOXMLParserState.hxx b/writerfilter/source/ooxml/OOXMLParserState.hxx
index 0ba0079d1653..d328b07b2835 100644
--- a/writerfilter/source/ooxml/OOXMLParserState.hxx
+++ b/writerfilter/source/ooxml/OOXMLParserState.hxx
@@ -59,6 +59,7 @@ class OOXMLParserState final
bool savedInCharacterGroup;
bool savedLastParagraphInSection;
std::vector<SavedAlternateState> maSavedAlternateStates;
+ OOXMLPropertySet::Pointer_t mpPostponedBreak;
public:
typedef std::shared_ptr<OOXMLParserState> Pointer_t;
@@ -102,6 +103,9 @@ public:
void setRowProperties(const OOXMLPropertySet::Pointer_t& pProps);
void resolveTableProperties(Stream & rStream);
void setTableProperties(const OOXMLPropertySet::Pointer_t& pProps);
+ // tdf#108714
+ void resolvePostponedBreak(Stream & rStream);
+ void setPostponedBreak(const OOXMLPropertySet::Pointer_t& pProps);
void startTable();
void endTable();
diff --git a/writerfilter/source/ooxml/factoryimpl_ns.py b/writerfilter/source/ooxml/factoryimpl_ns.py
index 80e0c84c691c..73b1245a71c1 100644
--- a/writerfilter/source/ooxml/factoryimpl_ns.py
+++ b/writerfilter/source/ooxml/factoryimpl_ns.py
@@ -428,7 +428,7 @@ def factoryChooseAction(actionNode):
ret.append(" {")
extra_space = " "
- if actionNode.getAttribute("action") in ("handleXNotes", "handleHdrFtr", "handleComment", "handlePicture", "handleBreak", "handleOLE", "handleFontRel", "handleHyperlinkURL"):
+ if actionNode.getAttribute("action") in ("handleXNotes", "handleHdrFtr", "handleComment", "handlePicture", "handleBreak", "handleOutOfOrderBreak", "handleOLE", "handleFontRel", "handleHyperlinkURL"):
ret.append(" %sif (OOXMLFastContextHandlerProperties* pProperties = dynamic_cast<OOXMLFastContextHandlerProperties*>(pHandler))" % extra_space)
ret.append(" %s pProperties->%s();" % (extra_space, actionNode.getAttribute("action")))
elif actionNode.getAttribute("action") == "propagateCharacterPropertiesAsSet":
diff --git a/writerfilter/source/ooxml/model.xml b/writerfilter/source/ooxml/model.xml
index 05fb53f08729..55a9ca47b6ba 100644
--- a/writerfilter/source/ooxml/model.xml
+++ b/writerfilter/source/ooxml/model.xml
@@ -13224,6 +13224,14 @@
<ref name="ST_BrClear"/>
</attribute>
</define>
+ <define name="CT_Br_OutOfOrder">
+ <attribute name="type">
+ <ref name="ST_BrType"/>
+ </attribute>
+ <attribute name="clear">
+ <ref name="ST_BrClear"/>
+ </attribute>
+ </define>
<define name="ST_PTabAlignment">
<choice>
<!-- Left -->
@@ -13963,6 +13971,11 @@
<element name="tbl">
<ref name="CT_Tbl"/>
</element>
+ <!-- tdf#108714 : allow <w:br> at block level (despite this is illegal according to ECMA-376-1:2016) - bug-to-bug compatibility with Word -->
+ <element name="br">
+ <ref name="CT_Br_OutOfOrder"/>
+ </element>
+ <!-- end tdf#108714 -->
<ref name="EG_RunLevelElts"/>
</choice>
</define>
@@ -17842,6 +17855,11 @@
<attribute name="clear" tokenid="ooxml:CT_Br_clear"/>
<action name="end" action="handleBreak"/>
</resource>
+ <resource name="CT_Br_OutOfOrder" resource="Properties">
+ <attribute name="type" tokenid="ooxml:CT_Br_type"/>
+ <attribute name="clear" tokenid="ooxml:CT_Br_clear"/>
+ <action name="end" action="handleOutOfOrderBreak"/>
+ </resource>
<resource name="ST_PTabAlignment" resource="List">
<value tokenid="ooxml:Value_ST_PTabAlignment_left">left</value>
<value tokenid="ooxml:Value_ST_PTabAlignment_center">center</value>