summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Kaganski <mike.kaganski@collabora.com>2017-07-13 09:08:56 +0300
committerMike Kaganski <mike.kaganski@collabora.com>2017-08-10 06:19:22 +0200
commit67a61e54531801645d51ad89aac30064b8c4b4e8 (patch)
treecaebee6370fc392206294cf94ade4f0519b2f7e0
parent368b583b992f2e9cad46c2362c9529a07c36d7a9 (diff)
tdf#111550: A workaround for out-of-order (in-paragraph) tbl on OOXML
Word allows <w:tbl> to be direct child of <w:p>, which is illegal according to ECMA-376-1:2016. This allows for import the data in such tables (previously, this text was simply dropped, causing dataloss) - bug-to-bug compatibility with Word. Change-Id: I19c17ab19915ea46685727c635476fe5df593212 Reviewed-on: https://gerrit.libreoffice.org/40909 Tested-by: Jenkins <ci@libreoffice.org> Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
-rw-r--r--sw/qa/extras/ooxmlimport/data/tdf111550.docxbin0 -> 1362 bytes
-rw-r--r--sw/qa/extras/ooxmlimport/ooxmlimport.cxx66
-rw-r--r--writerfilter/source/ooxml/OOXMLFastContextHandler.cxx19
-rw-r--r--writerfilter/source/ooxml/OOXMLFastContextHandler.hxx4
-rw-r--r--writerfilter/source/ooxml/factoryimpl_ns.py4
-rw-r--r--writerfilter/source/ooxml/model.xml19
6 files changed, 112 insertions, 0 deletions
diff --git a/sw/qa/extras/ooxmlimport/data/tdf111550.docx b/sw/qa/extras/ooxmlimport/data/tdf111550.docx
new file mode 100644
index 000000000000..6e13df351906
--- /dev/null
+++ b/sw/qa/extras/ooxmlimport/data/tdf111550.docx
Binary files differ
diff --git a/sw/qa/extras/ooxmlimport/ooxmlimport.cxx b/sw/qa/extras/ooxmlimport/ooxmlimport.cxx
index bea70605fdae..689c00a6fc3e 100644
--- a/sw/qa/extras/ooxmlimport/ooxmlimport.cxx
+++ b/sw/qa/extras/ooxmlimport/ooxmlimport.cxx
@@ -1470,6 +1470,72 @@ DECLARE_OOXMLIMPORT_TEST( testActiveXCheckbox, "activex_checkbox.docx" )
CPPUNIT_ASSERT_EQUAL(text::TextContentAnchorType_AT_CHARACTER,getProperty<text::TextContentAnchorType>(xPropertySet2,"AnchorType"));
}
+DECLARE_OOXMLIMPORT_TEST(testTdf111550, "tdf111550.docx")
+{
+ // The test document has following ill-formed structure:
+ //
+ // <w:tbl>
+ // ...
+ // <w:tr>
+ // <w:tc>
+ // <w:p>
+ // <w:r>
+ // <w:t>[outer:A2]</w:t>
+ // <w:br w:type="textWrapping"/>
+ // </w:r>
+ // <w:tbl>
+ // <w:tr>
+ // <w:tc>
+ // <w:p>
+ // <w:r>
+ // <w:t>[inner:A1]</w:t>
+ // </w:r>
+ // </w:p>
+ // </w:tc>
+ // </w:tr>
+ // </w:tbl>
+ // </w:p>
+ // </w:tc>
+ // </w:tr>
+ // </w:tbl>
+ //
+ // i.e., a <w:tbl> as direct child of <w:p> inside another table.
+ // Word accepts that illegal OOXML, and treats it as equal to
+ //
+ // <w:tbl>
+ // ...
+ // <w:tr>
+ // <w:tc>
+ // <w:tbl>
+ // <w:tr>
+ // <w:tc>
+ // <w:p>
+ // <w:r>
+ // <w:t>[outer:A2]</w:t>
+ // <w:br w:type="textWrapping"/>
+ // </w:r>
+ // <w:r>
+ // <w:t>[inner:A1]</w:t>
+ // </w:r>
+ // </w:p>
+ // </w:tc>
+ // </w:tr>
+ // </w:tbl>
+ // </w:tc>
+ // </w:tr>
+ // </w:tbl>
+ //
+ // i.e., moves all contents of the outer paragraph into the inner table's first paragraph.
+
+ CPPUNIT_ASSERT_EQUAL(2, getParagraphs());
+
+ uno::Reference<text::XTextContent> outerTable = getParagraphOrTable(1);
+ getCell(outerTable, "A1", "[outer:A1]");
+ uno::Reference<text::XText> cellA2(getCell(outerTable, "A2"), uno::UNO_QUERY_THROW);
+ uno::Reference<text::XTextContent> innerTable = getParagraphOrTable(1, cellA2);
+ getCell(innerTable, "A1", "[outer:A2]\n[inner:A1]");
+}
+
// tests should only be added to ooxmlIMPORT *if* they fail round-tripping in ooxmlEXPORT
CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
index c82b62113876..9bbbce2af961 100644
--- a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
+++ b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
@@ -1540,6 +1540,25 @@ void OOXMLFastContextHandlerTextTable::lcl_endFastElement
mpParserState->endTable();
}
+// tdf#111550
+void OOXMLFastContextHandlerTextTable::start_P_Tbl()
+{
+ // Normally, when one paragraph ends, and another begins,
+ // in OOXMLFactory_wml::endAction handler for <w:p>,
+ // pHandler->endOfParagraph() is called, which (among other things)
+ // calls TableManager::setHandle() to update current cell's starting point.
+ // Then, in OOXMLFactory_wml::startAction for next <w:p>,
+ // pHandler->startParagraphGroup() is called, which ends previous group,
+ // and there, it pushes cells to row in TableManager::endParagraphGroup()
+ // (cells have correct bounds defined by mCurHandle).
+ // When a table is child of a <w:p>, that paragraph doesn't end before nested
+ // paragraph begins. So, pHandler->endOfParagraph() was not (and should not be)
+ // called. But as next paragraph starts, is the previous group is closed, then
+ // cells will have wrong boundings. Here, we know that we *are* in paragraph
+ // group, but it should not be finished.
+ mpParserState->setInParagraphGroup(false);
+}
+
/*
class OOXMLFastContextHandlerShape
*/
diff --git a/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx b/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx
index 4909c320ccef..2b52eb16c0a2 100644
--- a/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx
+++ b/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx
@@ -419,6 +419,10 @@ public:
virtual std::string getType() const override { return "TextTable"; }
+ // tdf#111550
+ // when <w:tbl> appears as direct child of <w:p>, we need to rearrange this paragraph
+ // to merge with the table's first paragraph (that's what Word does in this case)
+ void start_P_Tbl();
protected:
virtual void lcl_startFastElement(Token_t Element, const css::uno::Reference< css::xml::sax::XFastAttributeList > & Attribs) override;
diff --git a/writerfilter/source/ooxml/factoryimpl_ns.py b/writerfilter/source/ooxml/factoryimpl_ns.py
index 73b1245a71c1..1134a14cb331 100644
--- a/writerfilter/source/ooxml/factoryimpl_ns.py
+++ b/writerfilter/source/ooxml/factoryimpl_ns.py
@@ -442,6 +442,10 @@ def factoryChooseAction(actionNode):
elif actionNode.getAttribute("action") == "handleGridBefore" or actionNode.getAttribute("action") == "handleGridAfter":
ret.append(" %sif (OOXMLFastContextHandlerTextTableRow* pTextTableRow = dynamic_cast<OOXMLFastContextHandlerTextTableRow*>(pHandler))" % extra_space)
ret.append(" %s pTextTableRow->%s();" % (extra_space, actionNode.getAttribute("action")))
+ # tdf#111550
+ elif actionNode.getAttribute("action") in ("start_P_Tbl"):
+ ret.append(" %sif (OOXMLFastContextHandlerTextTable* pTextTable = dynamic_cast<OOXMLFastContextHandlerTextTable*>(pHandler))" % extra_space)
+ ret.append(" %s pTextTable->%s();" % (extra_space, actionNode.getAttribute("action")))
elif actionNode.getAttribute("action") in ("sendProperty", "handleHyperlink"):
ret.append(" %sif (OOXMLFastContextHandlerStream* pStream = dynamic_cast<OOXMLFastContextHandlerStream*>(pHandler))" % extra_space)
ret.append(" %s pStream->%s();" % (extra_space, actionNode.getAttribute("action")))
diff --git a/writerfilter/source/ooxml/model.xml b/writerfilter/source/ooxml/model.xml
index 386d51d7c364..81cdc3ff9c77 100644
--- a/writerfilter/source/ooxml/model.xml
+++ b/writerfilter/source/ooxml/model.xml
@@ -14174,6 +14174,10 @@
<ref name="CT_Br_OutOfOrder"/>
</element>
<!-- end tdf#108714 -->
+ <!-- tdf#111550 : allow <w:tbl> at paragraph level (despite this is illegal according to ECMA-376-1:2016) - bug-to-bug compatibility with Word -->
+ <element name="tbl">
+ <ref name="CT_P_Tbl"/>
+ </element>
</define>
<define name="ST_TblWidth">
<choice>
@@ -14690,6 +14694,17 @@
</element>
<ref name="EG_ContentRowContent"/>
</define>
+ <!-- tdf#111550 : Special element - copy of usual CT_Tbl, but only used as direct child of CT_P -->
+ <define name="CT_P_Tbl">
+ <ref name="EG_RangeMarkupElements"/>
+ <element name="tblPr">
+ <ref name="CT_TblPr"/>
+ </element>
+ <element name="tblGrid">
+ <ref name="CT_TblGrid"/>
+ </element>
+ <ref name="EG_ContentRowContent"/>
+ </define>
<define name="CT_TblLook">
<attribute name="firstRow">
<ref name="ST_OnOff"/>
@@ -18418,6 +18433,10 @@
<element name="tblPrExChange" tokenid="ooxml:CT_TblPrEx_tblPrExChange"/>
</resource>
<resource name="CT_Tbl" resource="TextTable"/>
+ <!-- tdf#111550 : allow <w:tbl> at paragraph level (despite this is illegal according to ECMA-376-1:2016) - bug-to-bug compatibility with Word -->
+ <resource name="CT_P_Tbl" resource="TextTable">
+ <action name="start" action="start_P_Tbl"/>
+ </resource>
<resource name="CT_TblLook" resource="Properties">
<attribute name="firstRow" tokenid="ooxml:CT_TblLook_firstRow"/>
<attribute name="lastRow" tokenid="ooxml:CT_TblLook_lastRow"/>