diff options
author | Mike Kaganski <mike.kaganski@collabora.com> | 2017-07-13 09:08:56 +0300 |
---|---|---|
committer | Mike Kaganski <mike.kaganski@collabora.com> | 2017-08-10 06:19:22 +0200 |
commit | 67a61e54531801645d51ad89aac30064b8c4b4e8 (patch) | |
tree | caebee6370fc392206294cf94ade4f0519b2f7e0 | |
parent | 368b583b992f2e9cad46c2362c9529a07c36d7a9 (diff) |
tdf#111550: A workaround for out-of-order (in-paragraph) tbl on OOXML
Word allows <w:tbl> to be direct child of <w:p>, which is illegal
according to ECMA-376-1:2016.
This allows for import the data in such tables (previously, this text
was simply dropped, causing dataloss) - bug-to-bug compatibility
with Word.
Change-Id: I19c17ab19915ea46685727c635476fe5df593212
Reviewed-on: https://gerrit.libreoffice.org/40909
Tested-by: Jenkins <ci@libreoffice.org>
Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
-rw-r--r-- | sw/qa/extras/ooxmlimport/data/tdf111550.docx | bin | 0 -> 1362 bytes | |||
-rw-r--r-- | sw/qa/extras/ooxmlimport/ooxmlimport.cxx | 66 | ||||
-rw-r--r-- | writerfilter/source/ooxml/OOXMLFastContextHandler.cxx | 19 | ||||
-rw-r--r-- | writerfilter/source/ooxml/OOXMLFastContextHandler.hxx | 4 | ||||
-rw-r--r-- | writerfilter/source/ooxml/factoryimpl_ns.py | 4 | ||||
-rw-r--r-- | writerfilter/source/ooxml/model.xml | 19 |
6 files changed, 112 insertions, 0 deletions
diff --git a/sw/qa/extras/ooxmlimport/data/tdf111550.docx b/sw/qa/extras/ooxmlimport/data/tdf111550.docx Binary files differnew file mode 100644 index 000000000000..6e13df351906 --- /dev/null +++ b/sw/qa/extras/ooxmlimport/data/tdf111550.docx diff --git a/sw/qa/extras/ooxmlimport/ooxmlimport.cxx b/sw/qa/extras/ooxmlimport/ooxmlimport.cxx index bea70605fdae..689c00a6fc3e 100644 --- a/sw/qa/extras/ooxmlimport/ooxmlimport.cxx +++ b/sw/qa/extras/ooxmlimport/ooxmlimport.cxx @@ -1470,6 +1470,72 @@ DECLARE_OOXMLIMPORT_TEST( testActiveXCheckbox, "activex_checkbox.docx" ) CPPUNIT_ASSERT_EQUAL(text::TextContentAnchorType_AT_CHARACTER,getProperty<text::TextContentAnchorType>(xPropertySet2,"AnchorType")); } +DECLARE_OOXMLIMPORT_TEST(testTdf111550, "tdf111550.docx") +{ + // The test document has following ill-formed structure: + // + // <w:tbl> + // ... + // <w:tr> + // <w:tc> + // <w:p> + // <w:r> + // <w:t>[outer:A2]</w:t> + // <w:br w:type="textWrapping"/> + // </w:r> + // <w:tbl> + // <w:tr> + // <w:tc> + // <w:p> + // <w:r> + // <w:t>[inner:A1]</w:t> + // </w:r> + // </w:p> + // </w:tc> + // </w:tr> + // </w:tbl> + // </w:p> + // </w:tc> + // </w:tr> + // </w:tbl> + // + // i.e., a <w:tbl> as direct child of <w:p> inside another table. + // Word accepts that illegal OOXML, and treats it as equal to + // + // <w:tbl> + // ... + // <w:tr> + // <w:tc> + // <w:tbl> + // <w:tr> + // <w:tc> + // <w:p> + // <w:r> + // <w:t>[outer:A2]</w:t> + // <w:br w:type="textWrapping"/> + // </w:r> + // <w:r> + // <w:t>[inner:A1]</w:t> + // </w:r> + // </w:p> + // </w:tc> + // </w:tr> + // </w:tbl> + // </w:tc> + // </w:tr> + // </w:tbl> + // + // i.e., moves all contents of the outer paragraph into the inner table's first paragraph. + + CPPUNIT_ASSERT_EQUAL(2, getParagraphs()); + + uno::Reference<text::XTextContent> outerTable = getParagraphOrTable(1); + getCell(outerTable, "A1", "[outer:A1]"); + uno::Reference<text::XText> cellA2(getCell(outerTable, "A2"), uno::UNO_QUERY_THROW); + uno::Reference<text::XTextContent> innerTable = getParagraphOrTable(1, cellA2); + getCell(innerTable, "A1", "[outer:A2]\n[inner:A1]"); +} + // tests should only be added to ooxmlIMPORT *if* they fail round-tripping in ooxmlEXPORT CPPUNIT_PLUGIN_IMPLEMENT(); diff --git a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx index c82b62113876..9bbbce2af961 100644 --- a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx +++ b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx @@ -1540,6 +1540,25 @@ void OOXMLFastContextHandlerTextTable::lcl_endFastElement mpParserState->endTable(); } +// tdf#111550 +void OOXMLFastContextHandlerTextTable::start_P_Tbl() +{ + // Normally, when one paragraph ends, and another begins, + // in OOXMLFactory_wml::endAction handler for <w:p>, + // pHandler->endOfParagraph() is called, which (among other things) + // calls TableManager::setHandle() to update current cell's starting point. + // Then, in OOXMLFactory_wml::startAction for next <w:p>, + // pHandler->startParagraphGroup() is called, which ends previous group, + // and there, it pushes cells to row in TableManager::endParagraphGroup() + // (cells have correct bounds defined by mCurHandle). + // When a table is child of a <w:p>, that paragraph doesn't end before nested + // paragraph begins. So, pHandler->endOfParagraph() was not (and should not be) + // called. But as next paragraph starts, is the previous group is closed, then + // cells will have wrong boundings. Here, we know that we *are* in paragraph + // group, but it should not be finished. + mpParserState->setInParagraphGroup(false); +} + /* class OOXMLFastContextHandlerShape */ diff --git a/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx b/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx index 4909c320ccef..2b52eb16c0a2 100644 --- a/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx +++ b/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx @@ -419,6 +419,10 @@ public: virtual std::string getType() const override { return "TextTable"; } + // tdf#111550 + // when <w:tbl> appears as direct child of <w:p>, we need to rearrange this paragraph + // to merge with the table's first paragraph (that's what Word does in this case) + void start_P_Tbl(); protected: virtual void lcl_startFastElement(Token_t Element, const css::uno::Reference< css::xml::sax::XFastAttributeList > & Attribs) override; diff --git a/writerfilter/source/ooxml/factoryimpl_ns.py b/writerfilter/source/ooxml/factoryimpl_ns.py index 73b1245a71c1..1134a14cb331 100644 --- a/writerfilter/source/ooxml/factoryimpl_ns.py +++ b/writerfilter/source/ooxml/factoryimpl_ns.py @@ -442,6 +442,10 @@ def factoryChooseAction(actionNode): elif actionNode.getAttribute("action") == "handleGridBefore" or actionNode.getAttribute("action") == "handleGridAfter": ret.append(" %sif (OOXMLFastContextHandlerTextTableRow* pTextTableRow = dynamic_cast<OOXMLFastContextHandlerTextTableRow*>(pHandler))" % extra_space) ret.append(" %s pTextTableRow->%s();" % (extra_space, actionNode.getAttribute("action"))) + # tdf#111550 + elif actionNode.getAttribute("action") in ("start_P_Tbl"): + ret.append(" %sif (OOXMLFastContextHandlerTextTable* pTextTable = dynamic_cast<OOXMLFastContextHandlerTextTable*>(pHandler))" % extra_space) + ret.append(" %s pTextTable->%s();" % (extra_space, actionNode.getAttribute("action"))) elif actionNode.getAttribute("action") in ("sendProperty", "handleHyperlink"): ret.append(" %sif (OOXMLFastContextHandlerStream* pStream = dynamic_cast<OOXMLFastContextHandlerStream*>(pHandler))" % extra_space) ret.append(" %s pStream->%s();" % (extra_space, actionNode.getAttribute("action"))) diff --git a/writerfilter/source/ooxml/model.xml b/writerfilter/source/ooxml/model.xml index 386d51d7c364..81cdc3ff9c77 100644 --- a/writerfilter/source/ooxml/model.xml +++ b/writerfilter/source/ooxml/model.xml @@ -14174,6 +14174,10 @@ <ref name="CT_Br_OutOfOrder"/> </element> <!-- end tdf#108714 --> + <!-- tdf#111550 : allow <w:tbl> at paragraph level (despite this is illegal according to ECMA-376-1:2016) - bug-to-bug compatibility with Word --> + <element name="tbl"> + <ref name="CT_P_Tbl"/> + </element> </define> <define name="ST_TblWidth"> <choice> @@ -14690,6 +14694,17 @@ </element> <ref name="EG_ContentRowContent"/> </define> + <!-- tdf#111550 : Special element - copy of usual CT_Tbl, but only used as direct child of CT_P --> + <define name="CT_P_Tbl"> + <ref name="EG_RangeMarkupElements"/> + <element name="tblPr"> + <ref name="CT_TblPr"/> + </element> + <element name="tblGrid"> + <ref name="CT_TblGrid"/> + </element> + <ref name="EG_ContentRowContent"/> + </define> <define name="CT_TblLook"> <attribute name="firstRow"> <ref name="ST_OnOff"/> @@ -18418,6 +18433,10 @@ <element name="tblPrExChange" tokenid="ooxml:CT_TblPrEx_tblPrExChange"/> </resource> <resource name="CT_Tbl" resource="TextTable"/> + <!-- tdf#111550 : allow <w:tbl> at paragraph level (despite this is illegal according to ECMA-376-1:2016) - bug-to-bug compatibility with Word --> + <resource name="CT_P_Tbl" resource="TextTable"> + <action name="start" action="start_P_Tbl"/> + </resource> <resource name="CT_TblLook" resource="Properties"> <attribute name="firstRow" tokenid="ooxml:CT_TblLook_firstRow"/> <attribute name="lastRow" tokenid="ooxml:CT_TblLook_lastRow"/> |