diff options
author | László Németh <nemeth@numbertext.org> | 2021-02-10 00:12:52 +0100 |
---|---|---|
committer | László Németh <nemeth@numbertext.org> | 2021-02-12 18:27:31 +0100 |
commit | 9b39ce0e66acfe812e1d50e530dc2ccdef3e1357 (patch) | |
tree | f1bb0d9c24dbe6a40e82837f0ff5632a90799530 | |
parent | 3380163bc0fb9dab7f289cc36b0eeb0c9b3ddaa9 (diff) |
tdf#76260 DOCX import: fix slow footnote import
by parsing footnotes.xml only once instead
of parsing again and again for every footnotes.
This was a serious performance problem for
documents with hundreds of footnotes, where the
footnote import took minutes instead of seconds.
Note: switch off CHECK_NOTMERGED in a debug build
to measure realistic speed-up, e.g. with the
reported document: 2 min 36 sec -> 22 sec, and
not 1 min 40 sec, as with CHECK_NOTMERGED.
Revert commit 60dbe21f59a45889c433727d0862c9a4274d94d2
("tdf#88126: sw_ooxmlexport15: Add unittest"), because
it was a fragile table layout test, worked only by accident.
Change-Id: I9460442cf0c30f2bc5ff393c947a008ca7bba6df
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/110811
Tested-by: Jenkins
Reviewed-by: László Németh <nemeth@numbertext.org>
-rw-r--r-- | sw/qa/extras/ooxmlexport/data/tdf88126.docx | bin | 40642 -> 0 bytes | |||
-rw-r--r-- | sw/qa/extras/ooxmlexport/ooxmlexport15.cxx | 6 | ||||
-rw-r--r-- | writerfilter/source/dmapper/DomainMapper.cxx | 13 | ||||
-rw-r--r-- | writerfilter/source/dmapper/DomainMapper_Impl.cxx | 100 | ||||
-rw-r--r-- | writerfilter/source/dmapper/DomainMapper_Impl.hxx | 7 | ||||
-rw-r--r-- | writerfilter/source/ooxml/OOXMLDocumentImpl.cxx | 7 | ||||
-rw-r--r-- | writerfilter/source/ooxml/OOXMLDocumentImpl.hxx | 1 | ||||
-rw-r--r-- | writerfilter/source/ooxml/OOXMLFastContextHandler.cxx | 24 | ||||
-rw-r--r-- | writerfilter/source/ooxml/OOXMLParserState.cxx | 8 | ||||
-rw-r--r-- | writerfilter/source/ooxml/OOXMLParserState.hxx | 4 |
10 files changed, 153 insertions, 17 deletions
diff --git a/sw/qa/extras/ooxmlexport/data/tdf88126.docx b/sw/qa/extras/ooxmlexport/data/tdf88126.docx Binary files differdeleted file mode 100644 index 9d4d2d5d0f8c..000000000000 --- a/sw/qa/extras/ooxmlexport/data/tdf88126.docx +++ /dev/null diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx index 574b925a07aa..31426770648a 100644 --- a/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx +++ b/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx @@ -130,12 +130,6 @@ DECLARE_OOXMLEXPORT_TEST(testTdf137850_compat15ZOrder, "tdf137850_compat15ZOrder CPPUNIT_ASSERT_EQUAL_MESSAGE("Textbox is in the foreground", true, getProperty<bool>(xShape, "Opaque")); } -DECLARE_OOXMLEXPORT_TEST(testTdf88126, "tdf88126.docx") -{ - // Without the fix in place, this test would have hung - CPPUNIT_ASSERT_EQUAL(11, getPages()); -} - DECLARE_OOXMLEXPORT_EXPORTONLY_TEST(testTdf118701, "tdf118701.docx") { // This was 6, related to moving inline images after the page breaks diff --git a/writerfilter/source/dmapper/DomainMapper.cxx b/writerfilter/source/dmapper/DomainMapper.cxx index 9ea70164c96e..4f58670aefb5 100644 --- a/writerfilter/source/dmapper/DomainMapper.cxx +++ b/writerfilter/source/dmapper/DomainMapper.cxx @@ -2276,7 +2276,7 @@ void DomainMapper::sprmWithProps( Sprm& rSprm, const PropertyMapPtr& rContext ) case NS_ooxml::LN_EG_RPrBase_rStyle: { OUString sConvertedName( m_pImpl->GetStyleSheetTable()->ConvertStyleName( sStringValue, true ) ); - if (m_pImpl->CheckFootnoteStyle()) + if (m_pImpl->CheckFootnoteStyle() && m_pImpl->GetFootnoteContext()) m_pImpl->SetHasFootnoteStyle(m_pImpl->GetFootnoteContext()->GetFootnoteStyle() == sConvertedName); // First check if the style exists in the document. @@ -3370,6 +3370,17 @@ void DomainMapper::lcl_utext(const sal_uInt8 * data_, size_t len) if (len == 1) { + // preload all footnotes in separated footnotes + if (sText[0] == 0x5) + { + if (m_pImpl->GetFootnoteCount() > -1) + { + m_pImpl->PopFootOrEndnote(/*bIsFootnote=*/true); + m_pImpl->PushFootOrEndnote(/*bIsFootnote=*/true); + } + m_pImpl->IncrementFootnoteCount(); + } + // If the footnote contains a Footnote Reference Mark, it can't be a custom footnote // ****** // This code block is wrong, as it should also be in m_pImpl->IsInFootOrEndnote(). diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.cxx b/writerfilter/source/dmapper/DomainMapper_Impl.cxx index c3cbbcb2a2b9..ebfe9d625bd2 100644 --- a/writerfilter/source/dmapper/DomainMapper_Impl.cxx +++ b/writerfilter/source/dmapper/DomainMapper_Impl.cxx @@ -45,6 +45,8 @@ #include <com/sun/star/text/XDocumentIndex.hpp> #include <com/sun/star/text/XDocumentIndexesSupplier.hpp> #include <com/sun/star/text/XFootnote.hpp> +#include <com/sun/star/text/XEndnotesSupplier.hpp> +#include <com/sun/star/text/XFootnotesSupplier.hpp> #include <com/sun/star/text/XLineNumberingProperties.hpp> #include <com/sun/star/style/XStyle.hpp> #include <com/sun/star/text/PageNumberType.hpp> @@ -305,6 +307,7 @@ DomainMapper_Impl::DomainMapper_Impl( m_bHasFootnoteStyle(false), m_bCheckFootnoteStyle(false), m_eSkipFootnoteState(SkipFootnoteSeparator::OFF), + m_nFootnotes(-1), m_bLineNumberingSet( false ), m_bIsInFootnoteProperties( false ), m_bIsParaMarkerChange( false ), @@ -2700,6 +2703,7 @@ void DomainMapper_Impl::CreateRedline(uno::Reference<text::XTextRange> const& xR pRedlineProperties[1].Value <<= ConversionHelper::ConvertDateStringToDateTime( pRedline->m_sDate ); pRedlineProperties[2].Name = getPropertyName( PROP_REDLINE_REVERT_PROPERTIES ); pRedlineProperties[2].Value <<= pRedline->m_aRevertProperties; + // TODO: add !IsInFootOrEndnote(), if loading of endnotes is optimized if (!m_bIsActualParagraphFramed) { uno::Reference < text::XRedline > xRedline( xRange, uno::UNO_QUERY_THROW ); @@ -2712,6 +2716,12 @@ void DomainMapper_Impl::CreateRedline(uno::Reference<text::XTextRange> const& xR aFramedRedlines.push_back( uno::makeAny(sType) ); aFramedRedlines.push_back( uno::makeAny(aRedlineProperties) ); } + else if (IsInFootOrEndnote()) + { + aFootnoteRedlines.push_back( uno::makeAny(xRange) ); + aFootnoteRedlines.push_back( uno::makeAny(sType) ); + aFootnoteRedlines.push_back( uno::makeAny(aRedlineProperties) ); + } } catch( const uno::Exception & ) { @@ -2823,8 +2833,92 @@ void DomainMapper_Impl::PushAnnotation() } -void DomainMapper_Impl::PopFootOrEndnote() +void DomainMapper_Impl::PopFootOrEndnote( bool bIsFootnote ) { + // content of the footnotes were inserted after the first footnote in temporary footnotes, + // restore the content of the actual footnote by copying its content from the first + // (remaining) temporary footnote and remove the temporary footnote. + // FIXME: add footnote IDs to handle possible differences in footnote serialization + uno::Reference< text::XFootnotesSupplier> xFootnotesSupplier( GetTextDocument(), uno::UNO_QUERY ); + if ( bIsFootnote && GetFootnoteCount() > -1 && xFootnotesSupplier.is() ) + { + auto xFootnotes = xFootnotesSupplier->getFootnotes(); + uno::Reference< text::XFootnote > xFootnoteFirst, xFootnoteLast; + xFootnotes->getByIndex(xFootnotes->getCount()-1) >>= xFootnoteLast; + if ( xFootnotes->getCount() > 1 && xFootnoteLast->getLabel().isEmpty() ) + { + // copy content of the first remaining temporary footnote + xFootnotes->getByIndex(1) >>= xFootnoteFirst; + uno::Reference< text::XText > xSrc( xFootnoteFirst, uno::UNO_QUERY_THROW ); + uno::Reference< text::XText > xDest( xFootnoteLast, uno::UNO_QUERY_THROW ); + uno::Reference< text::XTextCopy > xTxt, xTxt2; + xTxt.set( xSrc, uno::UNO_QUERY_THROW ); + xTxt2.set( xDest, uno::UNO_QUERY_THROW ); + xTxt2->copyText( xTxt ); + + // copy its redlines + std::vector<sal_Int32> redPos, redLen; + sal_Int32 nFirstRedline = -1; + for( size_t i = 0; i < aFootnoteRedlines.size(); i+=3) + { + uno::Reference< text::XTextRange > xRange; + aFootnoteRedlines[i] >>= xRange; + + // is this a redline of the temporary footnote? + uno::Reference<text::XTextCursor> xRangeCursor; + try + { + xRangeCursor = xSrc->createTextCursorByRange( xRange ); + } + catch( const uno::Exception& ) + { + } + if (xRangeCursor.is()) + { + nFirstRedline = i; + sal_Int32 nLen = xRange->getString().getLength(); + redLen.push_back(nLen); + xRangeCursor->gotoRange(xSrc->getStart(), true); + redPos.push_back(xRangeCursor->getString().getLength() - nLen); + } + else + { + // we have already found all redlines of the footnote, + // skip checking the redlines of the other footnotes + if (nFirstRedline > -1) + break; + // failed createTextCursorByRange(), for example, table inside the frame + redLen.push_back(-1); + redPos.push_back(-1); + } + } + + // create redlines in the copied footnote + for( size_t i = 0; nFirstRedline > -1 && i <= sal::static_int_cast<size_t>(nFirstRedline); i+=3) + { + OUString sType; + beans::PropertyValues aRedlineProperties( 3 ); + // skip failed createTextCursorByRange() + if (redPos[i/3] == -1) + continue; + aFootnoteRedlines[i+1] >>= sType; + aFootnoteRedlines[i+2] >>= aRedlineProperties; + uno::Reference< text::XTextCursor > xCrsr = xDest->getText()->createTextCursor(); + xCrsr->goRight(redPos[i/3], false); + xCrsr->goRight(redLen[i/3], true); + uno::Reference < text::XRedline > xRedline( xCrsr, uno::UNO_QUERY_THROW ); + xRedline->makeRedline( sType, aRedlineProperties ); + } + + // remove processed redlines + for( size_t i = 0; nFirstRedline > -1 && i <= sal::static_int_cast<size_t>(nFirstRedline) + 2; i++) + aFootnoteRedlines.pop_front(); + + // remove temporary footnote + xFootnoteFirst->getAnchor()->setString(""); + } + } + if (!IsRTFImport()) RemoveLastParagraph(); @@ -7135,6 +7229,8 @@ void DomainMapper_Impl::RemoveTopRedline( ) { if (m_aRedlines.top().empty()) { + if (GetFootnoteCount() > -1) + return; SAL_WARN("writerfilter.dmapper", "RemoveTopRedline called with empty stack"); throw uno::Exception("RemoveTopRedline failed", nullptr); } @@ -7514,7 +7610,7 @@ void DomainMapper_Impl::substream(Id rName, break; case NS_ooxml::LN_footnote: case NS_ooxml::LN_endnote: - PopFootOrEndnote(); + PopFootOrEndnote( NS_ooxml::LN_footnote == rName ); break; case NS_ooxml::LN_annotation : PopAnnotation(); diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.hxx b/writerfilter/source/dmapper/DomainMapper_Impl.hxx index ab29d0a3a185..8441b4012152 100644 --- a/writerfilter/source/dmapper/DomainMapper_Impl.hxx +++ b/writerfilter/source/dmapper/DomainMapper_Impl.hxx @@ -525,6 +525,8 @@ private: bool m_bCheckFootnoteStyle; /// Skip paragraphs from the <w:separator/> footnote SkipFootnoteSeparator m_eSkipFootnoteState; + /// preload footnotes + sal_Int32 m_nFootnotes; bool m_bLineNumberingSet; bool m_bIsInFootnoteProperties; @@ -790,7 +792,7 @@ public: bool IsInTOC() const { return m_bStartTOC; } void PushFootOrEndnote( bool bIsFootnote ); - void PopFootOrEndnote(); + void PopFootOrEndnote( bool bIsFootnote ); bool IsInFootOrEndnote() const { return m_bInFootOrEndnote; } void StartCustomFootnote(const PropertyMapPtr pContext); @@ -804,6 +806,8 @@ public: SkipFootnoteSeparator GetSkipFootnoteState() const { return m_eSkipFootnoteState; } void SetSkipFootnoteState(SkipFootnoteSeparator eId) { m_eSkipFootnoteState = eId; } + sal_Int32 GetFootnoteCount() const { return m_nFootnotes; } + void IncrementFootnoteCount() { ++m_nFootnotes; } void PushAnnotation(); void PopAnnotation(); @@ -1091,6 +1095,7 @@ public: /// store their data, and create them after frame creation bool m_bIsActualParagraphFramed; std::vector<css::uno::Any> aFramedRedlines; + std::deque<css::uno::Any> aFootnoteRedlines; bool IsParaWithInlineObject() const { return m_bParaWithInlineObject; } diff --git a/writerfilter/source/ooxml/OOXMLDocumentImpl.cxx b/writerfilter/source/ooxml/OOXMLDocumentImpl.cxx index 813b9cbc75fb..938d977d3348 100644 --- a/writerfilter/source/ooxml/OOXMLDocumentImpl.cxx +++ b/writerfilter/source/ooxml/OOXMLDocumentImpl.cxx @@ -52,6 +52,7 @@ namespace writerfilter::ooxml OOXMLDocumentImpl::OOXMLDocumentImpl(OOXMLStream::Pointer_t const & pStream, const uno::Reference<task::XStatusIndicator>& xStatusIndicator, bool bSkipImages, const uno::Sequence<beans::PropertyValue>& rDescriptor) : mpStream(pStream) , mxStatusIndicator(xStatusIndicator) + , mpXNoteStream() , mnXNoteId(0) , mbIsSubstream(false) , mbSkipImages(bSkipImages) @@ -270,8 +271,8 @@ void OOXMLDocumentImpl::resolveFootnote(Stream & rStream, Id aType, const sal_Int32 nNoteId) { - writerfilter::Reference<Stream>::Pointer_t pStream = - getXNoteStream(OOXMLStream::FOOTNOTES, nNoteId); + if (!mpXNoteStream) + mpXNoteStream = getXNoteStream(OOXMLStream::FOOTNOTES, nNoteId); Id nId; switch (aType) @@ -285,7 +286,7 @@ void OOXMLDocumentImpl::resolveFootnote(Stream & rStream, break; } - resolveFastSubStreamWithId(rStream, pStream, nId); + resolveFastSubStreamWithId(rStream, mpXNoteStream, nId); } void OOXMLDocumentImpl::resolveEndnote(Stream & rStream, diff --git a/writerfilter/source/ooxml/OOXMLDocumentImpl.hxx b/writerfilter/source/ooxml/OOXMLDocumentImpl.hxx index 1848f8971766..35a77989605b 100644 --- a/writerfilter/source/ooxml/OOXMLDocumentImpl.hxx +++ b/writerfilter/source/ooxml/OOXMLDocumentImpl.hxx @@ -36,6 +36,7 @@ class OOXMLDocumentImpl : public OOXMLDocument { OOXMLStream::Pointer_t mpStream; css::uno::Reference<css::task::XStatusIndicator> mxStatusIndicator; + writerfilter::Reference<Stream>::Pointer_t mpXNoteStream; sal_Int32 mnXNoteId; css::uno::Reference<css::frame::XModel> mxModel; diff --git a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx index a115badfe72a..fe02597775bd 100644 --- a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx +++ b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx @@ -41,6 +41,7 @@ const sal_Unicode uCR = 0xd; const sal_Unicode uFtnEdnRef = 0x2; const sal_Unicode uFtnEdnSep = 0x3; +const sal_Unicode uFtnSep = 0x5; const sal_Unicode uTab = 0x9; const sal_Unicode uPgNum = 0x0; const sal_Unicode uNoBreakHyphen = 0x2011; @@ -165,12 +166,22 @@ void SAL_CALL OOXMLFastContextHandler::startFastElement mbPreserveSpace = Attribs->getValue(oox::NMSP_xml | oox::XML_space) == "preserve"; mbPreserveSpaceSet = true; } - if (Element == (NMSP_officeMath | XML_oMathPara)) + if (Element == W_TOKEN(footnote)) + { + // send uFtnSep to sign new footnote content, but skip footnote separators + if (!Attribs->hasAttribute(W_TOKEN(type)) || + ( Attribs->getValue(W_TOKEN(type)) != "separator" && + Attribs->getValue(W_TOKEN(type)) != "continuationSeparator" )) + { + mpParserState->setStartFootnote(true); + } + } + else if (Element == (NMSP_officeMath | XML_oMathPara)) { mnMathJcVal = eMathParaJc::CENTER; mbIsMathPara = true; } - if (Element == (NMSP_officeMath | XML_jc) && mpParent && mpParent->mpParent ) + else if (Element == (NMSP_officeMath | XML_jc) && mpParent && mpParent->mpParent ) { mbIsMathPara = true; auto aAttrLst = Attribs->getFastAttributes(); @@ -370,6 +381,11 @@ void OOXMLFastContextHandler::startCharacterGroup() mpStream->startCharacterGroup(); mpParserState->setInCharacterGroup(true); mpParserState->resolveCharacterProperties(*mpStream); + if (mpParserState->isStartFootnote()) + { + mpStream->utext(reinterpret_cast<const sal_uInt8*>(&uFtnSep), 1); + mpParserState->setStartFootnote(false); + } } // tdf#108714 : if we have a postponed break information, @@ -1340,7 +1356,9 @@ void OOXMLFastContextHandlerXNote::lcl_startFastElement mbForwardEventsSaved = isForwardEvents(); // If this is the note we're looking for or this is the footnote separator one. - if (mnMyXNoteId == getXNoteId() || static_cast<sal_uInt32>(mnMyXNoteType) == NS_ooxml::LN_Value_doc_ST_FtnEdn_separator) + if (mnMyXNoteId == getXNoteId() || + static_cast<sal_uInt32>(mnMyXNoteType) == NS_ooxml::LN_Value_doc_ST_FtnEdn_separator || + mpParserState->isStartFootnote()) setForwardEvents(true); else setForwardEvents(false); diff --git a/writerfilter/source/ooxml/OOXMLParserState.cxx b/writerfilter/source/ooxml/OOXMLParserState.cxx index c103566d5331..3d35cbb9deac 100644 --- a/writerfilter/source/ooxml/OOXMLParserState.cxx +++ b/writerfilter/source/ooxml/OOXMLParserState.cxx @@ -40,7 +40,8 @@ OOXMLParserState::OOXMLParserState() : inTxbxContent(false), savedInParagraphGroup(false), savedInCharacterGroup(false), - savedLastParagraphInSection(false) + savedLastParagraphInSection(false), + mbStartFootnote(false) { } @@ -275,6 +276,11 @@ void OOXMLParserState::endTxbxContent() inTxbxContent = false; } +void OOXMLParserState::setStartFootnote(bool bStartFootnote) +{ + mbStartFootnote = bStartFootnote; +} + } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/writerfilter/source/ooxml/OOXMLParserState.hxx b/writerfilter/source/ooxml/OOXMLParserState.hxx index 422ba0b776ef..7d094dbd42b7 100644 --- a/writerfilter/source/ooxml/OOXMLParserState.hxx +++ b/writerfilter/source/ooxml/OOXMLParserState.hxx @@ -58,6 +58,7 @@ class OOXMLParserState final : public virtual SvRefBase bool savedLastParagraphInSection; std::vector<SavedAlternateState> maSavedAlternateStates; std::vector<OOXMLPropertySet::Pointer_t> mvPostponedBreaks; + bool mbStartFootnote; public: typedef tools::SvRef<OOXMLParserState> Pointer_t; @@ -112,6 +113,9 @@ public: void startTxbxContent(); void endTxbxContent(); + + void setStartFootnote(bool bStartFootnote); + bool isStartFootnote() const { return mbStartFootnote; } }; } |