summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLászló Németh <nemeth@numbertext.org>2021-02-10 00:12:52 +0100
committerLászló Németh <nemeth@numbertext.org>2021-02-12 18:27:31 +0100
commit9b39ce0e66acfe812e1d50e530dc2ccdef3e1357 (patch)
treef1bb0d9c24dbe6a40e82837f0ff5632a90799530
parent3380163bc0fb9dab7f289cc36b0eeb0c9b3ddaa9 (diff)
tdf#76260 DOCX import: fix slow footnote import
by parsing footnotes.xml only once instead of parsing again and again for every footnotes. This was a serious performance problem for documents with hundreds of footnotes, where the footnote import took minutes instead of seconds. Note: switch off CHECK_NOTMERGED in a debug build to measure realistic speed-up, e.g. with the reported document: 2 min 36 sec -> 22 sec, and not 1 min 40 sec, as with CHECK_NOTMERGED. Revert commit 60dbe21f59a45889c433727d0862c9a4274d94d2 ("tdf#88126: sw_ooxmlexport15: Add unittest"), because it was a fragile table layout test, worked only by accident. Change-Id: I9460442cf0c30f2bc5ff393c947a008ca7bba6df Reviewed-on: https://gerrit.libreoffice.org/c/core/+/110811 Tested-by: Jenkins Reviewed-by: László Németh <nemeth@numbertext.org>
-rw-r--r--sw/qa/extras/ooxmlexport/data/tdf88126.docxbin40642 -> 0 bytes
-rw-r--r--sw/qa/extras/ooxmlexport/ooxmlexport15.cxx6
-rw-r--r--writerfilter/source/dmapper/DomainMapper.cxx13
-rw-r--r--writerfilter/source/dmapper/DomainMapper_Impl.cxx100
-rw-r--r--writerfilter/source/dmapper/DomainMapper_Impl.hxx7
-rw-r--r--writerfilter/source/ooxml/OOXMLDocumentImpl.cxx7
-rw-r--r--writerfilter/source/ooxml/OOXMLDocumentImpl.hxx1
-rw-r--r--writerfilter/source/ooxml/OOXMLFastContextHandler.cxx24
-rw-r--r--writerfilter/source/ooxml/OOXMLParserState.cxx8
-rw-r--r--writerfilter/source/ooxml/OOXMLParserState.hxx4
10 files changed, 153 insertions, 17 deletions
diff --git a/sw/qa/extras/ooxmlexport/data/tdf88126.docx b/sw/qa/extras/ooxmlexport/data/tdf88126.docx
deleted file mode 100644
index 9d4d2d5d0f8c..000000000000
--- a/sw/qa/extras/ooxmlexport/data/tdf88126.docx
+++ /dev/null
Binary files differ
diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx
index 574b925a07aa..31426770648a 100644
--- a/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx
+++ b/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx
@@ -130,12 +130,6 @@ DECLARE_OOXMLEXPORT_TEST(testTdf137850_compat15ZOrder, "tdf137850_compat15ZOrder
CPPUNIT_ASSERT_EQUAL_MESSAGE("Textbox is in the foreground", true, getProperty<bool>(xShape, "Opaque"));
}
-DECLARE_OOXMLEXPORT_TEST(testTdf88126, "tdf88126.docx")
-{
- // Without the fix in place, this test would have hung
- CPPUNIT_ASSERT_EQUAL(11, getPages());
-}
-
DECLARE_OOXMLEXPORT_EXPORTONLY_TEST(testTdf118701, "tdf118701.docx")
{
// This was 6, related to moving inline images after the page breaks
diff --git a/writerfilter/source/dmapper/DomainMapper.cxx b/writerfilter/source/dmapper/DomainMapper.cxx
index 9ea70164c96e..4f58670aefb5 100644
--- a/writerfilter/source/dmapper/DomainMapper.cxx
+++ b/writerfilter/source/dmapper/DomainMapper.cxx
@@ -2276,7 +2276,7 @@ void DomainMapper::sprmWithProps( Sprm& rSprm, const PropertyMapPtr& rContext )
case NS_ooxml::LN_EG_RPrBase_rStyle:
{
OUString sConvertedName( m_pImpl->GetStyleSheetTable()->ConvertStyleName( sStringValue, true ) );
- if (m_pImpl->CheckFootnoteStyle())
+ if (m_pImpl->CheckFootnoteStyle() && m_pImpl->GetFootnoteContext())
m_pImpl->SetHasFootnoteStyle(m_pImpl->GetFootnoteContext()->GetFootnoteStyle() == sConvertedName);
// First check if the style exists in the document.
@@ -3370,6 +3370,17 @@ void DomainMapper::lcl_utext(const sal_uInt8 * data_, size_t len)
if (len == 1)
{
+ // preload all footnotes in separated footnotes
+ if (sText[0] == 0x5)
+ {
+ if (m_pImpl->GetFootnoteCount() > -1)
+ {
+ m_pImpl->PopFootOrEndnote(/*bIsFootnote=*/true);
+ m_pImpl->PushFootOrEndnote(/*bIsFootnote=*/true);
+ }
+ m_pImpl->IncrementFootnoteCount();
+ }
+
// If the footnote contains a Footnote Reference Mark, it can't be a custom footnote
// ******
// This code block is wrong, as it should also be in m_pImpl->IsInFootOrEndnote().
diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.cxx b/writerfilter/source/dmapper/DomainMapper_Impl.cxx
index c3cbbcb2a2b9..ebfe9d625bd2 100644
--- a/writerfilter/source/dmapper/DomainMapper_Impl.cxx
+++ b/writerfilter/source/dmapper/DomainMapper_Impl.cxx
@@ -45,6 +45,8 @@
#include <com/sun/star/text/XDocumentIndex.hpp>
#include <com/sun/star/text/XDocumentIndexesSupplier.hpp>
#include <com/sun/star/text/XFootnote.hpp>
+#include <com/sun/star/text/XEndnotesSupplier.hpp>
+#include <com/sun/star/text/XFootnotesSupplier.hpp>
#include <com/sun/star/text/XLineNumberingProperties.hpp>
#include <com/sun/star/style/XStyle.hpp>
#include <com/sun/star/text/PageNumberType.hpp>
@@ -305,6 +307,7 @@ DomainMapper_Impl::DomainMapper_Impl(
m_bHasFootnoteStyle(false),
m_bCheckFootnoteStyle(false),
m_eSkipFootnoteState(SkipFootnoteSeparator::OFF),
+ m_nFootnotes(-1),
m_bLineNumberingSet( false ),
m_bIsInFootnoteProperties( false ),
m_bIsParaMarkerChange( false ),
@@ -2700,6 +2703,7 @@ void DomainMapper_Impl::CreateRedline(uno::Reference<text::XTextRange> const& xR
pRedlineProperties[1].Value <<= ConversionHelper::ConvertDateStringToDateTime( pRedline->m_sDate );
pRedlineProperties[2].Name = getPropertyName( PROP_REDLINE_REVERT_PROPERTIES );
pRedlineProperties[2].Value <<= pRedline->m_aRevertProperties;
+ // TODO: add !IsInFootOrEndnote(), if loading of endnotes is optimized
if (!m_bIsActualParagraphFramed)
{
uno::Reference < text::XRedline > xRedline( xRange, uno::UNO_QUERY_THROW );
@@ -2712,6 +2716,12 @@ void DomainMapper_Impl::CreateRedline(uno::Reference<text::XTextRange> const& xR
aFramedRedlines.push_back( uno::makeAny(sType) );
aFramedRedlines.push_back( uno::makeAny(aRedlineProperties) );
}
+ else if (IsInFootOrEndnote())
+ {
+ aFootnoteRedlines.push_back( uno::makeAny(xRange) );
+ aFootnoteRedlines.push_back( uno::makeAny(sType) );
+ aFootnoteRedlines.push_back( uno::makeAny(aRedlineProperties) );
+ }
}
catch( const uno::Exception & )
{
@@ -2823,8 +2833,92 @@ void DomainMapper_Impl::PushAnnotation()
}
-void DomainMapper_Impl::PopFootOrEndnote()
+void DomainMapper_Impl::PopFootOrEndnote( bool bIsFootnote )
{
+ // content of the footnotes were inserted after the first footnote in temporary footnotes,
+ // restore the content of the actual footnote by copying its content from the first
+ // (remaining) temporary footnote and remove the temporary footnote.
+ // FIXME: add footnote IDs to handle possible differences in footnote serialization
+ uno::Reference< text::XFootnotesSupplier> xFootnotesSupplier( GetTextDocument(), uno::UNO_QUERY );
+ if ( bIsFootnote && GetFootnoteCount() > -1 && xFootnotesSupplier.is() )
+ {
+ auto xFootnotes = xFootnotesSupplier->getFootnotes();
+ uno::Reference< text::XFootnote > xFootnoteFirst, xFootnoteLast;
+ xFootnotes->getByIndex(xFootnotes->getCount()-1) >>= xFootnoteLast;
+ if ( xFootnotes->getCount() > 1 && xFootnoteLast->getLabel().isEmpty() )
+ {
+ // copy content of the first remaining temporary footnote
+ xFootnotes->getByIndex(1) >>= xFootnoteFirst;
+ uno::Reference< text::XText > xSrc( xFootnoteFirst, uno::UNO_QUERY_THROW );
+ uno::Reference< text::XText > xDest( xFootnoteLast, uno::UNO_QUERY_THROW );
+ uno::Reference< text::XTextCopy > xTxt, xTxt2;
+ xTxt.set( xSrc, uno::UNO_QUERY_THROW );
+ xTxt2.set( xDest, uno::UNO_QUERY_THROW );
+ xTxt2->copyText( xTxt );
+
+ // copy its redlines
+ std::vector<sal_Int32> redPos, redLen;
+ sal_Int32 nFirstRedline = -1;
+ for( size_t i = 0; i < aFootnoteRedlines.size(); i+=3)
+ {
+ uno::Reference< text::XTextRange > xRange;
+ aFootnoteRedlines[i] >>= xRange;
+
+ // is this a redline of the temporary footnote?
+ uno::Reference<text::XTextCursor> xRangeCursor;
+ try
+ {
+ xRangeCursor = xSrc->createTextCursorByRange( xRange );
+ }
+ catch( const uno::Exception& )
+ {
+ }
+ if (xRangeCursor.is())
+ {
+ nFirstRedline = i;
+ sal_Int32 nLen = xRange->getString().getLength();
+ redLen.push_back(nLen);
+ xRangeCursor->gotoRange(xSrc->getStart(), true);
+ redPos.push_back(xRangeCursor->getString().getLength() - nLen);
+ }
+ else
+ {
+ // we have already found all redlines of the footnote,
+ // skip checking the redlines of the other footnotes
+ if (nFirstRedline > -1)
+ break;
+ // failed createTextCursorByRange(), for example, table inside the frame
+ redLen.push_back(-1);
+ redPos.push_back(-1);
+ }
+ }
+
+ // create redlines in the copied footnote
+ for( size_t i = 0; nFirstRedline > -1 && i <= sal::static_int_cast<size_t>(nFirstRedline); i+=3)
+ {
+ OUString sType;
+ beans::PropertyValues aRedlineProperties( 3 );
+ // skip failed createTextCursorByRange()
+ if (redPos[i/3] == -1)
+ continue;
+ aFootnoteRedlines[i+1] >>= sType;
+ aFootnoteRedlines[i+2] >>= aRedlineProperties;
+ uno::Reference< text::XTextCursor > xCrsr = xDest->getText()->createTextCursor();
+ xCrsr->goRight(redPos[i/3], false);
+ xCrsr->goRight(redLen[i/3], true);
+ uno::Reference < text::XRedline > xRedline( xCrsr, uno::UNO_QUERY_THROW );
+ xRedline->makeRedline( sType, aRedlineProperties );
+ }
+
+ // remove processed redlines
+ for( size_t i = 0; nFirstRedline > -1 && i <= sal::static_int_cast<size_t>(nFirstRedline) + 2; i++)
+ aFootnoteRedlines.pop_front();
+
+ // remove temporary footnote
+ xFootnoteFirst->getAnchor()->setString("");
+ }
+ }
+
if (!IsRTFImport())
RemoveLastParagraph();
@@ -7135,6 +7229,8 @@ void DomainMapper_Impl::RemoveTopRedline( )
{
if (m_aRedlines.top().empty())
{
+ if (GetFootnoteCount() > -1)
+ return;
SAL_WARN("writerfilter.dmapper", "RemoveTopRedline called with empty stack");
throw uno::Exception("RemoveTopRedline failed", nullptr);
}
@@ -7514,7 +7610,7 @@ void DomainMapper_Impl::substream(Id rName,
break;
case NS_ooxml::LN_footnote:
case NS_ooxml::LN_endnote:
- PopFootOrEndnote();
+ PopFootOrEndnote( NS_ooxml::LN_footnote == rName );
break;
case NS_ooxml::LN_annotation :
PopAnnotation();
diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.hxx b/writerfilter/source/dmapper/DomainMapper_Impl.hxx
index ab29d0a3a185..8441b4012152 100644
--- a/writerfilter/source/dmapper/DomainMapper_Impl.hxx
+++ b/writerfilter/source/dmapper/DomainMapper_Impl.hxx
@@ -525,6 +525,8 @@ private:
bool m_bCheckFootnoteStyle;
/// Skip paragraphs from the <w:separator/> footnote
SkipFootnoteSeparator m_eSkipFootnoteState;
+ /// preload footnotes
+ sal_Int32 m_nFootnotes;
bool m_bLineNumberingSet;
bool m_bIsInFootnoteProperties;
@@ -790,7 +792,7 @@ public:
bool IsInTOC() const { return m_bStartTOC; }
void PushFootOrEndnote( bool bIsFootnote );
- void PopFootOrEndnote();
+ void PopFootOrEndnote( bool bIsFootnote );
bool IsInFootOrEndnote() const { return m_bInFootOrEndnote; }
void StartCustomFootnote(const PropertyMapPtr pContext);
@@ -804,6 +806,8 @@ public:
SkipFootnoteSeparator GetSkipFootnoteState() const { return m_eSkipFootnoteState; }
void SetSkipFootnoteState(SkipFootnoteSeparator eId) { m_eSkipFootnoteState = eId; }
+ sal_Int32 GetFootnoteCount() const { return m_nFootnotes; }
+ void IncrementFootnoteCount() { ++m_nFootnotes; }
void PushAnnotation();
void PopAnnotation();
@@ -1091,6 +1095,7 @@ public:
/// store their data, and create them after frame creation
bool m_bIsActualParagraphFramed;
std::vector<css::uno::Any> aFramedRedlines;
+ std::deque<css::uno::Any> aFootnoteRedlines;
bool IsParaWithInlineObject() const { return m_bParaWithInlineObject; }
diff --git a/writerfilter/source/ooxml/OOXMLDocumentImpl.cxx b/writerfilter/source/ooxml/OOXMLDocumentImpl.cxx
index 813b9cbc75fb..938d977d3348 100644
--- a/writerfilter/source/ooxml/OOXMLDocumentImpl.cxx
+++ b/writerfilter/source/ooxml/OOXMLDocumentImpl.cxx
@@ -52,6 +52,7 @@ namespace writerfilter::ooxml
OOXMLDocumentImpl::OOXMLDocumentImpl(OOXMLStream::Pointer_t const & pStream, const uno::Reference<task::XStatusIndicator>& xStatusIndicator, bool bSkipImages, const uno::Sequence<beans::PropertyValue>& rDescriptor)
: mpStream(pStream)
, mxStatusIndicator(xStatusIndicator)
+ , mpXNoteStream()
, mnXNoteId(0)
, mbIsSubstream(false)
, mbSkipImages(bSkipImages)
@@ -270,8 +271,8 @@ void OOXMLDocumentImpl::resolveFootnote(Stream & rStream,
Id aType,
const sal_Int32 nNoteId)
{
- writerfilter::Reference<Stream>::Pointer_t pStream =
- getXNoteStream(OOXMLStream::FOOTNOTES, nNoteId);
+ if (!mpXNoteStream)
+ mpXNoteStream = getXNoteStream(OOXMLStream::FOOTNOTES, nNoteId);
Id nId;
switch (aType)
@@ -285,7 +286,7 @@ void OOXMLDocumentImpl::resolveFootnote(Stream & rStream,
break;
}
- resolveFastSubStreamWithId(rStream, pStream, nId);
+ resolveFastSubStreamWithId(rStream, mpXNoteStream, nId);
}
void OOXMLDocumentImpl::resolveEndnote(Stream & rStream,
diff --git a/writerfilter/source/ooxml/OOXMLDocumentImpl.hxx b/writerfilter/source/ooxml/OOXMLDocumentImpl.hxx
index 1848f8971766..35a77989605b 100644
--- a/writerfilter/source/ooxml/OOXMLDocumentImpl.hxx
+++ b/writerfilter/source/ooxml/OOXMLDocumentImpl.hxx
@@ -36,6 +36,7 @@ class OOXMLDocumentImpl : public OOXMLDocument
{
OOXMLStream::Pointer_t mpStream;
css::uno::Reference<css::task::XStatusIndicator> mxStatusIndicator;
+ writerfilter::Reference<Stream>::Pointer_t mpXNoteStream;
sal_Int32 mnXNoteId;
css::uno::Reference<css::frame::XModel> mxModel;
diff --git a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
index a115badfe72a..fe02597775bd 100644
--- a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
+++ b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
@@ -41,6 +41,7 @@
const sal_Unicode uCR = 0xd;
const sal_Unicode uFtnEdnRef = 0x2;
const sal_Unicode uFtnEdnSep = 0x3;
+const sal_Unicode uFtnSep = 0x5;
const sal_Unicode uTab = 0x9;
const sal_Unicode uPgNum = 0x0;
const sal_Unicode uNoBreakHyphen = 0x2011;
@@ -165,12 +166,22 @@ void SAL_CALL OOXMLFastContextHandler::startFastElement
mbPreserveSpace = Attribs->getValue(oox::NMSP_xml | oox::XML_space) == "preserve";
mbPreserveSpaceSet = true;
}
- if (Element == (NMSP_officeMath | XML_oMathPara))
+ if (Element == W_TOKEN(footnote))
+ {
+ // send uFtnSep to sign new footnote content, but skip footnote separators
+ if (!Attribs->hasAttribute(W_TOKEN(type)) ||
+ ( Attribs->getValue(W_TOKEN(type)) != "separator" &&
+ Attribs->getValue(W_TOKEN(type)) != "continuationSeparator" ))
+ {
+ mpParserState->setStartFootnote(true);
+ }
+ }
+ else if (Element == (NMSP_officeMath | XML_oMathPara))
{
mnMathJcVal = eMathParaJc::CENTER;
mbIsMathPara = true;
}
- if (Element == (NMSP_officeMath | XML_jc) && mpParent && mpParent->mpParent )
+ else if (Element == (NMSP_officeMath | XML_jc) && mpParent && mpParent->mpParent )
{
mbIsMathPara = true;
auto aAttrLst = Attribs->getFastAttributes();
@@ -370,6 +381,11 @@ void OOXMLFastContextHandler::startCharacterGroup()
mpStream->startCharacterGroup();
mpParserState->setInCharacterGroup(true);
mpParserState->resolveCharacterProperties(*mpStream);
+ if (mpParserState->isStartFootnote())
+ {
+ mpStream->utext(reinterpret_cast<const sal_uInt8*>(&uFtnSep), 1);
+ mpParserState->setStartFootnote(false);
+ }
}
// tdf#108714 : if we have a postponed break information,
@@ -1340,7 +1356,9 @@ void OOXMLFastContextHandlerXNote::lcl_startFastElement
mbForwardEventsSaved = isForwardEvents();
// If this is the note we're looking for or this is the footnote separator one.
- if (mnMyXNoteId == getXNoteId() || static_cast<sal_uInt32>(mnMyXNoteType) == NS_ooxml::LN_Value_doc_ST_FtnEdn_separator)
+ if (mnMyXNoteId == getXNoteId() ||
+ static_cast<sal_uInt32>(mnMyXNoteType) == NS_ooxml::LN_Value_doc_ST_FtnEdn_separator ||
+ mpParserState->isStartFootnote())
setForwardEvents(true);
else
setForwardEvents(false);
diff --git a/writerfilter/source/ooxml/OOXMLParserState.cxx b/writerfilter/source/ooxml/OOXMLParserState.cxx
index c103566d5331..3d35cbb9deac 100644
--- a/writerfilter/source/ooxml/OOXMLParserState.cxx
+++ b/writerfilter/source/ooxml/OOXMLParserState.cxx
@@ -40,7 +40,8 @@ OOXMLParserState::OOXMLParserState() :
inTxbxContent(false),
savedInParagraphGroup(false),
savedInCharacterGroup(false),
- savedLastParagraphInSection(false)
+ savedLastParagraphInSection(false),
+ mbStartFootnote(false)
{
}
@@ -275,6 +276,11 @@ void OOXMLParserState::endTxbxContent()
inTxbxContent = false;
}
+void OOXMLParserState::setStartFootnote(bool bStartFootnote)
+{
+ mbStartFootnote = bStartFootnote;
+}
+
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/writerfilter/source/ooxml/OOXMLParserState.hxx b/writerfilter/source/ooxml/OOXMLParserState.hxx
index 422ba0b776ef..7d094dbd42b7 100644
--- a/writerfilter/source/ooxml/OOXMLParserState.hxx
+++ b/writerfilter/source/ooxml/OOXMLParserState.hxx
@@ -58,6 +58,7 @@ class OOXMLParserState final : public virtual SvRefBase
bool savedLastParagraphInSection;
std::vector<SavedAlternateState> maSavedAlternateStates;
std::vector<OOXMLPropertySet::Pointer_t> mvPostponedBreaks;
+ bool mbStartFootnote;
public:
typedef tools::SvRef<OOXMLParserState> Pointer_t;
@@ -112,6 +113,9 @@ public:
void startTxbxContent();
void endTxbxContent();
+
+ void setStartFootnote(bool bStartFootnote);
+ bool isStartFootnote() const { return mbStartFootnote; }
};
}