summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNoel Grandin <noel.grandin@collabora.co.uk>2018-07-19 15:53:00 +0200
committerNoel Grandin <noel.grandin@collabora.co.uk>2018-07-20 08:58:45 +0200
commitc6acb048e6f40ead4110750a79eeb3d6d6d5865d (patch)
treea24f88bb0604edecd946c947b3df3a87d9a05646
parent7ef5d73a82c5e3b57cfbd1bb0c6fd77528345663 (diff)
tdf#79878 perf loading docx file, pendingChars
Use std::vector<char> for pendingCharacters in SAXParser to avoid calling the OUString utf8 conversion routine more than one per character block. We seem to hit multiple characters() callbacks per chunk of text fairly often in loading writer docs. This is only good for about 0.5% performance Change-Id: I354bb4efe9d883c4bebf49bc96dd44be4f2b1610 Reviewed-on: https://gerrit.libreoffice.org/57731 Tested-by: Jenkins Reviewed-by: Noel Grandin <noel.grandin@collabora.co.uk>
-rw-r--r--sax/source/fastparser/fastparser.cxx63
1 files changed, 36 insertions, 27 deletions
diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index 4ae2f4fdaf4a..3486dcd53759 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -273,7 +273,7 @@ private:
Entity *mpTop; /// std::stack::top() is amazingly slow => cache this.
std::stack< Entity > maEntities; /// Entity stack for each call of parseStream().
- OUString pendingCharacters; /// Data from characters() callback that needs to be sent.
+ std::vector<char> pendingCharacters; /// Data from characters() callback that needs to be sent.
};
} // namespace sax_fastparser
@@ -444,8 +444,7 @@ void Entity::startElement( Event const *pEvent )
xContext->startFastElement( nElementToken, xAttr );
}
// swap the reference we own in to avoid referencing thrash.
- maContextStack.top().mxContext.set( xContext.get() );
- xContext.set( nullptr, SAL_NO_ACQUIRE );
+ maContextStack.top().mxContext = std::move( xContext );
}
catch (...)
{
@@ -461,10 +460,10 @@ void Entity::characters( const OUString& sChars )
return;
}
- const Reference< XFastContextHandler >& xContext( maContextStack.top().mxContext );
- if( xContext.is() ) try
+ XFastContextHandler * pContext( maContextStack.top().mxContext.get() );
+ if( pContext ) try
{
- xContext->characters( sChars );
+ pContext->characters( sChars );
}
catch (...)
{
@@ -481,19 +480,20 @@ void Entity::endElement()
}
const SaxContext& aContext = maContextStack.top();
- const Reference< XFastContextHandler >& xContext( aContext.mxContext );
- if( xContext.is() ) try
- {
- sal_Int32 nElementToken = aContext.mnElementToken;
- if( nElementToken != FastToken::DONTKNOW )
- xContext->endFastElement( nElementToken );
- else
- xContext->endUnknownElement( aContext.maNamespace, aContext.maElementName );
- }
- catch (...)
- {
- saveException( ::cppu::getCaughtException() );
- }
+ XFastContextHandler* pContext( aContext.mxContext.get() );
+ if( pContext )
+ try
+ {
+ sal_Int32 nElementToken = aContext.mnElementToken;
+ if( nElementToken != FastToken::DONTKNOW )
+ pContext->endFastElement( nElementToken );
+ else
+ pContext->endUnknownElement( aContext.maNamespace, aContext.maElementName );
+ }
+ catch (...)
+ {
+ saveException( ::cppu::getCaughtException() );
+ }
maContextStack.pop();
}
@@ -1083,7 +1083,7 @@ void FastSaxParserImpl::parse()
void FastSaxParserImpl::callbackStartElement(const xmlChar *localName , const xmlChar* prefix, const xmlChar* URI,
int numNamespaces, const xmlChar** namespaces, int numAttributes, const xmlChar **attributes)
{
- if (!pendingCharacters.isEmpty())
+ if (!pendingCharacters.empty())
sendPendingCharacters();
Entity& rEntity = getEntity();
if( rEntity.maNamespaceCount.empty() )
@@ -1254,7 +1254,7 @@ void FastSaxParserImpl::addUnknownElementWithPrefix(const xmlChar **attributes,
void FastSaxParserImpl::callbackEndElement()
{
- if (!pendingCharacters.isEmpty())
+ if (!pendingCharacters.empty())
sendPendingCharacters();
Entity& rEntity = getEntity();
SAL_WARN_IF(rEntity.maNamespaceCount.empty(), "sax", "Empty NamespaceCount");
@@ -1279,24 +1279,33 @@ void FastSaxParserImpl::callbackCharacters( const xmlChar* s, int nLen )
// simpler FastSaxParser's character callback provides the whole string at once,
// so merge data from possible multiple calls and send them at once (before the element
// ends or another one starts).
- pendingCharacters += OUString( XML_CAST( s ), nLen, RTL_TEXTENCODING_UTF8 );
+ //
+ // We use a std::vector<char> to avoid calling into the OUString constructor more than once when
+ // we have multiple callbackCharacters() calls that we have to merge, which happens surprisingly
+ // often in writer documents.
+ int nOriginalLen = pendingCharacters.size();
+ pendingCharacters.resize(nOriginalLen + nLen);
+ memcpy(pendingCharacters.data() + nOriginalLen, s, nLen);
}
void FastSaxParserImpl::sendPendingCharacters()
{
Entity& rEntity = getEntity();
- Event& rEvent = rEntity.getEvent( CHARACTERS );
- rEvent.msChars = pendingCharacters;
- pendingCharacters.clear();
+ OUString sChars( pendingCharacters.data(), pendingCharacters.size(), RTL_TEXTENCODING_UTF8 );
if (rEntity.mbEnableThreads)
+ {
+ Event& rEvent = rEntity.getEvent( CHARACTERS );
+ rEvent.msChars = sChars;
produce();
+ }
else
- rEntity.characters( rEvent.msChars );
+ rEntity.characters( sChars );
+ pendingCharacters.resize(0);
}
void FastSaxParserImpl::callbackProcessingInstruction( const xmlChar *target, const xmlChar *data )
{
- if (!pendingCharacters.isEmpty())
+ if (!pendingCharacters.empty())
sendPendingCharacters();
Entity& rEntity = getEntity();
Event& rEvent = rEntity.getEvent( PROCESSING_INSTRUCTION );