diff options
author | Noel Grandin <noel.grandin@collabora.co.uk> | 2018-07-05 12:33:51 +0200 |
---|---|---|
committer | Noel Grandin <noel.grandin@collabora.co.uk> | 2018-07-08 11:48:05 +0200 |
commit | a2193f8f33565cc896592acb9d3ab65c756d97fb (patch) | |
tree | f5b8fe7cfe1710b452794dede14451ad5f9abe6e | |
parent | 8164399df0df976784ddc1a76b46939c53ae51ee (diff) |
tdf#79878 perf loading docx file, sax improvements
these are the smaller improvements, they make about 5% worth of
difference
- use std::vector instead of std::deque
- use std::move on pendingCharacters instead of copying
- in FastAttributeList::add, when reallocate the buffer, allocate twice
the existing size, instead of increasing to only what we need
- in FastAttributeList, create getAttributeIndex and friends, so
we can avoid iterating the attribute list more often than necessary
Change-Id: I3e3380ea50b77c6845b66e83404e245778ec06eb
Reviewed-on: https://gerrit.libreoffice.org/57021
Tested-by: Jenkins
Reviewed-by: Noel Grandin <noel.grandin@collabora.co.uk>
-rw-r--r-- | include/sax/fastattribs.hxx | 11 | ||||
-rw-r--r-- | sax/source/fastparser/fastparser.cxx | 13 | ||||
-rw-r--r-- | sax/source/tools/fastattribs.cxx | 18 | ||||
-rw-r--r-- | writerfilter/source/ooxml/OOXMLFactory.cxx | 32 |
4 files changed, 48 insertions, 26 deletions
diff --git a/include/sax/fastattribs.hxx b/include/sax/fastattribs.hxx index b5fbe225ee28..029f3a12f780 100644 --- a/include/sax/fastattribs.hxx +++ b/include/sax/fastattribs.hxx @@ -93,6 +93,9 @@ public: bool getAsInteger( sal_Int32 nToken, sal_Int32 &rInt) const; bool getAsDouble( sal_Int32 nToken, double &rDouble) const; bool getAsChar( sal_Int32 nToken, const char*& rPos ) const; + sal_Int32 getAsIntegerByIndex( sal_Int32 nTokenIndex ) const; + const char* getAsCharByIndex( sal_Int32 nTokenIndex ) const; + OUString getValueByIndex( sal_Int32 nTokenIndex ) const; // XFastAttributeList virtual sal_Bool SAL_CALL hasAttribute( ::sal_Int32 Token ) override; @@ -103,6 +106,14 @@ public: virtual css::uno::Sequence< css::xml::Attribute > SAL_CALL getUnknownAttributes( ) override; virtual css::uno::Sequence< css::xml::FastAttribute > SAL_CALL getFastAttributes() override; + sal_Int32 getAttributeIndex( ::sal_Int32 Token ) + { + for (size_t i=0; i<maAttributeTokens.size(); ++i) + if (maAttributeTokens[i] == Token) + return i; + return -1; + } + static FastAttributeList* castToFastAttributeList( const css::uno::Reference< css::xml::sax::XFastAttributeList >& xAttrList ) { diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx index 2765584a08fc..9477559c12e8 100644 --- a/sax/source/fastparser/fastparser.cxx +++ b/sax/source/fastparser/fastparser.cxx @@ -178,13 +178,13 @@ struct Entity : public ParserData void throwException( const ::rtl::Reference< FastLocatorImpl > &xDocumentLocator, bool mbDuringParse ); - std::stack< NameWithToken > maNamespaceStack; + std::stack< NameWithToken, std::vector<NameWithToken> > maNamespaceStack; /* Context for main thread consuming events. * startElement() stores the data, which characters() and endElement() uses */ - std::stack< SaxContext> maContextStack; + std::stack< SaxContext, std::vector<SaxContext> > maContextStack; // Determines which elements of maNamespaceDefines are valid in current context - std::stack< sal_uInt32 > maNamespaceCount; + std::stack< sal_uInt32, std::vector<sal_uInt32> > maNamespaceCount; std::vector< std::shared_ptr< NamespaceDefine > > maNamespaceDefines; @@ -270,7 +270,7 @@ private: ParserData maData; /// Cached parser configuration for next call of parseStream(). Entity *mpTop; /// std::stack::top() is amazingly slow => cache this. - std::stack< Entity > maEntities; /// Entity stack for each call of parseStream(). + std::stack< Entity > maEntities; /// Entity stack for each call of parseStream(). OUString pendingCharacters; /// Data from characters() callback that needs to be sent. }; @@ -674,11 +674,12 @@ sal_Int32 FastSaxParserImpl::GetTokenWithPrefix( const xmlChar* pPrefix, int nPr sal_uInt32 nNamespace = rEntity.maNamespaceCount.top(); while( nNamespace-- ) { - const OString& rPrefix( rEntity.maNamespaceDefines[nNamespace]->maPrefix ); + const auto & rNamespaceDefine = rEntity.maNamespaceDefines[nNamespace]; + const OString& rPrefix( rNamespaceDefine->maPrefix ); if( (rPrefix.getLength() == nPrefixLen) && (strncmp( rPrefix.getStr(), XML_CAST( pPrefix ), nPrefixLen ) == 0 ) ) { - nNamespaceToken = rEntity.maNamespaceDefines[nNamespace]->mnToken; + nNamespaceToken = rNamespaceDefine->mnToken; break; } diff --git a/sax/source/tools/fastattribs.cxx b/sax/source/tools/fastattribs.cxx index a9f0baf7a2c0..30a764368947 100644 --- a/sax/source/tools/fastattribs.cxx +++ b/sax/source/tools/fastattribs.cxx @@ -86,7 +86,7 @@ void FastAttributeList::add( sal_Int32 nToken, const sal_Char* pValue, size_t nV maAttributeValues.push_back( maAttributeValues.back() + nValueLength + 1 ); if (maAttributeValues.back() > mnChunkLength) { - mnChunkLength = maAttributeValues.back(); + mnChunkLength = std::max(mnChunkLength * 2, maAttributeValues.back()); mpChunk = static_cast<sal_Char *>(realloc( mpChunk, mnChunkLength )); } strncpy(mpChunk + nWritePosition, pValue, nValueLength); @@ -166,6 +166,11 @@ bool FastAttributeList::getAsInteger( sal_Int32 nToken, sal_Int32 &rInt) const return false; } +sal_Int32 FastAttributeList::getAsIntegerByIndex( sal_Int32 nTokenIndex ) const +{ + return rtl_str_toInt32( getFastAttributeValue(nTokenIndex), 10 ); +} + bool FastAttributeList::getAsDouble( sal_Int32 nToken, double &rDouble) const { rDouble = 0.0; @@ -193,6 +198,12 @@ bool FastAttributeList::getAsChar( sal_Int32 nToken, const char*& rPos ) const return false; } +const char* FastAttributeList::getAsCharByIndex( sal_Int32 nTokenIndex ) const +{ + sal_Int32 nOffset = maAttributeValues[nTokenIndex]; + return mpChunk + nOffset; +} + OUString FastAttributeList::getValue( ::sal_Int32 Token ) { for (size_t i = 0; i < maAttributeTokens.size(); ++i) @@ -202,6 +213,11 @@ OUString FastAttributeList::getValue( ::sal_Int32 Token ) throw SAXException(); } +OUString FastAttributeList::getValueByIndex( ::sal_Int32 nTokenIndex ) const +{ + return OUString( getFastAttributeValue(nTokenIndex), AttributeValueLength(nTokenIndex), RTL_TEXTENCODING_UTF8 ); +} + OUString FastAttributeList::getOptionalValue( ::sal_Int32 Token ) { for (size_t i = 0; i < maAttributeTokens.size(); ++i) diff --git a/writerfilter/source/ooxml/OOXMLFactory.cxx b/writerfilter/source/ooxml/OOXMLFactory.cxx index 91eb7b0e4c96..4c41684cd594 100644 --- a/writerfilter/source/ooxml/OOXMLFactory.cxx +++ b/writerfilter/source/ooxml/OOXMLFactory.cxx @@ -36,7 +36,7 @@ OOXMLFactory_ns::~OOXMLFactory_ns() // class OOXMLFactory void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler, - const uno::Reference< xml::sax::XFastAttributeList > & Attribs) + const uno::Reference< xml::sax::XFastAttributeList > & xAttribs) { Id nDefine = pHandler->getDefine(); OOXMLFactory_ns::Pointer_t pFactory = getFactoryForNamespace(nDefine); @@ -45,7 +45,7 @@ void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler, return; sax_fastparser::FastAttributeList *pAttribs = - sax_fastparser::FastAttributeList::castToFastAttributeList( Attribs ); + sax_fastparser::FastAttributeList::castToFastAttributeList( xAttribs ); const AttributeInfo *pAttr = pFactory->getAttributeInfoArray(nDefine); if (!pAttr) @@ -54,7 +54,8 @@ void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler, for (; pAttr->m_nToken != -1; ++pAttr) { sal_Int32 nToken = pAttr->m_nToken; - if (!pAttribs->hasAttribute(nToken)) + sal_Int32 nAttrIndex = pAttribs->getAttributeIndex(nToken); + if (nAttrIndex == -1) continue; Id nId = pFactory->getResourceId(nDefine, nToken); @@ -63,8 +64,7 @@ void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler, { case ResourceType::Boolean: { - const char *pValue = ""; - pAttribs->getAsChar(nToken, pValue); + const char *pValue = pAttribs->getAsCharByIndex(nAttrIndex); OOXMLValue::Pointer_t xValue(OOXMLBooleanValue::Create(pValue)); pHandler->newProperty(nId, xValue); pFactory->attributeAction(pHandler, nToken, xValue); @@ -72,7 +72,7 @@ void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler, break; case ResourceType::String: { - OUString aValue(pAttribs->getValue(nToken)); + OUString aValue(pAttribs->getValueByIndex(nAttrIndex)); OOXMLValue::Pointer_t xValue(new OOXMLStringValue(aValue)); pHandler->newProperty(nId, xValue); pFactory->attributeAction(pHandler, nToken, xValue); @@ -80,8 +80,7 @@ void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler, break; case ResourceType::Integer: { - sal_Int32 nValue; - pAttribs->getAsInteger(nToken,nValue); + sal_Int32 nValue = pAttribs->getAsIntegerByIndex(nAttrIndex); OOXMLValue::Pointer_t xValue = OOXMLIntegerValue::Create(nValue); pHandler->newProperty(nId, xValue); pFactory->attributeAction(pHandler, nToken, xValue); @@ -89,8 +88,7 @@ void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler, break; case ResourceType::Hex: { - const char *pValue = ""; - pAttribs->getAsChar(nToken, pValue); + const char *pValue = pAttribs->getAsCharByIndex(nAttrIndex); OOXMLValue::Pointer_t xValue(new OOXMLHexValue(pValue)); pHandler->newProperty(nId, xValue); pFactory->attributeAction(pHandler, nToken, xValue); @@ -98,8 +96,7 @@ void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler, break; case ResourceType::HexColor: { - const char *pValue = ""; - pAttribs->getAsChar(nToken, pValue); + const char *pValue = pAttribs->getAsCharByIndex(nAttrIndex); OOXMLValue::Pointer_t xValue(new OOXMLHexColorValue(pValue)); pHandler->newProperty(nId, xValue); pFactory->attributeAction(pHandler, nToken, xValue); @@ -107,8 +104,7 @@ void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler, break; case ResourceType::TwipsMeasure: { - const char *pValue = ""; - pAttribs->getAsChar(nToken, pValue); + const char *pValue = pAttribs->getAsCharByIndex(nAttrIndex); OOXMLValue::Pointer_t xValue(new OOXMLTwipsMeasureValue(pValue)); pHandler->newProperty(nId, xValue); pFactory->attributeAction(pHandler, nToken, xValue); @@ -116,8 +112,7 @@ void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler, break; case ResourceType::HpsMeasure: { - const char *pValue = ""; - pAttribs->getAsChar(nToken, pValue); + const char *pValue = pAttribs->getAsCharByIndex(nAttrIndex); OOXMLValue::Pointer_t xValue(new OOXMLHpsMeasureValue(pValue)); pHandler->newProperty(nId, xValue); pFactory->attributeAction(pHandler, nToken, xValue); @@ -125,8 +120,7 @@ void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler, break; case ResourceType::MeasurementOrPercent: { - const char *pValue = ""; - pAttribs->getAsChar(nToken, pValue); + const char *pValue = pAttribs->getAsCharByIndex(nAttrIndex); OOXMLValue::Pointer_t xValue(new OOXMLMeasurementOrPercentValue(pValue)); pHandler->newProperty(nId, xValue); pFactory->attributeAction(pHandler, nToken, xValue); @@ -135,7 +129,7 @@ void OOXMLFactory::attributes(OOXMLFastContextHandler * pHandler, case ResourceType::List: { sal_uInt32 nValue; - if (pFactory->getListValue(pAttr->m_nRef, Attribs->getValue(nToken), nValue)) + if (pFactory->getListValue(pAttr->m_nRef, pAttribs->getValueByIndex(nAttrIndex), nValue)) { OOXMLValue::Pointer_t xValue = OOXMLIntegerValue::Create(nValue); pHandler->newProperty(nId, xValue); |