summaryrefslogtreecommitdiff
path: root/sax
diff options
context:
space:
mode:
authorMatúš Kukan <matus.kukan@collabora.com>2014-10-03 23:12:56 +0200
committerMatúš Kukan <matus.kukan@collabora.com>2014-10-23 14:30:29 +0200
commit0c24faee6b622971d7d8f989da36029200cbd2a5 (patch)
treebe33db223c5beba6aee04b58207cde009934b6c3 /sax
parent143bbb20a3f4d757e2493fc078deea7dbaa1e14f (diff)
FastSerializer: Also use cache for writing to ForMerge if we are inside mark()
To ensure the correct order of calling ForMerge methods, call flush always before touching maMarkStack. This was the missing piece in optimizing write() methods, because of writeBytes() checking each time what to call. E.g. for Calc documents we don't use maMarkStack at all. So, just transfer the output to proper "ForMerge" when inside mark() and allow optimizations. This commit makes write() methods almost 1/3 as fast. Change-Id: I96c13888206c81f87e29b998839f78ea9d5570af
Diffstat (limited to 'sax')
-rw-r--r--sax/source/tools/CachedOutputStream.hxx37
-rw-r--r--sax/source/tools/fastserializer.cxx53
-rw-r--r--sax/source/tools/fastserializer.hxx12
3 files changed, 75 insertions, 27 deletions
diff --git a/sax/source/tools/CachedOutputStream.hxx b/sax/source/tools/CachedOutputStream.hxx
index 8877bb779a4e..fc74118a63ea 100644
--- a/sax/source/tools/CachedOutputStream.hxx
+++ b/sax/source/tools/CachedOutputStream.hxx
@@ -17,9 +17,17 @@
#include <cstring>
#include <cstdlib>
+#include <boost/shared_ptr.hpp>
namespace sax_fastparser {
+class ForMergeBase
+{
+public:
+ virtual ~ForMergeBase() {}
+ virtual void append( const css::uno::Sequence<sal_Int8>& rWhat ) = 0;
+};
+
class CachedOutputStream
{
/// When buffer hits this size, it's written to mxOutputStream
@@ -30,11 +38,16 @@ class CachedOutputStream
sal_Int32 mnCacheWrittenSize;
const css::uno::Sequence<sal_Int8> mpCache;
uno_Sequence *pSeq;
+ bool mbWriteToOutStream;
+ /// ForMerge structure is used for sorting elements in Writer
+ boost::shared_ptr< ForMergeBase > mpForMerge;
public:
CachedOutputStream() : mnCacheWrittenSize(0)
, mpCache(mnMaximumSize)
, pSeq(mpCache.get())
+ , mbWriteToOutStream(true)
+ , mpForMerge(NULL)
{}
~CachedOutputStream() {}
@@ -48,6 +61,20 @@ public:
mxOutputStream = xOutputStream;
}
+ void setOutput( boost::shared_ptr< ForMergeBase > pForMerge )
+ {
+ flush();
+ mbWriteToOutStream = false;
+ mpForMerge = pForMerge;
+ }
+
+ void resetOutputToStream()
+ {
+ flush();
+ mbWriteToOutStream = true;
+ mpForMerge.reset();
+ }
+
/// cache string and if limit is hit, flush
void writeBytes( const sal_Int8* pStr, sal_Int32 nLen )
{
@@ -61,7 +88,10 @@ public:
// In that case, just flush data and write immediately.
if (nLen > mnMaximumSize)
{
- mxOutputStream->writeBytes( css::uno::Sequence<sal_Int8>(pStr, nLen) );
+ if (mbWriteToOutStream)
+ mxOutputStream->writeBytes( css::uno::Sequence<sal_Int8>(pStr, nLen) );
+ else
+ mpForMerge->append( css::uno::Sequence<sal_Int8>(pStr, nLen) );
return;
}
}
@@ -75,7 +105,10 @@ public:
{
// resize the Sequence to written size
pSeq->nElements = mnCacheWrittenSize;
- mxOutputStream->writeBytes( mpCache );
+ if (mbWriteToOutStream)
+ mxOutputStream->writeBytes( mpCache );
+ else
+ mpForMerge->append( mpCache );
// and next time write to the beginning
mnCacheWrittenSize = 0;
}
diff --git a/sax/source/tools/fastserializer.cxx b/sax/source/tools/fastserializer.cxx
index 0f05ec9a3ff0..ac8376b3be84 100644
--- a/sax/source/tools/fastserializer.cxx
+++ b/sax/source/tools/fastserializer.cxx
@@ -57,6 +57,7 @@ namespace sax_fastparser {
FastSaxSerializer::FastSaxSerializer( const css::uno::Reference< css::io::XOutputStream >& xOutputStream )
: maCachedOutputStream()
, maMarkStack()
+ , mbMarkStackEmpty(true)
, mpDoubleStr(NULL)
, mnDoubleStrCapacity(RTL_STR_MAX_VALUEOFDOUBLE)
{
@@ -152,6 +153,7 @@ namespace sax_fastparser {
void FastSaxSerializer::endDocument()
{
+ assert(mbMarkStackEmpty && maMarkStack.empty());
maCachedOutputStream.flush();
}
@@ -186,8 +188,11 @@ namespace sax_fastparser {
void FastSaxSerializer::startFastElement( ::sal_Int32 Element, FastAttributeList* pAttrList )
{
- if ( !maMarkStack.empty() )
+ if ( !mbMarkStackEmpty )
+ {
+ maCachedOutputStream.flush();
maMarkStack.top()->setCurrentElement( Element );
+ }
#ifdef DBG_UTIL
m_DebugStartedElements.push(Element);
@@ -222,8 +227,11 @@ namespace sax_fastparser {
void FastSaxSerializer::singleFastElement( ::sal_Int32 Element, FastAttributeList* pAttrList )
{
- if ( !maMarkStack.empty() )
+ if ( !mbMarkStackEmpty )
+ {
+ maCachedOutputStream.flush();
maMarkStack.top()->setCurrentElement( Element );
+ }
writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket));
@@ -303,28 +311,47 @@ namespace sax_fastparser {
{
boost::shared_ptr< ForMerge > pSort( new ForSort( aOrder ) );
maMarkStack.push( pSort );
+ maCachedOutputStream.setOutput( pSort );
}
else
{
boost::shared_ptr< ForMerge > pMerge( new ForMerge( ) );
maMarkStack.push( pMerge );
+ maCachedOutputStream.setOutput( pMerge );
}
+ mbMarkStackEmpty = false;
}
void FastSaxSerializer::mergeTopMarks( sax_fastparser::MergeMarksEnum eMergeType )
{
- if ( maMarkStack.empty() )
+ SAL_WARN_IF(mbMarkStackEmpty, "sax", "Empty mark stack - nothing to merge");
+ if ( mbMarkStackEmpty )
return;
+ // flush, so that we get everything in getData()
+ maCachedOutputStream.flush();
+
if ( maMarkStack.size() == 1 && eMergeType != MERGE_MARKS_IGNORE)
{
- writeOutput( maMarkStack.top()->getData() );
+ Sequence<sal_Int8> aSeq( maMarkStack.top()->getData() );
maMarkStack.pop();
+ mbMarkStackEmpty = true;
+ maCachedOutputStream.resetOutputToStream();
+ maCachedOutputStream.writeBytes( aSeq.getConstArray(), aSeq.getLength() );
return;
}
const Int8Sequence aMerge( maMarkStack.top()->getData() );
maMarkStack.pop();
+ if (maMarkStack.empty())
+ {
+ mbMarkStackEmpty = true;
+ maCachedOutputStream.resetOutputToStream();
+ }
+ else
+ {
+ maCachedOutputStream.setOutput( maMarkStack.top() );
+ }
switch ( eMergeType )
{
@@ -338,26 +365,12 @@ namespace sax_fastparser {
void FastSaxSerializer::writeBytes( const Sequence< sal_Int8 >& rData )
{
- writeBytes( reinterpret_cast<const char*>(rData.getConstArray()), rData.getLength() );
+ maCachedOutputStream.writeBytes( rData.getConstArray(), rData.getLength() );
}
void FastSaxSerializer::writeBytes( const char* pStr, size_t nLen )
{
- if ( maMarkStack.empty() )
- writeOutput( reinterpret_cast<const sal_Int8*>(pStr), nLen );
- else
- maMarkStack.top()->append( Sequence< sal_Int8 >(
- reinterpret_cast<const sal_Int8*>(pStr), nLen) );
- }
-
- void FastSaxSerializer::writeOutput( const Sequence< ::sal_Int8 >& aData )
- {
- writeOutput( aData.getConstArray(), aData.getLength() );
- }
-
- void FastSaxSerializer::writeOutput( const sal_Int8* pStr, size_t nLen )
- {
- maCachedOutputStream.writeBytes( pStr, nLen );
+ maCachedOutputStream.writeBytes( reinterpret_cast<const sal_Int8*>(pStr), nLen );
}
FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForMerge::getData()
diff --git a/sax/source/tools/fastserializer.hxx b/sax/source/tools/fastserializer.hxx
index 5b740cee338b..8500b680f65d 100644
--- a/sax/source/tools/fastserializer.hxx
+++ b/sax/source/tools/fastserializer.hxx
@@ -148,11 +148,14 @@ public:
void mergeTopMarks( sax_fastparser::MergeMarksEnum eMergeType = sax_fastparser::MERGE_MARKS_APPEND );
private:
- /// Helper class to cache data and write in chunks to XOutputStream
+ /** Helper class to cache data and write in chunks to XOutputStream or ForMerge::append.
+ * Its flush method needs to be called before touching maMarkStack
+ * to ensure correct order of ForSort methods.
+ */
CachedOutputStream maCachedOutputStream;
::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler > mxFastTokenHandler;
- class ForMerge
+ class ForMerge : public ForMergeBase
{
Int8Sequence maData;
Int8Sequence maPostponed;
@@ -168,7 +171,7 @@ private:
#endif
virtual void prepend( const Int8Sequence &rWhat );
- virtual void append( const Int8Sequence &rWhat );
+ virtual void append( const Int8Sequence &rWhat ) SAL_OVERRIDE;
void postpone( const Int8Sequence &rWhat );
protected:
@@ -205,6 +208,7 @@ private:
};
::std::stack< boost::shared_ptr< ForMerge > > maMarkStack;
+ bool mbMarkStackEmpty;
// Would be better to use OStringBuffer instead of these two
// but then we couldn't get the rtl_String* member :-(
rtl_String *mpDoubleStr;
@@ -217,8 +221,6 @@ private:
void writeTokenValueList();
void writeFastAttributeList( FastAttributeList* pAttrList );
- void writeOutput( const sal_Int8* pStr, size_t nLen );
- void writeOutput( const css::uno::Sequence< ::sal_Int8 >& aData );
/** Forward the call to the output stream, or write to the stack.