summaryrefslogtreecommitdiff
path: root/sax
diff options
context:
space:
mode:
authorMohammed Abdul Azeem <azeemmysore@gmail.com>2016-08-03 17:27:48 +0530
committerNoel Grandin <noelgrandin@gmail.com>2016-08-10 11:09:12 +0000
commit18edd88edc0c45d9c3b8f6faa45bab481ec078f5 (patch)
treea296122ddec37e8edd77b77306401c481d371fd2 /sax
parente5d24f50b7e527a5991a1d21f40edcb537eeb72d (diff)
GSoC - Making legacyfastparser to use tokens:
This tokenizes some elements, de-tokenize while consuming and emits elements through legacy interface. DummyTokenHandler is just to test the correctness. Change-Id: I1ea1e4d806ed4d426215f93b3f6b66a9776f6479 Reviewed-on: https://gerrit.libreoffice.org/27849 Tested-by: Jenkins <ci@libreoffice.org> Reviewed-by: Noel Grandin <noelgrandin@gmail.com>
Diffstat (limited to 'sax')
-rw-r--r--sax/qa/cppunit/xmlimport.cxx89
-rw-r--r--sax/source/fastparser/fastparser.cxx77
-rw-r--r--sax/source/fastparser/legacyfastparser.cxx94
3 files changed, 206 insertions, 54 deletions
diff --git a/sax/qa/cppunit/xmlimport.cxx b/sax/qa/cppunit/xmlimport.cxx
index f7fcd739d4c3..c50a6749fef3 100644
--- a/sax/qa/cppunit/xmlimport.cxx
+++ b/sax/qa/cppunit/xmlimport.cxx
@@ -24,6 +24,7 @@
#include <test/bootstrapfixture.hxx>
#include <cppuhelper/weak.hxx>
#include <cppuhelper/implbase.hxx>
+#include <com/sun/star/beans/Pair.hpp>
#include <com/sun/star/xml/sax/XDocumentHandler.hpp>
#include <com/sun/star/xml/sax/XFastDocumentHandler.hpp>
#include <com/sun/star/xml/sax/XFastAttributeList.hpp>
@@ -40,6 +41,7 @@
#include <osl/conditn.hxx>
#include <unotools/ucbstreamhelper.hxx>
#include <unotools/streamwrap.hxx>
+#include <sax/fastattribs.hxx>
#include <string>
#include <stack>
#include <deque>
@@ -264,6 +266,73 @@ void SAL_CALL NSDocumentHandler::startElement( const OUString& aName, const Refe
CPPUNIT_ASSERT(false);
}
+class DummyTokenHandler : public cppu::WeakImplHelper< XFastTokenHandler >,
+ public sax_fastparser::FastTokenHandlerBase
+{
+public:
+ const static OUString tokens[];
+ const static OUString namespaceURIs[];
+ const static OUString namespacePrefixes[];
+
+ // XFastTokenHandler
+ virtual Sequence< sal_Int8 > SAL_CALL getUTF8Identifier( sal_Int32 nToken )
+ throw (css::uno::RuntimeException, std::exception) override;
+ virtual sal_Int32 SAL_CALL getTokenFromUTF8( const css::uno::Sequence< sal_Int8 >& Identifier )
+ throw (css::uno::RuntimeException, std::exception) override;
+ //FastTokenHandlerBase
+ virtual sal_Int32 getTokenDirect( const char *pToken, sal_Int32 nLength ) const override;
+};
+
+const OUString DummyTokenHandler::tokens[] = { "Signature", "CanonicalizationMethod", "Algorithm", "Type",
+ "DigestMethod", "Reference", "document",
+ "spacing", "Player", "Height" };
+
+const OUString DummyTokenHandler::namespaceURIs[] = { "http://www.w3.org/2000/09/xmldsig#",
+ "http://schemas.openxmlformats.org/wordprocessingml/2006/main/",
+ "xyzsports.com/players/football/" };
+
+const OUString DummyTokenHandler::namespacePrefixes[] = { "", "w", "Player" };
+
+Sequence< sal_Int8 > DummyTokenHandler::getUTF8Identifier( sal_Int32 nToken )
+ throw (uno::RuntimeException, std::exception)
+{
+ OString aUtf8Token;
+ if ( ( ( nToken & 0xffff0000 ) != 0 ) ) //namespace
+ {
+ sal_uInt32 nNamespaceToken = ( nToken >> 16 ) - 1;
+ if ( nNamespaceToken < sizeof( namespacePrefixes ) / sizeof( OUString ) )
+ aUtf8Token = OUStringToOString( namespacePrefixes[ nNamespaceToken ], RTL_TEXTENCODING_UTF8 );
+ }
+ else //element or attribute
+ {
+ sal_uInt32 nElementToken = nToken & 0xffff;
+ if ( nElementToken < sizeof( tokens ) / sizeof( OUString ) )
+ aUtf8Token = OUStringToOString( tokens[ nElementToken ], RTL_TEXTENCODING_UTF8 );
+ }
+ Sequence< sal_Int8 > aSeq = Sequence< sal_Int8 >( reinterpret_cast< const sal_Int8* >(
+ aUtf8Token.getStr() ), aUtf8Token.getLength() );
+ return aSeq;
+}
+
+sal_Int32 DummyTokenHandler::getTokenFromUTF8( const uno::Sequence< sal_Int8 >& rIdentifier )
+ throw (uno::RuntimeException, std::exception)
+{
+ return getTokenDirect( reinterpret_cast< const char* >(
+ rIdentifier.getConstArray() ), rIdentifier.getLength() );
+}
+
+sal_Int32 DummyTokenHandler::getTokenDirect( const char* pToken, sal_Int32 nLength ) const
+{
+ OUString sToken( pToken, nLength, RTL_TEXTENCODING_UTF8 );
+ for( sal_uInt16 i = 0; i < sizeof(tokens)/sizeof(OUString); i++ )
+ {
+ if ( tokens[i] == sToken )
+ return (sal_Int32)i;
+ }
+ return FastToken::DONTKNOW;
+}
+
+
class XMLImportTest : public test::BootstrapFixture
{
private:
@@ -298,6 +367,26 @@ void XMLImportTest::setUp()
m_xLegacyFastParser.set( xContext->getServiceManager()->createInstanceWithContext
( "com.sun.star.xml.sax.LegacyFastParser", xContext ), UNO_QUERY );
m_xLegacyFastParser->setDocumentHandler( m_xDocumentHandler.get() );
+
+ Reference< XFastTokenHandler > xTokenHandler;
+ xTokenHandler.set( new DummyTokenHandler() );
+ uno::Reference<lang::XInitialization> const xInit(m_xLegacyFastParser,
+ uno::UNO_QUERY_THROW);
+ uno::Sequence<uno::Any> args(1);
+ args[0] <<= xTokenHandler;
+ xInit->initialize( args );
+
+ sal_Int32 nNamespaceCount = sizeof( DummyTokenHandler::namespaceURIs ) / sizeof( OUString );
+ uno::Sequence<uno::Any> namespaceArgs( nNamespaceCount + 1 );
+ namespaceArgs[0] <<= OUString( "registerNamespaces" );
+ for (sal_Int32 i = 1; i <= nNamespaceCount; i++ )
+ {
+ css::beans::Pair <OUString, sal_Int32> rPair;
+ rPair = css::beans::Pair<OUString, sal_Int32>( DummyTokenHandler::namespaceURIs[i - 1], i << 16 );
+ namespaceArgs[i] <<= rPair;
+ }
+ xInit->initialize( namespaceArgs );
+
m_sDirPath = m_directories.getPathFromSrc( "/sax/qa/data/" );
}
diff --git a/sax/source/fastparser/fastparser.cxx b/sax/source/fastparser/fastparser.cxx
index 8fa922379f64..24811fd1bc97 100644
--- a/sax/source/fastparser/fastparser.cxx
+++ b/sax/source/fastparser/fastparser.cxx
@@ -434,6 +434,17 @@ void Entity::startElement( Event *pEvent )
{
Reference< XFastAttributeList > xAttr( pEvent->mxAttributes.get() );
Reference< XFastContextHandler > xContext;
+
+ if ( mxNamespaceHandler.is() )
+ {
+ Sequence< xml::Attribute > NSDeclAttribs = pEvent->mxDeclAttributes->getUnknownAttributes();
+ sal_uInt16 len = NSDeclAttribs.getLength();
+ for (sal_uInt16 i = 0; i < len; i++)
+ {
+ mxNamespaceHandler->registerNamespace( NSDeclAttribs[i].Name, NSDeclAttribs[i].Value );
+ }
+ }
+
if( nElementToken == FastToken::DONTKNOW )
{
if( pParentContext )
@@ -441,16 +452,6 @@ void Entity::startElement( Event *pEvent )
else if( mxDocumentHandler.is() )
xContext = mxDocumentHandler->createUnknownChildContext( aNamespace, aElementName, xAttr );
- if ( mxNamespaceHandler.is() )
- {
- Sequence< xml::Attribute > NSDeclAttribs = pEvent->mxDeclAttributes->getUnknownAttributes();
- sal_uInt16 len = NSDeclAttribs.getLength();
- for (sal_uInt16 i = 0; i < len; i++)
- {
- mxNamespaceHandler->registerNamespace( NSDeclAttribs[i].Name, NSDeclAttribs[i].Value );
- }
- }
-
if( xContext.is() )
{
xContext->startUnknownElement( aNamespace, aElementName, xAttr );
@@ -1103,29 +1104,33 @@ void FastSaxParserImpl::callbackStartElement(const xmlChar *localName , const xm
try
{
- if ( rEntity.mxTokenHandler.is() )
- {
- /* #158414# Each element may define new namespaces, also for attribues.
- First, process all namespaces, second, process the attributes after namespaces
- have been initialized. */
+ /* #158414# Each element may define new namespaces, also for attribues.
+ First, process all namespaces, second, process the attributes after namespaces
+ have been initialized. */
- // #158414# first: get namespaces
- for (int i = 0; i < numNamespaces * 2; i += 2)
+ // #158414# first: get namespaces
+ for (int i = 0; i < numNamespaces * 2; i += 2)
+ {
+ // namespaces[] is (prefix/URI)
+ if( namespaces[ i ] != nullptr )
{
- // namespaces[] is (prefix/URI)
- if( namespaces[ i ] != nullptr )
- {
- DefineNamespace( OString( XML_CAST( namespaces[ i ] )),
- OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 ));
- }
- else
- {
- // default namespace
- sNamespace = OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 );
- nNamespaceToken = GetNamespaceToken( sNamespace );
- }
+ DefineNamespace( OString( XML_CAST( namespaces[ i ] )),
+ OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 ));
+ if( rEntity.mxNamespaceHandler.is() )
+ rEvent.mxDeclAttributes->addUnknown( OString( XML_CAST( namespaces[ i ] ) ), OString( XML_CAST( namespaces[ i + 1 ] ) ) );
}
+ else
+ {
+ // default namespace
+ sNamespace = OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 );
+ nNamespaceToken = GetNamespaceToken( sNamespace );
+ if( rEntity.mxNamespaceHandler.is() )
+ rEvent.mxDeclAttributes->addUnknown( OString( "" ), OString( XML_CAST( namespaces[ i + 1 ] ) ) );
+ }
+ }
+ if ( rEntity.mxTokenHandler.is() )
+ {
// #158414# second: fill attribute list with other attributes
for (int i = 0; i < numAttributes * 5; i += 5)
{
@@ -1159,20 +1164,6 @@ void FastSaxParserImpl::callbackStartElement(const xmlChar *localName , const xm
}
else
{
- for (int i = 0; i < numNamespaces * 2; i += 2)
- {
- if( rEntity.mxNamespaceHandler.is() )
- {
- if( namespaces[ i ] != nullptr )
- rEvent.mxDeclAttributes->addUnknown( OString( XML_CAST( namespaces[ i ] ) ), OString( XML_CAST( namespaces[ i + 1 ] ) ) );
- else
- {
- sNamespace = OUString( XML_CAST( namespaces[ i + 1 ] ), strlen( XML_CAST( namespaces[ i + 1 ] )), RTL_TEXTENCODING_UTF8 );
- rEvent.mxDeclAttributes->addUnknown( OString( "" ), OString( XML_CAST( namespaces[ i + 1 ] ) ) );
- }
- }
- }
-
for (int i = 0; i < numAttributes * 5; i += 5)
{
if( attributes[ i + 1 ] != nullptr )
diff --git a/sax/source/fastparser/legacyfastparser.cxx b/sax/source/fastparser/legacyfastparser.cxx
index 166f740638d6..dbcc03ff97b4 100644
--- a/sax/source/fastparser/legacyfastparser.cxx
+++ b/sax/source/fastparser/legacyfastparser.cxx
@@ -22,6 +22,7 @@
#include <com/sun/star/xml/sax/FastParser.hpp>
#include <com/sun/star/xml/sax/FastToken.hpp>
#include <com/sun/star/lang/XInitialization.hpp>
+#include <com/sun/star/beans/Pair.hpp>
#include <comphelper/attributelist.hxx>
#include <cppuhelper/supportsservice.hxx>
#include <comphelper/processfactory.hxx>
@@ -129,16 +130,24 @@ public:
private:
Reference< XFastParser > m_xParser;
Reference< XDocumentHandler > m_xDocumentHandler;
+ Reference< XFastTokenHandler > m_xTokenHandler;
};
+
class CallbackDocumentHandler : public WeakImplHelper< XFastDocumentHandler >
{
private:
Reference< XDocumentHandler > m_xDocumentHandler;
+ Reference< XFastTokenHandler > m_xTokenHandler;
rtl::Reference< NamespaceHandler > m_aNamespaceHandler;
+ const OUString getNamespacePrefixFromToken( sal_Int32 nToken );
+ const OUString getNameFromToken( sal_Int32 nToken );
+
public:
- CallbackDocumentHandler( Reference< XDocumentHandler > const & xDocumentHandler, rtl::Reference< NamespaceHandler > const & rNamespaceHandler );
+ CallbackDocumentHandler( Reference< XDocumentHandler > const & xDocumentHandler,
+ rtl::Reference< NamespaceHandler > const & rNamespaceHandler,
+ Reference< XFastTokenHandler > const & xTokenHandler);
// XFastDocumentHandler
virtual void SAL_CALL startDocument() throw (SAXException, RuntimeException, exception) override;
@@ -156,10 +165,32 @@ public:
};
-CallbackDocumentHandler::CallbackDocumentHandler( Reference< XDocumentHandler > const & xDocumentHandler, rtl::Reference< NamespaceHandler > const & rNamespaceHandler )
+const OUString CallbackDocumentHandler::getNamespacePrefixFromToken( sal_Int32 nToken )
+{
+ if ( ( nToken & 0xffff0000 ) != 0 )
+ {
+ Sequence< sal_Int8 > aSeq = m_xTokenHandler->getUTF8Identifier( nToken & 0xffff0000 );
+ return OUString( reinterpret_cast< const char* >(
+ aSeq.getConstArray() ), aSeq.getLength(), RTL_TEXTENCODING_UTF8 );
+ }
+ else
+ return OUString();
+}
+
+const OUString CallbackDocumentHandler::getNameFromToken( sal_Int32 nToken )
+{
+ Sequence< sal_Int8 > aSeq = m_xTokenHandler->getUTF8Identifier( nToken & 0xffff );
+ return OUString( reinterpret_cast< const char* >(
+ aSeq.getConstArray() ), aSeq.getLength(), RTL_TEXTENCODING_UTF8 );
+}
+
+CallbackDocumentHandler::CallbackDocumentHandler( Reference< XDocumentHandler > const & xDocumentHandler,
+ rtl::Reference< NamespaceHandler > const & rNamespaceHandler,
+ Reference< XFastTokenHandler > const & xTokenHandler)
{
m_xDocumentHandler.set( xDocumentHandler );
m_aNamespaceHandler.set( rNamespaceHandler.get() );
+ m_xTokenHandler.set( xTokenHandler );
}
void SAL_CALL CallbackDocumentHandler::startDocument()
@@ -183,9 +214,11 @@ void SAL_CALL CallbackDocumentHandler::setDocumentLocator( const Reference< XLoc
m_xDocumentHandler->setDocumentLocator( xLocator );
}
-void SAL_CALL CallbackDocumentHandler::startFastElement( sal_Int32/* nElement */, const Reference< XFastAttributeList >&/* Attribs */ )
+void SAL_CALL CallbackDocumentHandler::startFastElement( sal_Int32 nElement , const Reference< XFastAttributeList >& Attribs )
throw (SAXException, RuntimeException, exception)
{
+ startUnknownElement( CallbackDocumentHandler::getNamespacePrefixFromToken( nElement ),
+ CallbackDocumentHandler::getNameFromToken( nElement ), Attribs );
}
void SAL_CALL CallbackDocumentHandler::startUnknownElement( const OUString& Namespace, const OUString& Name, const Reference< XFastAttributeList >& Attribs )
@@ -201,15 +234,29 @@ void SAL_CALL CallbackDocumentHandler::startUnknownElement( const OUString& Name
else
elementName = Name;
+ Sequence< xml::FastAttribute > fastAttribs = Attribs->getFastAttributes();
+ sal_uInt16 len = fastAttribs.getLength();
+ for (sal_uInt16 i = 0; i < len; i++)
+ {
+ OUString& rAttrValue = fastAttribs[i].Value;
+ sal_Int32 nToken = fastAttribs[i].Token;
+ const OUString& rAttrNamespacePrefix = CallbackDocumentHandler::getNamespacePrefixFromToken( nToken );
+ OUString sAttrName = CallbackDocumentHandler::getNameFromToken( nToken );
+ if ( !rAttrNamespacePrefix.isEmpty() )
+ sAttrName = rAttrNamespacePrefix + ":" + sAttrName;
+
+ rAttrList->AddAttribute( sAttrName, "CDATA", rAttrValue );
+ }
+
Sequence< xml::Attribute > unknownAttribs = Attribs->getUnknownAttributes();
- sal_uInt16 len = unknownAttribs.getLength();
+ len = unknownAttribs.getLength();
for (sal_uInt16 i = 0; i < len; i++)
{
OUString& rAttrValue = unknownAttribs[i].Value;
OUString sAttrName = unknownAttribs[i].Name;
- OUString& rAttrNamespaceURL = unknownAttribs[i].NamespaceURL;
- if ( !rAttrNamespaceURL.isEmpty() )
- sAttrName = rAttrNamespaceURL + ":" + sAttrName;
+ OUString& rAttrNamespacePrefix = unknownAttribs[i].NamespaceURL;
+ if ( !rAttrNamespacePrefix.isEmpty() )
+ sAttrName = rAttrNamespacePrefix + ":" + sAttrName;
rAttrList->AddAttribute( sAttrName, "CDATA", rAttrValue );
}
@@ -217,9 +264,11 @@ void SAL_CALL CallbackDocumentHandler::startUnknownElement( const OUString& Name
}
}
-void SAL_CALL CallbackDocumentHandler::endFastElement( sal_Int32/* nElement */)
+void SAL_CALL CallbackDocumentHandler::endFastElement( sal_Int32 nElement )
throw (SAXException, RuntimeException, exception)
{
+ endUnknownElement( CallbackDocumentHandler::getNamespacePrefixFromToken( nElement ),
+ CallbackDocumentHandler::getNameFromToken( nElement ) );
}
@@ -267,9 +316,30 @@ SaxLegacyFastParser::SaxLegacyFastParser( ) : m_aNamespaceHandler( new Namespace
void SAL_CALL SaxLegacyFastParser::initialize(Sequence< Any > const& rArguments )
throw (RuntimeException, Exception, exception)
{
- uno::Reference<lang::XInitialization> const xInit(m_xParser,
+ if (rArguments.getLength())
+ {
+ Reference< XFastTokenHandler > xTokenHandler;
+ OUString str;
+ if ( ( rArguments[0] >>= xTokenHandler ) && xTokenHandler.is() )
+ {
+ m_xTokenHandler.set( xTokenHandler );
+ }
+ else if ( ( rArguments[0] >>= str ) && "registerNamespaces" == str )
+ {
+ css::beans::Pair< OUString, sal_Int32 > rPair;
+ for (sal_Int32 i = 1; i < rArguments.getLength(); i++ )
+ {
+ rArguments[i] >>= rPair;
+ m_xParser->registerNamespace( rPair.First, rPair.Second );
+ }
+ }
+ else
+ {
+ uno::Reference<lang::XInitialization> const xInit(m_xParser,
uno::UNO_QUERY_THROW);
- xInit->initialize( rArguments );
+ xInit->initialize( rArguments );
+ }
+ }
}
void SaxLegacyFastParser::parseStream( const InputSource& structSource )
@@ -277,7 +347,9 @@ void SaxLegacyFastParser::parseStream( const InputSource& structSource )
IOException,
RuntimeException, exception)
{
- m_xParser->setFastDocumentHandler( new CallbackDocumentHandler( m_xDocumentHandler.get(), m_aNamespaceHandler.get() ) );
+ m_xParser->setFastDocumentHandler( new CallbackDocumentHandler( m_xDocumentHandler.get(),
+ m_aNamespaceHandler.get(), m_xTokenHandler.get() ) );
+ m_xParser->setTokenHandler( m_xTokenHandler );
m_xParser->parseStream( structSource );
}