/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 . */ #include "sax/fastparser.hxx" #include "sax/fastattribs.hxx" #include "xml2utf.hxx" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define PARSER_IMPLEMENTATION_NAME "com.sun.star.comp.extensions.xml.sax.FastParser" #define PARSER_SERVICE_NAME "com.sun.star.xml.sax.FastParser" using namespace ::std; using namespace ::osl; using namespace ::cppu; using namespace ::com::sun::star::uno; using namespace ::com::sun::star::lang; using namespace ::com::sun::star::xml::sax; using namespace ::com::sun::star::io; using namespace com::sun::star; using namespace sax_fastparser; namespace { struct Event; class FastLocatorImpl; struct NamespaceDefine; struct Entity; typedef ::boost::shared_ptr< NamespaceDefine > NamespaceDefineRef; typedef ::boost::unordered_map< OUString, sal_Int32, OUStringHash, ::std::equal_to< OUString > > NamespaceMap; typedef std::vector EventList; enum CallbackType { INVALID, START_ELEMENT, END_ELEMENT, CHARACTERS, DONE, EXCEPTION }; struct Event { CallbackType maType; sal_Int32 mnElementToken; OUString msNamespace; OUString msElementName; rtl::Reference< FastAttributeList > mxAttributes; OUString msChars; }; struct NameWithToken { OUString msName; sal_Int32 mnToken; NameWithToken(const OUString& sName, const sal_Int32& nToken) : msName(sName), mnToken(nToken) {} }; struct SaxContext { ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastContextHandler > mxContext; sal_Int32 mnElementToken; boost::optional< OUString > maNamespace; boost::optional< OUString > maElementName; SaxContext( sal_Int32 nElementToken, const OUString& aNamespace, const OUString& aElementName ): mnElementToken(nElementToken) { if (nElementToken == FastToken::DONTKNOW) { maNamespace = aNamespace; maElementName = aElementName; } } }; struct ParserData { ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastDocumentHandler > mxDocumentHandler; ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler > mxTokenHandler; FastTokenHandlerBase *mpTokenHandler; ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XErrorHandler > mxErrorHandler; ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XEntityResolver > mxEntityResolver; ::com::sun::star::lang::Locale maLocale; ParserData(); ~ParserData(); }; struct NamespaceDefine { OString maPrefix; sal_Int32 mnToken; OUString maNamespaceURL; NamespaceDefine( const OString& rPrefix, sal_Int32 nToken, const OUString& rNamespaceURL ) : maPrefix( rPrefix ), mnToken( nToken ), maNamespaceURL( rNamespaceURL ) {} }; // Entity binds all information needed for a single file | single call of parseStream struct Entity : public ParserData { // Amount of work producer sends to consumer in one iteration: static const size_t mnEventListSize = 1000; // unique for each Entity instance: // Number of valid events in mpProducedEvents: size_t mnProducedEventsSize; EventList *mpProducedEvents; std::queue< EventList * > maPendingEvents; std::queue< EventList * > maUsedEvents; osl::Mutex maEventProtector; static const size_t mnEventLowWater = 4; static const size_t mnEventHighWater = 8; osl::Condition maConsumeResume; osl::Condition maProduceResume; // Event we use to store data if threading is disabled: Event maSharedEvent; // copied in copy constructor: // Allow to disable threading for small documents: bool mbEnableThreads; ::com::sun::star::xml::sax::InputSource maStructSource; XML_Parser mpParser; ::sax_expatwrap::XMLFile2UTFConverter maConverter; // Exceptions cannot be thrown through the C-XmlParser (possible // resource leaks), therefore any exception thrown by a UNO callback // must be saved somewhere until the C-XmlParser is stopped. ::com::sun::star::uno::Any maSavedException; void saveException( const Exception &e ); void throwException( const ::rtl::Reference< FastLocatorImpl > &xDocumentLocator, bool mbDuringParse ); ::std::stack< NameWithToken > maNamespaceStack; /* Context for main thread consuming events. * startElement() stores the data, which characters() and endElement() uses */ ::std::stack< SaxContext> maContextStack; // Determines which elements of maNamespaceDefines are valid in current context ::std::stack< sal_uInt32 > maNamespaceCount; ::std::vector< NamespaceDefineRef > maNamespaceDefines; explicit Entity( const ParserData& rData ); Entity( const Entity& rEntity ); ~Entity(); void startElement( Event *pEvent ); void characters( const OUString& sChars ); void endElement(); EventList* getEventList(); Event& getEvent( CallbackType aType ); }; } // namespace namespace sax_fastparser { class FastSaxParserImpl { public: FastSaxParserImpl( FastSaxParser* pFront ); ~FastSaxParserImpl(); // The implementation details static ::com::sun::star::uno::Sequence< OUString > getSupportedServiceNames_Static(void); // XFastParser void parseStream( const ::com::sun::star::xml::sax::InputSource& aInputSource ) throw (::com::sun::star::xml::sax::SAXException, ::com::sun::star::io::IOException, ::com::sun::star::uno::RuntimeException); void setFastDocumentHandler( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastDocumentHandler >& Handler ) throw (::com::sun::star::uno::RuntimeException); void setTokenHandler( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >& Handler ) throw (::com::sun::star::uno::RuntimeException); void registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException); OUString getNamespaceURL( const OUString& rPrefix ) throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException); void setErrorHandler( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XErrorHandler >& Handler ) throw (::com::sun::star::uno::RuntimeException); void setEntityResolver( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XEntityResolver >& Resolver ) throw (::com::sun::star::uno::RuntimeException); void setLocale( const ::com::sun::star::lang::Locale& rLocale ) throw (::com::sun::star::uno::RuntimeException); // XServiceInfo OUString getImplementationName( ) throw (::com::sun::star::uno::RuntimeException); sal_Bool supportsService( const OUString& ServiceName ) throw (::com::sun::star::uno::RuntimeException); ::com::sun::star::uno::Sequence< OUString > SAL_CALL getSupportedServiceNames( ) throw (::com::sun::star::uno::RuntimeException); // called by the C callbacks of the expat parser void callbackStartElement( const XML_Char* name, const XML_Char** atts ); void callbackEndElement( const XML_Char* name ); void callbackCharacters( const XML_Char* s, int nLen ); int callbackExternalEntityRef( XML_Parser parser, const XML_Char *openEntityNames, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId); void callbackEntityDecl(const XML_Char *entityName, int is_parameter_entity, const XML_Char *value, int value_length, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId, const XML_Char *notationName); void pushEntity( const Entity& rEntity ); void popEntity(); Entity& getEntity(); const Entity& getEntity() const; void parse(); void produce( CallbackType aType ); bool hasNamespaceURL( const OUString& rPrefix ) const; private: bool consume(EventList *); void deleteUsedEvents(); sal_Int32 GetToken( const sal_Char* pToken, sal_Int32 nTokenLen = 0 ); sal_Int32 GetTokenWithPrefix( const sal_Char*pPrefix, int nPrefixLen, const sal_Char* pName, int nNameLen ) throw (::com::sun::star::xml::sax::SAXException); OUString GetNamespaceURL( const OString& rPrefix ) throw (::com::sun::star::xml::sax::SAXException); OUString GetNamespaceURL( const sal_Char*pPrefix, int nPrefixLen ) throw (::com::sun::star::xml::sax::SAXException); sal_Int32 GetNamespaceToken( const OUString& rNamespaceURL ); sal_Int32 GetTokenWithContextNamespace( sal_Int32 nNamespaceToken, const sal_Char* pName, int nNameLen ); void DefineNamespace( const OString& rPrefix, const sal_Char* pNamespaceURL ); void pushContext(); void popContext(); void splitName( const XML_Char *pwName, const XML_Char *&rpPrefix, sal_Int32 &rPrefixLen, const XML_Char *&rpName, sal_Int32 &rNameLen ); private: FastSaxParser* mpFront; osl::Mutex maMutex; ///< Protecting whole parseStream() execution ::rtl::Reference< FastLocatorImpl > mxDocumentLocator; NamespaceMap maNamespaceMap; ParserData maData; /// Cached parser configuration for next call of parseStream(). ::std::stack< Entity > maEntities; /// Entity stack for each call of parseStream(). FastTokenLookup maTokenLookup; }; } // namespace sax_fastparser namespace { class ParserThread: public salhelper::Thread { FastSaxParserImpl *mpParser; public: ParserThread(FastSaxParserImpl *pParser): Thread("Parser"), mpParser(pParser) {} private: virtual void execute() { try { mpParser->parse(); } catch (const Exception &e) { Entity &rEntity = mpParser->getEntity(); rEntity.getEvent( EXCEPTION ); mpParser->produce( EXCEPTION ); } } }; extern "C" { static void call_callbackStartElement(void *userData, const XML_Char *name , const XML_Char **atts) { sax_fastparser::FastSaxParserImpl* pFastParser = reinterpret_cast( userData ); pFastParser->callbackStartElement( name, atts ); } static void call_callbackEndElement(void *userData, const XML_Char *name) { sax_fastparser::FastSaxParserImpl* pFastParser = reinterpret_cast( userData ); pFastParser->callbackEndElement( name ); } static void call_callbackCharacters( void *userData , const XML_Char *s , int nLen ) { sax_fastparser::FastSaxParserImpl* pFastParser = reinterpret_cast( userData ); pFastParser->callbackCharacters( s, nLen ); } static void call_callbackEntityDecl(void *userData, const XML_Char *entityName, int is_parameter_entity, const XML_Char *value, int value_length, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId, const XML_Char *notationName) { sax_fastparser::FastSaxParserImpl* pFastParser = reinterpret_cast(userData); pFastParser->callbackEntityDecl(entityName, is_parameter_entity, value, value_length, base, systemId, publicId, notationName); } static int call_callbackExternalEntityRef( XML_Parser parser, const XML_Char *openEntityNames, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId ) { sax_fastparser::FastSaxParserImpl* pFastParser = reinterpret_cast( XML_GetUserData( parser ) ); return pFastParser->callbackExternalEntityRef( parser, openEntityNames, base, systemId, publicId ); } } // -------------------------------------------------------------------- // FastLocatorImpl // -------------------------------------------------------------------- class FastLocatorImpl : public WeakImplHelper1< XLocator > { public: FastLocatorImpl( FastSaxParserImpl *p ) : mpParser(p) {} void dispose() { mpParser = 0; } void checkDispose() throw (RuntimeException) { if( !mpParser ) throw DisposedException(); } //XLocator virtual sal_Int32 SAL_CALL getColumnNumber(void) throw (RuntimeException); virtual sal_Int32 SAL_CALL getLineNumber(void) throw (RuntimeException); virtual OUString SAL_CALL getPublicId(void) throw (RuntimeException); virtual OUString SAL_CALL getSystemId(void) throw (RuntimeException); private: FastSaxParserImpl *mpParser; }; sal_Int32 SAL_CALL FastLocatorImpl::getColumnNumber(void) throw (RuntimeException) { checkDispose(); return XML_GetCurrentColumnNumber( mpParser->getEntity().mpParser ); } // -------------------------------------------------------------------- sal_Int32 SAL_CALL FastLocatorImpl::getLineNumber(void) throw (RuntimeException) { checkDispose(); return XML_GetCurrentLineNumber( mpParser->getEntity().mpParser ); } // -------------------------------------------------------------------- OUString SAL_CALL FastLocatorImpl::getPublicId(void) throw (RuntimeException) { checkDispose(); return mpParser->getEntity().maStructSource.sPublicId; } // -------------------------------------------------------------------- OUString SAL_CALL FastLocatorImpl::getSystemId(void) throw (RuntimeException) { checkDispose(); return mpParser->getEntity().maStructSource.sSystemId; } // -------------------------------------------------------------------- ParserData::ParserData() : mpTokenHandler( NULL ) {} ParserData::~ParserData() {} // -------------------------------------------------------------------- Entity::Entity( const ParserData& rData ) : ParserData( rData ) { mpProducedEvents = 0; } Entity::Entity( const Entity& e ) : ParserData( e ) ,mbEnableThreads(e.mbEnableThreads) ,maStructSource(e.maStructSource) ,mpParser(e.mpParser) ,maConverter(e.maConverter) ,maSavedException(e.maSavedException) ,maNamespaceStack(e.maNamespaceStack) ,maContextStack(e.maContextStack) ,maNamespaceCount(e.maNamespaceCount) ,maNamespaceDefines(e.maNamespaceDefines) { mpProducedEvents = 0; } Entity::~Entity() { } void Entity::startElement( Event *pEvent ) { const sal_Int32& nElementToken = pEvent->mnElementToken; const OUString& aNamespace = pEvent->msNamespace; const OUString& aElementName = pEvent->msElementName; // Use un-wrapped pointers to avoid significant acquire/release overhead XFastContextHandler *pParentContext = NULL; if( !maContextStack.empty() ) { pParentContext = maContextStack.top().mxContext.get(); if( !pParentContext ) { maContextStack.push( SaxContext(nElementToken, aNamespace, aElementName) ); return; } } maContextStack.push( SaxContext( nElementToken, aNamespace, aElementName ) ); try { Reference< XFastAttributeList > xAttr( pEvent->mxAttributes.get() ); Reference< XFastContextHandler > xContext; if( nElementToken == FastToken::DONTKNOW ) { if( pParentContext ) xContext = pParentContext->createUnknownChildContext( aNamespace, aElementName, xAttr ); else if( mxDocumentHandler.is() ) xContext = mxDocumentHandler->createUnknownChildContext( aNamespace, aElementName, xAttr ); if( xContext.is() ) { xContext->startUnknownElement( aNamespace, aElementName, xAttr ); } } else { if( pParentContext ) xContext = pParentContext->createFastChildContext( nElementToken, xAttr ); else if( mxDocumentHandler.is() ) xContext = mxDocumentHandler->createFastChildContext( nElementToken, xAttr ); if( xContext.is() ) xContext->startFastElement( nElementToken, xAttr ); } // swap the reference we own in to avoid referencing thrash. maContextStack.top().mxContext.set( static_cast( xContext.get() ) ); xContext.set( NULL, UNO_REF_NO_ACQUIRE ); } catch (const Exception& e) { saveException( e ); } } void Entity::characters( const OUString& sChars ) { const Reference< XFastContextHandler >& xContext( maContextStack.top().mxContext ); if( xContext.is() ) try { xContext->characters( sChars ); } catch (const Exception& e) { saveException( e ); } } void Entity::endElement() { const SaxContext& aContext = maContextStack.top(); const Reference< XFastContextHandler >& xContext( aContext.mxContext ); if( xContext.is() ) try { sal_Int32 nElementToken = aContext.mnElementToken; if( nElementToken != FastToken::DONTKNOW ) xContext->endFastElement( nElementToken ); else xContext->endUnknownElement( aContext.maNamespace.get(), aContext.maElementName.get() ); } catch (const Exception& e) { saveException( e ); } maContextStack.pop(); } EventList* Entity::getEventList() { if (!mpProducedEvents) { osl::ResettableMutexGuard aGuard(maEventProtector); if (!maUsedEvents.empty()) { mpProducedEvents = maUsedEvents.front(); maUsedEvents.pop(); aGuard.clear(); // unlock mnProducedEventsSize = 0; } if (!mpProducedEvents) { mpProducedEvents = new EventList(); mpProducedEvents->resize(mnEventListSize); mnProducedEventsSize = 0; } } return mpProducedEvents; } Event& Entity::getEvent( CallbackType aType ) { if (!mbEnableThreads) return maSharedEvent; EventList* pEventList = getEventList(); Event& rEvent = (*pEventList)[mnProducedEventsSize++]; rEvent.maType = aType; return rEvent; } OUString lclGetErrorMessage( XML_Error xmlE, const OUString& sSystemId, sal_Int32 nLine ) { const sal_Char* pMessage = ""; switch( xmlE ) { case XML_ERROR_NONE: pMessage = "No"; break; case XML_ERROR_NO_MEMORY: pMessage = "no memory"; break; case XML_ERROR_SYNTAX: pMessage = "syntax"; break; case XML_ERROR_NO_ELEMENTS: pMessage = "no elements"; break; case XML_ERROR_INVALID_TOKEN: pMessage = "invalid token"; break; case XML_ERROR_UNCLOSED_TOKEN: pMessage = "unclosed token"; break; case XML_ERROR_PARTIAL_CHAR: pMessage = "partial char"; break; case XML_ERROR_TAG_MISMATCH: pMessage = "tag mismatch"; break; case XML_ERROR_DUPLICATE_ATTRIBUTE: pMessage = "duplicate attribute"; break; case XML_ERROR_JUNK_AFTER_DOC_ELEMENT: pMessage = "junk after doc element"; break; case XML_ERROR_PARAM_ENTITY_REF: pMessage = "parameter entity reference"; break; case XML_ERROR_UNDEFINED_ENTITY: pMessage = "undefined entity"; break; case XML_ERROR_RECURSIVE_ENTITY_REF: pMessage = "recursive entity reference"; break; case XML_ERROR_ASYNC_ENTITY: pMessage = "async entity"; break; case XML_ERROR_BAD_CHAR_REF: pMessage = "bad char reference"; break; case XML_ERROR_BINARY_ENTITY_REF: pMessage = "binary entity reference"; break; case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF: pMessage = "attribute external entity reference"; break; case XML_ERROR_MISPLACED_XML_PI: pMessage = "misplaced xml processing instruction"; break; case XML_ERROR_UNKNOWN_ENCODING: pMessage = "unknown encoding"; break; case XML_ERROR_INCORRECT_ENCODING: pMessage = "incorrect encoding"; break; case XML_ERROR_UNCLOSED_CDATA_SECTION: pMessage = "unclosed cdata section"; break; case XML_ERROR_EXTERNAL_ENTITY_HANDLING: pMessage = "external entity reference"; break; case XML_ERROR_NOT_STANDALONE: pMessage = "not standalone"; break; default:; } OUStringBuffer aBuffer( '[' ); aBuffer.append( sSystemId ); aBuffer.append( " line " ); aBuffer.append( nLine ); aBuffer.append( "]: " ); aBuffer.appendAscii( pMessage ); aBuffer.append( " error" ); return aBuffer.makeStringAndClear(); } // throw an exception, but avoid callback if // during a threaded produce void Entity::throwException( const ::rtl::Reference< FastLocatorImpl > &xDocumentLocator, bool mbDuringParse ) { // Error during parsing ! SAXParseException aExcept( lclGetErrorMessage( XML_GetErrorCode( mpParser ), xDocumentLocator->getSystemId(), xDocumentLocator->getLineNumber() ), Reference< XInterface >(), Any( &maSavedException, getCppuType( &maSavedException ) ), xDocumentLocator->getPublicId(), xDocumentLocator->getSystemId(), xDocumentLocator->getLineNumber(), xDocumentLocator->getColumnNumber() ); // error handler is set, it may throw the exception if( !mbDuringParse || !mbEnableThreads ) { if (mxErrorHandler.is() ) mxErrorHandler->fatalError( Any( aExcept ) ); } // error handler has not thrown, but parsing must stop => throw ourselves throw aExcept; } // In the single threaded case we emit events via our C // callbacks, so any exception caught must be queued up until // we can safely re-throw it from our C++ parent of parse() // // If multi-threaded, we need to push an EXCEPTION event, at // which point we transfer ownership of maSavedException to // the consuming thread. void Entity::saveException( const Exception &e ) { // only store the first exception if( !maSavedException.hasValue() ) { maSavedException <<= e; XML_StopParser( mpParser, /* resumable? */ XML_FALSE ); } } } // namespace namespace sax_fastparser { FastSaxParserImpl::FastSaxParserImpl( FastSaxParser* pFront ) : mpFront(pFront) { mxDocumentLocator.set( new FastLocatorImpl( this ) ); } // -------------------------------------------------------------------- FastSaxParserImpl::~FastSaxParserImpl() { if( mxDocumentLocator.is() ) mxDocumentLocator->dispose(); } // -------------------------------------------------------------------- void FastSaxParserImpl::DefineNamespace( const OString& rPrefix, const sal_Char* pNamespaceURL ) { Entity& rEntity = getEntity(); assert(!rEntity.maNamespaceCount.empty()); // need a context! if( !rEntity.maNamespaceCount.empty() ) { sal_uInt32 nOffset = rEntity.maNamespaceCount.top()++; if( rEntity.maNamespaceDefines.size() <= nOffset ) rEntity.maNamespaceDefines.resize( rEntity.maNamespaceDefines.size() + 64 ); const OUString aNamespaceURL( pNamespaceURL, strlen( pNamespaceURL ), RTL_TEXTENCODING_UTF8 ); rEntity.maNamespaceDefines[nOffset].reset( new NamespaceDefine( rPrefix, GetNamespaceToken( aNamespaceURL ), aNamespaceURL ) ); } } // -------------------------------------------------------------------- sal_Int32 FastSaxParserImpl::GetToken( const sal_Char* pToken, sal_Int32 nLen /* = 0 */ ) { return maTokenLookup.getTokenFromChars( getEntity().mxTokenHandler, getEntity().mpTokenHandler, pToken, nLen ); } // -------------------------------------------------------------------- sal_Int32 FastSaxParserImpl::GetTokenWithPrefix( const sal_Char*pPrefix, int nPrefixLen, const sal_Char* pName, int nNameLen ) throw (SAXException) { sal_Int32 nNamespaceToken = FastToken::DONTKNOW; Entity& rEntity = getEntity(); sal_uInt32 nNamespace = rEntity.maNamespaceCount.top(); while( nNamespace-- ) { const OString& rPrefix( rEntity.maNamespaceDefines[nNamespace]->maPrefix ); if( (rPrefix.getLength() == nPrefixLen) && (strncmp( rPrefix.getStr(), pPrefix, nPrefixLen ) == 0 ) ) { nNamespaceToken = rEntity.maNamespaceDefines[nNamespace]->mnToken; break; } if( !nNamespace ) throw SAXException(); // prefix that has no defined namespace url } if( nNamespaceToken != FastToken::DONTKNOW ) { sal_Int32 nNameToken = GetToken( pName, nNameLen ); if( nNameToken != FastToken::DONTKNOW ) return nNamespaceToken | nNameToken; } return FastToken::DONTKNOW; } // -------------------------------------------------------------------- sal_Int32 FastSaxParserImpl::GetNamespaceToken( const OUString& rNamespaceURL ) { NamespaceMap::iterator aIter( maNamespaceMap.find( rNamespaceURL ) ); if( aIter != maNamespaceMap.end() ) return (*aIter).second; else return FastToken::DONTKNOW; } // -------------------------------------------------------------------- OUString FastSaxParserImpl::GetNamespaceURL( const OString& rPrefix ) throw (SAXException) { Entity& rEntity = getEntity(); if( !rEntity.maNamespaceCount.empty() ) { sal_uInt32 nNamespace = rEntity.maNamespaceCount.top(); while( nNamespace-- ) if( rEntity.maNamespaceDefines[nNamespace]->maPrefix == rPrefix ) return rEntity.maNamespaceDefines[nNamespace]->maNamespaceURL; } throw SAXException(); // prefix that has no defined namespace url } OUString FastSaxParserImpl::GetNamespaceURL( const sal_Char*pPrefix, int nPrefixLen ) throw(SAXException) { Entity& rEntity = getEntity(); if( pPrefix && !rEntity.maNamespaceCount.empty() ) { sal_uInt32 nNamespace = rEntity.maNamespaceCount.top(); while( nNamespace-- ) { const OString& rPrefix( rEntity.maNamespaceDefines[nNamespace]->maPrefix ); if( (rPrefix.getLength() == nPrefixLen) && (strncmp( rPrefix.getStr(), pPrefix, nPrefixLen ) == 0 ) ) { return rEntity.maNamespaceDefines[nNamespace]->maNamespaceURL; } } } throw SAXException(); // prefix that has no defined namespace url } // -------------------------------------------------------------------- sal_Int32 FastSaxParserImpl::GetTokenWithContextNamespace( sal_Int32 nNamespaceToken, const sal_Char* pName, int nNameLen ) { if( nNamespaceToken != FastToken::DONTKNOW ) { sal_Int32 nNameToken = GetToken( pName, nNameLen ); if( nNameToken != FastToken::DONTKNOW ) return nNamespaceToken | nNameToken; } return FastToken::DONTKNOW; } // -------------------------------------------------------------------- void FastSaxParserImpl::splitName( const XML_Char *pwName, const XML_Char *&rpPrefix, sal_Int32 &rPrefixLen, const XML_Char *&rpName, sal_Int32 &rNameLen ) { XML_Char *p; for( p = const_cast< XML_Char* >( pwName ), rNameLen = 0, rPrefixLen = 0; *p; p++ ) { if( *p == ':' ) { rPrefixLen = p - pwName; rNameLen = 0; } else { rNameLen++; } } if( rPrefixLen ) { rpPrefix = pwName; rpName = &pwName[ rPrefixLen + 1 ]; } else { rpPrefix = 0; rpName = pwName; } } /*************** * * parseStream does Parser-startup initializations. The FastSaxParser::parse() method does * the file-specific initialization work. (During a parser run, external files may be opened) * ****************/ void FastSaxParserImpl::parseStream( const InputSource& maStructSource) throw (SAXException, IOException, RuntimeException) { // Only one text at one time MutexGuard guard( maMutex ); Entity entity( maData ); entity.maStructSource = maStructSource; if( !entity.maStructSource.aInputStream.is() ) throw SAXException("No input source", Reference< XInterface >(), Any() ); entity.maConverter.setInputStream( entity.maStructSource.aInputStream ); if( !entity.maStructSource.sEncoding.isEmpty() ) entity.maConverter.setEncoding( OUStringToOString( entity.maStructSource.sEncoding, RTL_TEXTENCODING_ASCII_US ) ); // create parser with proper encoding entity.mpParser = XML_ParserCreate( 0 ); if( !entity.mpParser ) throw SAXException("Couldn't create parser", Reference< XInterface >(), Any() ); // set all necessary C-Callbacks XML_SetUserData( entity.mpParser, this ); XML_SetElementHandler( entity.mpParser, call_callbackStartElement, call_callbackEndElement ); XML_SetCharacterDataHandler( entity.mpParser, call_callbackCharacters ); XML_SetEntityDeclHandler(entity.mpParser, call_callbackEntityDecl); XML_SetExternalEntityRefHandler( entity.mpParser, call_callbackExternalEntityRef ); pushEntity( entity ); Entity& rEntity = getEntity(); try { // start the document if( entity.mxDocumentHandler.is() ) { Reference< XLocator > xLoc( mxDocumentLocator.get() ); entity.mxDocumentHandler->setDocumentLocator( xLoc ); entity.mxDocumentHandler->startDocument(); } rEntity.mbEnableThreads = (rEntity.maStructSource.aInputStream->available() > 10000); if (rEntity.mbEnableThreads) { rtl::Reference xParser; xParser = new ParserThread(this); xParser->launch(); bool done = false; do { rEntity.maConsumeResume.wait(); rEntity.maConsumeResume.reset(); osl::ResettableMutexGuard aGuard(rEntity.maEventProtector); while (!rEntity.maPendingEvents.empty()) { if (rEntity.maPendingEvents.size() <= rEntity.mnEventLowWater) rEntity.maProduceResume.set(); // start producer again EventList *pEventList = rEntity.maPendingEvents.front(); rEntity.maPendingEvents.pop(); aGuard.clear(); // unlock if (!consume(pEventList)) done = true; aGuard.reset(); // lock rEntity.maUsedEvents.push(pEventList); } } while (!done); xParser->join(); deleteUsedEvents(); } else { parse(); } // finish document if( entity.mxDocumentHandler.is() ) { entity.mxDocumentHandler->endDocument(); } } catch (const SAXException&) { popEntity(); XML_ParserFree( entity.mpParser ); throw; } catch (const IOException&) { popEntity(); XML_ParserFree( entity.mpParser ); throw; } catch (const RuntimeException&) { popEntity(); XML_ParserFree( entity.mpParser ); throw; } popEntity(); XML_ParserFree( entity.mpParser ); } void FastSaxParserImpl::setFastDocumentHandler( const Reference< XFastDocumentHandler >& Handler ) throw (RuntimeException) { maData.mxDocumentHandler = Handler; } void FastSaxParserImpl::setTokenHandler( const Reference< XFastTokenHandler >& xHandler ) throw (RuntimeException) { maData.mxTokenHandler = xHandler; maData.mpTokenHandler = dynamic_cast< FastTokenHandlerBase *>( xHandler.get() ); } void FastSaxParserImpl::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken ) throw (IllegalArgumentException, RuntimeException) { if( NamespaceToken >= FastToken::NAMESPACE ) { if( GetNamespaceToken( NamespaceURL ) == FastToken::DONTKNOW ) { maNamespaceMap[ NamespaceURL ] = NamespaceToken; return; } } throw IllegalArgumentException(); } OUString FastSaxParserImpl::getNamespaceURL( const OUString& rPrefix ) throw(IllegalArgumentException, RuntimeException) { try { return GetNamespaceURL( OUStringToOString( rPrefix, RTL_TEXTENCODING_UTF8 ) ); } catch (const Exception&) { } throw IllegalArgumentException(); } void FastSaxParserImpl::setErrorHandler(const Reference< XErrorHandler > & Handler) throw (RuntimeException) { maData.mxErrorHandler = Handler; } void FastSaxParserImpl::setEntityResolver(const Reference < XEntityResolver > & Resolver) throw (RuntimeException) { maData.mxEntityResolver = Resolver; } void FastSaxParserImpl::setLocale( const Locale & Locale ) throw (RuntimeException) { maData.maLocale = Locale; } Sequence< OUString > FastSaxParserImpl::getSupportedServiceNames_Static(void) { Sequence aRet(1); aRet.getArray()[0] = OUString( PARSER_SERVICE_NAME ); return aRet; } // XServiceInfo OUString FastSaxParserImpl::getImplementationName() throw (RuntimeException) { return OUString( PARSER_IMPLEMENTATION_NAME ); } // XServiceInfo sal_Bool FastSaxParserImpl::supportsService(const OUString& ServiceName) throw (RuntimeException) { return cppu::supportsService(mpFront, ServiceName); } // XServiceInfo Sequence< OUString > FastSaxParserImpl::getSupportedServiceNames(void) throw (RuntimeException) { Sequence seq(1); seq.getArray()[0] = OUString( PARSER_SERVICE_NAME ); return seq; } void FastSaxParserImpl::deleteUsedEvents() { Entity& rEntity = getEntity(); osl::ResettableMutexGuard aGuard(rEntity.maEventProtector); while (!rEntity.maUsedEvents.empty()) { EventList *pEventList = rEntity.maUsedEvents.front(); rEntity.maUsedEvents.pop(); aGuard.clear(); // unlock delete pEventList; aGuard.reset(); // lock } } void FastSaxParserImpl::produce( CallbackType aType ) { Entity& rEntity = getEntity(); if (aType == DONE || aType == EXCEPTION || rEntity.mnProducedEventsSize == rEntity.mnEventListSize) { osl::ResettableMutexGuard aGuard(rEntity.maEventProtector); while (rEntity.maPendingEvents.size() >= rEntity.mnEventHighWater) { // pause parsing for a bit aGuard.clear(); // unlock rEntity.maProduceResume.wait(); rEntity.maProduceResume.reset(); aGuard.reset(); // lock } rEntity.maPendingEvents.push(rEntity.mpProducedEvents); rEntity.mpProducedEvents = 0; aGuard.clear(); // unlock rEntity.maConsumeResume.set(); } } bool FastSaxParserImpl::hasNamespaceURL( const OUString& rPrefix ) const { const Entity& rEntity = getEntity(); if (rEntity.maNamespaceCount.empty()) return false; OString aPrefix = OUStringToOString(rPrefix, RTL_TEXTENCODING_UTF8); sal_uInt32 nNamespace = rEntity.maNamespaceCount.top(); while (nNamespace--) { if (rEntity.maNamespaceDefines[nNamespace]->maPrefix == aPrefix) return true; } return false; } bool FastSaxParserImpl::consume(EventList *pEventList) { Entity& rEntity = getEntity(); for (EventList::iterator aEventIt = pEventList->begin(); aEventIt != pEventList->end(); ++aEventIt) { switch ((*aEventIt).maType) { case START_ELEMENT: rEntity.startElement( &(*aEventIt) ); break; case END_ELEMENT: rEntity.endElement(); break; case CHARACTERS: rEntity.characters( (*aEventIt).msChars ); break; case DONE: return false; case EXCEPTION: rEntity.throwException( mxDocumentLocator, false ); return false; default: assert(false); return false; } } return true; } void FastSaxParserImpl::pushEntity( const Entity& rEntity ) { maEntities.push( rEntity ); } void FastSaxParserImpl::popEntity() { maEntities.pop(); } Entity& FastSaxParserImpl::getEntity() { return maEntities.top(); } const Entity& FastSaxParserImpl::getEntity() const { return maEntities.top(); } // starts parsing with actual parser ! void FastSaxParserImpl::parse() { const int BUFFER_SIZE = 16 * 1024; Sequence< sal_Int8 > seqOut( BUFFER_SIZE ); Entity& rEntity = getEntity(); int nRead = 0; do { nRead = rEntity.maConverter.readAndConvert( seqOut, BUFFER_SIZE ); if( nRead <= 0 ) { XML_Parse( rEntity.mpParser, (const char*) seqOut.getConstArray(), 0, 1 ); break; } bool const bContinue = XML_STATUS_ERROR != XML_Parse(rEntity.mpParser, reinterpret_cast(seqOut.getConstArray()), nRead, 0); // callbacks used inside XML_Parse may have caught an exception if( !bContinue || rEntity.maSavedException.hasValue() ) rEntity.throwException( mxDocumentLocator, true ); } while( nRead > 0 ); rEntity.getEvent( DONE ); if( rEntity.mbEnableThreads ) produce( DONE ); } //------------------------------------------ // // The C-Callbacks // //----------------------------------------- void FastSaxParserImpl::callbackStartElement( const XML_Char* pwName, const XML_Char** awAttributes ) { Entity& rEntity = getEntity(); if( rEntity.maNamespaceCount.empty() ) { rEntity.maNamespaceCount.push(0); DefineNamespace( OString("xml"), "http://www.w3.org/XML/1998/namespace"); } else { rEntity.maNamespaceCount.push( rEntity.maNamespaceCount.top() ); } // create attribute map and process namespace instructions Event& rEvent = getEntity().getEvent( START_ELEMENT ); if (rEvent.mxAttributes.is()) rEvent.mxAttributes->clear(); else rEvent.mxAttributes.set( new FastAttributeList( rEntity.mxTokenHandler, rEntity.mpTokenHandler ) ); sal_Int32 nNameLen, nPrefixLen; const XML_Char *pName; const XML_Char *pPrefix; sal_Int32 nNamespaceToken = FastToken::DONTKNOW; if (!rEntity.maNamespaceStack.empty()) { rEvent.msNamespace = rEntity.maNamespaceStack.top().msName; nNamespaceToken = rEntity.maNamespaceStack.top().mnToken; } try { /* #158414# Each element may define new namespaces, also for attribues. First, process all namespace attributes and cache other attributes in a vector. Second, process the attributes after namespaces have been initialized. */ // #158414# first: get namespaces for (int i = 0; awAttributes[i]; i += 2) { assert(awAttributes[i+1]); if( awAttributes[i][0] != 'x' || strncmp( awAttributes[i], "xmlns", 5) != 0 ) continue; splitName( awAttributes[i], pPrefix, nPrefixLen, pName, nNameLen ); if( nPrefixLen ) { if( (nPrefixLen == 5) && (strncmp( pPrefix, "xmlns", 5 ) == 0) ) { DefineNamespace( OString( pName, nNameLen ), awAttributes[i+1] ); } } else { if( (nNameLen == 5) && (strcmp( pName, "xmlns" ) == 0) ) { // default namespace is the attribute value rEvent.msNamespace = OUString( awAttributes[i+1], strlen( awAttributes[i+1] ), RTL_TEXTENCODING_UTF8 ); nNamespaceToken = GetNamespaceToken( rEvent.msNamespace ); } } } // #158414# second: fill attribute list with other attributes for (int i = 0; awAttributes[i]; i += 2) { splitName( awAttributes[i], pPrefix, nPrefixLen, pName, nNameLen ); if( nPrefixLen ) { if( (nPrefixLen != 5) || (strncmp( pPrefix, "xmlns", 5 ) != 0) ) { sal_Int32 nAttributeToken = GetTokenWithPrefix( pPrefix, nPrefixLen, pName, nNameLen ); if( nAttributeToken != FastToken::DONTKNOW ) rEvent.mxAttributes->add( nAttributeToken, awAttributes[i+1] ); else rEvent.mxAttributes->addUnknown( GetNamespaceURL( pPrefix, nPrefixLen ), OString(pName, nNameLen), awAttributes[i+1] ); } } else { if( (nNameLen != 5) || (strcmp( pName, "xmlns" ) != 0) ) { sal_Int32 nAttributeToken = GetToken( pName, nNameLen ); if( nAttributeToken != FastToken::DONTKNOW ) rEvent.mxAttributes->add( nAttributeToken, awAttributes[i+1] ); else rEvent.mxAttributes->addUnknown( OString(pName, nNameLen), awAttributes[i+1] ); } } } splitName( pwName, pPrefix, nPrefixLen, pName, nNameLen ); if( nPrefixLen > 0 ) rEvent.mnElementToken = GetTokenWithPrefix( pPrefix, nPrefixLen, pName, nNameLen ); else if( !rEvent.msNamespace.isEmpty() ) rEvent.mnElementToken = GetTokenWithContextNamespace( nNamespaceToken, pName, nNameLen ); else rEvent.mnElementToken = GetToken( pName ); if( rEvent.mnElementToken == FastToken::DONTKNOW ) { if( nPrefixLen > 0 ) { rEvent.msNamespace = GetNamespaceURL( pPrefix, nPrefixLen ); nNamespaceToken = GetNamespaceToken( rEvent.msNamespace ); } rEvent.msElementName = OUString( pName, nNameLen, RTL_TEXTENCODING_UTF8 ); } else // token is always preferred. rEvent.msElementName = OUString( "" ); rEntity.maNamespaceStack.push( NameWithToken(rEvent.msNamespace, nNamespaceToken) ); if (rEntity.mbEnableThreads) produce( START_ELEMENT ); else rEntity.startElement( &rEvent ); } catch (const Exception& e) { rEntity.saveException( e ); } } void FastSaxParserImpl::callbackEndElement( SAL_UNUSED_PARAMETER const XML_Char* ) { Entity& rEntity = getEntity(); assert( !rEntity.maNamespaceCount.empty() ); if( !rEntity.maNamespaceCount.empty() ) rEntity.maNamespaceCount.pop(); assert( !rEntity.maNamespaceStack.empty() ); if( !rEntity.maNamespaceStack.empty() ) rEntity.maNamespaceStack.pop(); rEntity.getEvent( END_ELEMENT ); if (rEntity.mbEnableThreads) produce( END_ELEMENT ); else rEntity.endElement(); } void FastSaxParserImpl::callbackCharacters( const XML_Char* s, int nLen ) { Entity& rEntity = getEntity(); Event& rEvent = rEntity.getEvent( CHARACTERS ); rEvent.msChars = OUString(s, nLen, RTL_TEXTENCODING_UTF8); if (rEntity.mbEnableThreads) produce( CHARACTERS ); else rEntity.characters( rEvent.msChars ); } void FastSaxParserImpl::callbackEntityDecl( SAL_UNUSED_PARAMETER const XML_Char * /*entityName*/, SAL_UNUSED_PARAMETER int /*is_parameter_entity*/, const XML_Char *value, SAL_UNUSED_PARAMETER int /*value_length*/, SAL_UNUSED_PARAMETER const XML_Char * /*base*/, SAL_UNUSED_PARAMETER const XML_Char * /*systemId*/, SAL_UNUSED_PARAMETER const XML_Char * /*publicId*/, SAL_UNUSED_PARAMETER const XML_Char * /*notationName*/) { if (value) { // value != 0 means internal entity SAL_INFO("sax", "FastSaxParser: internal entity declaration, stopping"); XML_StopParser(getEntity().mpParser, XML_FALSE); getEntity().saveException( SAXParseException( "FastSaxParser: internal entity declaration, stopping", static_cast(mpFront), Any(), mxDocumentLocator->getPublicId(), mxDocumentLocator->getSystemId(), mxDocumentLocator->getLineNumber(), mxDocumentLocator->getColumnNumber() ) ); } else { SAL_INFO("sax", "FastSaxParser: ignoring external entity declaration"); } } int FastSaxParserImpl::callbackExternalEntityRef( XML_Parser parser, const XML_Char *context, SAL_UNUSED_PARAMETER const XML_Char * /*base*/, const XML_Char *systemId, const XML_Char *publicId ) { bool bOK = true; InputSource source; Entity& rCurrEntity = getEntity(); Entity aNewEntity( rCurrEntity ); if( rCurrEntity.mxEntityResolver.is() ) try { aNewEntity.maStructSource = rCurrEntity.mxEntityResolver->resolveEntity( OUString( publicId, strlen( publicId ), RTL_TEXTENCODING_UTF8 ) , OUString( systemId, strlen( systemId ), RTL_TEXTENCODING_UTF8 ) ); } catch (const SAXParseException & e) { rCurrEntity.saveException( e ); bOK = false; } catch (const SAXException& e) { rCurrEntity.saveException( SAXParseException( e.Message, e.Context, e.WrappedException, mxDocumentLocator->getPublicId(), mxDocumentLocator->getSystemId(), mxDocumentLocator->getLineNumber(), mxDocumentLocator->getColumnNumber() ) ); bOK = false; } if( aNewEntity.maStructSource.aInputStream.is() ) { aNewEntity.mpParser = XML_ExternalEntityParserCreate( parser, context, 0 ); if( !aNewEntity.mpParser ) { return false; } aNewEntity.maConverter.setInputStream( aNewEntity.maStructSource.aInputStream ); pushEntity( aNewEntity ); try { parse(); } catch (const SAXParseException& e) { rCurrEntity.saveException( e ); bOK = false; } catch (const IOException& e) { SAXException aEx; aEx.WrappedException <<= e; rCurrEntity.saveException( aEx ); bOK = false; } catch (const RuntimeException& e) { SAXException aEx; aEx.WrappedException <<= e; rCurrEntity.saveException( aEx ); bOK = false; } popEntity(); XML_ParserFree( aNewEntity.mpParser ); } return bOK; } FastSaxParser::FastSaxParser() : mpImpl(new FastSaxParserImpl(this)) {} FastSaxParser::~FastSaxParser() { delete mpImpl; } uno::Sequence FastSaxParser::getSupportedServiceNames_Static() { return FastSaxParserImpl::getSupportedServiceNames_Static(); } void FastSaxParser::parseStream( const xml::sax::InputSource& aInputSource ) throw (xml::sax::SAXException, io::IOException, uno::RuntimeException) { mpImpl->parseStream(aInputSource); } void FastSaxParser::setFastDocumentHandler( const uno::Reference& Handler ) throw (uno::RuntimeException) { mpImpl->setFastDocumentHandler(Handler); } void FastSaxParser::setTokenHandler( const uno::Reference& Handler ) throw (uno::RuntimeException) { mpImpl->setTokenHandler(Handler); } void FastSaxParser::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken ) throw (lang::IllegalArgumentException, uno::RuntimeException) { mpImpl->registerNamespace(NamespaceURL, NamespaceToken); } OUString FastSaxParser::getNamespaceURL( const OUString& rPrefix ) throw(lang::IllegalArgumentException, uno::RuntimeException) { return mpImpl->getNamespaceURL(rPrefix); } void FastSaxParser::setErrorHandler( const uno::Reference< xml::sax::XErrorHandler >& Handler ) throw (uno::RuntimeException) { mpImpl->setErrorHandler(Handler); } void FastSaxParser::setEntityResolver( const uno::Reference< xml::sax::XEntityResolver >& Resolver ) throw (uno::RuntimeException) { mpImpl->setEntityResolver(Resolver); } void FastSaxParser::setLocale( const lang::Locale& rLocale ) throw (uno::RuntimeException) { mpImpl->setLocale(rLocale); } OUString FastSaxParser::getImplementationName() throw (uno::RuntimeException) { return mpImpl->getImplementationName(); } sal_Bool FastSaxParser::supportsService( const OUString& ServiceName ) throw (uno::RuntimeException) { return mpImpl->supportsService(ServiceName); } uno::Sequence FastSaxParser::getSupportedServiceNames() throw (uno::RuntimeException) { return mpImpl->getSupportedServiceNames(); } bool FastSaxParser::hasNamespaceURL( const OUString& rPrefix ) const { return mpImpl->hasNamespaceURL(rPrefix); } } // namespace sax_fastparser Reference< XInterface > SAL_CALL FastSaxParser_CreateInstance( SAL_UNUSED_PARAMETER const Reference< XMultiServiceFactory > & ) throw(Exception) { sax_fastparser::FastSaxParser *p = new sax_fastparser::FastSaxParser; return Reference< XInterface > ( (OWeakObject * ) p ); } extern "C" { SAL_DLLPUBLIC_EXPORT void * SAL_CALL fastsax_component_getFactory( const sal_Char * pImplName, void * pServiceManager, SAL_UNUSED_PARAMETER void * /*pRegistryKey*/ ) { void * pRet = 0; if (pServiceManager ) { Reference< XSingleServiceFactory > xRet; Reference< XMultiServiceFactory > xSMgr( reinterpret_cast< XMultiServiceFactory * > ( pServiceManager ) ); OUString aImplementationName( OUString::createFromAscii( pImplName ) ); if ( aImplementationName == PARSER_IMPLEMENTATION_NAME ) { xRet = createSingleFactory( xSMgr, aImplementationName, FastSaxParser_CreateInstance, sax_fastparser::FastSaxParserImpl::getSupportedServiceNames_Static() ); } if (xRet.is()) { xRet->acquire(); pRet = xRet.get(); } } return pRet; } } /* vim:set shiftwidth=4 softtabstop=4 expandtab: */