diff options
Diffstat (limited to 'sax/source/expatwrap')
-rw-r--r-- | sax/source/expatwrap/attrlistimpl.cxx | 168 | ||||
-rw-r--r-- | sax/source/expatwrap/attrlistimpl.hxx | 85 | ||||
-rw-r--r-- | sax/source/expatwrap/factory.hxx | 34 | ||||
-rw-r--r-- | sax/source/expatwrap/makefile.mk | 76 | ||||
-rw-r--r-- | sax/source/expatwrap/sax_expat.cxx | 1107 | ||||
-rw-r--r-- | sax/source/expatwrap/saxwriter.cxx | 1454 | ||||
-rw-r--r-- | sax/source/expatwrap/xml2utf.cxx | 570 |
7 files changed, 3494 insertions, 0 deletions
diff --git a/sax/source/expatwrap/attrlistimpl.cxx b/sax/source/expatwrap/attrlistimpl.cxx new file mode 100644 index 000000000000..114eb653f648 --- /dev/null +++ b/sax/source/expatwrap/attrlistimpl.cxx @@ -0,0 +1,168 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#include "attrlistimpl.hxx" + +#include <vector> + +#include <cppuhelper/weak.hxx> + +using namespace ::std; +using namespace ::rtl; +using namespace ::cppu; +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::util; +using namespace ::com::sun::star::xml::sax; + + +namespace sax_expatwrap { +struct TagAttribute +{ + TagAttribute() + {} + TagAttribute( const OUString &aName, const OUString &aType , const OUString &aValue ) + { + this->sName = aName; + this->sType = aType; + this->sValue = aValue; + } + + OUString sName; + OUString sType; + OUString sValue; +}; + +struct AttributeList_impl +{ + AttributeList_impl() + { + // performance improvement during adding + vecAttribute.reserve(20); + } + vector<struct TagAttribute> vecAttribute; +}; + + + +sal_Int16 AttributeList::getLength(void) throw (RuntimeException) +{ + return static_cast<sal_Int16>(m_pImpl->vecAttribute.size()); +} + + +AttributeList::AttributeList( const AttributeList &r ) : + cppu::WeakImplHelper2<XAttributeList, XCloneable>() +{ + m_pImpl = new AttributeList_impl; + *m_pImpl = *(r.m_pImpl); +} + +OUString AttributeList::getNameByIndex(sal_Int16 i) throw (RuntimeException) +{ + if( std::vector< TagAttribute >::size_type(i) < m_pImpl->vecAttribute.size() ) { + return m_pImpl->vecAttribute[i].sName; + } + return OUString(); +} + + +OUString AttributeList::getTypeByIndex(sal_Int16 i) throw (RuntimeException) +{ + if( std::vector< TagAttribute >::size_type(i) < m_pImpl->vecAttribute.size() ) { + return m_pImpl->vecAttribute[i].sType; + } + return OUString(); +} + +OUString AttributeList::getValueByIndex(sal_Int16 i) throw (RuntimeException) +{ + if( std::vector< TagAttribute >::size_type(i) < m_pImpl->vecAttribute.size() ) { + return m_pImpl->vecAttribute[i].sValue; + } + return OUString(); + +} + +OUString AttributeList::getTypeByName( const OUString& sName ) throw (RuntimeException) +{ + vector<struct TagAttribute>::iterator ii = m_pImpl->vecAttribute.begin(); + + for( ; ii != m_pImpl->vecAttribute.end() ; ii ++ ) { + if( (*ii).sName == sName ) { + return (*ii).sType; + } + } + return OUString(); +} + +OUString AttributeList::getValueByName(const OUString& sName) throw (RuntimeException) +{ + vector<struct TagAttribute>::iterator ii = m_pImpl->vecAttribute.begin(); + + for( ; ii != m_pImpl->vecAttribute.end() ; ii ++ ) { + if( (*ii).sName == sName ) { + return (*ii).sValue; + } + } + return OUString(); +} + + +Reference< XCloneable > AttributeList::createClone() throw (RuntimeException) +{ + AttributeList *p = new AttributeList( *this ); + return Reference< XCloneable > ( (XCloneable * ) p ); +} + + + +AttributeList::AttributeList() +{ + m_pImpl = new AttributeList_impl; +} + + + +AttributeList::~AttributeList() +{ + delete m_pImpl; +} + + +void AttributeList::addAttribute( const OUString &sName , + const OUString &sType , + const OUString &sValue ) +{ + m_pImpl->vecAttribute.push_back( TagAttribute( sName , sType , sValue ) ); +} + +void AttributeList::clear() +{ + m_pImpl->vecAttribute.clear(); +} + +} diff --git a/sax/source/expatwrap/attrlistimpl.hxx b/sax/source/expatwrap/attrlistimpl.hxx new file mode 100644 index 000000000000..aaf6cf84b359 --- /dev/null +++ b/sax/source/expatwrap/attrlistimpl.hxx @@ -0,0 +1,85 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +#ifndef _SAX_ATTRLISTIMPL_HXX +#define _SAX_ATTRLISTIMPL_HXX + +#include "sal/config.h" +//#include "sax/saxdllapi.h" + +#include <cppuhelper/implbase2.hxx> + +#include <com/sun/star/uno/RuntimeException.hpp> +#include <com/sun/star/util/XCloneable.hpp> +#include <com/sun/star/xml/sax/XAttributeList.hpp> + +namespace sax_expatwrap +{ + +struct AttributeList_impl; + +//FIXME +class /*SAX_DLLPUBLIC*/ AttributeList : + public ::cppu::WeakImplHelper2< + ::com::sun::star::xml::sax::XAttributeList, + ::com::sun::star::util::XCloneable > +{ +public: + AttributeList(); + AttributeList( const AttributeList & ); + virtual ~AttributeList(); + + void addAttribute( const ::rtl::OUString &sName , + const ::rtl::OUString &sType , const ::rtl::OUString &sValue ); + void clear(); +public: + // XAttributeList + virtual sal_Int16 SAL_CALL getLength(void) + throw(::com::sun::star::uno::RuntimeException); + virtual ::rtl::OUString SAL_CALL getNameByIndex(sal_Int16 i) + throw(::com::sun::star::uno::RuntimeException); + virtual ::rtl::OUString SAL_CALL getTypeByIndex(sal_Int16 i) + throw(::com::sun::star::uno::RuntimeException); + virtual ::rtl::OUString SAL_CALL getTypeByName(const ::rtl::OUString& aName) + throw(::com::sun::star::uno::RuntimeException); + virtual ::rtl::OUString SAL_CALL getValueByIndex(sal_Int16 i) + throw(::com::sun::star::uno::RuntimeException); + virtual ::rtl::OUString SAL_CALL getValueByName(const ::rtl::OUString& aName) + throw( ::com::sun::star::uno::RuntimeException); + + // XCloneable + virtual ::com::sun::star::uno::Reference< XCloneable > SAL_CALL + createClone() throw(::com::sun::star::uno::RuntimeException); + +private: + struct AttributeList_impl *m_pImpl; +}; + +} + +#endif + diff --git a/sax/source/expatwrap/factory.hxx b/sax/source/expatwrap/factory.hxx new file mode 100644 index 000000000000..c6a566781044 --- /dev/null +++ b/sax/source/expatwrap/factory.hxx @@ -0,0 +1,34 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ +namespace sax_expatwrap { +Reference< XInterface > SAL_CALL SaxWriter_CreateInstance( + const Reference< XMultiServiceFactory > & rSMgr ) throw (Exception); +OUString SaxWriter_getServiceName() throw(); +OUString SaxWriter_getImplementationName() throw(); +Sequence< OUString > SaxWriter_getSupportedServiceNames(void) throw(); +} + diff --git a/sax/source/expatwrap/makefile.mk b/sax/source/expatwrap/makefile.mk new file mode 100644 index 000000000000..bcb73b0443ac --- /dev/null +++ b/sax/source/expatwrap/makefile.mk @@ -0,0 +1,76 @@ +#************************************************************************* +# +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# Copyright 2000, 2010 Oracle and/or its affiliates. +# +# OpenOffice.org - a multi-platform office productivity suite +# +# This file is part of OpenOffice.org. +# +# OpenOffice.org is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# only, as published by the Free Software Foundation. +# +# OpenOffice.org is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License version 3 for more details +# (a copy is included in the LICENSE file that accompanied this code). +# +# You should have received a copy of the GNU Lesser General Public License +# version 3 along with OpenOffice.org. If not, see +# <http://www.openoffice.org/license.html> +# for a copy of the LGPLv3 License. +# +#************************************************************************* +PRJ=..$/.. + +PRJNAME=sax +TARGET=sax.uno +ENABLE_EXCEPTIONS=TRUE + +# --- Settings ----------------------------------------------------- + +.INCLUDE : settings.mk +DLLPRE = + +.IF "$(SYSTEM_ZLIB)" == "YES" +CFLAGS+=-DSYSTEM_ZLIB +.ENDIF + +.IF "$(SYSTEM_EXPAT)" == "YES" +CFLAGS+=-DSYSTEM_EXPAT +.ELSE +CFLAGS += -DXML_UNICODE +.ENDIF + +#----------------------------------------------------------- + +SLOFILES =\ + $(SLO)$/xml2utf.obj\ + $(SLO)$/attrlistimpl.obj\ + $(SLO)$/sax_expat.obj \ + $(SLO)$/saxwriter.obj + +SHL1TARGET= $(TARGET) +SHL1IMPLIB= i$(TARGET) + +SHL1STDLIBS= \ + $(SALLIB) \ + $(CPPULIB) \ + $(CPPUHELPERLIB)\ + $(EXPAT3RDLIB) + +SHL1DEPN= +SHL1VERSIONMAP= $(SOLARENV)$/src$/component.map +SHL1LIBS= $(SLB)$/$(TARGET).lib +SHL1DEF= $(MISC)$/$(SHL1TARGET).def +DEF1NAME= $(SHL1TARGET) + +# --- Targets ------------------------------------------------------ + +.INCLUDE : target.mk + + + diff --git a/sax/source/expatwrap/sax_expat.cxx b/sax/source/expatwrap/sax_expat.cxx new file mode 100644 index 000000000000..4ffebfa3590d --- /dev/null +++ b/sax/source/expatwrap/sax_expat.cxx @@ -0,0 +1,1107 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ +#include <stdlib.h> +#include <string.h> +#include <sal/alloca.h> +#include <vector> + +#include <osl/diagnose.h> + +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/util/XCloneable.hpp> +#include <com/sun/star/xml/sax/XExtendedDocumentHandler.hpp> +#include <com/sun/star/xml/sax/XParser.hpp> +#include <com/sun/star/xml/sax/SAXParseException.hpp> +#include <com/sun/star/io/XSeekable.hpp> + +#include <cppuhelper/factory.hxx> +#include <cppuhelper/weak.hxx> +#include <cppuhelper/implbase1.hxx> +#include <cppuhelper/implbase2.hxx> + +#include <expat.h> + +using namespace ::rtl; +using namespace ::std; +using namespace ::osl; +using namespace ::cppu; +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::registry; +using namespace ::com::sun::star::xml::sax; +using namespace ::com::sun::star::util; +using namespace ::com::sun::star::io; + +#include "factory.hxx" +#include "attrlistimpl.hxx" +#include "xml2utf.hxx" + +namespace sax_expatwrap { + +// Useful macros for correct String conversion depending on the choosen expat-mode +#ifdef XML_UNICODE +OUString XmlNChar2OUString( const XML_Char *p , int nLen ) +{ + if( p ) { + if( sizeof( sal_Unicode ) == sizeof( XML_Char ) ) + { + return OUString( (sal_Unicode*)p,nLen); + } + else + { + sal_Unicode *pWchar = (sal_Unicode *)alloca( sizeof( sal_Unicode ) * nLen ); + for( int n = 0 ; n < nLen ; n++ ) { + pWchar[n] = (sal_Unicode) p[n]; + } + return OUString( pWchar , nLen ); + } + } + else { + return OUString(); + } +} + +OUString XmlChar2OUString( const XML_Char *p ) +{ + if( p ) { + int nLen; + for( nLen = 0 ; p[nLen] ; nLen ++ ) + ; + return XmlNChar2OUString( p , nLen ); + } + else return OUString(); +} + + +#define XML_CHAR_TO_OUSTRING(x) XmlChar2OUString(x) +#define XML_CHAR_N_TO_USTRING(x,n) XmlNChar2OUString(x,n) +#else +#define XML_CHAR_TO_OUSTRING(x) OUString(x , strlen( x ), RTL_TEXTENCODING_UTF8) +#define XML_CHAR_N_TO_USTRING(x,n) OUString(x,n, RTL_TEXTENCODING_UTF8 ) +#endif + + +/* +* The following macro encapsulates any call to an event handler. +* It ensures, that exceptions thrown by the event handler are +* treated properly. +*/ +#define CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS(pThis,call) \ + if( ! pThis->bExceptionWasThrown ) { \ + try {\ + pThis->call;\ + }\ + catch( SAXParseException &e ) {\ + pThis->callErrorHandler( pThis , e );\ + }\ + catch( SAXException &e ) {\ + pThis->callErrorHandler( pThis , SAXParseException(\ + e.Message, \ + e.Context, \ + e.WrappedException,\ + pThis->rDocumentLocator->getPublicId(),\ + pThis->rDocumentLocator->getSystemId(),\ + pThis->rDocumentLocator->getLineNumber(),\ + pThis->rDocumentLocator->getColumnNumber()\ + ) );\ + }\ + catch( com::sun::star::uno::RuntimeException &e ) {\ + pThis->bExceptionWasThrown = sal_True; \ + pThis->bRTExceptionWasThrown = sal_True; \ + pImpl->rtexception = e; \ + }\ + }\ + ((void)0) + +#define IMPLEMENTATION_NAME "com.sun.star.comp.extensions.xml.sax.ParserExpat" +#define SERVICE_NAME "com.sun.star.xml.sax.Parser" + +class SaxExpatParser_Impl; + + +// This class implements the external Parser interface +class SaxExpatParser : + public WeakImplHelper2< + XParser, + XServiceInfo + > +{ + +public: + SaxExpatParser(); + ~SaxExpatParser(); + +public: + + // The implementation details + static Sequence< OUString > getSupportedServiceNames_Static(void) throw (); + +public: + // The SAX-Parser-Interface + virtual void SAL_CALL parseStream( const InputSource& structSource) + throw ( SAXException, + IOException, + RuntimeException); + virtual void SAL_CALL setDocumentHandler(const Reference< XDocumentHandler > & xHandler) + throw (RuntimeException); + + virtual void SAL_CALL setErrorHandler(const Reference< XErrorHandler > & xHandler) + throw (RuntimeException); + virtual void SAL_CALL setDTDHandler(const Reference < XDTDHandler > & xHandler) + throw (RuntimeException); + virtual void SAL_CALL setEntityResolver(const Reference< XEntityResolver >& xResolver) + throw (RuntimeException); + + virtual void SAL_CALL setLocale( const Locale &locale ) throw (RuntimeException); + +public: // XServiceInfo + OUString SAL_CALL getImplementationName() throw (); + Sequence< OUString > SAL_CALL getSupportedServiceNames(void) throw (); + sal_Bool SAL_CALL supportsService(const OUString& ServiceName) throw (); + +private: + + SaxExpatParser_Impl *m_pImpl; + +}; + +//-------------------------------------- +// the extern interface +//--------------------------------------- +Reference< XInterface > SAL_CALL SaxExpatParser_CreateInstance( + const Reference< XMultiServiceFactory > & ) throw(Exception) +{ + SaxExpatParser *p = new SaxExpatParser; + + return Reference< XInterface > ( (OWeakObject * ) p ); +} + + + +Sequence< OUString > SaxExpatParser::getSupportedServiceNames_Static(void) throw () +{ + Sequence<OUString> aRet(1); + aRet.getArray()[0] = ::rtl::OUString( RTL_CONSTASCII_USTRINGPARAM(SERVICE_NAME) ); + return aRet; +} + + +//--------------------------------------------- +// the implementation part +//--------------------------------------------- + + +// Entity binds all information neede for a single file +struct Entity +{ + InputSource structSource; + XML_Parser pParser; + XMLFile2UTFConverter converter; +}; + + +class SaxExpatParser_Impl +{ +public: // module scope + Mutex aMutex; + + Reference< XDocumentHandler > rDocumentHandler; + Reference< XExtendedDocumentHandler > rExtendedDocumentHandler; + + Reference< XErrorHandler > rErrorHandler; + Reference< XDTDHandler > rDTDHandler; + Reference< XEntityResolver > rEntityResolver; + Reference < XLocator > rDocumentLocator; + + + Reference < XAttributeList > rAttrList; + AttributeList *pAttrList; + + // External entity stack + vector<struct Entity> vecEntity; + void pushEntity( const struct Entity &entity ) + { vecEntity.push_back( entity ); } + void popEntity() + { vecEntity.pop_back( ); } + struct Entity &getEntity() + { return vecEntity.back(); } + + + // Exception cannot be thrown through the C-XmlParser (possible resource leaks), + // therefor the exception must be saved somewhere. + SAXParseException exception; + RuntimeException rtexception; + sal_Bool bExceptionWasThrown; + sal_Bool bRTExceptionWasThrown; + + Locale locale; + +public: + // the C-Callbacks for the expat parser + void static callbackStartElement(void *userData, const XML_Char *name , const XML_Char **atts); + void static callbackEndElement(void *userData, const XML_Char *name); + void static callbackCharacters( void *userData , const XML_Char *s , int nLen ); + void static callbackProcessingInstruction( void *userData , + const XML_Char *sTarget , + const XML_Char *sData ); + + void static callbackUnparsedEntityDecl( void *userData , + const XML_Char *entityName, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId, + const XML_Char *notationName); + + void static callbackNotationDecl( void *userData, + const XML_Char *notationName, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId); + + int static callbackExternalEntityRef( XML_Parser parser, + const XML_Char *openEntityNames, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId); + + int static callbackUnknownEncoding(void *encodingHandlerData, + const XML_Char *name, + XML_Encoding *info); + + void static callbackDefault( void *userData, const XML_Char *s, int len); + + void static callbackStartCDATA( void *userData ); + void static callbackEndCDATA( void *userData ); + void static callbackComment( void *userData , const XML_Char *s ); + void static callErrorHandler( SaxExpatParser_Impl *pImpl , const SAXParseException &e ); + +public: + void parse(); +}; + +extern "C" +{ + static void call_callbackStartElement(void *userData, const XML_Char *name , const XML_Char **atts) + { + SaxExpatParser_Impl::callbackStartElement(userData,name,atts); + } + static void call_callbackEndElement(void *userData, const XML_Char *name) + { + SaxExpatParser_Impl::callbackEndElement(userData,name); + } + static void call_callbackCharacters( void *userData , const XML_Char *s , int nLen ) + { + SaxExpatParser_Impl::callbackCharacters(userData,s,nLen); + } + static void call_callbackProcessingInstruction(void *userData,const XML_Char *sTarget,const XML_Char *sData ) + { + SaxExpatParser_Impl::callbackProcessingInstruction(userData,sTarget,sData ); + } + static void call_callbackUnparsedEntityDecl(void *userData , + const XML_Char *entityName, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId, + const XML_Char *notationName) + { + SaxExpatParser_Impl::callbackUnparsedEntityDecl(userData,entityName,base,systemId,publicId,notationName); + } + static void call_callbackNotationDecl(void *userData, + const XML_Char *notationName, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId) + { + SaxExpatParser_Impl::callbackNotationDecl(userData,notationName,base,systemId,publicId); + } + static int call_callbackExternalEntityRef(XML_Parser parser, + const XML_Char *openEntityNames, + const XML_Char *base, + const XML_Char *systemId, + const XML_Char *publicId) + { + return SaxExpatParser_Impl::callbackExternalEntityRef(parser,openEntityNames,base,systemId,publicId); + } + static int call_callbackUnknownEncoding(void *encodingHandlerData, + const XML_Char *name, + XML_Encoding *info) + { + return SaxExpatParser_Impl::callbackUnknownEncoding(encodingHandlerData,name,info); + } + static void call_callbackDefault( void *userData, const XML_Char *s, int len) + { + SaxExpatParser_Impl::callbackDefault(userData,s,len); + } + static void call_callbackStartCDATA( void *userData ) + { + SaxExpatParser_Impl::callbackStartCDATA(userData); + } + static void call_callbackEndCDATA( void *userData ) + { + SaxExpatParser_Impl::callbackEndCDATA(userData); + } + static void call_callbackComment( void *userData , const XML_Char *s ) + { + SaxExpatParser_Impl::callbackComment(userData,s); + } +} + + +//--------------------------------------------- +// LocatorImpl +//--------------------------------------------- +class LocatorImpl : + public WeakImplHelper2< XLocator, com::sun::star::io::XSeekable > + // should use a different interface for stream positions! +{ +public: + LocatorImpl( SaxExpatParser_Impl *p ) + { + m_pParser = p; + } + +public: //XLocator + virtual sal_Int32 SAL_CALL getColumnNumber(void) throw () + { + return XML_GetCurrentColumnNumber( m_pParser->getEntity().pParser ); + } + virtual sal_Int32 SAL_CALL getLineNumber(void) throw () + { + return XML_GetCurrentLineNumber( m_pParser->getEntity().pParser ); + } + virtual OUString SAL_CALL getPublicId(void) throw () + { + return m_pParser->getEntity().structSource.sPublicId; + } + virtual OUString SAL_CALL getSystemId(void) throw () + { + return m_pParser->getEntity().structSource.sSystemId; + } + + // XSeekable (only for getPosition) + + virtual void SAL_CALL seek( sal_Int64 ) throw() + { + } + virtual sal_Int64 SAL_CALL getPosition() throw() + { + return XML_GetCurrentByteIndex( m_pParser->getEntity().pParser ); + } + virtual ::sal_Int64 SAL_CALL getLength() throw() + { + return 0; + } + +private: + + SaxExpatParser_Impl *m_pParser; +}; + + + + +SaxExpatParser::SaxExpatParser( ) +{ + m_pImpl = new SaxExpatParser_Impl; + + LocatorImpl *pLoc = new LocatorImpl( m_pImpl ); + m_pImpl->rDocumentLocator = Reference< XLocator > ( pLoc ); + + // performance-Improvment. Reference is needed when calling the startTag callback. + // Handing out the same object with every call is allowed (see sax-specification) + m_pImpl->pAttrList = new AttributeList; + m_pImpl->rAttrList = Reference< XAttributeList > ( m_pImpl->pAttrList ); + + m_pImpl->bExceptionWasThrown = sal_False; + m_pImpl->bRTExceptionWasThrown = sal_False; +} + +SaxExpatParser::~SaxExpatParser() +{ + delete m_pImpl; +} + + +/*************** +* +* parseStream does Parser-startup initializations. The SaxExpatParser_Impl::parse() method does +* the file-specific initialization work. (During a parser run, external files may be opened) +* +****************/ +void SaxExpatParser::parseStream( const InputSource& structSource) + throw (SAXException, + IOException, + RuntimeException) +{ + // Only one text at one time + MutexGuard guard( m_pImpl->aMutex ); + + + struct Entity entity; + entity.structSource = structSource; + + if( ! entity.structSource.aInputStream.is() ) + { + throw SAXException( OUString::createFromAscii( "No input source" ) , + Reference< XInterface > () , Any() ); + } + + entity.converter.setInputStream( entity.structSource.aInputStream ); + if( entity.structSource.sEncoding.getLength() ) + { + entity.converter.setEncoding( + OUStringToOString( entity.structSource.sEncoding , RTL_TEXTENCODING_ASCII_US ) ); + } + + // create parser with proper encoding + entity.pParser = XML_ParserCreate( 0 ); + if( ! entity.pParser ) + { + throw SAXException( OUString::createFromAscii( "Couldn't create parser" ) , + Reference< XInterface > (), Any() ); + } + + // set all necessary C-Callbacks + XML_SetUserData( entity.pParser , m_pImpl ); + XML_SetElementHandler( entity.pParser , + call_callbackStartElement , + call_callbackEndElement ); + XML_SetCharacterDataHandler( entity.pParser , call_callbackCharacters ); + XML_SetProcessingInstructionHandler(entity.pParser , + call_callbackProcessingInstruction ); + XML_SetUnparsedEntityDeclHandler( entity.pParser, + call_callbackUnparsedEntityDecl ); + XML_SetNotationDeclHandler( entity.pParser, call_callbackNotationDecl ); + XML_SetExternalEntityRefHandler( entity.pParser, + call_callbackExternalEntityRef); + XML_SetUnknownEncodingHandler( entity.pParser, call_callbackUnknownEncoding ,0); + + if( m_pImpl->rExtendedDocumentHandler.is() ) { + + // These handlers just delegate calls to the ExtendedHandler. If no extended handler is + // given, these callbacks can be ignored + XML_SetDefaultHandlerExpand( entity.pParser, call_callbackDefault ); + XML_SetCommentHandler( entity.pParser, call_callbackComment ); + XML_SetCdataSectionHandler( entity.pParser , + call_callbackStartCDATA , + call_callbackEndCDATA ); + } + + + m_pImpl->exception = SAXParseException(); + m_pImpl->pushEntity( entity ); + try + { + // start the document + if( m_pImpl->rDocumentHandler.is() ) { + m_pImpl->rDocumentHandler->setDocumentLocator( m_pImpl->rDocumentLocator ); + m_pImpl->rDocumentHandler->startDocument(); + } + + m_pImpl->parse(); + + // finish document + if( m_pImpl->rDocumentHandler.is() ) { + m_pImpl->rDocumentHandler->endDocument(); + } + } +// catch( SAXParseException &e ) +// { +// m_pImpl->popEntity(); +// XML_ParserFree( entity.pParser ); +// Any aAny; +// aAny <<= e; +// throw SAXException( e.Message, e.Context, aAny ); +// } + catch( SAXException & ) + { + m_pImpl->popEntity(); + XML_ParserFree( entity.pParser ); + throw; + } + catch( IOException & ) + { + m_pImpl->popEntity(); + XML_ParserFree( entity.pParser ); + throw; + } + catch( RuntimeException & ) + { + m_pImpl->popEntity(); + XML_ParserFree( entity.pParser ); + throw; + } + + m_pImpl->popEntity(); + XML_ParserFree( entity.pParser ); +} + +void SaxExpatParser::setDocumentHandler(const Reference< XDocumentHandler > & xHandler) + throw (RuntimeException) +{ + m_pImpl->rDocumentHandler = xHandler; + m_pImpl->rExtendedDocumentHandler = + Reference< XExtendedDocumentHandler >( xHandler , UNO_QUERY ); +} + +void SaxExpatParser::setErrorHandler(const Reference< XErrorHandler > & xHandler) + throw (RuntimeException) +{ + m_pImpl->rErrorHandler = xHandler; +} + +void SaxExpatParser::setDTDHandler(const Reference< XDTDHandler > & xHandler) + throw (RuntimeException) +{ + m_pImpl->rDTDHandler = xHandler; +} + +void SaxExpatParser::setEntityResolver(const Reference < XEntityResolver > & xResolver) + throw (RuntimeException) +{ + m_pImpl->rEntityResolver = xResolver; +} + + +void SaxExpatParser::setLocale( const Locale & locale ) throw (RuntimeException) +{ + m_pImpl->locale = locale; +} + +// XServiceInfo +OUString SaxExpatParser::getImplementationName() throw () +{ + return OUString::createFromAscii( IMPLEMENTATION_NAME ); +} + +// XServiceInfo +sal_Bool SaxExpatParser::supportsService(const OUString& ServiceName) throw () +{ + Sequence< OUString > aSNL = getSupportedServiceNames(); + const OUString * pArray = aSNL.getConstArray(); + + for( sal_Int32 i = 0; i < aSNL.getLength(); i++ ) + if( pArray[i] == ServiceName ) + return sal_True; + + return sal_False; +} + +// XServiceInfo +Sequence< OUString > SaxExpatParser::getSupportedServiceNames(void) throw () +{ + + Sequence<OUString> seq(1); + seq.getArray()[0] = OUString::createFromAscii( SERVICE_NAME ); + return seq; +} + + +/*--------------------------------------- +* +* Helper functions and classes +* +* +*-------------------------------------------*/ +OUString getErrorMessage( XML_Error xmlE, OUString sSystemId , sal_Int32 nLine ) +{ + OUString Message; + if( XML_ERROR_NONE == xmlE ) { + Message = OUString::createFromAscii( "No" ); + } + else if( XML_ERROR_NO_MEMORY == xmlE ) { + Message = OUString::createFromAscii( "no memory" ); + } + else if( XML_ERROR_SYNTAX == xmlE ) { + Message = OUString::createFromAscii( "syntax" ); + } + else if( XML_ERROR_NO_ELEMENTS == xmlE ) { + Message = OUString::createFromAscii( "no elements" ); + } + else if( XML_ERROR_INVALID_TOKEN == xmlE ) { + Message = OUString::createFromAscii( "invalid token" ); + } + else if( XML_ERROR_UNCLOSED_TOKEN == xmlE ) { + Message = OUString::createFromAscii( "unclosed token" ); + } + else if( XML_ERROR_PARTIAL_CHAR == xmlE ) { + Message = OUString::createFromAscii( "partial char" ); + } + else if( XML_ERROR_TAG_MISMATCH == xmlE ) { + Message = OUString::createFromAscii( "tag mismatch" ); + } + else if( XML_ERROR_DUPLICATE_ATTRIBUTE == xmlE ) { + Message = OUString::createFromAscii( "duplicate attribute" ); + } + else if( XML_ERROR_JUNK_AFTER_DOC_ELEMENT == xmlE ) { + Message = OUString::createFromAscii( "junk after doc element" ); + } + else if( XML_ERROR_PARAM_ENTITY_REF == xmlE ) { + Message = OUString::createFromAscii( "parameter entity reference" ); + } + else if( XML_ERROR_UNDEFINED_ENTITY == xmlE ) { + Message = OUString::createFromAscii( "undefined entity" ); + } + else if( XML_ERROR_RECURSIVE_ENTITY_REF == xmlE ) { + Message = OUString::createFromAscii( "recursive entity reference" ); + } + else if( XML_ERROR_ASYNC_ENTITY == xmlE ) { + Message = OUString::createFromAscii( "async entity" ); + } + else if( XML_ERROR_BAD_CHAR_REF == xmlE ) { + Message = OUString::createFromAscii( "bad char reference" ); + } + else if( XML_ERROR_BINARY_ENTITY_REF == xmlE ) { + Message = OUString::createFromAscii( "binary entity reference" ); + } + else if( XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF == xmlE ) { + Message = OUString::createFromAscii( "attribute external entity reference" ); + } + else if( XML_ERROR_MISPLACED_XML_PI == xmlE ) { + Message = OUString::createFromAscii( "misplaced xml processing instruction" ); + } + else if( XML_ERROR_UNKNOWN_ENCODING == xmlE ) { + Message = OUString::createFromAscii( "unknown encoding" ); + } + else if( XML_ERROR_INCORRECT_ENCODING == xmlE ) { + Message = OUString::createFromAscii( "incorrect encoding" ); + } + else if( XML_ERROR_UNCLOSED_CDATA_SECTION == xmlE ) { + Message = OUString::createFromAscii( "unclosed cdata section" ); + } + else if( XML_ERROR_EXTERNAL_ENTITY_HANDLING == xmlE ) { + Message = OUString::createFromAscii( "external entity reference" ); + } + else if( XML_ERROR_NOT_STANDALONE == xmlE ) { + Message = OUString::createFromAscii( "not standalone" ); + } + + OUString str = OUString::createFromAscii( "[" ); + str += sSystemId; + str += OUString::createFromAscii( " line " ); + str += OUString::valueOf( nLine ); + str += OUString::createFromAscii( "]: " ); + str += Message; + str += OUString::createFromAscii( "error" ); + + return str; +} + + +// starts parsing with actual parser ! +void SaxExpatParser_Impl::parse( ) +{ + const int nBufSize = 16*1024; + + int nRead = nBufSize; + Sequence< sal_Int8 > seqOut(nBufSize); + + while( nRead ) { + nRead = getEntity().converter.readAndConvert( seqOut , nBufSize ); + + if( ! nRead ) { + XML_Parse( getEntity().pParser , + ( const char * ) seqOut.getArray() , + 0 , + 1 ); + break; + } + + sal_Bool bContinue = ( XML_Parse( getEntity().pParser , + (const char *) seqOut.getArray(), + nRead, + 0 ) != 0 ); + + if( ! bContinue || this->bExceptionWasThrown ) { + + if ( this->bRTExceptionWasThrown ) + throw rtexception; + + // Error during parsing ! + XML_Error xmlE = XML_GetErrorCode( getEntity().pParser ); + OUString sSystemId = rDocumentLocator->getSystemId(); + sal_Int32 nLine = rDocumentLocator->getLineNumber(); + + SAXParseException aExcept( + getErrorMessage(xmlE , sSystemId, nLine) , + Reference< XInterface >(), + Any( &exception , getCppuType( &exception) ), + rDocumentLocator->getPublicId(), + rDocumentLocator->getSystemId(), + rDocumentLocator->getLineNumber(), + rDocumentLocator->getColumnNumber() + ); + + if( rErrorHandler.is() ) { + + // error handler is set, so the handler may throw the exception + Any a; + a <<= aExcept; + rErrorHandler->fatalError( a ); + } + + // Error handler has not thrown an exception, but parsing cannot go on, + // so an exception MUST be thrown. + throw aExcept; + } // if( ! bContinue ) + } // while +} + +//------------------------------------------ +// +// The C-Callbacks +// +//----------------------------------------- +void SaxExpatParser_Impl::callbackStartElement( void *pvThis , + const XML_Char *pwName , + const XML_Char **awAttributes ) +{ + // in case of two concurrent threads, there is only the danger of an leak, + // which is neglectable for one string + static OUString g_CDATA( RTL_CONSTASCII_USTRINGPARAM( "CDATA" ) ); + + SaxExpatParser_Impl *pImpl = ((SaxExpatParser_Impl*)pvThis); + + if( pImpl->rDocumentHandler.is() ) { + + int i = 0; + pImpl->pAttrList->clear(); + + while( awAttributes[i] ) { + OSL_ASSERT( awAttributes[i+1] ); + pImpl->pAttrList->addAttribute( + XML_CHAR_TO_OUSTRING( awAttributes[i] ) , + g_CDATA , // expat doesn't know types + XML_CHAR_TO_OUSTRING( awAttributes[i+1] ) ); + i +=2; + } + + CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( + pImpl , + rDocumentHandler->startElement( XML_CHAR_TO_OUSTRING( pwName ) , + pImpl->rAttrList ) ); + } +} + +void SaxExpatParser_Impl::callbackEndElement( void *pvThis , const XML_Char *pwName ) +{ + SaxExpatParser_Impl *pImpl = ((SaxExpatParser_Impl*)pvThis); + + if( pImpl->rDocumentHandler.is() ) { + CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl, + rDocumentHandler->endElement( XML_CHAR_TO_OUSTRING( pwName ) ) ); + } +} + + +void SaxExpatParser_Impl::callbackCharacters( void *pvThis , const XML_Char *s , int nLen ) +{ + SaxExpatParser_Impl *pImpl = ((SaxExpatParser_Impl*)pvThis); + + if( pImpl->rDocumentHandler.is() ) { + CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl , + rDocumentHandler->characters( XML_CHAR_N_TO_USTRING(s,nLen) ) ); + } +} + +void SaxExpatParser_Impl::callbackProcessingInstruction( void *pvThis, + const XML_Char *sTarget , + const XML_Char *sData ) +{ + SaxExpatParser_Impl *pImpl = ((SaxExpatParser_Impl*)pvThis); + if( pImpl->rDocumentHandler.is() ) { + CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( + pImpl , + rDocumentHandler->processingInstruction( XML_CHAR_TO_OUSTRING( sTarget ), + XML_CHAR_TO_OUSTRING( sData ) ) ); + } +} + + +void SaxExpatParser_Impl::callbackUnparsedEntityDecl(void *pvThis , + const XML_Char *entityName, + const XML_Char * /*base*/, + const XML_Char *systemId, + const XML_Char *publicId, + const XML_Char *notationName) +{ + SaxExpatParser_Impl *pImpl = ((SaxExpatParser_Impl*)pvThis); + if( pImpl->rDTDHandler.is() ) { + CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( + pImpl , + rDTDHandler->unparsedEntityDecl( + XML_CHAR_TO_OUSTRING( entityName ), + XML_CHAR_TO_OUSTRING( publicId ) , + XML_CHAR_TO_OUSTRING( systemId ) , + XML_CHAR_TO_OUSTRING( notationName ) ) ); + } +} + +void SaxExpatParser_Impl::callbackNotationDecl( void *pvThis, + const XML_Char *notationName, + const XML_Char * /*base*/, + const XML_Char *systemId, + const XML_Char *publicId) +{ + SaxExpatParser_Impl *pImpl = ((SaxExpatParser_Impl*)pvThis); + if( pImpl->rDTDHandler.is() ) { + CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl, + rDTDHandler->notationDecl( XML_CHAR_TO_OUSTRING( notationName ) , + XML_CHAR_TO_OUSTRING( publicId ) , + XML_CHAR_TO_OUSTRING( systemId ) ) ); + } + +} + + + +int SaxExpatParser_Impl::callbackExternalEntityRef( XML_Parser parser, + const XML_Char *context, + const XML_Char * /*base*/, + const XML_Char *systemId, + const XML_Char *publicId) +{ + sal_Bool bOK = sal_True; + InputSource source; + SaxExpatParser_Impl *pImpl = ((SaxExpatParser_Impl*)XML_GetUserData( parser )); + + struct Entity entity; + + if( pImpl->rEntityResolver.is() ) { + try + { + entity.structSource = pImpl->rEntityResolver->resolveEntity( + XML_CHAR_TO_OUSTRING( publicId ) , + XML_CHAR_TO_OUSTRING( systemId ) ); + } + catch( SAXParseException & e ) + { + pImpl->exception = e; + bOK = sal_False; + } + catch( SAXException & e ) + { + pImpl->exception = SAXParseException( + e.Message , e.Context , e.WrappedException , + pImpl->rDocumentLocator->getPublicId(), + pImpl->rDocumentLocator->getSystemId(), + pImpl->rDocumentLocator->getLineNumber(), + pImpl->rDocumentLocator->getColumnNumber() ); + bOK = sal_False; + } + } + + if( entity.structSource.aInputStream.is() ) { + entity.pParser = XML_ExternalEntityParserCreate( parser , context, 0 ); + if( ! entity.pParser ) + { + return sal_False; + } + + entity.converter.setInputStream( entity.structSource.aInputStream ); + pImpl->pushEntity( entity ); + try + { + pImpl->parse(); + } + catch( SAXParseException & e ) + { + pImpl->exception = e; + bOK = sal_False; + } + catch( IOException &e ) + { + pImpl->exception.WrappedException <<= e; + bOK = sal_False; + } + catch( RuntimeException &e ) + { + pImpl->exception.WrappedException <<=e; + bOK = sal_False; + } + + pImpl->popEntity(); + + XML_ParserFree( entity.pParser ); + } + + return bOK; +} + +int SaxExpatParser_Impl::callbackUnknownEncoding(void * /*encodingHandlerData*/, + const XML_Char * /*name*/, + XML_Encoding * /*info*/) +{ + return 0; +} + +void SaxExpatParser_Impl::callbackDefault( void *pvThis, const XML_Char *s, int len) +{ + SaxExpatParser_Impl *pImpl = ((SaxExpatParser_Impl*)pvThis); + + CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl, + rExtendedDocumentHandler->unknown( XML_CHAR_N_TO_USTRING( s ,len) ) ); +} + +void SaxExpatParser_Impl::callbackComment( void *pvThis , const XML_Char *s ) +{ + SaxExpatParser_Impl *pImpl = ((SaxExpatParser_Impl*)pvThis); + CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl, + rExtendedDocumentHandler->comment( XML_CHAR_TO_OUSTRING( s ) ) ); +} + +void SaxExpatParser_Impl::callbackStartCDATA( void *pvThis ) +{ + SaxExpatParser_Impl *pImpl = ((SaxExpatParser_Impl*)pvThis); + + CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS( pImpl, rExtendedDocumentHandler->startCDATA() ); +} + + +void SaxExpatParser_Impl::callErrorHandler( SaxExpatParser_Impl *pImpl , + const SAXParseException & e ) +{ + try + { + if( pImpl->rErrorHandler.is() ) { + Any a; + a <<= e; + pImpl->rErrorHandler->error( a ); + } + else { + pImpl->exception = e; + pImpl->bExceptionWasThrown = sal_True; + } + } + catch( SAXParseException & ex ) { + pImpl->exception = ex; + pImpl->bExceptionWasThrown = sal_True; + } + catch( SAXException & ex ) { + pImpl->exception = SAXParseException( + ex.Message, + ex.Context, + ex.WrappedException, + pImpl->rDocumentLocator->getPublicId(), + pImpl->rDocumentLocator->getSystemId(), + pImpl->rDocumentLocator->getLineNumber(), + pImpl->rDocumentLocator->getColumnNumber() + ); + pImpl->bExceptionWasThrown = sal_True; + } +} + +void SaxExpatParser_Impl::callbackEndCDATA( void *pvThis ) +{ + SaxExpatParser_Impl *pImpl = ((SaxExpatParser_Impl*)pvThis); + + CALL_ELEMENT_HANDLER_AND_CARE_FOR_EXCEPTIONS(pImpl,rExtendedDocumentHandler->endCDATA() ); +} + +} +using namespace sax_expatwrap; + +extern "C" +{ + +void SAL_CALL component_getImplementationEnvironment( + const sal_Char ** ppEnvTypeName, uno_Environment ** /*ppEnv*/ ) +{ + *ppEnvTypeName = CPPU_CURRENT_LANGUAGE_BINDING_NAME; +} + + +sal_Bool SAL_CALL component_writeInfo( + void * /*pServiceManager*/, void * pRegistryKey ) +{ + if (pRegistryKey) + { + try + { + Reference< XRegistryKey > xKey( + reinterpret_cast< XRegistryKey * >( pRegistryKey ) ); + + Reference< XRegistryKey > xNewKey = xKey->createKey( + OUString::createFromAscii( "/" IMPLEMENTATION_NAME "/UNO/SERVICES" ) ); + xNewKey->createKey( OUString::createFromAscii( SERVICE_NAME ) ); + + xNewKey = xKey->createKey( OUString::createFromAscii("/") + + SaxWriter_getImplementationName()+ + OUString::createFromAscii( "/UNO/SERVICES" ) ); + xNewKey->createKey( SaxWriter_getServiceName() ); + + return sal_True; + } + catch (InvalidRegistryException &) + { + OSL_ENSURE( sal_False, "### InvalidRegistryException!" ); + } + } + return sal_False; +} + + +void * SAL_CALL component_getFactory( + const sal_Char * pImplName, void * pServiceManager, void * /*pRegistryKey*/ ) +{ + void * pRet = 0; + + if (pServiceManager ) + { + Reference< XSingleServiceFactory > xRet; + Reference< XMultiServiceFactory > xSMgr = + reinterpret_cast< XMultiServiceFactory * > ( pServiceManager ); + + OUString aImplementationName = OUString::createFromAscii( pImplName ); + + if (aImplementationName == + OUString( RTL_CONSTASCII_USTRINGPARAM( IMPLEMENTATION_NAME ) ) ) + { + xRet = createSingleFactory( xSMgr, aImplementationName, + SaxExpatParser_CreateInstance, + SaxExpatParser::getSupportedServiceNames_Static() ); + } + else if ( aImplementationName == SaxWriter_getImplementationName() ) + { + xRet = createSingleFactory( xSMgr, aImplementationName, + SaxWriter_CreateInstance, + SaxWriter_getSupportedServiceNames() ); + } + + if (xRet.is()) + { + xRet->acquire(); + pRet = xRet.get(); + } + } + + return pRet; +} + + +} + diff --git a/sax/source/expatwrap/saxwriter.cxx b/sax/source/expatwrap/saxwriter.cxx new file mode 100644 index 000000000000..92d53700aa86 --- /dev/null +++ b/sax/source/expatwrap/saxwriter.cxx @@ -0,0 +1,1454 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ +#include <string.h> + +#include <com/sun/star/lang/XServiceInfo.hpp> +#include <com/sun/star/util/XCloneable.hpp> +#include <com/sun/star/xml/sax/XExtendedDocumentHandler.hpp> +#include <com/sun/star/xml/sax/XParser.hpp> +#include <com/sun/star/xml/sax/SAXParseException.hpp> +#include <com/sun/star/xml/sax/SAXInvalidCharacterException.hpp> + +#include <com/sun/star/io/XActiveDataSource.hpp> + +#include <cppuhelper/factory.hxx> +#include <cppuhelper/weak.hxx> +#include <cppuhelper/implbase3.hxx> + +#include <rtl/strbuf.hxx> +#include <rtl/byteseq.hxx> +#include <rtl/ustrbuf.hxx> + +using namespace ::rtl; +using namespace ::std; +using namespace ::osl; +using namespace ::cppu; +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::lang; +using namespace ::com::sun::star::registry; +using namespace ::com::sun::star::xml::sax; +using namespace ::com::sun::star::util; +using namespace ::com::sun::star::io; + +#include "factory.hxx" +#include "xml2utf.hxx" + +#define LINEFEED 10 +#define SEQUENCESIZE 1024 +#define MAXCOLUMNCOUNT 72 + +/****** +* +* +* Character conversion functions +* +* +*****/ + +namespace sax_expatwrap { +/***** +* +* Calculates the length of the sequence after conversion, but the conversion is not done. +* .g. &<>"' plus some more are +* special characters in XML that need to be transformed +* +* @param bConvertAll For Attributes it is necessary to convert every symbol (including line feed and tab) +* Set this to true, if you want to perform this special conversion +* @return The returned value is equal to the length of the incoming sequence, when no ++ conversion is necessary, otherwise it is larger than the length of the sequence. +****/ +// inline sal_Int32 CalcXMLLen( const Sequence<sal_Int8> & seq , sal_Bool bConvertAll ) throw() +// { +// sal_Int32 nLen = 0; +// const sal_Int8 *pArray = seq.getConstArray(); + +// for( int i = 0 ; i < seq.getLength() ; i ++ ) { + +// sal_Int8 c = pArray[i]; +// switch( c ) +// { +// case '&': // resemble to & +// nLen +=5; +// break; +// case '<': // < +// case '>': // > +// nLen +=4; +// break; +// case 39: // 39 == ''', ' +// case '"': // " +// case 13: // 
 +// nLen += 6; +// break; + +// case 10: // 
 +// case 9: // 	 +// if( bConvertAll ) +// { +// nLen += 6; // +// } +// break; +// default: +// nLen ++; +// } +// } + +// return nLen; +// } + +enum SaxInvalidCharacterError +{ + SAX_NONE, + SAX_WARNING, + SAX_ERROR +}; + +class SaxWriterHelper +{ + Reference< XOutputStream > m_out; + Sequence < sal_Int8 > m_Sequence; + sal_Int8* mp_Sequence; + + sal_Int32 nLastLineFeedPos; // is negative after writing a sequence + sal_uInt32 nCurrentPos; + sal_Bool m_bStartElementFinished; + + + inline sal_uInt32 writeSequence() throw( SAXException ); + + // use only if to insert the bytes more space in the sequence is needed and + // so the sequence has to write out and reset rPos to 0 + // writes sequence only on overflow, sequence could be full on the end (rPos == SEQUENCESIZE) + inline void AddBytes(sal_Int8* pTarget, sal_uInt32& rPos, + const sal_Int8* pBytes, sal_uInt32 nBytesCount) throw( SAXException ); + inline sal_Bool convertToXML(const sal_Unicode * pStr, + sal_Int32 nStrLen, + sal_Bool bDoNormalization, + sal_Bool bNormalizeWhitespace, + sal_Int8 *pTarget, + sal_uInt32& rPos) throw( SAXException ); + inline void FinishStartElement() throw( SAXException ); +public: + SaxWriterHelper(Reference< XOutputStream > m_TempOut) : + m_out(m_TempOut), + m_Sequence(SEQUENCESIZE), + mp_Sequence(NULL), + nLastLineFeedPos(0), + nCurrentPos(0), + m_bStartElementFinished(sal_True) + { + OSL_ENSURE(SEQUENCESIZE > 50, "Sequence cache size to small"); + mp_Sequence = m_Sequence.getArray(); + } + ~SaxWriterHelper() + { + OSL_ENSURE(!nCurrentPos, "cached Sequence not written"); + OSL_ENSURE(m_bStartElementFinished, "StartElement not complettly written"); + } + + inline void insertIndentation(sal_uInt32 m_nLevel) throw( SAXException ); + +// returns whether it works correct or invalid characters were in the string +// If there are invalid characters in the string it returns sal_False. +// Than the calling method has to throw the needed Exception. + inline sal_Bool writeString(const rtl::OUString& rWriteOutString, + sal_Bool bDoNormalization, + sal_Bool bNormalizeWhitespace) throw( SAXException ); + + sal_uInt32 GetLastColumnCount() { return (sal_uInt32)(nCurrentPos - nLastLineFeedPos); } + + inline void startDocument() throw( SAXException ); + +// returns whether it works correct or invalid characters were in the strings +// If there are invalid characters in one of the strings it returns sal_False. +// Than the calling method has to throw the needed Exception. + inline SaxInvalidCharacterError startElement(const rtl::OUString& rName, const Reference< XAttributeList >& xAttribs) throw( SAXException ); + inline sal_Bool FinishEmptyElement() throw( SAXException ); + +// returns whether it works correct or invalid characters were in the string +// If there are invalid characters in the string it returns sal_False. +// Than the calling method has to throw the needed Exception. + inline sal_Bool endElement(const rtl::OUString& rName) throw( SAXException ); + inline void endDocument() throw( SAXException ); + +// returns whether it works correct or invalid characters were in the strings +// If there are invalid characters in the string it returns sal_False. +// Than the calling method has to throw the needed Exception. + inline sal_Bool processingInstruction(const rtl::OUString& rTarget, const rtl::OUString& rData) throw( SAXException ); + inline void startCDATA() throw( SAXException ); + inline void endCDATA() throw( SAXException ); + +// returns whether it works correct or invalid characters were in the strings +// If there are invalid characters in the string it returns sal_False. +// Than the calling method has to throw the needed Exception. + inline sal_Bool comment(const rtl::OUString& rComment) throw( SAXException ); + + inline void clearBuffer() throw( SAXException ); +}; + +const sal_Bool g_bValidCharsBelow32[32] = +{ +// 0 1 2 3 4 5 6 7 + 0,0,0,0,0,0,0,0, //0 + 0,1,1,0,0,1,0,0, //8 + 0,0,0,0,0,0,0,0, //16 + 0,0,0,0,0,0,0,0 +}; + +inline sal_Bool IsInvalidChar(const sal_Unicode aChar) +{ + sal_Bool bRet(sal_False); + // check first for the most common characters + if( aChar < 32 || aChar >= 0xd800 ) + bRet = ( (aChar < 32 && ! g_bValidCharsBelow32[aChar]) || + aChar == 0xffff || + aChar == 0xfffe ); + return bRet; +} + +/******** +* write through to the output stream +* +*****/ +inline sal_uInt32 SaxWriterHelper::writeSequence() throw( SAXException ) +{ + try + { + m_out->writeBytes( m_Sequence ); + } + catch( IOException & e ) + { + Any a; + a <<= e; + throw SAXException( + OUString::createFromAscii( "io exception during writing" ), + Reference< XInterface > (), + a ); + } + nLastLineFeedPos -= SEQUENCESIZE; + return 0; +} + +inline void SaxWriterHelper::AddBytes(sal_Int8* pTarget, sal_uInt32& rPos, + const sal_Int8* pBytes, sal_uInt32 nBytesCount) throw( SAXException ) +{ + OSL_ENSURE((rPos + nBytesCount) > SEQUENCESIZE, "wrong use of AddBytesMethod"); + sal_uInt32 nCount(SEQUENCESIZE - rPos); + memcpy( &(pTarget[rPos]) , pBytes, nCount); + + OSL_ENSURE(rPos + nCount == SEQUENCESIZE, "the position should be the at the end"); + + rPos = writeSequence(); + sal_uInt32 nRestCount(nBytesCount - nCount); + if ((rPos + nRestCount) <= SEQUENCESIZE) + { + memcpy( &(pTarget[rPos]), &pBytes[nCount], nRestCount); + rPos += nRestCount; + } + else + AddBytes(pTarget, rPos, &pBytes[nCount], nRestCount); +} + +/** Converts an UTF16 string to UTF8 and does XML normalization + + @param pTarget + Pointer to a piece of memory, to where the output should be written. The caller + must call calcXMLByteLength on the same string, to ensure, + that there is enough memory for converting. + */ +inline sal_Bool SaxWriterHelper::convertToXML( const sal_Unicode * pStr, + sal_Int32 nStrLen, + sal_Bool bDoNormalization, + sal_Bool bNormalizeWhitespace, + sal_Int8 *pTarget, + sal_uInt32& rPos ) throw( SAXException ) +{ + sal_Bool bRet(sal_True); + sal_uInt32 nSurrogate = 0; + + for( sal_Int32 i = 0 ; i < nStrLen ; i ++ ) + { + sal_uInt16 c = pStr[i]; + if (IsInvalidChar(c)) + bRet = sal_False; + else if( (c >= 0x0001) && (c <= 0x007F) ) + { + if( bDoNormalization ) + { + switch( c ) + { + case '&': // resemble to & + { + if ((rPos + 5) > SEQUENCESIZE) + AddBytes(pTarget, rPos, (sal_Int8*)"&", 5); + else + { + memcpy( &(pTarget[rPos]) , "&", 5 ); + rPos += 5; + } + } + break; + case '<': + { + if ((rPos + 4) > SEQUENCESIZE) + AddBytes(pTarget, rPos, (sal_Int8*)"<", 4); + else + { + memcpy( &(pTarget[rPos]) , "<" , 4 ); + rPos += 4; // < + } + } + break; + case '>': + { + if ((rPos + 4) > SEQUENCESIZE) + AddBytes(pTarget, rPos, (sal_Int8*)">", 4); + else + { + memcpy( &(pTarget[rPos]) , ">" , 4 ); + rPos += 4; // > + } + } + break; + case 39: // 39 == ''' + { + if ((rPos + 6) > SEQUENCESIZE) + AddBytes(pTarget, rPos, (sal_Int8*)"'", 6); + else + { + memcpy( &(pTarget[rPos]) , "'" , 6 ); + rPos += 6; // ' + } + } + break; + case '"': + { + if ((rPos + 6) > SEQUENCESIZE) + AddBytes(pTarget, rPos, (sal_Int8*)""", 6); + else + { + memcpy( &(pTarget[rPos]) , """ , 6 ); + rPos += 6; // " + } + } + break; + case 13: + { + if ((rPos + 6) > SEQUENCESIZE) + AddBytes(pTarget, rPos, (sal_Int8*)"
", 6); + else + { + memcpy( &(pTarget[rPos]) , "
" , 6 ); + rPos += 6; + } + } + break; + case LINEFEED: + { + if( bNormalizeWhitespace ) + { + if ((rPos + 6) > SEQUENCESIZE) + AddBytes(pTarget, rPos, (sal_Int8*)"
" , 6); + else + { + memcpy( &(pTarget[rPos]) , "
" , 6 ); + rPos += 6; + } + } + else + { + pTarget[rPos] = LINEFEED; + nLastLineFeedPos = rPos; + rPos ++; + } + } + break; + case 9: + { + if( bNormalizeWhitespace ) + { + if ((rPos + 6) > SEQUENCESIZE) + AddBytes(pTarget, rPos, (sal_Int8*)"	" , 6); + else + { + memcpy( &(pTarget[rPos]) , "	" , 6 ); + rPos += 6; + } + } + else + { + pTarget[rPos] = 9; + rPos ++; + } + } + break; + default: + { + pTarget[rPos] = (sal_Int8)c; + rPos ++; + } + break; + } + } + else + { + pTarget[rPos] = (sal_Int8)c; + if ((sal_Int8)c == LINEFEED) + nLastLineFeedPos = rPos; + rPos ++; + } + } + else if( c >= 0xd800 && c < 0xdc00 ) + { + // 1. surrogate: save (until 2. surrogate) + OSL_ENSURE( nSurrogate == 0, "left-over Unicode surrogate" ); + nSurrogate = ( ( c & 0x03ff ) + 0x0040 ); + } + else if( c >= 0xdc00 && c < 0xe000 ) + { + // 2. surrogate: write as UTF-8 + OSL_ENSURE( nSurrogate != 0, "lone 2nd Unicode surrogate" ); + + nSurrogate = ( nSurrogate << 10 ) | ( c & 0x03ff ); + if( nSurrogate >= 0x00010000 && nSurrogate <= 0x0010FFFF ) + { + sal_Int8 aBytes[] = { sal_Int8(0xF0 | ((nSurrogate >> 18) & 0x0F)), + sal_Int8(0x80 | ((nSurrogate >> 12) & 0x3F)), + sal_Int8(0x80 | ((nSurrogate >> 6) & 0x3F)), + sal_Int8(0x80 | ((nSurrogate >> 0) & 0x3F)) }; + if ((rPos + 4) > SEQUENCESIZE) + AddBytes(pTarget, rPos, aBytes, 4); + else + { + pTarget[rPos] = aBytes[0]; + rPos ++; + pTarget[rPos] = aBytes[1]; + rPos ++; + pTarget[rPos] = aBytes[2]; + rPos ++; + pTarget[rPos] = aBytes[3]; + rPos ++; + } + } + else + { + OSL_ENSURE( false, "illegal Unicode character" ); + bRet = sal_False; + } + + // reset surrogate + nSurrogate = 0; + } + else if( c > 0x07FF ) + { + sal_Int8 aBytes[] = { sal_Int8(0xE0 | ((c >> 12) & 0x0F)), + sal_Int8(0x80 | ((c >> 6) & 0x3F)), + sal_Int8(0x80 | ((c >> 0) & 0x3F)) }; + if ((rPos + 3) > SEQUENCESIZE) + AddBytes(pTarget, rPos, aBytes, 3); + else + { + pTarget[rPos] = aBytes[0]; + rPos ++; + pTarget[rPos] = aBytes[1]; + rPos ++; + pTarget[rPos] = aBytes[2]; + rPos ++; + } + } + else + { + sal_Int8 aBytes[] = { sal_Int8(0xC0 | ((c >> 6) & 0x1F)), + sal_Int8(0x80 | ((c >> 0) & 0x3F)) }; + if ((rPos + 2) > SEQUENCESIZE) + AddBytes(pTarget, rPos, aBytes, 2); + else + { + pTarget[rPos] = aBytes[0]; + rPos ++; + pTarget[rPos] = aBytes[1]; + rPos ++; + } + } + OSL_ENSURE(rPos <= SEQUENCESIZE, "not reset current position"); + if (rPos == SEQUENCESIZE) + rPos = writeSequence(); + + // reset left-over surrogate + if( ( nSurrogate != 0 ) && !( c >= 0xd800 && c < 0xdc00 ) ) + { + OSL_ENSURE( nSurrogate != 0, "left-over Unicode surrogate" ); + nSurrogate = 0; + bRet = sal_False; + } + } + return bRet; +} + +inline void SaxWriterHelper::FinishStartElement() throw( SAXException ) +{ + if (!m_bStartElementFinished) + { + mp_Sequence[nCurrentPos] = '>'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + m_bStartElementFinished = sal_True; + } +} + +inline void SaxWriterHelper::insertIndentation(sal_uInt32 m_nLevel) throw( SAXException ) +{ + FinishStartElement(); + if (m_nLevel > 0) + { + if ((nCurrentPos + m_nLevel + 1) <= SEQUENCESIZE) + { + mp_Sequence[nCurrentPos] = LINEFEED; + nLastLineFeedPos = nCurrentPos; + nCurrentPos++; + memset( &(mp_Sequence[nCurrentPos]) , 32 , m_nLevel ); + nCurrentPos += m_nLevel; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + } + else + { + sal_uInt32 nCount(m_nLevel + 1); + sal_Int8* pBytes = new sal_Int8[nCount]; + pBytes[0] = LINEFEED; + memset( &(pBytes[1]), 32, m_nLevel ); + AddBytes(mp_Sequence, nCurrentPos, pBytes, nCount); + delete[] pBytes; + nLastLineFeedPos = nCurrentPos - nCount; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + } + } + else + { + mp_Sequence[nCurrentPos] = LINEFEED; + nLastLineFeedPos = nCurrentPos; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + } +} + +inline sal_Bool SaxWriterHelper::writeString( const rtl::OUString& rWriteOutString, + sal_Bool bDoNormalization, + sal_Bool bNormalizeWhitespace ) throw( SAXException ) +{ + FinishStartElement(); + return convertToXML(rWriteOutString.getStr(), + rWriteOutString.getLength(), + bDoNormalization, + bNormalizeWhitespace, + mp_Sequence, + nCurrentPos); +} + +inline void SaxWriterHelper::startDocument() throw( SAXException ) +{ + const char pc[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"; + const int nLen = strlen( pc ); + if ((nCurrentPos + nLen) <= SEQUENCESIZE) + { + memcpy( mp_Sequence, pc , nLen ); + nCurrentPos += nLen; + } + else + { + AddBytes(mp_Sequence, nCurrentPos, (sal_Int8*)pc, nLen); + } + OSL_ENSURE(nCurrentPos <= SEQUENCESIZE, "not reset current position"); + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + mp_Sequence[nCurrentPos] = LINEFEED; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); +} + +inline SaxInvalidCharacterError SaxWriterHelper::startElement(const rtl::OUString& rName, const Reference< XAttributeList >& xAttribs) throw( SAXException ) +{ + FinishStartElement(); + mp_Sequence[nCurrentPos] = '<'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + + SaxInvalidCharacterError eRet(SAX_NONE); + if (!writeString(rName, sal_False, sal_False)) + eRet = SAX_ERROR; + + sal_Int16 nAttribCount = xAttribs.is() ? static_cast<sal_Int16>(xAttribs->getLength()) : 0; + for(sal_Int16 i = 0 ; i < nAttribCount ; i++ ) + { + mp_Sequence[nCurrentPos] = ' '; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + + if (!writeString(xAttribs->getNameByIndex( i ), sal_False, sal_False)) + eRet = SAX_ERROR; + + mp_Sequence[nCurrentPos] = '='; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + mp_Sequence[nCurrentPos] = '"'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + + if (!writeString(xAttribs->getValueByIndex( i ), sal_True, sal_True) && + !(eRet == SAX_ERROR)) + eRet = SAX_WARNING; + + mp_Sequence[nCurrentPos] = '"'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + } + + m_bStartElementFinished = sal_False; // because the '>' character is not added, + // because it is possible, that the "/>" + // characters have to add + return eRet; +} + +inline sal_Bool SaxWriterHelper::FinishEmptyElement() throw( SAXException ) +{ + if (m_bStartElementFinished) + return sal_False; + + mp_Sequence[nCurrentPos] = '/'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + mp_Sequence[nCurrentPos] = '>'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + + m_bStartElementFinished = sal_True; + + return sal_True; +} + +inline sal_Bool SaxWriterHelper::endElement(const rtl::OUString& rName) throw( SAXException ) +{ + FinishStartElement(); + mp_Sequence[nCurrentPos] = '<'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + mp_Sequence[nCurrentPos] = '/'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + + sal_Bool bRet(writeString( rName, sal_False, sal_False)); + + mp_Sequence[nCurrentPos] = '>'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + + return bRet; +} + +inline void SaxWriterHelper::endDocument() throw( SAXException ) +{ + if (nCurrentPos > 0) + { + m_Sequence.realloc(nCurrentPos); + nCurrentPos = writeSequence(); + //m_Sequence.realloc(SEQUENCESIZE); + } +} + +inline void SaxWriterHelper::clearBuffer() throw( SAXException ) +{ + FinishStartElement(); + if (nCurrentPos > 0) + { + m_Sequence.realloc(nCurrentPos); + nCurrentPos = writeSequence(); + m_Sequence.realloc(SEQUENCESIZE); + // Be sure to update the array pointer after the reallocation. + mp_Sequence = m_Sequence.getArray(); + } +} + +inline sal_Bool SaxWriterHelper::processingInstruction(const rtl::OUString& rTarget, const rtl::OUString& rData) throw( SAXException ) +{ + FinishStartElement(); + mp_Sequence[nCurrentPos] = '<'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + mp_Sequence[nCurrentPos] = '?'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + + sal_Bool bRet(writeString( rTarget, sal_False, sal_False )); + + mp_Sequence[nCurrentPos] = ' '; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + + if (!writeString( rData, sal_False, sal_False )) + bRet = sal_False; + + mp_Sequence[nCurrentPos] = '?'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + mp_Sequence[nCurrentPos] = '>'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + + return bRet; +} + +inline void SaxWriterHelper::startCDATA() throw( SAXException ) +{ + FinishStartElement(); + if ((nCurrentPos + 9) <= SEQUENCESIZE) + { + memcpy( &(mp_Sequence[nCurrentPos]), "<![CDATA[" , 9 ); + nCurrentPos += 9; + } + else + AddBytes(mp_Sequence, nCurrentPos, (sal_Int8*)"<![CDATA[" , 9); + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); +} + +inline void SaxWriterHelper::endCDATA() throw( SAXException ) +{ + FinishStartElement(); + if ((nCurrentPos + 3) <= SEQUENCESIZE) + { + memcpy( &(mp_Sequence[nCurrentPos]), "]]>" , 3 ); + nCurrentPos += 3; + } + else + AddBytes(mp_Sequence, nCurrentPos, (sal_Int8*)"]]>" , 3); + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); +} + +inline sal_Bool SaxWriterHelper::comment(const rtl::OUString& rComment) throw( SAXException ) +{ + FinishStartElement(); + mp_Sequence[nCurrentPos] = '<'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + mp_Sequence[nCurrentPos] = '!'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + mp_Sequence[nCurrentPos] = '-'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + mp_Sequence[nCurrentPos] = '-'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + + sal_Bool bRet(writeString( rComment, sal_False, sal_False)); + + mp_Sequence[nCurrentPos] = '-'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + mp_Sequence[nCurrentPos] = '-'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + mp_Sequence[nCurrentPos] = '>'; + nCurrentPos++; + if (nCurrentPos == SEQUENCESIZE) + nCurrentPos = writeSequence(); + + return bRet; +} + +inline sal_Int32 calcXMLByteLength( const sal_Unicode *pStr, sal_Int32 nStrLen, + sal_Bool bDoNormalization, + sal_Bool bNormalizeWhitespace ) +{ + sal_Int32 nOutputLength = 0; + sal_uInt32 nSurrogate = 0; + + for( sal_Int32 i = 0 ; i < nStrLen ; i++ ) + { + sal_uInt16 c = pStr[i]; + if( !IsInvalidChar(c) && (c >= 0x0001) && (c <= 0x007F) ) + { + if( bDoNormalization ) + { + switch( c ) + { + case '&': // resemble to & + nOutputLength +=5; + break; + case '<': // < + case '>': // > + nOutputLength +=4; + break; + case 39: // 39 == ''', ' + case '"': // " + case 13: // 
 + nOutputLength += 6; + break; + + case 10: // 
 + case 9: // 	 + if( bNormalizeWhitespace ) + { + nOutputLength += 6; // + } + else + { + nOutputLength ++; + } + break; + default: + nOutputLength ++; + } + } + else + { + nOutputLength ++; + } + } + else if( c >= 0xd800 && c < 0xdc00 ) + { + // save surrogate + nSurrogate = ( ( c & 0x03ff ) + 0x0040 ); + } + else if( c >= 0xdc00 && c < 0xe000 ) + { + // 2. surrogate: write as UTF-8 (if range is OK + nSurrogate = ( nSurrogate << 10 ) | ( c & 0x03ff ); + if( nSurrogate >= 0x00010000 && nSurrogate <= 0x0010FFFF ) + nOutputLength += 4; + nSurrogate = 0; + } + else if( c > 0x07FF ) + { + nOutputLength += 3; + } + else + { + nOutputLength += 2; + } + + // surrogate processing + if( ( nSurrogate != 0 ) && !( c >= 0xd800 && c < 0xdc00 ) ) + nSurrogate = 0; + } + + return nOutputLength; +} + +/** returns position of first ascii 10 within the string, -1 when no 10 in string. + */ +static inline sal_Int32 getFirstLineBreak( const OUString & str ) throw () +{ + const sal_Unicode *pSource = str.getStr(); + sal_Int32 nLen = str.getLength(); + + for( int n = 0; n < nLen ; n ++ ) + { + if( LINEFEED == pSource[n] ) { + return n; + } + } + return -1; +} + +/** returns position of last ascii 10 within sequence, -1 when no 10 in string. + */ +static inline sal_Int32 getLastLineBreak( const Sequence<sal_Int8> & seq) throw () +{ + const sal_Int8 *pSource = seq.getConstArray(); + sal_Int32 nLen = seq.getLength(); + + for( int n = nLen-1; n >= 0 ; n -- ) + { + if( LINEFEED == pSource[n] ) { + return n; + } + } + return -1; +} + + +class SAXWriter : + public WeakImplHelper3< + XActiveDataSource, + XExtendedDocumentHandler, + XServiceInfo > +{ +public: + SAXWriter( ) : + m_seqStartElement(), + mp_SaxWriterHelper( NULL ), + m_bForceLineBreak(sal_False), + m_bAllowLineBreak(sal_False) + {} + ~SAXWriter() + { + delete mp_SaxWriterHelper; + } + +public: // XActiveDataSource + virtual void SAL_CALL setOutputStream(const Reference< XOutputStream > & aStream) + throw (RuntimeException) + { + // temporary: set same stream again to clear buffer + if ( m_out == aStream && mp_SaxWriterHelper && m_bDocStarted ) + mp_SaxWriterHelper->clearBuffer(); + else + { + + m_out = aStream; + delete mp_SaxWriterHelper; + mp_SaxWriterHelper = new SaxWriterHelper(m_out); + m_bDocStarted = sal_False; + m_nLevel = 0; + m_bIsCDATA = sal_False; + + } + } + virtual Reference< XOutputStream > SAL_CALL getOutputStream(void) + throw(RuntimeException) + { return m_out; } + +public: // XDocumentHandler + virtual void SAL_CALL startDocument(void) + throw(SAXException, RuntimeException); + + virtual void SAL_CALL endDocument(void) + throw(SAXException, RuntimeException); + + virtual void SAL_CALL startElement(const OUString& aName, + const Reference< XAttributeList > & xAttribs) + throw (SAXException, RuntimeException); + + virtual void SAL_CALL endElement(const OUString& aName) + throw(SAXException, RuntimeException); + + virtual void SAL_CALL characters(const OUString& aChars) + throw(SAXException, RuntimeException); + + virtual void SAL_CALL ignorableWhitespace(const OUString& aWhitespaces) + throw(SAXException, RuntimeException); + virtual void SAL_CALL processingInstruction(const OUString& aTarget, + const OUString& aData) + throw(SAXException, RuntimeException); + virtual void SAL_CALL setDocumentLocator(const Reference< XLocator > & xLocator) + throw(SAXException, RuntimeException); + +public: // XExtendedDocumentHandler + virtual void SAL_CALL startCDATA(void) throw(SAXException, RuntimeException); + virtual void SAL_CALL endCDATA(void) throw(RuntimeException); + virtual void SAL_CALL comment(const OUString& sComment) + throw(SAXException, RuntimeException); + virtual void SAL_CALL unknown(const OUString& sString) + throw(SAXException, RuntimeException); + virtual void SAL_CALL allowLineBreak(void) + throw(SAXException,RuntimeException); + +public: // XServiceInfo + OUString SAL_CALL getImplementationName() throw(); + Sequence< OUString > SAL_CALL getSupportedServiceNames(void) throw(); + sal_Bool SAL_CALL supportsService(const OUString& ServiceName) throw(); + +private: + + void writeSequence( const Sequence<sal_Int8> & seq ); + sal_Int32 getIndentPrefixLength( sal_Int32 nFirstLineBreakOccurence ) throw(); + + Reference< XOutputStream > m_out; + Sequence < sal_Int8 > m_seqStartElement; + SaxWriterHelper* mp_SaxWriterHelper; + + // Status information + sal_Bool m_bDocStarted : 1; + sal_Bool m_bIsCDATA : 1; + sal_Bool m_bForceLineBreak : 1; + sal_Bool m_bAllowLineBreak : 1; + sal_Int32 m_nLevel; +}; + + +//-------------------------------------- +// the extern interface +//--------------------------------------- +Reference < XInterface > SAL_CALL SaxWriter_CreateInstance( + const Reference < XMultiServiceFactory > & ) + throw (Exception) +{ + SAXWriter *p = new SAXWriter; + return Reference< XInterface > ( SAL_STATIC_CAST(OWeakObject *, p ) ); +} + +OUString SaxWriter_getServiceName() throw() +{ + return OUString::createFromAscii( "com.sun.star.xml.sax.Writer" ); +} + +OUString SaxWriter_getImplementationName() throw() +{ + return OUString::createFromAscii( "com.sun.star.extensions.xml.sax.Writer" ); +} + +Sequence< OUString > SaxWriter_getSupportedServiceNames(void) throw() +{ + Sequence<OUString> aRet(1); + aRet.getArray()[0] = SaxWriter_getServiceName(); + return aRet; +} + + +sal_Int32 SAXWriter::getIndentPrefixLength( sal_Int32 nFirstLineBreakOccurence ) throw() +{ + sal_Int32 nLength =-1; + if (mp_SaxWriterHelper) + { + if ( m_bForceLineBreak || + (m_bAllowLineBreak && + ((nFirstLineBreakOccurence + mp_SaxWriterHelper->GetLastColumnCount()) > MAXCOLUMNCOUNT)) ) + nLength = m_nLevel; + } + m_bForceLineBreak = sal_False; + m_bAllowLineBreak = sal_False; + return nLength; +} + +static inline sal_Bool isFirstCharWhitespace( const sal_Unicode *p ) throw() +{ + return *p == ' '; +} + + +// XServiceInfo +OUString SAXWriter::getImplementationName() throw() +{ + return SaxWriter_getImplementationName(); +} + +// XServiceInfo +sal_Bool SAXWriter::supportsService(const OUString& ServiceName) throw() +{ + Sequence< OUString > aSNL = getSupportedServiceNames(); + const OUString * pArray = aSNL.getConstArray(); + + for( sal_Int32 i = 0; i < aSNL.getLength(); i++ ) + if( pArray[i] == ServiceName ) + return sal_True; + + return sal_False; +} + +// XServiceInfo +Sequence< OUString > SAXWriter::getSupportedServiceNames(void) throw () +{ + Sequence<OUString> seq(1); + seq.getArray()[0] = SaxWriter_getServiceName(); + return seq; +} + + + +void SAXWriter::startDocument() throw(SAXException, RuntimeException ) +{ + if( m_bDocStarted || ! m_out.is() || !mp_SaxWriterHelper ) { + throw SAXException(); + } + m_bDocStarted = sal_True; + mp_SaxWriterHelper->startDocument(); +} + + +void SAXWriter::endDocument(void) throw(SAXException, RuntimeException) +{ + if( ! m_bDocStarted ) + { + throw SAXException( + OUString::createFromAscii( "endDocument called before startDocument" ), + Reference< XInterface >() , Any() ); + } + if( m_nLevel ) { + throw SAXException( + OUString::createFromAscii( "unexpected end of document" ), + Reference< XInterface >() , Any() ); + } + mp_SaxWriterHelper->endDocument(); + try + { + m_out->closeOutput(); + } + catch( IOException & e ) + { + Any a; + a <<= e; + throw SAXException( + OUString::createFromAscii( "IO exception during closing the IO Stream" ), + Reference< XInterface > (), + a ); + } +} + + +void SAXWriter::startElement(const OUString& aName, const Reference< XAttributeList >& xAttribs) + throw(SAXException, RuntimeException) +{ + if( ! m_bDocStarted ) + { + SAXException except; + except.Message = OUString( RTL_CONSTASCII_USTRINGPARAM( "startElement called before startDocument" )); + throw except; + } + if( m_bIsCDATA ) + { + SAXException except; + except.Message = OUString( RTL_CONSTASCII_USTRINGPARAM( "startElement call not allowed with CDATA sections" )); + throw except; + } + + sal_Int32 nLength(0); + if (m_bAllowLineBreak) + { + sal_Int32 nAttribCount = xAttribs.is() ? xAttribs->getLength() : 0; + + nLength ++; // "<" + nLength += calcXMLByteLength( aName.getStr() , aName.getLength(), + sal_False, sal_False ); // the tag name + + sal_Int16 n; + for( n = 0 ; n < static_cast<sal_Int16>(nAttribCount) ; n ++ ) { + nLength ++; // " " + OUString tmp = xAttribs->getNameByIndex( n ); + + nLength += calcXMLByteLength( tmp.getStr() , tmp.getLength() , sal_False, sal_False ); + + nLength += 2; // =" + + tmp = xAttribs->getValueByIndex( n ); + + nLength += calcXMLByteLength( tmp.getStr(), tmp.getLength(), sal_True, sal_True ); + + nLength += 1; // " + } + + nLength ++; // '>' + } + + // Is there a new indentation necesarry ? + sal_Int32 nPrefix(getIndentPrefixLength( nLength )); + + // write into sequence + if( nPrefix >= 0 ) + mp_SaxWriterHelper->insertIndentation( nPrefix ); + + SaxInvalidCharacterError eRet(mp_SaxWriterHelper->startElement(aName, xAttribs)); + + m_nLevel++; + + if (eRet == SAX_WARNING) + { + SAXInvalidCharacterException except; + except.Message = OUString( RTL_CONSTASCII_USTRINGPARAM( "Invalid charcter during XML-Export in a attribute value" ) ); + throw except; + } + else if (eRet == SAX_ERROR) + { + SAXException except; + except.Message = OUString( RTL_CONSTASCII_USTRINGPARAM( "Invalid charcter during XML-Export" ) ); + throw except; + } +} + +void SAXWriter::endElement(const OUString& aName) throw (SAXException, RuntimeException) +{ + if( ! m_bDocStarted ) { + throw SAXException (); + } + m_nLevel --; + + if( m_nLevel < 0 ) { + throw SAXException(); + } + sal_Bool bRet(sal_True); + + if( mp_SaxWriterHelper->FinishEmptyElement() ) + m_bForceLineBreak = sal_False; + else + { + // only ascii chars allowed + sal_Int32 nLength(0); + if (m_bAllowLineBreak) + nLength = 3 + calcXMLByteLength( aName.getStr(), aName.getLength(), sal_False, sal_False ); + sal_Int32 nPrefix = getIndentPrefixLength( nLength ); + + if( nPrefix >= 0 ) + mp_SaxWriterHelper->insertIndentation( nPrefix ); + + bRet = mp_SaxWriterHelper->endElement(aName); + } + + if (!bRet) + { + SAXException except; + except.Message = OUString( RTL_CONSTASCII_USTRINGPARAM( "Invalid charcter during XML-Export" ) ); + throw except; + } +} + +void SAXWriter::characters(const OUString& aChars) throw(SAXException, RuntimeException) +{ + if( ! m_bDocStarted ) + { + SAXException except; + except.Message = OUString( RTL_CONSTASCII_USTRINGPARAM( "characters method called before startDocument" ) ); + throw except; + } + + sal_Bool bThrowException(sal_False); + if( aChars.getLength() ) + { + if( m_bIsCDATA ) + bThrowException = !mp_SaxWriterHelper->writeString( aChars, sal_False, sal_False ); + else + { + // Note : nFirstLineBreakOccurence is not exact, because we don't know, how + // many 2 and 3 byte chars are inbetween. However this whole stuff + // is eitherway for pretty printing only, so it does not need to be exact. + sal_Int32 nLength(0); + sal_Int32 nIndentPrefix(-1); + if (m_bAllowLineBreak) + { + sal_Int32 nFirstLineBreakOccurence = getFirstLineBreak( aChars ); + + nLength = calcXMLByteLength( aChars.getStr(), aChars.getLength(), + ! m_bIsCDATA , sal_False ); + nIndentPrefix = getIndentPrefixLength( + nFirstLineBreakOccurence >= 0 ? nFirstLineBreakOccurence : nLength ); + } + else + nIndentPrefix = getIndentPrefixLength(nLength); + + // insert indentation + if( nIndentPrefix >= 0 ) + { + if( isFirstCharWhitespace( aChars.getStr() ) ) + mp_SaxWriterHelper->insertIndentation( nIndentPrefix - 1 ); + else + mp_SaxWriterHelper->insertIndentation( nIndentPrefix ); + } + bThrowException = !mp_SaxWriterHelper->writeString(aChars, sal_True , sal_False); + } + } + if (bThrowException) + { + SAXInvalidCharacterException except; + except.Message = OUString( RTL_CONSTASCII_USTRINGPARAM( "Invalid charcter during XML-Export" ) ); + throw except; + } +} + + +void SAXWriter::ignorableWhitespace(const OUString&) throw(SAXException, RuntimeException) +{ + if( ! m_bDocStarted ) + { + throw SAXException (); + } + + m_bForceLineBreak = sal_True; +} + +void SAXWriter::processingInstruction(const OUString& aTarget, const OUString& aData) + throw (SAXException, RuntimeException) +{ + if( ! m_bDocStarted || m_bIsCDATA ) + { + throw SAXException(); + } + + sal_Int32 nLength(0); + if (m_bAllowLineBreak) + { + nLength = 2; // "<?" + nLength += calcXMLByteLength( aTarget.getStr(), aTarget.getLength(), sal_False, sal_False ); + + nLength += 1; // " " + + nLength += calcXMLByteLength( aData.getStr(), aData.getLength(), sal_False, sal_False ); + + nLength += 2; // "?>" + } + + sal_Int32 nPrefix = getIndentPrefixLength( nLength ); + + if( nPrefix >= 0 ) + mp_SaxWriterHelper->insertIndentation( nPrefix ); + + if (!mp_SaxWriterHelper->processingInstruction(aTarget, aData)) + { + SAXException except; + except.Message = OUString( RTL_CONSTASCII_USTRINGPARAM( "Invalid charcter during XML-Export" ) ); + throw except; + } +} + + +void SAXWriter::setDocumentLocator(const Reference< XLocator >&) + throw (SAXException, RuntimeException) +{ + +} + +void SAXWriter::startCDATA(void) throw(SAXException, RuntimeException) +{ + if( ! m_bDocStarted || m_bIsCDATA) + { + throw SAXException (); + } + + sal_Int32 nLength = 9; + sal_Int32 nPrefix = getIndentPrefixLength( nLength ); + if( nPrefix >= 0 ) + mp_SaxWriterHelper->insertIndentation( nPrefix ); + + mp_SaxWriterHelper->startCDATA(); + + m_bIsCDATA = sal_True; +} + +void SAXWriter::endCDATA(void) throw (RuntimeException) +{ + if( ! m_bDocStarted | ! m_bIsCDATA) + { + SAXException except; + except.Message = OUString( RTL_CONSTASCII_USTRINGPARAM( "endCDATA was called without startCDATA" ) ); + throw except; + } + + sal_Int32 nLength = 3; + sal_Int32 nPrefix = getIndentPrefixLength( nLength ); + if( nPrefix >= 0 ) + mp_SaxWriterHelper->insertIndentation( nPrefix ); + + mp_SaxWriterHelper->endCDATA(); + + m_bIsCDATA = sal_False; +} + + +void SAXWriter::comment(const OUString& sComment) throw(SAXException, RuntimeException) +{ + if( ! m_bDocStarted || m_bIsCDATA ) + { + throw SAXException(); + } + + sal_Int32 nLength(0); + if (m_bAllowLineBreak) + { + nLength = 4; // "<!--" + nLength += calcXMLByteLength( sComment.getStr(), sComment.getLength(), sal_False, sal_False); + + nLength += 3; + } + + sal_Int32 nPrefix = getIndentPrefixLength( nLength ); + if( nPrefix >= 0 ) + mp_SaxWriterHelper->insertIndentation( nPrefix ); + + if (!mp_SaxWriterHelper->comment(sComment)) + { + SAXException except; + except.Message = OUString( RTL_CONSTASCII_USTRINGPARAM( "Invalid charcter during XML-Export" ) ); + throw except; + } +} + + +void SAXWriter::allowLineBreak( ) throw ( SAXException , RuntimeException) +{ + if( ! m_bDocStarted || m_bAllowLineBreak ) { + throw SAXException(); + } + + m_bAllowLineBreak = sal_True; +} + +void SAXWriter::unknown(const OUString& sString) throw (SAXException, RuntimeException) +{ + + if( ! m_bDocStarted ) + { + throw SAXException (); + } + if( m_bIsCDATA ) + { + throw SAXException(); + } + + if( sString.matchAsciiL( "<?xml", 5 ) ) + return; + + sal_Int32 nLength(0); + if (m_bAllowLineBreak) + nLength = calcXMLByteLength( sString.getStr(), sString.getLength(), sal_False, sal_False ); + + sal_Int32 nPrefix = getIndentPrefixLength( nLength ); + if( nPrefix >= 0 ) + mp_SaxWriterHelper->insertIndentation( nPrefix ); + + if (!mp_SaxWriterHelper->writeString( sString, sal_False, sal_False)) + { + SAXException except; + except.Message = OUString( RTL_CONSTASCII_USTRINGPARAM( "Invalid charcter during XML-Export" ) ); + throw except; + } +} + +} + diff --git a/sax/source/expatwrap/xml2utf.cxx b/sax/source/expatwrap/xml2utf.cxx new file mode 100644 index 000000000000..bbd72b2a0d8b --- /dev/null +++ b/sax/source/expatwrap/xml2utf.cxx @@ -0,0 +1,570 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ +#include <string.h> + +#include <sal/types.h> + +#include <rtl/textenc.h> +#include <rtl/tencinfo.h> + + +#include <com/sun/star/io/XInputStream.hpp> + +using namespace rtl; +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::io; + +#include "xml2utf.hxx" + +namespace sax_expatwrap { + +sal_Int32 XMLFile2UTFConverter::readAndConvert( Sequence<sal_Int8> &seq , sal_Int32 nMaxToRead ) + throw ( IOException, NotConnectedException , BufferSizeExceededException , RuntimeException ) +{ + + Sequence<sal_Int8> seqIn; + + if( ! m_in.is() ) { + throw NotConnectedException(); + } + if( ! m_bStarted ) { + nMaxToRead = Max( 512 , nMaxToRead ); // it should be possible to find the encoding attribute + // within the first 512 bytes == 128 chars in UCS-4 + } + + sal_Int32 nRead; + Sequence< sal_Int8 > seqStart; + while( sal_True ) + { + nRead = m_in->readSomeBytes( seq , nMaxToRead ); + + if( nRead + seqStart.getLength()) + { + // if nRead is 0, the file is already eof. + if( ! m_bStarted && nRead ) + { + // ensure that enough data is available to parse encoding + if( seqStart.getLength() ) + { + // prefix with what we had so far. + sal_Int32 nLength = seq.getLength(); + seq.realloc( seqStart.getLength() + nLength ); + + memmove (seq.getArray() + seqStart.getLength(), + seq.getConstArray(), + nLength); + memcpy (seq.getArray(), + seqStart.getConstArray(), + seqStart.getLength()); + } + + // autodetection with the first bytes + if( ! isEncodingRecognizable( seq ) ) + { + // remember what we have so far. + seqStart = seq; + + // read more ! + continue; + } + if( scanForEncoding( seq ) || m_sEncoding.getLength() ) { + // initialize decoding + initializeDecoding(); + } + nRead = seq.getLength(); + seqStart = Sequence < sal_Int8 > (); + } + + // do the encoding + if( m_pText2Unicode && m_pUnicode2Text && + m_pText2Unicode->canContinue() && m_pUnicode2Text->canContinue() ) { + + Sequence<sal_Unicode> seqUnicode = m_pText2Unicode->convert( seq ); + seq = m_pUnicode2Text->convert( seqUnicode.getConstArray(), seqUnicode.getLength() ); + } + + if( ! m_bStarted ) + { + // it must now be ensured, that no encoding attribute exist anymore + // ( otherwise the expat-Parser will crash ) + // This must be done after decoding ! + // ( e.g. Files decoded in ucs-4 cannot be read properly ) + m_bStarted = sal_True; + removeEncoding( seq ); + } + nRead = seq.getLength(); + } + + break; + } + return nRead; +} + + +XMLFile2UTFConverter::~XMLFile2UTFConverter() +{ + if( m_pText2Unicode ) + delete m_pText2Unicode; + if( m_pUnicode2Text ) + delete m_pUnicode2Text; +} + + +void XMLFile2UTFConverter::removeEncoding( Sequence<sal_Int8> &seq ) +{ + const sal_Int8 *pSource = seq.getArray(); + if( ! strncmp( (const char * ) pSource , "<?xml" , 4) ) + { + + // scan for encoding + OString str( (sal_Char * ) pSource , seq.getLength() ); + + // cut sequence to first line break + // find first line break; + int nMax = str.indexOf( 10 ); + if( nMax >= 0 ) + { + str = str.copy( 0 , nMax ); + } + + int nFound = str.indexOf( " encoding" ); + if( nFound >= 0 ) { + int nStop; + int nStart = str.indexOf( "\"" , nFound ); + if( nStart < 0 || str.indexOf( "'" , nFound ) < nStart ) + { + nStart = str.indexOf( "'" , nFound ); + nStop = str.indexOf( "'" , nStart +1 ); + } + else + { + nStop = str.indexOf( "\"" , nStart +1); + } + + if( nStart >= 0 && nStop >= 0 && nStart+1 < nStop ) + { + // remove encoding tag from file + memmove( &( seq.getArray()[nFound] ) , + &( seq.getArray()[nStop+1]) , + seq.getLength() - nStop -1); + seq.realloc( seq.getLength() - ( nStop+1 - nFound ) ); +// str = String( (char * ) seq.getArray() , seq.getLen() ); + } + } + } +} + +// Checks, if enough data has been accumulated to recognize the encoding +sal_Bool XMLFile2UTFConverter::isEncodingRecognizable( const Sequence< sal_Int8 > &seq) +{ + const sal_Int8 *pSource = seq.getConstArray(); + sal_Bool bCheckIfFirstClosingBracketExsists = sal_False; + + if( seq.getLength() < 8 ) { + // no recognition possible, when less than 8 bytes are available + return sal_False; + } + + if( ! strncmp( (const char * ) pSource , "<?xml" , 4 ) ) { + // scan if the <?xml tag finishes within this buffer + bCheckIfFirstClosingBracketExsists = sal_True; + } + else if( ('<' == pSource[0] || '<' == pSource[2] ) && + ( ('?' == pSource[4] || '?' == pSource[6] ) ) ) + { + // check for utf-16 + bCheckIfFirstClosingBracketExsists = sal_True; + } + else if( ( '<' == pSource[1] || '<' == pSource[3] ) && + ( '?' == pSource[5] || '?' == pSource[7] ) ) + { + // check for + bCheckIfFirstClosingBracketExsists = sal_True; + } + + if( bCheckIfFirstClosingBracketExsists ) + { + for( sal_Int32 i = 0; i < seq.getLength() ; i ++ ) + { + // whole <?xml tag is valid + if( '>' == pSource[ i ] ) + { + return sal_True; + } + } + return sal_False; + } + + // No <? tag in front, no need for a bigger buffer + return sal_True; +} + +sal_Bool XMLFile2UTFConverter::scanForEncoding( Sequence< sal_Int8 > &seq ) +{ + const sal_uInt8 *pSource = reinterpret_cast<const sal_uInt8*>( seq.getConstArray() ); + sal_Bool bReturn = sal_True; + + if( seq.getLength() < 4 ) { + // no recognition possible, when less than 4 bytes are available + return sal_False; + } + + // first level : detect possible file formats + if( ! strncmp( (const char * ) pSource , "<?xml" , 4 ) ) { + + // scan for encoding + OString str( (const sal_Char *) pSource , seq.getLength() ); + + // cut sequence to first line break + //find first line break; + int nMax = str.indexOf( 10 ); + if( nMax >= 0 ) + { + str = str.copy( 0 , nMax ); + } + + int nFound = str.indexOf( " encoding" ); + if( nFound < str.getLength() ) { + int nStop; + int nStart = str.indexOf( "\"" , nFound ); + if( nStart < 0 || str.indexOf( "'" , nFound ) < nStart ) + { + nStart = str.indexOf( "'" , nFound ); + nStop = str.indexOf( "'" , nStart +1 ); + } + else + { + nStop = str.indexOf( "\"" , nStart +1); + } + if( nStart >= 0 && nStop >= 0 && nStart+1 < nStop ) + { + // encoding found finally + m_sEncoding = str.copy( nStart+1 , nStop - nStart - 1 ); + } + } + } + else if( 0xFE == pSource[0] && + 0xFF == pSource[1] ) { + // UTF-16 big endian + // conversion is done so that encoding information can be easily extracted + m_sEncoding = "utf-16"; + } + else if( 0xFF == pSource[0] && + 0xFE == pSource[1] ) { + // UTF-16 little endian + // conversion is done so that encoding information can be easily extracted + m_sEncoding = "utf-16"; + } + else if( 0x00 == pSource[0] && 0x3c == pSource[1] && 0x00 == pSource[2] && 0x3f == pSource[3] ) { + // UTF-16 big endian without byte order mark (this is (strictly speaking) an error.) + // The byte order mark is simply added + + // simply add the byte order mark ! + seq.realloc( seq.getLength() + 2 ); + memmove( &( seq.getArray()[2] ) , seq.getArray() , seq.getLength() - 2 ); + ((sal_uInt8*)seq.getArray())[0] = 0xFE; + ((sal_uInt8*)seq.getArray())[1] = 0xFF; + + m_sEncoding = "utf-16"; + } + else if( 0x3c == pSource[0] && 0x00 == pSource[1] && 0x3f == pSource[2] && 0x00 == pSource[3] ) { + // UTF-16 little endian without byte order mark (this is (strictly speaking) an error.) + // The byte order mark is simply added + + seq.realloc( seq.getLength() + 2 ); + memmove( &( seq.getArray()[2] ) , seq.getArray() , seq.getLength() - 2 ); + ((sal_uInt8*)seq.getArray())[0] = 0xFF; + ((sal_uInt8*)seq.getArray())[1] = 0xFE; + + m_sEncoding = "utf-16"; + } + else if( 0xEF == pSource[0] && + 0xBB == pSource[1] && + 0xBF == pSource[2] ) + { + // UTF-8 BOM (byte order mark); signifies utf-8, and not byte order + // The BOM is removed. + memmove( seq.getArray(), &( seq.getArray()[3] ), seq.getLength()-3 ); + seq.realloc( seq.getLength() - 3 ); + m_sEncoding = "utf-8"; + } + else if( 0x00 == pSource[0] && 0x00 == pSource[1] && 0x00 == pSource[2] && 0x3c == pSource[3] ) { + // UCS-4 big endian + m_sEncoding = "ucs-4"; + } + else if( 0x3c == pSource[0] && 0x00 == pSource[1] && 0x00 == pSource[2] && 0x00 == pSource[3] ) { + // UCS-4 little endian + m_sEncoding = "ucs-4"; + } + else if( 0x4c == pSource[0] && 0x6f == pSource[1] && + 0xa7 == static_cast<unsigned char> (pSource[2]) && + 0x94 == static_cast<unsigned char> (pSource[3]) ) { + // EBCDIC + bReturn = sal_False; // must be extended + } + else { + // other + // UTF8 is directly recognized by the parser. + bReturn = sal_False; + } + + return bReturn; +} + +void XMLFile2UTFConverter::initializeDecoding() +{ + + if( m_sEncoding.getLength() ) + { + rtl_TextEncoding encoding = rtl_getTextEncodingFromMimeCharset( m_sEncoding.getStr() ); + if( encoding != RTL_TEXTENCODING_UTF8 ) + { + m_pText2Unicode = new Text2UnicodeConverter( m_sEncoding ); + m_pUnicode2Text = new Unicode2TextConverter( RTL_TEXTENCODING_UTF8 ); + } + } +} + + +//---------------------------------------------- +// +// Text2UnicodeConverter +// +//---------------------------------------------- +Text2UnicodeConverter::Text2UnicodeConverter( const OString &sEncoding ) +{ + rtl_TextEncoding encoding = rtl_getTextEncodingFromMimeCharset( sEncoding.getStr() ); + if( RTL_TEXTENCODING_DONTKNOW == encoding ) + { + m_bCanContinue = sal_False; + m_bInitialized = sal_False; + } + else + { + init( encoding ); + } +} + +Text2UnicodeConverter::~Text2UnicodeConverter() +{ + if( m_bInitialized ) + { + rtl_destroyTextToUnicodeContext( m_convText2Unicode , m_contextText2Unicode ); + rtl_destroyUnicodeToTextConverter( m_convText2Unicode ); + } +} + +void Text2UnicodeConverter::init( rtl_TextEncoding encoding ) +{ + m_bCanContinue = sal_True; + m_bInitialized = sal_True; + + m_convText2Unicode = rtl_createTextToUnicodeConverter(encoding); + m_contextText2Unicode = rtl_createTextToUnicodeContext( m_convText2Unicode ); + m_rtlEncoding = encoding; +} + + +Sequence<sal_Unicode> Text2UnicodeConverter::convert( const Sequence<sal_Int8> &seqText ) +{ + sal_uInt32 uiInfo; + sal_Size nSrcCvtBytes = 0; + sal_Size nTargetCount = 0; + sal_Size nSourceCount = 0; + + // the whole source size + sal_Int32 nSourceSize = seqText.getLength() + m_seqSource.getLength(); + Sequence<sal_Unicode> seqUnicode ( nSourceSize ); + + const sal_Int8 *pbSource = seqText.getConstArray(); + sal_Int8 *pbTempMem = 0; + + if( m_seqSource.getLength() ) { + // put old rest and new byte sequence into one array + pbTempMem = new sal_Int8[ nSourceSize ]; + memcpy( pbTempMem , m_seqSource.getConstArray() , m_seqSource.getLength() ); + memcpy( &(pbTempMem[ m_seqSource.getLength() ]) , seqText.getConstArray() , seqText.getLength() ); + pbSource = pbTempMem; + + // set to zero again + m_seqSource = Sequence< sal_Int8 >(); + } + + while( sal_True ) { + + /* All invalid characters are transformed to the unicode undefined char */ + nTargetCount += rtl_convertTextToUnicode( + m_convText2Unicode, + m_contextText2Unicode, + ( const sal_Char * ) &( pbSource[nSourceCount] ), + nSourceSize - nSourceCount , + &( seqUnicode.getArray()[ nTargetCount ] ), + seqUnicode.getLength() - nTargetCount, + RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT | + RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT | + RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT, + &uiInfo, + &nSrcCvtBytes ); + nSourceCount += nSrcCvtBytes; + + if( uiInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL ) { + // save necessary bytes for next conversion + seqUnicode.realloc( seqUnicode.getLength() * 2 ); + continue; + } + break; + } + if( uiInfo & RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL ) { + m_seqSource.realloc( nSourceSize - nSourceCount ); + memcpy( m_seqSource.getArray() , &(pbSource[nSourceCount]) , nSourceSize-nSourceCount ); + } + + + if( pbTempMem ) { + delete [] pbTempMem; + } + + // set to correct unicode size + seqUnicode.realloc( nTargetCount ); + + return seqUnicode; +} + + + +//---------------------------------------------- +// +// Unicode2TextConverter +// +//---------------------------------------------- +Unicode2TextConverter::Unicode2TextConverter( rtl_TextEncoding encoding ) +{ + init( encoding ); +} + + +Unicode2TextConverter::~Unicode2TextConverter() +{ + if( m_bInitialized ) { + rtl_destroyUnicodeToTextContext( m_convUnicode2Text , m_contextUnicode2Text ); + rtl_destroyUnicodeToTextConverter( m_convUnicode2Text ); + } +} + + +Sequence<sal_Int8> Unicode2TextConverter::convert(const sal_Unicode *puSource , sal_Int32 nSourceSize) +{ + sal_Unicode *puTempMem = 0; + + if( m_seqSource.getLength() ) { + // For surrogates ! + // put old rest and new byte sequence into one array + // In general when surrogates are used, they should be rarely + // cut off between two convert()-calls. So this code is used + // rarely and the extra copy is acceptable. + puTempMem = new sal_Unicode[ nSourceSize + m_seqSource.getLength()]; + memcpy( puTempMem , + m_seqSource.getConstArray() , + m_seqSource.getLength() * sizeof( sal_Unicode ) ); + memcpy( + &(puTempMem[ m_seqSource.getLength() ]) , + puSource , + nSourceSize*sizeof( sal_Unicode ) ); + puSource = puTempMem; + nSourceSize += m_seqSource.getLength(); + + m_seqSource = Sequence< sal_Unicode > (); + } + + + sal_Size nTargetCount = 0; + sal_Size nSourceCount = 0; + + sal_uInt32 uiInfo; + sal_Size nSrcCvtChars; + + // take nSourceSize * 3 as preference + // this is an upper boundary for converting to utf8, + // which most often used as the target. + sal_Int32 nSeqSize = nSourceSize * 3; + + Sequence<sal_Int8> seqText( nSeqSize ); + sal_Char *pTarget = (sal_Char *) seqText.getArray(); + while( sal_True ) { + + nTargetCount += rtl_convertUnicodeToText( + m_convUnicode2Text, + m_contextUnicode2Text, + &( puSource[nSourceCount] ), + nSourceSize - nSourceCount , + &( pTarget[nTargetCount] ), + nSeqSize - nTargetCount, + RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT | + RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT , + &uiInfo, + &nSrcCvtChars); + nSourceCount += nSrcCvtChars; + + if( uiInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL ) { + nSeqSize = nSeqSize *2; + seqText.realloc( nSeqSize ); // double array size + pTarget = ( sal_Char * ) seqText.getArray(); + continue; + } + break; + } + + // for surrogates + if( uiInfo & RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL ) { + m_seqSource.realloc( nSourceSize - nSourceCount ); + memcpy( m_seqSource.getArray() , + &(puSource[nSourceCount]), + (nSourceSize - nSourceCount) * sizeof( sal_Unicode ) ); + } + + if( puTempMem ) { + delete [] puTempMem; + } + + // reduce the size of the buffer (fast, no copy necessary) + seqText.realloc( nTargetCount ); + + return seqText; +} + +void Unicode2TextConverter::init( rtl_TextEncoding encoding ) +{ + m_bCanContinue = sal_True; + m_bInitialized = sal_True; + + m_convUnicode2Text = rtl_createUnicodeToTextConverter( encoding ); + m_contextUnicode2Text = rtl_createUnicodeToTextContext( m_convUnicode2Text ); + m_rtlEncoding = encoding; +}; + + +} |