diff options
Diffstat (limited to 'svtools/source/svrtf/svparser.cxx')
-rw-r--r-- | svtools/source/svrtf/svparser.cxx | 726 |
1 files changed, 0 insertions, 726 deletions
diff --git a/svtools/source/svrtf/svparser.cxx b/svtools/source/svrtf/svparser.cxx deleted file mode 100644 index 3eec85cd93..0000000000 --- a/svtools/source/svrtf/svparser.cxx +++ /dev/null @@ -1,726 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * <http://www.openoffice.org/license.html> - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -// MARKER(update_precomp.py): autogen include statement, do not remove -#include "precompiled_svtools.hxx" - -#include <stdio.h> -#include <svtools/svparser.hxx> -#include <tools/stream.hxx> -#include <tools/debug.hxx> -#define _SVSTDARR_USHORTS -#include <svl/svstdarr.hxx> -#include <rtl/textcvt.h> -#include <rtl/tencinfo.h> - -#define SVPAR_CSM_ - -#define SVPAR_CSM_ANSI 0x0001U -#define SVPAR_CSM_UTF8 0x0002U -#define SVPAR_CSM_UCS2B 0x0004U -#define SVPAR_CSM_UCS2L 0x0008U -#define SVPAR_CSM_SWITCH 0x8000U - -// Struktur, um sich die akt. Daten zumerken -struct SvParser_Impl -{ - String aToken; // gescanntes Token - sal_uLong nFilePos; // akt. Position im Stream - sal_uLong nlLineNr; // akt. Zeilen Nummer - sal_uLong nlLinePos; // akt. Spalten Nummer - long nTokenValue; // zusaetzlicher Wert (RTF) - sal_Bool bTokenHasValue; // indicates whether nTokenValue is valid - int nToken; // akt. Token - sal_Unicode nNextCh; // akt. Zeichen - - int nSaveToken; // das Token vom Continue - - rtl_TextToUnicodeConverter hConv; - rtl_TextToUnicodeContext hContext; - -#ifdef DBG_UTIL - SvFileStream aOut; -#endif - - SvParser_Impl() : - nSaveToken(0), hConv( 0 ), hContext( (rtl_TextToUnicodeContext)1 ) - { - } - -}; - - - -// Konstruktor -SvParser::SvParser( SvStream& rIn, sal_uInt8 nStackSize ) - : rInput( rIn ) - , nlLineNr( 1 ) - , nlLinePos( 1 ) - , pImplData( 0 ) - , nTokenValue( 0 ) - , bTokenHasValue( false ) - , eState( SVPAR_NOTSTARTED ) - , eSrcEnc( RTL_TEXTENCODING_DONTKNOW ) - , bDownloadingFile( sal_False ) - , nTokenStackSize( nStackSize ) - , nTokenStackPos( 0 ) -{ - bUCS2BSrcEnc = bSwitchToUCS2 = sal_False; - eState = SVPAR_NOTSTARTED; - if( nTokenStackSize < 3 ) - nTokenStackSize = 3; - pTokenStack = new TokenStackType[ nTokenStackSize ]; - pTokenStackPos = pTokenStack; - -#ifdef DBG_UTIL - - // wenn die Datei schon existiert, dann Anhaengen: - if( !pImplData ) - pImplData = new SvParser_Impl; - pImplData->aOut.Open( String::CreateFromAscii( "\\parser.dmp" ), - STREAM_STD_WRITE | STREAM_NOCREATE ); - if( pImplData->aOut.GetError() || !pImplData->aOut.IsOpen() ) - pImplData->aOut.Close(); - else - { - pImplData->aOut.Seek( STREAM_SEEK_TO_END ); - pImplData->aOut << "\x0c\n\n >>>>>>>>>>>>>>> Dump Start <<<<<<<<<<<<<<<\n"; - } -#endif -} - -SvParser::~SvParser() -{ -#ifdef DBG_UTIL - if( pImplData->aOut.IsOpen() ) - pImplData->aOut << "\n\n >>>>>>>>>>>>>>> Dump Ende <<<<<<<<<<<<<<<\n"; - pImplData->aOut.Close(); -#endif - - if( pImplData && pImplData->hConv ) - { - rtl_destroyTextToUnicodeContext( pImplData->hConv, - pImplData->hContext ); - rtl_destroyTextToUnicodeConverter( pImplData->hConv ); - } - - delete pImplData; - - delete [] pTokenStack; -} - -void SvParser::ClearTxtConvContext() -{ - if( pImplData && pImplData->hConv ) - rtl_resetTextToUnicodeContext( pImplData->hConv, pImplData->hContext ); -} - -void SvParser::SetSrcEncoding( rtl_TextEncoding eEnc ) -{ - - if( eEnc != eSrcEnc ) - { - if( pImplData && pImplData->hConv ) - { - rtl_destroyTextToUnicodeContext( pImplData->hConv, - pImplData->hContext ); - rtl_destroyTextToUnicodeConverter( pImplData->hConv ); - pImplData->hConv = 0; - pImplData->hContext = (rtl_TextToUnicodeContext )1; - } - - if( rtl_isOctetTextEncoding(eEnc) || - RTL_TEXTENCODING_UCS2 == eEnc ) - { - eSrcEnc = eEnc; - if( !pImplData ) - pImplData = new SvParser_Impl; - pImplData->hConv = rtl_createTextToUnicodeConverter( eSrcEnc ); - DBG_ASSERT( pImplData->hConv, - "SvParser::SetSrcEncoding: no converter for source encoding" ); - if( !pImplData->hConv ) - eSrcEnc = RTL_TEXTENCODING_DONTKNOW; - else - pImplData->hContext = - rtl_createTextToUnicodeContext( pImplData->hConv ); - } - else - { - DBG_ASSERT( !this, - "SvParser::SetSrcEncoding: invalid source encoding" ); - eSrcEnc = RTL_TEXTENCODING_DONTKNOW; - } - } -} - -void SvParser::RereadLookahead() -{ - rInput.Seek(nNextChPos); - nNextCh = GetNextChar(); -} - -sal_Unicode SvParser::GetNextChar() -{ - sal_Unicode c = 0U; - - // When reading muliple bytes, we don't have to care about the file - // position when we run inti the pending state. The file position is - // maintained by SaveState/RestoreState. - sal_Bool bErr; - if( bSwitchToUCS2 && 0 == rInput.Tell() ) - { - sal_uChar c1, c2; - sal_Bool bSeekBack = sal_True; - - rInput >> c1; - bErr = rInput.IsEof() || rInput.GetError(); - if( !bErr ) - { - if( 0xff == c1 || 0xfe == c1 ) - { - rInput >> c2; - bErr = rInput.IsEof() || rInput.GetError(); - if( !bErr ) - { - if( 0xfe == c1 && 0xff == c2 ) - { - eSrcEnc = RTL_TEXTENCODING_UCS2; - bUCS2BSrcEnc = sal_True; - bSeekBack = sal_False; - } - else if( 0xff == c1 && 0xfe == c2 ) - { - eSrcEnc = RTL_TEXTENCODING_UCS2; - bUCS2BSrcEnc = sal_False; - bSeekBack = sal_False; - } - } - } - } - if( bSeekBack ) - rInput.Seek( 0 ); - - bSwitchToUCS2 = sal_False; - } - - nNextChPos = rInput.Tell(); - - if( RTL_TEXTENCODING_UCS2 == eSrcEnc ) - { - sal_Unicode cUC = USHRT_MAX; - sal_uChar c1, c2; - - rInput >> c1 >> c2; - if( 2 == rInput.Tell() && - !(rInput.IsEof() || rInput.GetError()) && - ( (bUCS2BSrcEnc && 0xfe == c1 && 0xff == c2) || - (!bUCS2BSrcEnc && 0xff == c1 && 0xfe == c2) ) ) - rInput >> c1 >> c2; - - bErr = rInput.IsEof() || rInput.GetError(); - if( !bErr ) - { - if( bUCS2BSrcEnc ) - cUC = (sal_Unicode(c1) << 8) | c2; - else - cUC = (sal_Unicode(c2) << 8) | c1; - } - - if( !bErr ) - { - c = cUC; - } - } - else - { - sal_Size nChars = 0; - do - { - sal_Char c1; // signed, that's the text converter expects - rInput >> c1; - bErr = rInput.IsEof() || rInput.GetError(); - if( !bErr ) - { - if ( - RTL_TEXTENCODING_DONTKNOW == eSrcEnc || - RTL_TEXTENCODING_SYMBOL == eSrcEnc - ) - { - // no convserion shall take place - c = (sal_Unicode)c1; - nChars = 1; - } - else - { - DBG_ASSERT( pImplData && pImplData->hConv, - "no text converter!" ); - - sal_Unicode cUC; - sal_uInt32 nInfo = 0; - sal_Size nCvtBytes; - nChars = rtl_convertTextToUnicode( - pImplData->hConv, pImplData->hContext, - &c1, 1, &cUC, 1, - RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR| - RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR| - RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR, - &nInfo, &nCvtBytes); - if( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) != 0 ) - { - // The conversion wasn't successfull because we haven't - // read enough characters. - if( pImplData->hContext != (rtl_TextToUnicodeContext)1 ) - { - while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) != 0 ) - { - rInput >> c1; - bErr = rInput.IsEof() || rInput.GetError(); - if( bErr ) - break; - - nChars = rtl_convertTextToUnicode( - pImplData->hConv, pImplData->hContext, - &c1, 1, &cUC, 1, - RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR| - RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR| - RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR, - &nInfo, &nCvtBytes); - } - if( !bErr ) - { - if( 1 == nChars && 0 == nInfo ) - { - c = cUC; - } - else if( 0 != nChars || 0 != nInfo ) - { - DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) == 0, - "source buffer is to small" ); - DBG_ASSERT( (nInfo&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL)) == 0, - "there is a conversion error" ); - DBG_ASSERT( 0 == nChars, - "there is a converted character, but an error" ); - // There are still errors, but nothing we can - // do - c = (sal_Unicode)'?'; - nChars = 1; - } - } - } - else - { - sal_Char sBuffer[10]; - sBuffer[0] = c1; - sal_uInt16 nLen = 1; - while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) != 0 && - nLen < 10 ) - { - rInput >> c1; - bErr = rInput.IsEof() || rInput.GetError(); - if( bErr ) - break; - - sBuffer[nLen++] = c1; - nChars = rtl_convertTextToUnicode( - pImplData->hConv, 0, sBuffer, nLen, &cUC, 1, - RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR| - RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR| - RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR, - &nInfo, &nCvtBytes); - } - if( !bErr ) - { - if( 1 == nChars && 0 == nInfo ) - { - DBG_ASSERT( nCvtBytes == nLen, - "no all bytes have been converted!" ); - c = cUC; - } - else - { - DBG_ASSERT( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) == 0, - "source buffer is to small" ); - DBG_ASSERT( (nInfo&~(RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL)) == 0, - "there is a conversion error" ); - DBG_ASSERT( 0 == nChars, - "there is a converted character, but an error" ); - - // There are still errors, so we use the first - // character and restart after that. - c = (sal_Unicode)sBuffer[0]; - rInput.SeekRel( -(nLen-1) ); - nChars = 1; - } - } - } - } - else if( 1 == nChars && 0 == nInfo ) - { - // The conversion was successfull - DBG_ASSERT( nCvtBytes == 1, - "no all bytes have been converted!" ); - c = cUC; - } - else if( 0 != nChars || 0 != nInfo ) - { - DBG_ASSERT( 0 == nChars, - "there is a converted character, but an error" ); - DBG_ASSERT( 0 != nInfo, - "there is no converted character and no error" ); - // #73398#: If the character could not be converted, - // because a conversion is not available, do no conversion at all. - c = (sal_Unicode)c1; - nChars = 1; - - } - } - } - } - while( 0 == nChars && !bErr ); - } - if( bErr ) - { - if( ERRCODE_IO_PENDING == rInput.GetError() ) - { - eState = SVPAR_PENDING; - return c; - } - else - return sal_Unicode(EOF); - } - -#ifdef DBG_UTIL - if( pImplData->aOut.IsOpen() ) - pImplData->aOut << ByteString::ConvertFromUnicode( c, - RTL_TEXTENCODING_MS_1251 ); -#endif - - if( c == '\n' ) - { - IncLineNr(); - SetLinePos( 1L ); - } - else - IncLinePos(); - return c; -} - -int SvParser::GetNextToken() -{ - int nRet = 0; - - if( !nTokenStackPos ) - { - aToken.Erase(); // Token-Buffer loeschen - nTokenValue = -1; // Kennzeichen fuer kein Value gelesen - bTokenHasValue = false; - - nRet = _GetNextToken(); - if( SVPAR_PENDING == eState ) - return nRet; - } - - ++pTokenStackPos; - if( pTokenStackPos == pTokenStack + nTokenStackSize ) - pTokenStackPos = pTokenStack; - - // vom Stack holen ?? - if( nTokenStackPos ) - { - --nTokenStackPos; - nTokenValue = pTokenStackPos->nTokenValue; - bTokenHasValue = pTokenStackPos->bTokenHasValue; - aToken = pTokenStackPos->sToken; - nRet = pTokenStackPos->nTokenId; - } - // nein, dann das aktuelle auf den Stack - else if( SVPAR_WORKING == eState ) - { - pTokenStackPos->sToken = aToken; - pTokenStackPos->nTokenValue = nTokenValue; - pTokenStackPos->bTokenHasValue = bTokenHasValue; - pTokenStackPos->nTokenId = nRet; - } - else if( SVPAR_ACCEPTED != eState && SVPAR_PENDING != eState ) - eState = SVPAR_ERROR; // irgend ein Fehler - - return nRet; -} - -int SvParser::SkipToken( short nCnt ) // n Tokens zurueck "skippen" -{ - pTokenStackPos = GetStackPtr( nCnt ); - short nTmp = nTokenStackPos - nCnt; - if( nTmp < 0 ) - nTmp = 0; - else if( nTmp > nTokenStackSize ) - nTmp = nTokenStackSize; - nTokenStackPos = sal_uInt8(nTmp); - - // und die Werte zurueck - aToken = pTokenStackPos->sToken; - nTokenValue = pTokenStackPos->nTokenValue; - bTokenHasValue = pTokenStackPos->bTokenHasValue; - - return pTokenStackPos->nTokenId; -} - -SvParser::TokenStackType* SvParser::GetStackPtr( short nCnt ) -{ - sal_uInt8 nAktPos = sal_uInt8(pTokenStackPos - pTokenStack ); - if( nCnt > 0 ) - { - if( nCnt >= nTokenStackSize ) - nCnt = (nTokenStackSize-1); - if( nAktPos + nCnt < nTokenStackSize ) - nAktPos = sal::static_int_cast< sal_uInt8 >(nAktPos + nCnt); - else - nAktPos = sal::static_int_cast< sal_uInt8 >( - nAktPos + (nCnt - nTokenStackSize)); - } - else if( nCnt < 0 ) - { - if( -nCnt >= nTokenStackSize ) - nCnt = -nTokenStackSize+1; - if( -nCnt <= nAktPos ) - nAktPos = sal::static_int_cast< sal_uInt8 >(nAktPos + nCnt); - else - nAktPos = sal::static_int_cast< sal_uInt8 >( - nAktPos + (nCnt + nTokenStackSize)); - } - return pTokenStack + nAktPos; -} - -// wird fuer jedes Token gerufen, das in CallParser erkannt wird -void SvParser::NextToken( int ) -{ -} - - -// fuers asynchrone lesen aus dem SvStream - -int SvParser::GetSaveToken() const -{ - return pImplData ? pImplData->nSaveToken : 0; -} - -void SvParser::SaveState( int nToken ) -{ - // aktuellen Status merken - if( !pImplData ) - { - pImplData = new SvParser_Impl; - pImplData->nSaveToken = 0; - } - - pImplData->nFilePos = rInput.Tell(); - pImplData->nToken = nToken; - - pImplData->aToken = aToken; - pImplData->nlLineNr = nlLineNr; - pImplData->nlLinePos = nlLinePos; - pImplData->nTokenValue= nTokenValue; - pImplData->bTokenHasValue = bTokenHasValue; - pImplData->nNextCh = nNextCh; -} - -void SvParser::RestoreState() -{ - // alten Status wieder zurueck setzen - if( pImplData ) - { - if( ERRCODE_IO_PENDING == rInput.GetError() ) - rInput.ResetError(); - aToken = pImplData->aToken; - nlLineNr = pImplData->nlLineNr; - nlLinePos = pImplData->nlLinePos; - nTokenValue= pImplData->nTokenValue; - bTokenHasValue=pImplData->bTokenHasValue; - nNextCh = pImplData->nNextCh; - - pImplData->nSaveToken = pImplData->nToken; - - rInput.Seek( pImplData->nFilePos ); - } -} - -void SvParser::Continue( int ) -{ -} - -void SvParser::BuildWhichTbl( SvUShorts &rWhichMap, - sal_uInt16 *pWhichIds, - sal_uInt16 nWhichIds ) -{ - sal_uInt16 aNewRange[2]; - - for( sal_uInt16 nCnt = 0; nCnt < nWhichIds; ++nCnt, ++pWhichIds ) - if( *pWhichIds ) - { - aNewRange[0] = aNewRange[1] = *pWhichIds; - sal_Bool bIns = sal_True; - - // Position suchen - for ( sal_uInt16 nOfs = 0; rWhichMap[nOfs]; nOfs += 2 ) - { - if( *pWhichIds < rWhichMap[nOfs] - 1 ) - { - // neuen Range davor - rWhichMap.Insert( aNewRange, 2, nOfs ); - bIns = sal_False; - break; - } - else if( *pWhichIds == rWhichMap[nOfs] - 1 ) - { - // diesen Range nach unten erweitern - rWhichMap[nOfs] = *pWhichIds; - bIns = sal_False; - break; - } - else if( *pWhichIds == rWhichMap[nOfs+1] + 1 ) - { - if( rWhichMap[nOfs+2] != 0 && rWhichMap[nOfs+2] == *pWhichIds + 1 ) - { - // mit dem naechsten Bereich mergen - rWhichMap[nOfs+1] = rWhichMap[nOfs+3]; - rWhichMap.Remove( nOfs+2, 2 ); - } - else - // diesen Range nach oben erweitern - rWhichMap[nOfs+1] = *pWhichIds; - bIns = sal_False; - break; - } - } - - // einen Range hinten anhaengen - if( bIns ) - rWhichMap.Insert( aNewRange, 2, rWhichMap.Count()-1 ); - } -} - - -IMPL_STATIC_LINK( SvParser, NewDataRead, void*, EMPTYARG ) -{ - switch( pThis->eState ) - { - case SVPAR_PENDING: - // Wenn gerade ein File geladen wird duerfen wir nicht weiterlaufen, - // sondern muessen den Aufruf ignorieren. - if( pThis->IsDownloadingFile() ) - break; - - pThis->eState = SVPAR_WORKING; - pThis->RestoreState(); - - pThis->Continue( pThis->pImplData->nToken ); - - if( ERRCODE_IO_PENDING == pThis->rInput.GetError() ) - pThis->rInput.ResetError(); - - if( SVPAR_PENDING != pThis->eState ) - pThis->ReleaseRef(); // ansonsten sind wir fertig! - break; - - case SVPAR_WAITFORDATA: - pThis->eState = SVPAR_WORKING; - break; - - case SVPAR_NOTSTARTED: - case SVPAR_WORKING: - break; - - default: - pThis->ReleaseRef(); // ansonsten sind wir fertig! - break; - } - - return 0; -} - -/*======================================================================== - * - * SvKeyValueIterator. - * - *======================================================================*/ -SV_DECL_PTRARR_DEL(SvKeyValueList_Impl, SvKeyValue*, 0, 4) -SV_IMPL_PTRARR(SvKeyValueList_Impl, SvKeyValue*); - -/* - * SvKeyValueIterator. - */ -SvKeyValueIterator::SvKeyValueIterator (void) - : m_pList (new SvKeyValueList_Impl), - m_nPos (0) -{ -} - -/* - * ~SvKeyValueIterator. - */ -SvKeyValueIterator::~SvKeyValueIterator (void) -{ - delete m_pList; -} - -/* - * GetFirst. - */ -sal_Bool SvKeyValueIterator::GetFirst (SvKeyValue &rKeyVal) -{ - m_nPos = m_pList->Count(); - return GetNext (rKeyVal); -} - -/* - * GetNext. - */ -sal_Bool SvKeyValueIterator::GetNext (SvKeyValue &rKeyVal) -{ - if (m_nPos > 0) - { - rKeyVal = *m_pList->GetObject(--m_nPos); - return sal_True; - } - else - { - // Nothing to do. - return sal_False; - } -} - -/* - * Append. - */ -void SvKeyValueIterator::Append (const SvKeyValue &rKeyVal) -{ - SvKeyValue *pKeyVal = new SvKeyValue (rKeyVal); - m_pList->C40_INSERT(SvKeyValue, pKeyVal, m_pList->Count()); -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |