diff options
author | Eike Rathke <erack@redhat.com> | 2012-04-10 18:30:07 +0200 |
---|---|---|
committer | Eike Rathke <erack@redhat.com> | 2012-04-10 19:32:09 +0200 |
commit | 8cd05e9cf1152b21528c6f1a5bda3d949dc49791 (patch) | |
tree | 0f37b5ff5447c3d6088b64a01d75e4801cdab847 | |
parent | bf0629e09d176555aaa10f60061b206103cc0295 (diff) |
resolved fdo#48501 enable line size >64k in SvStream::Read*Line()
CSV and other text formats may come with line sizes >64k that so far were
truncated due to limitations in ByteString/UniString/String, even if one line
consists of several fields that each are <64k.
Introduced additional SvStream methods that read into rtl::OString and
rtl::OUString and let SvStream::ReadUniOrByteStringLine() fill solely an
rtl::OUString.
Made Calc CSV import use those.
-rw-r--r-- | sc/source/filter/dif/difimp.cxx | 60 | ||||
-rw-r--r-- | sc/source/filter/inc/dif.hxx | 5 | ||||
-rw-r--r-- | sc/source/ui/dbgui/asciiopt.cxx | 12 | ||||
-rw-r--r-- | sc/source/ui/dbgui/csvgrid.cxx | 25 | ||||
-rw-r--r-- | sc/source/ui/dbgui/csvtablebox.cxx | 4 | ||||
-rw-r--r-- | sc/source/ui/dbgui/scuiasciiopt.cxx | 4 | ||||
-rw-r--r-- | sc/source/ui/docshell/impex.cxx | 125 | ||||
-rw-r--r-- | sc/source/ui/inc/asciiopt.hxx | 6 | ||||
-rw-r--r-- | sc/source/ui/inc/csvcontrol.hxx | 2 | ||||
-rw-r--r-- | sc/source/ui/inc/csvgrid.hxx | 4 | ||||
-rw-r--r-- | sc/source/ui/inc/csvtablebox.hxx | 2 | ||||
-rw-r--r-- | sc/source/ui/inc/impex.hxx | 9 | ||||
-rw-r--r-- | sc/source/ui/inc/scuiasciiopt.hxx | 4 | ||||
-rw-r--r-- | sc/source/ui/vba/vbarange.cxx | 6 | ||||
-rw-r--r-- | tools/inc/tools/stream.hxx | 67 | ||||
-rw-r--r-- | tools/source/stream/stream.cxx | 51 |
16 files changed, 267 insertions, 119 deletions
diff --git a/sc/source/filter/dif/difimp.cxx b/sc/source/filter/dif/difimp.cxx index 19189a19e807..1ffae6a1cb40 100644 --- a/sc/source/filter/dif/difimp.cxx +++ b/sc/source/filter/dif/difimp.cxx @@ -312,7 +312,7 @@ TOPIC DifParser::GetNextTopic( void ) }; STATE eS = S_START; - String aLine; + rtl::OUString aLine; nVector = 0; nVal = 0; @@ -360,7 +360,7 @@ TOPIC DifParser::GetNextTopic( void ) break; case S_VectorVal: { - const sal_Unicode* pCur = aLine.GetBuffer(); + const sal_Unicode* pCur = aLine.getStr(); pCur = ScanIntVal( pCur, nVector ); @@ -375,10 +375,11 @@ TOPIC DifParser::GetNextTopic( void ) } break; case S_Data: - OSL_ENSURE( aLine.Len() >= 2, + OSL_ENSURE( aLine.getLength() >= 2, "+GetNextTopic(): <String> ist zu kurz!" ); - if( aLine.Len() > 2 ) - aData = aLine.Copy( 1, aLine.Len() - 2 ); + OSL_ENSURE( aLine.getLength() - 2 <= STRING_MAXLEN, "GetNextTopic(): line doesn't fit into data"); + if( aLine.getLength() > 2 ) + aData = aLine.copy( 1, aLine.getLength() - 2 ); else aData.Erase(); eS = S_END; @@ -448,16 +449,16 @@ DATASET DifParser::GetNumberDataset( const sal_Unicode* pPossibleNumericData ) return eRet; } -bool DifParser::ReadNextLine( String& rStr ) +bool DifParser::ReadNextLine( rtl::OUString& rStr ) { - if( aLookAheadLine.Len() == 0 ) + if( aLookAheadLine.isEmpty() ) { return rIn.ReadUniOrByteStringLine( rStr, rIn.GetStreamCharSet() ); } else { rStr = aLookAheadLine; - aLookAheadLine.Erase(); + aLookAheadLine = rtl::OUString(); return true; } } @@ -469,10 +470,10 @@ bool DifParser::LookAhead() const sal_Unicode* pAktBuffer; bool bValidStructure = false; - OSL_ENSURE( aLookAheadLine.Len() == 0, "*DifParser::LookAhead(): LookAhead called twice in a row" ); + OSL_ENSURE( aLookAheadLine.isEmpty(), "*DifParser::LookAhead(): LookAhead called twice in a row" ); rIn.ReadUniOrByteStringLine( aLookAheadLine, rIn.GetStreamCharSet() ); - pAktBuffer = aLookAheadLine.GetBuffer(); + pAktBuffer = aLookAheadLine.getStr(); switch( *pAktBuffer ) { @@ -493,7 +494,7 @@ bool DifParser::LookAhead() } break; case '1': // String Data - if( Is1_0( aLookAheadLine.GetBuffer() ) ) + if( Is1_0( aLookAheadLine.getStr() ) ) { bValidStructure = true; } @@ -505,12 +506,12 @@ bool DifParser::LookAhead() DATASET DifParser::GetNextDataset( void ) { DATASET eRet = D_UNKNOWN; - String aLine; + rtl::OUString aLine; const sal_Unicode* pAktBuffer; ReadNextLine( aLine ); - pAktBuffer = aLine.GetBuffer(); + pAktBuffer = aLine.getStr(); switch( *pAktBuffer ) { @@ -520,9 +521,9 @@ DATASET DifParser::GetNextDataset( void ) if( Is1_0( pAktBuffer ) ) { ReadNextLine( aLine ); - if( IsBOT( aLine.GetBuffer() ) ) + if( IsBOT( aLine.getStr() ) ) eRet = D_BOT; - else if( IsEOD( aLine.GetBuffer() ) ) + else if( IsEOD( aLine.getStr() ) ) eRet = D_EOD; } break; @@ -532,25 +533,32 @@ DATASET DifParser::GetNextDataset( void ) { pAktBuffer++; eRet = GetNumberDataset(pAktBuffer); - ReadNextLine( aData ); + rtl::OUString aTmpLine; + ReadNextLine( aTmpLine ); if ( eRet == D_SYNT_ERROR ) { // for broken records write "#ERR: data" to cell String aTmp( RTL_CONSTASCII_USTRINGPARAM( "#ERR: " )); aTmp += pAktBuffer; aTmp.AppendAscii( " (" ); - aTmp += aData; + OSL_ENSURE( aTmpLine.getLength() <= STRING_MAXLEN - aTmp.Len() - 1, "GetNextDataset(): line doesn't fit into data"); + aTmp += aTmpLine; aTmp += sal_Unicode(')'); aData = aTmp; eRet = D_STRING; } + else + { + OSL_ENSURE( aTmpLine.getLength() <= STRING_MAXLEN, "GetNextDataset(): line doesn't fit into data"); + aData = aTmpLine; + } } break; case '1': // String Data - if( Is1_0( aLine.GetBuffer() ) ) + if( Is1_0( aLine.getStr() ) ) { ReadNextLine( aLine ); - xub_StrLen nLineLength = aLine.Len(); - const sal_Unicode* pLine = aLine.GetBuffer(); + sal_Int32 nLineLength = aLine.getLength(); + const sal_Unicode* pLine = aLine.getStr(); if( nLineLength >= 1 && *pLine == '"' ) { @@ -562,7 +570,8 @@ DATASET DifParser::GetNextDataset( void ) // Single line string if( nLineLength >= 2 && pLine[nLineLength - 1] == '"' ) { - aData = aLine.Copy( 1, nLineLength - 2 ); + OSL_ENSURE( aLine.getLength() - 2 <= STRING_MAXLEN, "GetNextDataset(): line doesn't fit into data"); + aData = aLine.copy( 1, nLineLength - 2 ); lcl_DeEscapeQuotesDif( aData ); eRet = D_STRING; } @@ -570,7 +579,8 @@ DATASET DifParser::GetNextDataset( void ) else { // Multiline string - aData = aLine.Copy( 1 ); + OSL_ENSURE( aLine.getLength() - 1 <= STRING_MAXLEN, "GetNextDataset(): line doesn't fit into data"); + aData = aLine.copy( 1 ); bool bContinue = true; while ( bContinue ) { @@ -578,17 +588,19 @@ DATASET DifParser::GetNextDataset( void ) bContinue = !rIn.IsEof() && ReadNextLine( aLine ); if( bContinue ) { - nLineLength = aLine.Len(); + nLineLength = aLine.getLength(); if( nLineLength >= 1 ) { - pLine = aLine.GetBuffer(); + pLine = aLine.getStr(); bContinue = !LookAhead(); if( bContinue ) { + OSL_ENSURE( aLine.getLength() <= STRING_MAXLEN - aData.Len(), "GetNextDataset(): line doesn't fit into data"); aData.Append( aLine ); } else if( pLine[nLineLength - 1] == '"' ) { + OSL_ENSURE( nLineLength - 1 <= STRING_MAXLEN - aData.Len(), "GetNextDataset(): line doesn't fit into data"); aData.Append( pLine, nLineLength - 1 ); lcl_DeEscapeQuotesDif( aData ); eRet = D_STRING; diff --git a/sc/source/filter/inc/dif.hxx b/sc/source/filter/inc/dif.hxx index 374ea301b976..2e1ca38ba672 100644 --- a/sc/source/filter/inc/dif.hxx +++ b/sc/source/filter/inc/dif.hxx @@ -32,6 +32,7 @@ #include <boost/ptr_container/ptr_vector.hpp> #include <tools/string.hxx> +#include <rtl/ustring.hxx> #include "address.hxx" #include "global.hxx" @@ -80,9 +81,9 @@ private: SvNumberFormatter* pNumFormatter; SvStream& rIn; sal_Bool bPlain; - String aLookAheadLine; + rtl::OUString aLookAheadLine; - bool ReadNextLine( String& rStr ); + bool ReadNextLine( rtl::OUString& rStr ); bool LookAhead(); DATASET GetNumberDataset( const sal_Unicode* pPossibleNumericData ); static inline sal_Bool IsBOT( const sal_Unicode* pRef ); diff --git a/sc/source/ui/dbgui/asciiopt.cxx b/sc/source/ui/dbgui/asciiopt.cxx index 60e0db4c185a..0b6ce50e4527 100644 --- a/sc/source/ui/dbgui/asciiopt.cxx +++ b/sc/source/ui/dbgui/asciiopt.cxx @@ -81,7 +81,7 @@ ScAsciiOptions::ScAsciiOptions(const ScAsciiOptions& rOpt) : { if (nInfoCount) { - pColStart = new xub_StrLen[nInfoCount]; + pColStart = new sal_Int32[nInfoCount]; pColFormat = new sal_uInt8[nInfoCount]; for (sal_uInt16 i=0; i<nInfoCount; i++) { @@ -104,7 +104,7 @@ ScAsciiOptions::~ScAsciiOptions() } -void ScAsciiOptions::SetColInfo( sal_uInt16 nCount, const xub_StrLen* pStart, const sal_uInt8* pFormat ) +void ScAsciiOptions::SetColInfo( sal_uInt16 nCount, const sal_Int32* pStart, const sal_uInt8* pFormat ) { delete[] pColStart; delete[] pColFormat; @@ -113,7 +113,7 @@ void ScAsciiOptions::SetColInfo( sal_uInt16 nCount, const xub_StrLen* pStart, co if (nInfoCount) { - pColStart = new xub_StrLen[nInfoCount]; + pColStart = new sal_Int32[nInfoCount]; pColFormat = new sal_uInt8[nInfoCount]; for (sal_uInt16 i=0; i<nInfoCount; i++) { @@ -139,7 +139,7 @@ void ScAsciiOptions::SetColumnInfo( const ScCsvExpDataVec& rDataVec ) nInfoCount = static_cast< sal_uInt16 >( rDataVec.size() ); if( nInfoCount ) { - pColStart = new xub_StrLen[ nInfoCount ]; + pColStart = new sal_Int32[ nInfoCount ]; pColFormat = new sal_uInt8[ nInfoCount ]; for( sal_uInt16 nIx = 0; nIx < nInfoCount; ++nIx ) { @@ -276,11 +276,11 @@ void ScAsciiOptions::ReadFromString( const String& rString ) nInfoCount = nSub / 2; if (nInfoCount) { - pColStart = new xub_StrLen[nInfoCount]; + pColStart = new sal_Int32[nInfoCount]; pColFormat = new sal_uInt8[nInfoCount]; for (sal_uInt16 nInfo=0; nInfo<nInfoCount; nInfo++) { - pColStart[nInfo] = (xub_StrLen) aToken.GetToken( 2*nInfo, '/' ).ToInt32(); + pColStart[nInfo] = (sal_Int32) aToken.GetToken( 2*nInfo, '/' ).ToInt32(); pColFormat[nInfo] = (sal_uInt8) aToken.GetToken( 2*nInfo+1, '/' ).ToInt32(); } } diff --git a/sc/source/ui/dbgui/csvgrid.cxx b/sc/source/ui/dbgui/csvgrid.cxx index 83cbad623b6d..09d3651e6fde 100644 --- a/sc/source/ui/dbgui/csvgrid.cxx +++ b/sc/source/ui/dbgui/csvgrid.cxx @@ -537,7 +537,7 @@ void ScCsvGrid::FillColumnDataSep( ScAsciiOptions& rOptions ) const if( GetColumnType( nColIx ) != CSV_TYPE_DEFAULT ) // 1-based column index aDataVec.push_back( ScCsvExpData( - static_cast< xub_StrLen >( nColIx + 1 ), + static_cast< sal_Int32 >( nColIx + 1 ), lcl_GetExtColumnType( GetColumnType( nColIx ) ) ) ); } rOptions.SetColumnInfo( aDataVec ); @@ -551,11 +551,10 @@ void ScCsvGrid::FillColumnDataFix( ScAsciiOptions& rOptions ) const for( sal_uInt32 nColIx = 0; nColIx < nCount; ++nColIx ) { ScCsvExpData& rData = aDataVec[ nColIx ]; - rData.mnIndex = static_cast< xub_StrLen >( - Min( static_cast< sal_Int32 >( STRING_MAXLEN ), GetColumnPos( nColIx ) ) ); + rData.mnIndex = static_cast< sal_Int32 >( GetColumnPos( nColIx ) ); rData.mnType = lcl_GetExtColumnType( GetColumnType( nColIx ) ); } - aDataVec[ nCount ].mnIndex = STRING_MAXLEN; + aDataVec[ nCount ].mnIndex = SAL_MAX_INT32; aDataVec[ nCount ].mnType = SC_COL_SKIP; rOptions.SetColumnInfo( aDataVec ); } @@ -730,7 +729,7 @@ void ScCsvGrid::DoSelectAction( sal_uInt32 nColIndex, sal_uInt16 nModifier ) // cell contents -------------------------------------------------------------- void ScCsvGrid::ImplSetTextLineSep( - sal_Int32 nLine, const String& rTextLine, + sal_Int32 nLine, const rtl::OUString& rTextLine, const String& rSepChars, sal_Unicode cTextSep, bool bMergeSep ) { if( nLine < GetFirstVisLine() ) return; @@ -744,7 +743,7 @@ void ScCsvGrid::ImplSetTextLineSep( // scan for separators String aCellText; const sal_Unicode* pSepChars = rSepChars.GetBuffer(); - const sal_Unicode* pChar = rTextLine.GetBuffer(); + const sal_Unicode* pChar = rTextLine.getStr(); sal_uInt32 nColIx = 0; while( *pChar && (nColIx < sal::static_int_cast<sal_uInt32>(CSV_MAXCOLCOUNT)) ) @@ -787,11 +786,11 @@ void ScCsvGrid::ImplSetTextLineSep( InvalidateGfx(); } -void ScCsvGrid::ImplSetTextLineFix( sal_Int32 nLine, const String& rTextLine ) +void ScCsvGrid::ImplSetTextLineFix( sal_Int32 nLine, const rtl::OUString& rTextLine ) { if( nLine < GetFirstVisLine() ) return; - sal_Int32 nChars = rTextLine.Len(); + sal_Int32 nChars = rTextLine.getLength(); if( nChars > GetPosCount() ) Execute( CSVCMD_SETPOSCOUNT, nChars ); @@ -802,13 +801,13 @@ void ScCsvGrid::ImplSetTextLineFix( sal_Int32 nLine, const String& rTextLine ) StringVec& rStrVec = maTexts[ nLineIx ]; rStrVec.clear(); sal_uInt32 nColCount = GetColumnCount(); - xub_StrLen nStrLen = rTextLine.Len(); - xub_StrLen nStrIx = 0; + sal_Int32 nStrLen = rTextLine.getLength(); + sal_Int32 nStrIx = 0; for( sal_uInt32 nColIx = 0; (nColIx < nColCount) && (nStrIx < nStrLen); ++nColIx ) { - xub_StrLen nColWidth = static_cast< xub_StrLen >( GetColumnWidth( nColIx ) ); - rStrVec.push_back( rTextLine.Copy( nStrIx, Max( nColWidth, CSV_MAXSTRLEN ) ) ); - nStrIx = sal::static_int_cast<xub_StrLen>( nStrIx + nColWidth ); + sal_Int32 nColWidth = GetColumnWidth( nColIx ); + rStrVec.push_back( rTextLine.copy( nStrIx, Max( nColWidth, static_cast<sal_Int32>(CSV_MAXSTRLEN) ) ) ); + nStrIx = nStrIx + nColWidth; } InvalidateGfx(); } diff --git a/sc/source/ui/dbgui/csvtablebox.cxx b/sc/source/ui/dbgui/csvtablebox.cxx index 74777a09021a..bb55aec394ec 100644 --- a/sc/source/ui/dbgui/csvtablebox.cxx +++ b/sc/source/ui/dbgui/csvtablebox.cxx @@ -191,14 +191,14 @@ void ScCsvTableBox::MakePosVisible( sal_Int32 nPos ) // cell contents -------------------------------------------------------------- void ScCsvTableBox::SetUniStrings( - const String* pTextLines, const String& rSepChars, + const rtl::OUString* pTextLines, const String& rSepChars, sal_Unicode cTextSep, bool bMergeSep ) { // assuming that pTextLines is a string array with size CSV_PREVIEW_LINES // -> will be dynamic sometime DisableRepaint(); sal_Int32 nEndLine = GetFirstVisLine() + CSV_PREVIEW_LINES; - const String* pString = pTextLines; + const rtl::OUString* pString = pTextLines; for( sal_Int32 nLine = GetFirstVisLine(); nLine < nEndLine; ++nLine, ++pString ) { if( mbFixedMode ) diff --git a/sc/source/ui/dbgui/scuiasciiopt.cxx b/sc/source/ui/dbgui/scuiasciiopt.cxx index fefb2557677e..11b2bb3533d8 100644 --- a/sc/source/ui/dbgui/scuiasciiopt.cxx +++ b/sc/source/ui/dbgui/scuiasciiopt.cxx @@ -441,7 +441,7 @@ ScImportAsciiDlg::~ScImportAsciiDlg() // ---------------------------------------------------------------------------- -bool ScImportAsciiDlg::GetLine( sal_uLong nLine, String &rText ) +bool ScImportAsciiDlg::GetLine( sal_uLong nLine, rtl::OUString &rText ) { if (nLine >= ASCIIDLG_MAXROWS || !mpDatStream) return false; @@ -704,7 +704,7 @@ IMPL_LINK_NOARG(ScImportAsciiDlg, UpdateTextHdl) break; } for (; i < CSV_PREVIEW_LINES; i++) - maPreviewLine[i].Erase(); + maPreviewLine[i] = rtl::OUString(); maTableBox.Execute( CSVCMD_SETLINECOUNT, mnRowPosCount); bool bMergeSep = (aCkbAsOnce.IsChecked() == sal_True); diff --git a/sc/source/ui/docshell/impex.cxx b/sc/source/ui/docshell/impex.cxx index eecae51acbfd..418eda7ac728 100644 --- a/sc/source/ui/docshell/impex.cxx +++ b/sc/source/ui/docshell/impex.cxx @@ -87,6 +87,13 @@ class StarBASIC; //======================================================================== +// We don't want to end up with 2GB read in one line just because of malformed +// multiline fields, so chop it _somewhere_, which is twice supported columns +// times maximum cell content length, 2*1024*64K=128M, and because it's +// sal_Unicode that's 256MB. If it's 2GB of data without LF we're out of luck +// anyway. +static const sal_Int32 nArbitraryLineLengthLimit = 2 * MAXCOLCOUNT * STRING_MAXLEN; + namespace { const char SYLK_LF[] = "\x1b :"; @@ -562,6 +569,30 @@ void ScImportExport::WriteUnicodeOrByteEndl( SvStream& rStrm ) } +/** Append characters of [p1,p2) to rField. + + @returns TRUE if ok; FALSE if data overflow, truncated + */ +static bool lcl_appendLineData( String& rField, const sal_Unicode* p1, const sal_Unicode* p2 ) +{ + OSL_ENSURE( rField.Len() + (p2 - p1) <= STRING_MAXLEN, "lcl_appendLineData: data overflow"); + if (rField.Len() + (p2 - p1) <= STRING_MAXLEN) + { + rField.Append( p1, sal::static_int_cast<xub_StrLen>( p2 - p1 ) ); + return true; + } + else + { + // If STRING_MAXLEN is passed as length, then String attempts to + // determine the length of the string and comes up with an overflow + // casted to xub_StrLen again ... so pass max-1, data will be truncated + // anyway. + rField.Append( p1, (rField.Len() ? STRING_MAXLEN - rField.Len() : STRING_MAXLEN - 1) ); + return false; + } +} + + enum DoubledQuoteMode { DQM_KEEP, // both are taken @@ -613,7 +644,12 @@ static const sal_Unicode* lcl_ScanString( const sal_Unicode* p, String& rString, p++; } if ( p0 < p ) - rString.Append( p0, sal::static_int_cast<xub_StrLen>( ((*p || *(p-1) == cStr) ? p-1 : p) - p0 ) ); + { + if (!lcl_appendLineData( rString, p0, ((*p || *(p-1) == cStr) ? p-1 : p))) + { + /* TODO: warning at UI, data truncated */ + } + } } while ( bCont ); return p; } @@ -785,16 +821,17 @@ sal_Bool ScImportExport::Text2Doc( SvStream& rStrm ) while( bOk ) { - String aLine, aCell; + rtl::OUString aLine; + String aCell; SCROW nRow = nStartRow; rStrm.Seek( nOldPos ); for( ;; ) { - rStrm.ReadUniOrByteStringLine( aLine, rStrm.GetStreamCharSet() ); + rStrm.ReadUniOrByteStringLine( aLine, rStrm.GetStreamCharSet(), nArbitraryLineLengthLimit ); if( rStrm.IsEof() ) break; SCCOL nCol = nStartCol; - const sal_Unicode* p = aLine.GetBuffer(); + const sal_Unicode* p = aLine.getStr(); while( *p ) { aCell.Erase(); @@ -811,7 +848,10 @@ sal_Bool ScImportExport::Text2Doc( SvStream& rStrm ) const sal_Unicode* q = p; while( *p && *p != cSep ) p++; - aCell.Assign( q, sal::static_int_cast<xub_StrLen>( p - q ) ); + if (!lcl_appendLineData( aCell, q, p)) + { + /* TODO: warning at UI, data truncated */ + } if( *p ) p++; } @@ -1104,25 +1144,31 @@ static bool lcl_PutString( } -String lcl_GetFixed( const String& rLine, xub_StrLen nStart, xub_StrLen nNext, bool& rbIsQuoted ) +String lcl_GetFixed( const rtl::OUString& rLine, sal_Int32 nStart, sal_Int32 nNext, bool& rbIsQuoted ) { - xub_StrLen nLen = rLine.Len(); + sal_Int32 nLen = rLine.getLength(); if (nNext > nLen) nNext = nLen; if ( nNext <= nStart ) return EMPTY_STRING; - const sal_Unicode* pStr = rLine.GetBuffer(); + const sal_Unicode* pStr = rLine.getStr(); - xub_StrLen nSpace = nNext; + sal_Int32 nSpace = nNext; while ( nSpace > nStart && pStr[nSpace-1] == ' ' ) --nSpace; rbIsQuoted = (pStr[nStart] == sal_Unicode('"') && pStr[nSpace-1] == sal_Unicode('"')); if (rbIsQuoted) - return rLine.Copy(nStart+1, nSpace-nStart-2); + { + OSL_ENSURE( nSpace - nStart - 3 <= STRING_MAXLEN, "lcl_GetFixed: line doesn't fit into data"); + return rLine.copy(nStart+1, nSpace-nStart-2); + } else - return rLine.Copy(nStart, nSpace-nStart); + { + OSL_ENSURE( nSpace - nStart <= STRING_MAXLEN, "lcl_GetFixed: line doesn't fit into data"); + return rLine.copy(nStart, nSpace-nStart); + } } sal_Bool ScImportExport::ExtText2Doc( SvStream& rStrm ) @@ -1144,12 +1190,12 @@ sal_Bool ScImportExport::ExtText2Doc( SvStream& rStrm ) SCROW nStartRow = aRange.aStart.Row(); SCTAB nTab = aRange.aStart.Tab(); - sal_Bool bFixed = pExtOptions->IsFixedLen(); - const String& rSeps = pExtOptions->GetFieldSeps(); - const sal_Unicode* pSeps = rSeps.GetBuffer(); - sal_Bool bMerge = pExtOptions->IsMergeSeps(); - sal_uInt16 nInfoCount = pExtOptions->GetInfoCount(); - const xub_StrLen* pColStart = pExtOptions->GetColStart(); + sal_Bool bFixed = pExtOptions->IsFixedLen(); + const String& rSeps = pExtOptions->GetFieldSeps(); + const sal_Unicode* pSeps = rSeps.GetBuffer(); + sal_Bool bMerge = pExtOptions->IsMergeSeps(); + sal_uInt16 nInfoCount = pExtOptions->GetInfoCount(); + const sal_Int32* pColStart = pExtOptions->GetColStart(); const sal_uInt8* pColFormat = pExtOptions->GetColFormat(); long nSkipLines = pExtOptions->GetStartRow(); @@ -1176,7 +1222,8 @@ sal_Bool ScImportExport::ExtText2Doc( SvStream& rStrm ) MsLangId::convertLanguageToLocale( LANGUAGE_ENGLISH_US ) ); } - String aLine, aCell; + rtl::OUString aLine; + String aCell; sal_uInt16 i; SCROW nRow = nStartRow; @@ -1209,7 +1256,7 @@ sal_Bool ScImportExport::ExtText2Doc( SvStream& rStrm ) if ( rStrm.IsEof() ) break; - xub_StrLen nLineLen = aLine.Len(); + sal_Int32 nLineLen = aLine.getLength(); SCCOL nCol = nStartCol; bool bMultiLine = false; if ( bFixed ) // Feste Satzlaenge @@ -1227,8 +1274,8 @@ sal_Bool ScImportExport::ExtText2Doc( SvStream& rStrm ) bOverflow = sal_True; // display warning on import else if (!bDetermineRange) { - xub_StrLen nStart = pColStart[i]; - xub_StrLen nNext = ( i+1 < nInfoCount ) ? pColStart[i+1] : nLineLen; + sal_Int32 nStart = pColStart[i]; + sal_Int32 nNext = ( i+1 < nInfoCount ) ? pColStart[i+1] : nLineLen; bool bIsQuoted = false; aCell = lcl_GetFixed( aLine, nStart, nNext, bIsQuoted ); if (bIsQuoted && bQuotedAsText) @@ -1247,7 +1294,7 @@ sal_Bool ScImportExport::ExtText2Doc( SvStream& rStrm ) { SCCOL nSourceCol = 0; sal_uInt16 nInfoStart = 0; - const sal_Unicode* p = aLine.GetBuffer(); + const sal_Unicode* p = aLine.getStr(); // Yes, the check is nCol<=MAXCOL+1, +1 because it is only an // overflow if there is really data following to be put behind // the last column, which doesn't happen if info is @@ -1378,7 +1425,12 @@ const sal_Unicode* ScImportExport::ScanNextFieldFromString( const sal_Unicode* p // Append remaining unquoted and undelimited data (dirty, dirty) to // this field. if (p > p1) - rField.Append( p1, sal::static_int_cast<xub_StrLen>( p - p1 ) ); + { + if (!lcl_appendLineData( rField, p1, p)) + { + /* TODO: warning at UI, data truncated */ + } + } if( *p ) p++; } @@ -1387,7 +1439,10 @@ const sal_Unicode* ScImportExport::ScanNextFieldFromString( const sal_Unicode* p const sal_Unicode* p0 = p; while ( *p && !ScGlobal::UnicodeStrChr( pSeps, *p ) ) p++; - rField.Append( p0, sal::static_int_cast<xub_StrLen>( p - p0 ) ); + if (!lcl_appendLineData( rField, p0, p)) + { + /* TODO: warning at UI, data truncated */ + } if( *p ) p++; } @@ -2135,12 +2190,12 @@ inline const sal_Unicode* lcl_UnicodeStrChr( const sal_Unicode* pStr, return 0; } -String ReadCsvLine(SvStream &rStream, sal_Bool bEmbeddedLineBreak, +rtl::OUString ReadCsvLine(SvStream &rStream, sal_Bool bEmbeddedLineBreak, const String& rFieldSeparators, sal_Unicode cFieldQuote, sal_Bool bAllowBackslashEscape) { - String aStr; - rStream.ReadUniOrByteStringLine(aStr, rStream.GetStreamCharSet()); + rtl::OUString aStr; + rStream.ReadUniOrByteStringLine(aStr, rStream.GetStreamCharSet(), nArbitraryLineLengthLimit); if (bEmbeddedLineBreak) { @@ -2149,13 +2204,13 @@ String ReadCsvLine(SvStream &rStream, sal_Bool bEmbeddedLineBreak, // See if the separator(s) include tab. bool bTabSep = lcl_UnicodeStrChr(pSeps, '\t') != NULL; - xub_StrLen nLastOffset = 0; - xub_StrLen nQuotes = 0; - while (!rStream.IsEof() && aStr.Len() < STRING_MAXLEN) + sal_Int32 nLastOffset = 0; + sal_Int32 nQuotes = 0; + while (!rStream.IsEof() && aStr.getLength() < nArbitraryLineLengthLimit) { bool bBackslashEscaped = false; const sal_Unicode *p, *pStart; - p = pStart = aStr.GetBuffer(); + p = pStart = aStr.getStr(); p += nLastOffset; while (*p) { @@ -2193,10 +2248,10 @@ String ReadCsvLine(SvStream &rStream, sal_Bool bEmbeddedLineBreak, break; else { - nLastOffset = aStr.Len(); - String aNext; - rStream.ReadUniOrByteStringLine(aNext, rStream.GetStreamCharSet()); - aStr += sal_Unicode(_LF); + nLastOffset = aStr.getLength(); + rtl::OUString aNext; + rStream.ReadUniOrByteStringLine(aNext, rStream.GetStreamCharSet(), nArbitraryLineLengthLimit); + aStr += rtl::OUString( sal_Unicode(_LF)); aStr += aNext; } } diff --git a/sc/source/ui/inc/asciiopt.hxx b/sc/source/ui/inc/asciiopt.hxx index 7fb3c937538d..53a22dd6dec5 100644 --- a/sc/source/ui/inc/asciiopt.hxx +++ b/sc/source/ui/inc/asciiopt.hxx @@ -59,7 +59,7 @@ private: sal_Bool bCharSetSystem; long nStartRow; sal_uInt16 nInfoCount; - xub_StrLen* pColStart; //! TODO replace with vector + sal_Int32* pColStart; //! TODO replace with vector sal_uInt8* pColFormat; //! TODO replace with vector public: @@ -87,7 +87,7 @@ public: sal_Unicode GetTextSep() const { return cTextSep; } sal_Bool IsFixedLen() const { return bFixedLen; } sal_uInt16 GetInfoCount() const { return nInfoCount; } - const xub_StrLen* GetColStart() const { return pColStart; } + const sal_Int32* GetColStart() const { return pColStart; } const sal_uInt8* GetColFormat() const { return pColFormat; } long GetStartRow() const { return nStartRow; } LanguageType GetLanguage() const { return eLang; } @@ -103,7 +103,7 @@ public: void SetStartRow( long nRow) { nStartRow= nRow; } void SetLanguage(LanguageType e) { eLang = e; } - void SetColInfo( sal_uInt16 nCount, const xub_StrLen* pStart, const sal_uInt8* pFormat ); + void SetColInfo( sal_uInt16 nCount, const sal_Int32* pStart, const sal_uInt8* pFormat ); void SetColumnInfo( const ScCsvExpDataVec& rDataVec ); }; diff --git a/sc/source/ui/inc/csvcontrol.hxx b/sc/source/ui/inc/csvcontrol.hxx index 68a97150e8e4..6a47b677ba14 100644 --- a/sc/source/ui/inc/csvcontrol.hxx +++ b/sc/source/ui/inc/csvcontrol.hxx @@ -83,7 +83,7 @@ const sal_uInt8 SC_COL_ENGLISH = 10; /** Exported data of a column (data used in the dialog). */ struct ScCsvExpData { - xub_StrLen mnIndex; /// Index of a column. + sal_Int32 mnIndex; /// Index of a column. sal_uInt8 mnType; /// External type of the column. inline ScCsvExpData() : mnIndex( 0 ), mnType( SC_COL_STANDARD ) {} diff --git a/sc/source/ui/inc/csvgrid.hxx b/sc/source/ui/inc/csvgrid.hxx index acb21731528e..8056943a1ce0 100644 --- a/sc/source/ui/inc/csvgrid.hxx +++ b/sc/source/ui/inc/csvgrid.hxx @@ -277,10 +277,10 @@ private: public: /** Fills all cells of a line with the passed text (separators mode). */ void ImplSetTextLineSep( - sal_Int32 nLine, const String& rTextLine, + sal_Int32 nLine, const rtl::OUString& rTextLine, const String& rSepChars, sal_Unicode cTextSep, bool bMergeSep ); /** Fills all cells of a line with the passed text (fixed width mode). */ - void ImplSetTextLineFix( sal_Int32 nLine, const String& rTextLine ); + void ImplSetTextLineFix( sal_Int32 nLine, const rtl::OUString& rTextLine ); /** Returns the text of the specified cell. */ const String& GetCellText( sal_uInt32 nColIndex, sal_Int32 nLine ) const; diff --git a/sc/source/ui/inc/csvtablebox.hxx b/sc/source/ui/inc/csvtablebox.hxx index 22bb380e38c2..68f7e7efe7c2 100644 --- a/sc/source/ui/inc/csvtablebox.hxx +++ b/sc/source/ui/inc/csvtablebox.hxx @@ -108,7 +108,7 @@ private: public: /** Fills all cells of all lines with the passed texts (Unicode strings). */ void SetUniStrings( - const String* pTextLines, const String& rSepChars, + const rtl::OUString* pTextLines, const String& rSepChars, sal_Unicode cTextSep, bool bMergeSep ); // column settings -------------------------------------------------------- diff --git a/sc/source/ui/inc/impex.hxx b/sc/source/ui/inc/impex.hxx index 51bd610aaa1c..aaaebac17853 100644 --- a/sc/source/ui/inc/impex.hxx +++ b/sc/source/ui/inc/impex.hxx @@ -225,8 +225,8 @@ public: @ATTENTION Note that the string returned may be truncated even inside - a quoted field if STRING_MAXLEN was reached. There - currently is no way to exactly determine the conditions, + a quoted field if some (arbritary) maximum length was reached. + There currently is no way to exactly determine the conditions, whether this was at a line end, or whether open quotes would have closed the field before the line end, as even a ReadUniOrByteStringLine() may return prematurely but the @@ -235,11 +235,12 @@ public: length and bytes read don't necessarily match, and resyncing to a previous position matching the string's length isn't always possible. As a result, a logical line - with embedded line breaks and more than STRING_MAXLEN + with embedded line breaks and more than the maximum length characters will be spoiled, and a subsequent ReadCsvLine() may start under false preconditions. + */ -SC_DLLPUBLIC String ReadCsvLine(SvStream &rStream, sal_Bool bEmbeddedLineBreak, +SC_DLLPUBLIC rtl::OUString ReadCsvLine(SvStream &rStream, sal_Bool bEmbeddedLineBreak, const String& rFieldSeparators, sal_Unicode cFieldQuote, sal_Bool bAllowBackslashEscape = sal_False); diff --git a/sc/source/ui/inc/scuiasciiopt.hxx b/sc/source/ui/inc/scuiasciiopt.hxx index c07f6f4f10c8..c45b9acd1d08 100644 --- a/sc/source/ui/inc/scuiasciiopt.hxx +++ b/sc/source/ui/inc/scuiasciiopt.hxx @@ -44,7 +44,7 @@ class ScImportAsciiDlg : public ModalDialog sal_uLong* mpRowPosArray; sal_uLong mnRowPosCount; - String maPreviewLine[ CSV_PREVIEW_LINES ]; + rtl::OUString maPreviewLine[ CSV_PREVIEW_LINES ]; FixedLine aFlFieldOpt; FixedText aFtCharSet; @@ -116,7 +116,7 @@ private: void SetupSeparatorCtrls(); - bool GetLine( sal_uLong nLine, String &rText ); + bool GetLine( sal_uLong nLine, rtl::OUString &rText ); void UpdateVertical(); inline bool Seek( sal_uLong nPos ); // synced to and from mnStreamPos diff --git a/sc/source/ui/vba/vbarange.cxx b/sc/source/ui/vba/vbarange.cxx index 27de1e8bf5da..def7b63bc360 100644 --- a/sc/source/ui/vba/vbarange.cxx +++ b/sc/source/ui/vba/vbarange.cxx @@ -4996,12 +4996,12 @@ ScVbaRange::TextToColumns( const css::uno::Any& Destination, const css::uno::Any // Parse the value of parameter FieldInfo. sal_uInt16 nRealCount = 0; - xub_StrLen* pColumns = NULL; + sal_Int32* pColumns = NULL; sal_uInt8* pFormats = NULL; if ( sFieldInfo.getLength() > 0 ) { sal_uInt16 nCount = sFieldInfo.getLength(); - pColumns = new xub_StrLen[nCount]; + pColumns = new sal_Int32[nCount]; pFormats = new sal_uInt8[nCount]; sal_uInt16 nFormat = 1; uno::Reference< script::XTypeConverter > xConverter = getTypeConverter( mxContext ); @@ -5012,7 +5012,7 @@ ScVbaRange::TextToColumns( const css::uno::Any& Destination, const css::uno::Any nFormat = 1; try { - uno::Any aConverted = xConverter->convertTo( sFieldInfo[nIndex][0], getCppuType((xub_StrLen*)0) ); + uno::Any aConverted = xConverter->convertTo( sFieldInfo[nIndex][0], getCppuType((sal_Int32*)0) ); aConverted >>= pColumns[nRealCount]; aConverted = xConverter->convertTo( sFieldInfo[nIndex][1], getCppuType((sal_uInt16*)0) ); aConverted >>= nFormat; diff --git a/tools/inc/tools/stream.hxx b/tools/inc/tools/stream.hxx index 5b5c443ac354..dc4505a06a35 100644 --- a/tools/inc/tools/stream.hxx +++ b/tools/inc/tools/stream.hxx @@ -380,9 +380,37 @@ public: // next Tell() <= nSize sal_Bool SetStreamSize( sal_Size nSize ); - sal_Bool ReadLine( rtl::OString& rStr ); + /** Read a line of bytes. + + @param nMaxBytesToRead + Maximum of bytes to read, if line is longer it will be + truncated. + + NOTE that the default is one character less than + STRING_MAXLEN to prevent problems after conversion to + String that may be lurking in various places doing + something like + for (sal_uInt16 i=0; i < aString.Len(); ++i) + causing endless loops ... + */ + sal_Bool ReadLine( rtl::OString& rStr, sal_Int32 nMaxBytesToRead = 0xFFFE ); sal_Bool WriteLine( const rtl::OString& rStr ); + /** Read a line of bytes. + + @param nMaxBytesToRead + Maximum of bytes to read, if line is longer it will be + truncated. + + NOTE that the default is one character less than + STRING_MAXLEN to prevent problems after conversion to + String that may be lurking in various places doing + something like + for (sal_uInt16 i=0; i < aString.Len(); ++i) + causing endless loops ... + */ + sal_Bool ReadByteStringLine( rtl::OUString& rStr, rtl_TextEncoding eSrcCharSet, + sal_Int32 nMaxBytesToRead = 0xFFFE ); sal_Bool ReadByteStringLine( String& rStr, rtl_TextEncoding eSrcCharSet ); sal_Bool WriteByteStringLine( const String& rStr, rtl_TextEncoding eDestCharSet ); @@ -403,17 +431,44 @@ public: */ sal_Bool StartReadingUnicodeText( rtl_TextEncoding eReadBomCharSet ); - /// Read a line of Unicode - sal_Bool ReadUniStringLine( String& rStr ); + /** Read a line of Unicode. + + @param nMaxCodepointsToRead + Maximum of codepoints (UCS-2 or UTF-16 pairs, not + bytes) to read, if line is longer it will be truncated. + + NOTE that the default is one character less than + STRING_MAXLEN to prevent problems after conversion to + String that may be lurking in various places doing + something like + for (sal_uInt16 i=0; i < aString.Len(); ++i) + causing endless loops ... + */ + sal_Bool ReadUniStringLine( rtl::OUString& rStr, sal_Int32 nMaxCodepointsToRead = 0xFFFE ); /// Read a 32bit length prefixed sequence of utf-16 if eSrcCharSet==RTL_TEXTENCODING_UNICODE, /// otherwise read a 16bit length prefixed sequence of bytes and convert from eSrcCharSet rtl::OUString ReadUniOrByteString(rtl_TextEncoding eSrcCharSet); /// Write a 32bit length prefixed sequence of utf-16 if eSrcCharSet==RTL_TEXTENCODING_UNICODE, /// otherwise convert to eSrcCharSet and write a 16bit length prefixed sequence of bytes SvStream& WriteUniOrByteString( const rtl::OUString& rStr, rtl_TextEncoding eDestCharSet ); - /// Read a line of Unicode if eSrcCharSet==RTL_TEXTENCODING_UNICODE, - /// otherwise read a line of Bytecode and convert from eSrcCharSet - sal_Bool ReadUniOrByteStringLine( String& rStr, rtl_TextEncoding eSrcCharSet ); + + /** Read a line of Unicode if eSrcCharSet==RTL_TEXTENCODING_UNICODE, + otherwise read a line of Bytecode and convert from eSrcCharSet + + @param nMaxCodepointsToRead + Maximum of codepoints (2 bytes if Unicode, bytes if not + Unicode) to read, if line is longer it will be + truncated. + + NOTE that the default is one character less than + STRING_MAXLEN to prevent problems after conversion to + String that may be lurking in various places doing + something like + for (sal_uInt16 i=0; i < aString.Len(); ++i) + causing endless loops ... + */ + sal_Bool ReadUniOrByteStringLine( rtl::OUString& rStr, rtl_TextEncoding eSrcCharSet, + sal_Int32 nMaxCodepointsToRead = 0xFFFE ); /// Write a sequence of Unicode characters if eDestCharSet==RTL_TEXTENCODING_UNICODE, /// otherwise write a sequence of Bytecodes converted to eDestCharSet sal_Bool WriteUnicodeOrByteText( const String& rStr, rtl_TextEncoding eDestCharSet ); diff --git a/tools/source/stream/stream.cxx b/tools/source/stream/stream.cxx index 1da4096a92c7..96cabc266c34 100644 --- a/tools/source/stream/stream.cxx +++ b/tools/source/stream/stream.cxx @@ -647,6 +647,15 @@ void SvStream::ResetError() |* *************************************************************************/ +sal_Bool SvStream::ReadByteStringLine( rtl::OUString& rStr, rtl_TextEncoding eSrcCharSet, + sal_Int32 nMaxBytesToRead ) +{ + rtl::OString aStr; + sal_Bool bRet = ReadLine( aStr, nMaxBytesToRead); + rStr = rtl::OStringToOUString(aStr, eSrcCharSet); + return bRet; +} + sal_Bool SvStream::ReadByteStringLine( String& rStr, rtl_TextEncoding eSrcCharSet ) { rtl::OString aStr; @@ -655,7 +664,7 @@ sal_Bool SvStream::ReadByteStringLine( String& rStr, rtl_TextEncoding eSrcCharSe return bRet; } -sal_Bool SvStream::ReadLine(rtl::OString& rStr) +sal_Bool SvStream::ReadLine( rtl::OString& rStr, sal_Int32 nMaxBytesToRead ) { sal_Char buf[256+1]; sal_Bool bEnd = sal_False; @@ -663,7 +672,7 @@ sal_Bool SvStream::ReadLine(rtl::OString& rStr) sal_Char c = 0; sal_Size nTotalLen = 0; - rtl::OStringBuffer aBuf; + rtl::OStringBuffer aBuf(4096); while( !bEnd && !GetError() ) // !!! nicht auf EOF testen, // !!! weil wir blockweise // !!! lesen @@ -695,8 +704,15 @@ sal_Bool SvStream::ReadLine(rtl::OString& rStr) buf[n] = c; ++n; } - aBuf.append(buf, n); nTotalLen += j; + if (nTotalLen > static_cast<sal_Size>(nMaxBytesToRead)) + { + n -= nTotalLen - nMaxBytesToRead; + nTotalLen = nMaxBytesToRead; + bEnd = sal_True; + } + if ( n ) + aBuf.append(buf, n); } if ( !bEnd && !GetError() && aBuf.getLength() ) @@ -723,7 +739,7 @@ sal_Bool SvStream::ReadLine(rtl::OString& rStr) return bEnd; } -sal_Bool SvStream::ReadUniStringLine( String& rStr ) +sal_Bool SvStream::ReadUniStringLine( rtl::OUString& rStr, sal_Int32 nMaxCodepointsToRead ) { sal_Unicode buf[256+1]; sal_Bool bEnd = sal_False; @@ -733,7 +749,7 @@ sal_Bool SvStream::ReadUniStringLine( String& rStr ) DBG_ASSERT( sizeof(sal_Unicode) == sizeof(sal_uInt16), "ReadUniStringLine: swapping sizeof(sal_Unicode) not implemented" ); - rStr.Erase(); + rtl::OUStringBuffer aBuf(4096); while( !bEnd && !GetError() ) // !!! nicht auf EOF testen, // !!! weil wir blockweise // !!! lesen @@ -742,10 +758,11 @@ sal_Bool SvStream::ReadUniStringLine( String& rStr ) nLen /= sizeof(sal_Unicode); if ( !nLen ) { - if ( rStr.Len() == 0 ) + if ( aBuf.getLength() == 0 ) { // der allererste Blockread hat fehlgeschlagen -> Abflug bIsEof = sal_True; + rStr = rtl::OUString(); return sal_False; } else @@ -774,12 +791,18 @@ sal_Bool SvStream::ReadUniStringLine( String& rStr ) ++n; } } - if ( n ) - rStr.Append( buf, n ); nTotalLen += j; + if (nTotalLen > static_cast<sal_Size>(nMaxCodepointsToRead)) + { + n -= nTotalLen - nMaxCodepointsToRead; + nTotalLen = nMaxCodepointsToRead; + bEnd = sal_True; + } + if ( n ) + aBuf.append( buf, n ); } - if ( !bEnd && !GetError() && rStr.Len() ) + if ( !bEnd && !GetError() && aBuf.getLength() ) bEnd = sal_True; nOldFilePos += nTotalLen * sizeof(sal_Unicode); @@ -799,20 +822,22 @@ sal_Bool SvStream::ReadUniStringLine( String& rStr ) if ( bEnd ) bIsEof = sal_False; + rStr = aBuf.makeStringAndClear(); return bEnd; } -sal_Bool SvStream::ReadUniOrByteStringLine( String& rStr, rtl_TextEncoding eSrcCharSet ) +sal_Bool SvStream::ReadUniOrByteStringLine( rtl::OUString& rStr, rtl_TextEncoding eSrcCharSet, + sal_Int32 nMaxCodepointsToRead ) { if ( eSrcCharSet == RTL_TEXTENCODING_UNICODE ) - return ReadUniStringLine( rStr ); + return ReadUniStringLine( rStr, nMaxCodepointsToRead ); else - return ReadByteStringLine( rStr, eSrcCharSet ); + return ReadByteStringLine( rStr, eSrcCharSet, nMaxCodepointsToRead ); } rtl::OString read_zeroTerminated_uInt8s_ToOString(SvStream& rStream) { - rtl::OStringBuffer aOutput; + rtl::OStringBuffer aOutput(256); sal_Char buf[ 256 + 1 ]; sal_Bool bEnd = sal_False; |