summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorEike Rathke <erack@redhat.com>2012-04-10 18:30:07 +0200
committerEike Rathke <erack@redhat.com>2012-04-10 19:32:09 +0200
commit8cd05e9cf1152b21528c6f1a5bda3d949dc49791 (patch)
tree0f37b5ff5447c3d6088b64a01d75e4801cdab847 /tools
parentbf0629e09d176555aaa10f60061b206103cc0295 (diff)
resolved fdo#48501 enable line size >64k in SvStream::Read*Line()
CSV and other text formats may come with line sizes >64k that so far were truncated due to limitations in ByteString/UniString/String, even if one line consists of several fields that each are <64k. Introduced additional SvStream methods that read into rtl::OString and rtl::OUString and let SvStream::ReadUniOrByteStringLine() fill solely an rtl::OUString. Made Calc CSV import use those.
Diffstat (limited to 'tools')
-rw-r--r--tools/inc/tools/stream.hxx67
-rw-r--r--tools/source/stream/stream.cxx51
2 files changed, 99 insertions, 19 deletions
diff --git a/tools/inc/tools/stream.hxx b/tools/inc/tools/stream.hxx
index 5b5c443ac354..dc4505a06a35 100644
--- a/tools/inc/tools/stream.hxx
+++ b/tools/inc/tools/stream.hxx
@@ -380,9 +380,37 @@ public:
// next Tell() <= nSize
sal_Bool SetStreamSize( sal_Size nSize );
- sal_Bool ReadLine( rtl::OString& rStr );
+ /** Read a line of bytes.
+
+ @param nMaxBytesToRead
+ Maximum of bytes to read, if line is longer it will be
+ truncated.
+
+ NOTE that the default is one character less than
+ STRING_MAXLEN to prevent problems after conversion to
+ String that may be lurking in various places doing
+ something like
+ for (sal_uInt16 i=0; i < aString.Len(); ++i)
+ causing endless loops ...
+ */
+ sal_Bool ReadLine( rtl::OString& rStr, sal_Int32 nMaxBytesToRead = 0xFFFE );
sal_Bool WriteLine( const rtl::OString& rStr );
+ /** Read a line of bytes.
+
+ @param nMaxBytesToRead
+ Maximum of bytes to read, if line is longer it will be
+ truncated.
+
+ NOTE that the default is one character less than
+ STRING_MAXLEN to prevent problems after conversion to
+ String that may be lurking in various places doing
+ something like
+ for (sal_uInt16 i=0; i < aString.Len(); ++i)
+ causing endless loops ...
+ */
+ sal_Bool ReadByteStringLine( rtl::OUString& rStr, rtl_TextEncoding eSrcCharSet,
+ sal_Int32 nMaxBytesToRead = 0xFFFE );
sal_Bool ReadByteStringLine( String& rStr, rtl_TextEncoding eSrcCharSet );
sal_Bool WriteByteStringLine( const String& rStr, rtl_TextEncoding eDestCharSet );
@@ -403,17 +431,44 @@ public:
*/
sal_Bool StartReadingUnicodeText( rtl_TextEncoding eReadBomCharSet );
- /// Read a line of Unicode
- sal_Bool ReadUniStringLine( String& rStr );
+ /** Read a line of Unicode.
+
+ @param nMaxCodepointsToRead
+ Maximum of codepoints (UCS-2 or UTF-16 pairs, not
+ bytes) to read, if line is longer it will be truncated.
+
+ NOTE that the default is one character less than
+ STRING_MAXLEN to prevent problems after conversion to
+ String that may be lurking in various places doing
+ something like
+ for (sal_uInt16 i=0; i < aString.Len(); ++i)
+ causing endless loops ...
+ */
+ sal_Bool ReadUniStringLine( rtl::OUString& rStr, sal_Int32 nMaxCodepointsToRead = 0xFFFE );
/// Read a 32bit length prefixed sequence of utf-16 if eSrcCharSet==RTL_TEXTENCODING_UNICODE,
/// otherwise read a 16bit length prefixed sequence of bytes and convert from eSrcCharSet
rtl::OUString ReadUniOrByteString(rtl_TextEncoding eSrcCharSet);
/// Write a 32bit length prefixed sequence of utf-16 if eSrcCharSet==RTL_TEXTENCODING_UNICODE,
/// otherwise convert to eSrcCharSet and write a 16bit length prefixed sequence of bytes
SvStream& WriteUniOrByteString( const rtl::OUString& rStr, rtl_TextEncoding eDestCharSet );
- /// Read a line of Unicode if eSrcCharSet==RTL_TEXTENCODING_UNICODE,
- /// otherwise read a line of Bytecode and convert from eSrcCharSet
- sal_Bool ReadUniOrByteStringLine( String& rStr, rtl_TextEncoding eSrcCharSet );
+
+ /** Read a line of Unicode if eSrcCharSet==RTL_TEXTENCODING_UNICODE,
+ otherwise read a line of Bytecode and convert from eSrcCharSet
+
+ @param nMaxCodepointsToRead
+ Maximum of codepoints (2 bytes if Unicode, bytes if not
+ Unicode) to read, if line is longer it will be
+ truncated.
+
+ NOTE that the default is one character less than
+ STRING_MAXLEN to prevent problems after conversion to
+ String that may be lurking in various places doing
+ something like
+ for (sal_uInt16 i=0; i < aString.Len(); ++i)
+ causing endless loops ...
+ */
+ sal_Bool ReadUniOrByteStringLine( rtl::OUString& rStr, rtl_TextEncoding eSrcCharSet,
+ sal_Int32 nMaxCodepointsToRead = 0xFFFE );
/// Write a sequence of Unicode characters if eDestCharSet==RTL_TEXTENCODING_UNICODE,
/// otherwise write a sequence of Bytecodes converted to eDestCharSet
sal_Bool WriteUnicodeOrByteText( const String& rStr, rtl_TextEncoding eDestCharSet );
diff --git a/tools/source/stream/stream.cxx b/tools/source/stream/stream.cxx
index 1da4096a92c7..96cabc266c34 100644
--- a/tools/source/stream/stream.cxx
+++ b/tools/source/stream/stream.cxx
@@ -647,6 +647,15 @@ void SvStream::ResetError()
|*
*************************************************************************/
+sal_Bool SvStream::ReadByteStringLine( rtl::OUString& rStr, rtl_TextEncoding eSrcCharSet,
+ sal_Int32 nMaxBytesToRead )
+{
+ rtl::OString aStr;
+ sal_Bool bRet = ReadLine( aStr, nMaxBytesToRead);
+ rStr = rtl::OStringToOUString(aStr, eSrcCharSet);
+ return bRet;
+}
+
sal_Bool SvStream::ReadByteStringLine( String& rStr, rtl_TextEncoding eSrcCharSet )
{
rtl::OString aStr;
@@ -655,7 +664,7 @@ sal_Bool SvStream::ReadByteStringLine( String& rStr, rtl_TextEncoding eSrcCharSe
return bRet;
}
-sal_Bool SvStream::ReadLine(rtl::OString& rStr)
+sal_Bool SvStream::ReadLine( rtl::OString& rStr, sal_Int32 nMaxBytesToRead )
{
sal_Char buf[256+1];
sal_Bool bEnd = sal_False;
@@ -663,7 +672,7 @@ sal_Bool SvStream::ReadLine(rtl::OString& rStr)
sal_Char c = 0;
sal_Size nTotalLen = 0;
- rtl::OStringBuffer aBuf;
+ rtl::OStringBuffer aBuf(4096);
while( !bEnd && !GetError() ) // !!! nicht auf EOF testen,
// !!! weil wir blockweise
// !!! lesen
@@ -695,8 +704,15 @@ sal_Bool SvStream::ReadLine(rtl::OString& rStr)
buf[n] = c;
++n;
}
- aBuf.append(buf, n);
nTotalLen += j;
+ if (nTotalLen > static_cast<sal_Size>(nMaxBytesToRead))
+ {
+ n -= nTotalLen - nMaxBytesToRead;
+ nTotalLen = nMaxBytesToRead;
+ bEnd = sal_True;
+ }
+ if ( n )
+ aBuf.append(buf, n);
}
if ( !bEnd && !GetError() && aBuf.getLength() )
@@ -723,7 +739,7 @@ sal_Bool SvStream::ReadLine(rtl::OString& rStr)
return bEnd;
}
-sal_Bool SvStream::ReadUniStringLine( String& rStr )
+sal_Bool SvStream::ReadUniStringLine( rtl::OUString& rStr, sal_Int32 nMaxCodepointsToRead )
{
sal_Unicode buf[256+1];
sal_Bool bEnd = sal_False;
@@ -733,7 +749,7 @@ sal_Bool SvStream::ReadUniStringLine( String& rStr )
DBG_ASSERT( sizeof(sal_Unicode) == sizeof(sal_uInt16), "ReadUniStringLine: swapping sizeof(sal_Unicode) not implemented" );
- rStr.Erase();
+ rtl::OUStringBuffer aBuf(4096);
while( !bEnd && !GetError() ) // !!! nicht auf EOF testen,
// !!! weil wir blockweise
// !!! lesen
@@ -742,10 +758,11 @@ sal_Bool SvStream::ReadUniStringLine( String& rStr )
nLen /= sizeof(sal_Unicode);
if ( !nLen )
{
- if ( rStr.Len() == 0 )
+ if ( aBuf.getLength() == 0 )
{
// der allererste Blockread hat fehlgeschlagen -> Abflug
bIsEof = sal_True;
+ rStr = rtl::OUString();
return sal_False;
}
else
@@ -774,12 +791,18 @@ sal_Bool SvStream::ReadUniStringLine( String& rStr )
++n;
}
}
- if ( n )
- rStr.Append( buf, n );
nTotalLen += j;
+ if (nTotalLen > static_cast<sal_Size>(nMaxCodepointsToRead))
+ {
+ n -= nTotalLen - nMaxCodepointsToRead;
+ nTotalLen = nMaxCodepointsToRead;
+ bEnd = sal_True;
+ }
+ if ( n )
+ aBuf.append( buf, n );
}
- if ( !bEnd && !GetError() && rStr.Len() )
+ if ( !bEnd && !GetError() && aBuf.getLength() )
bEnd = sal_True;
nOldFilePos += nTotalLen * sizeof(sal_Unicode);
@@ -799,20 +822,22 @@ sal_Bool SvStream::ReadUniStringLine( String& rStr )
if ( bEnd )
bIsEof = sal_False;
+ rStr = aBuf.makeStringAndClear();
return bEnd;
}
-sal_Bool SvStream::ReadUniOrByteStringLine( String& rStr, rtl_TextEncoding eSrcCharSet )
+sal_Bool SvStream::ReadUniOrByteStringLine( rtl::OUString& rStr, rtl_TextEncoding eSrcCharSet,
+ sal_Int32 nMaxCodepointsToRead )
{
if ( eSrcCharSet == RTL_TEXTENCODING_UNICODE )
- return ReadUniStringLine( rStr );
+ return ReadUniStringLine( rStr, nMaxCodepointsToRead );
else
- return ReadByteStringLine( rStr, eSrcCharSet );
+ return ReadByteStringLine( rStr, eSrcCharSet, nMaxCodepointsToRead );
}
rtl::OString read_zeroTerminated_uInt8s_ToOString(SvStream& rStream)
{
- rtl::OStringBuffer aOutput;
+ rtl::OStringBuffer aOutput(256);
sal_Char buf[ 256 + 1 ];
sal_Bool bEnd = sal_False;