summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorJens-Heiner Rechtien <hr@openoffice.org>2004-10-11 11:27:31 +0000
committerJens-Heiner Rechtien <hr@openoffice.org>2004-10-11 11:27:31 +0000
commitf915712a80cc48e8bb987893adf3358d276d24b5 (patch)
treec8ecc8d556814344de2351efe20cfc0656b2ed1c /tools
parent5efbdbb0840264d1155c26f9d5a8dba0ce9c8039 (diff)
INTEGRATION: CWS csvio (1.16.34); FILE MERGED
2004/09/14 14:00:57 er 1.16.34.2: #b4996294# ReadUniStringLine: reading little endian Unicode with DOS (u+000d,u+000a) line ends on a big endian machine resulted in a blank line every second line 2004/09/13 15:59:26 er 1.16.34.1: #i834# add SvStream::ReadCsvLine() method
Diffstat (limited to 'tools')
-rw-r--r--tools/source/stream/stream.cxx87
1 files changed, 82 insertions, 5 deletions
diff --git a/tools/source/stream/stream.cxx b/tools/source/stream/stream.cxx
index e5ab5ac1d889..ce3d3601d4e4 100644
--- a/tools/source/stream/stream.cxx
+++ b/tools/source/stream/stream.cxx
@@ -2,9 +2,9 @@
*
* $RCSfile: stream.cxx,v $
*
- * $Revision: 1.17 $
+ * $Revision: 1.18 $
*
- * last change: $Author: hr $ $Date: 2004-09-08 16:01:59 $
+ * last change: $Author: hr $ $Date: 2004-10-11 12:27:31 $
*
* The Contents of this file are made available subject to the terms of
* either of the following licenses
@@ -870,6 +870,8 @@ sal_Bool SvStream::ReadUniStringLine( String& rStr )
{
sal_Unicode cTemp;
Read( (char*)&cTemp, sizeof(cTemp) );
+ if ( bSwap )
+ SwapUShort( cTemp );
if( cTemp == c || (cTemp != '\n' && cTemp != '\r') )
Seek( nOldFilePos );
}
@@ -1084,8 +1086,9 @@ sal_Bool SvStream::WriteUniOrByteChar( sal_Unicode ch, rtl_TextEncoding eDestCha
sal_Bool SvStream::StartWritingUnicodeText()
{
SetEndianSwap( FALSE ); // write native format
- // some convention (whose? MS?)
- // upon read: 0xfeff(-257) => no swap; 0xfffe(-2) => swap
+ // BOM, Byte Order Mark, U+FEFF, see
+ // http://www.unicode.org/faq/utf_bom.html#BOM
+ // Upon read: 0xfeff(-257) => no swap; 0xfffe(-2) => swap
*this << sal_uInt16( 0xfeff );
return nError == SVSTREAM_OK;
}
@@ -1109,7 +1112,81 @@ sal_Bool SvStream::StartReadingUnicodeText()
SetEndianSwap( !bSwap );
break;
default:
- SeekRel( -((sal_Size)sizeof(nFlag)) ); // no flag, pure data
+ SeekRel( -((sal_Size)sizeof(nFlag)) ); // no BOM, pure data
+ }
+ return nError == SVSTREAM_OK;
+}
+
+/*************************************************************************
+|*
+|* Stream::ReadCsvLine()
+|*
+*************************************************************************/
+
+// Precondition: pStr is guaranteed to be non-NULL and points to a 0-terminated
+// array.
+inline const sal_Unicode* lcl_UnicodeStrChr( const sal_Unicode* pStr,
+ sal_Unicode c )
+{
+ while (*pStr)
+ {
+ if (*pStr == c)
+ return pStr;
+ ++pStr;
+ }
+ return 0;
+}
+
+sal_Bool SvStream::ReadCsvLine( String& rStr, sal_Bool bEmbeddedLineBreak,
+ const String& rFieldSeparators, sal_Unicode cFieldQuote,
+ sal_Bool bAllowBackslashEscape)
+{
+ ReadUniOrByteStringLine( rStr);
+
+ if (bEmbeddedLineBreak)
+ {
+ const sal_Unicode* pSeps = rFieldSeparators.GetBuffer();
+ xub_StrLen nLastOffset = 0;
+ xub_StrLen nQuotes = 0;
+ while (!IsEof() && rStr.Len() < STRING_MAXLEN)
+ {
+ bool bBackslashEscaped = false;
+ const sal_Unicode *p, *pStart;
+ p = pStart = rStr.GetBuffer();
+ p += nLastOffset;
+ while (*p)
+ {
+ if (nQuotes)
+ {
+ if (*p == cFieldQuote && !bBackslashEscaped)
+ ++nQuotes;
+ else if (bAllowBackslashEscape)
+ {
+ if (*p == '\\')
+ bBackslashEscaped = !bBackslashEscaped;
+ else
+ bBackslashEscaped = false;
+ }
+ }
+ else if (*p == cFieldQuote && (p == pStart ||
+ lcl_UnicodeStrChr( pSeps, p[-1])))
+ nQuotes = 1;
+ // A quote character inside a field content does not start
+ // a quote.
+ ++p;
+ }
+
+ if (nQuotes % 2 == 0)
+ break;
+ else
+ {
+ nLastOffset = rStr.Len();
+ String aNext;
+ ReadUniOrByteStringLine( aNext);
+ rStr += sal_Unicode(_LF);
+ rStr += aNext;
+ }
+ }
}
return nError == SVSTREAM_OK;
}