summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCaolán McNamara <caolanm@redhat.com>2012-01-13 16:38:27 +0000
committerCaolán McNamara <caolanm@redhat.com>2012-01-13 16:46:34 +0000
commit17fe34ec569f3e14f35f3958cc5885a00bd6cff9 (patch)
tree1e3069d3f51148bd173f57e0c2b5f3e92ed18b0b
parente5ac3524f2d72bf3113c571de105f8aa52a558d2 (diff)
here calc, take ownership of this foul monstrosity only you use
SvStream::ReadCsvLine doesn't need to be a member of Stream and the subcomment about what's wrong with the method is longer than the body of the method. Only used by calc, so can go into calc. foul monstrosity back
-rw-r--r--sc/source/ui/dbgui/scuiasciiopt.cxx4
-rw-r--r--sc/source/ui/docshell/impex.cxx85
-rw-r--r--sc/source/ui/inc/impex.hxx53
-rw-r--r--tools/inc/tools/stream.hxx54
-rw-r--r--tools/source/stream/stream.cxx87
5 files changed, 138 insertions, 145 deletions
diff --git a/sc/source/ui/dbgui/scuiasciiopt.cxx b/sc/source/ui/dbgui/scuiasciiopt.cxx
index fdb1fa88121e..834d0cc17f9e 100644
--- a/sc/source/ui/dbgui/scuiasciiopt.cxx
+++ b/sc/source/ui/dbgui/scuiasciiopt.cxx
@@ -476,7 +476,7 @@ bool ScImportAsciiDlg::GetLine( sal_uLong nLine, String &rText )
bRet = false;
break;
}
- mpDatStream->ReadCsvLine( rText, !bFixed, maFieldSeparators,
+ ReadCsvLine(*mpDatStream, rText, !bFixed, maFieldSeparators,
mcTextSep);
mnStreamPos = mpDatStream->Tell();
mpRowPosArray[++mnRowPosCount] = mnStreamPos;
@@ -494,7 +494,7 @@ bool ScImportAsciiDlg::GetLine( sal_uLong nLine, String &rText )
else
{
Seek( mpRowPosArray[nLine]);
- mpDatStream->ReadCsvLine( rText, !bFixed, maFieldSeparators, mcTextSep);
+ ReadCsvLine(*mpDatStream, rText, !bFixed, maFieldSeparators, mcTextSep);
mnStreamPos = mpDatStream->Tell();
}
diff --git a/sc/source/ui/docshell/impex.cxx b/sc/source/ui/docshell/impex.cxx
index a10ab3906e1a..b7922b3840e7 100644
--- a/sc/source/ui/docshell/impex.cxx
+++ b/sc/source/ui/docshell/impex.cxx
@@ -1180,7 +1180,7 @@ sal_Bool ScImportExport::ExtText2Doc( SvStream& rStrm )
while(--nSkipLines>0)
{
- rStrm.ReadCsvLine( aLine, !bFixed, rSeps, cStr); // content is ignored
+ ReadCsvLine(rStrm, aLine, !bFixed, rSeps, cStr); // content is ignored
if ( rStrm.IsEof() )
break;
}
@@ -1203,7 +1203,7 @@ sal_Bool ScImportExport::ExtText2Doc( SvStream& rStrm )
{
for( ;; )
{
- rStrm.ReadCsvLine( aLine, !bFixed, rSeps, cStr);
+ ReadCsvLine(rStrm, aLine, !bFixed, rSeps, cStr);
if ( rStrm.IsEof() )
break;
@@ -2109,4 +2109,85 @@ ScFormatFilterPlugin &ScFormatFilter::Get()
return *plugin;
}
+// Precondition: pStr is guaranteed to be non-NULL and points to a 0-terminated
+// array.
+inline const sal_Unicode* lcl_UnicodeStrChr( const sal_Unicode* pStr,
+ sal_Unicode c )
+{
+ while (*pStr)
+ {
+ if (*pStr == c)
+ return pStr;
+ ++pStr;
+ }
+ return 0;
+}
+
+void ReadCsvLine(SvStream &rStream, String& rStr, sal_Bool bEmbeddedLineBreak,
+ const String& rFieldSeparators, sal_Unicode cFieldQuote,
+ sal_Bool bAllowBackslashEscape)
+{
+ rStream.ReadUniOrByteStringLine(rStr, rStream.GetStreamCharSet());
+
+ if (bEmbeddedLineBreak)
+ {
+ const sal_Unicode* pSeps = rFieldSeparators.GetBuffer();
+
+ // See if the separator(s) include tab.
+ bool bTabSep = lcl_UnicodeStrChr(pSeps, '\t') != NULL;
+
+ xub_StrLen nLastOffset = 0;
+ xub_StrLen nQuotes = 0;
+ while (!rStream.IsEof() && rStr.Len() < STRING_MAXLEN)
+ {
+ bool bBackslashEscaped = false;
+ const sal_Unicode *p, *pStart;
+ p = pStart = rStr.GetBuffer();
+ p += nLastOffset;
+ while (*p)
+ {
+ if (nQuotes)
+ {
+ if (bTabSep && *p == '\t' && (nQuotes % 2) != 0)
+ {
+ // When tab-delimited, tab char ends quoted sequence
+ // even if we haven't reached the end quote. Doing
+ // this helps keep mal-formed rows from damaging
+ // other, well-formed rows.
+ nQuotes = 0;
+ break;
+ }
+
+ if (*p == cFieldQuote && !bBackslashEscaped)
+ ++nQuotes;
+ else if (bAllowBackslashEscape)
+ {
+ if (*p == '\\')
+ bBackslashEscaped = !bBackslashEscaped;
+ else
+ bBackslashEscaped = false;
+ }
+ }
+ else if (*p == cFieldQuote && (p == pStart ||
+ lcl_UnicodeStrChr( pSeps, p[-1])))
+ nQuotes = 1;
+ // A quote character inside a field content does not start
+ // a quote.
+ ++p;
+ }
+
+ if (nQuotes % 2 == 0)
+ break;
+ else
+ {
+ nLastOffset = rStr.Len();
+ String aNext;
+ rStream.ReadUniOrByteStringLine(aNext, rStream.GetStreamCharSet());
+ rStr += sal_Unicode(_LF);
+ rStr += aNext;
+ }
+ }
+ }
+}
+
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/ui/inc/impex.hxx b/sc/source/ui/inc/impex.hxx
index 302cf4e5c627..8ccfdd96403e 100644
--- a/sc/source/ui/inc/impex.hxx
+++ b/sc/source/ui/inc/impex.hxx
@@ -189,6 +189,59 @@ public:
}
};
+/** Read a CSV (comma separated values) data line using
+ ReadUniOrByteStringLine().
+
+ @param bEmbeddedLineBreak
+ If sal_True and a line-break occurs inside a field of data,
+ a line feed LF '\n' and the next line are appended. Repeats
+ until a line-break is not in a field. A field is determined
+ by delimiting rFieldSeparators and optionally surrounded by
+ a pair of cFieldQuote characters. For a line-break to be
+ within a field, the field content MUST be surrounded by
+ cFieldQuote characters, and the opening cFieldQuote MUST be
+ at the very start of a line or follow right behind a field
+ separator with no extra characters in between. Anything,
+ including field separators and escaped quotes (by doubling
+ them, or preceding them with a backslash if
+ bAllowBackslashEscape==sal_True) may appear in a quoted
+ field.
+
+ If bEmbeddedLineBreak==sal_False, nothing is parsed and the
+ string returned is simply one ReadUniOrByteStringLine().
+
+ @param rFieldSeparators
+ A list of characters that each may act as a field separator.
+
+ @param cFieldQuote
+ The quote character used.
+
+ @param bAllowBackslashEscape
+ If sal_True, an embedded quote character inside a quoted
+ field may also be escaped with a preceding backslash.
+ Normally, quotes are escaped by doubling them.
+
+ check Stream::good() to detect IO problems during read
+
+ @ATTENTION
+ Note that the string returned may be truncated even inside
+ a quoted field if STRING_MAXLEN was reached. There
+ currently is no way to exactly determine the conditions,
+ whether this was at a line end, or whether open quotes
+ would have closed the field before the line end, as even a
+ ReadUniOrByteStringLine() may return prematurely but the
+ stream was positioned ahead until the real end of line.
+ Additionally, due to character encoding conversions, string
+ length and bytes read don't necessarily match, and
+ resyncing to a previous position matching the string's
+ length isn't always possible. As a result, a logical line
+ with embedded line breaks and more than STRING_MAXLEN
+ characters will be spoiled, and a subsequent ReadCsvLine()
+ may start under false preconditions.
+ */
+SC_DLLPUBLIC void ReadCsvLine(SvStream &rStream, String& rStr, sal_Bool bEmbeddedLineBreak,
+ const String& rFieldSeparators, sal_Unicode cFieldQuote,
+ sal_Bool bAllowBackslashEscape = sal_False);
#endif
diff --git a/tools/inc/tools/stream.hxx b/tools/inc/tools/stream.hxx
index 85d5f124adc9..4f06610e2f00 100644
--- a/tools/inc/tools/stream.hxx
+++ b/tools/inc/tools/stream.hxx
@@ -432,60 +432,6 @@ public:
sal_Bool WriteUniOrByteChar( sal_Unicode ch )
{ return WriteUniOrByteChar( ch, GetStreamCharSet() ); }
- /** Read a CSV (comma separated values) data line using
- ReadUniOrByteStringLine().
-
- @param bEmbeddedLineBreak
- If sal_True and a line-break occurs inside a field of data,
- a line feed LF '\n' and the next line are appended. Repeats
- until a line-break is not in a field. A field is determined
- by delimiting rFieldSeparators and optionally surrounded by
- a pair of cFieldQuote characters. For a line-break to be
- within a field, the field content MUST be surrounded by
- cFieldQuote characters, and the opening cFieldQuote MUST be
- at the very start of a line or follow right behind a field
- separator with no extra characters in between. Anything,
- including field separators and escaped quotes (by doubling
- them, or preceding them with a backslash if
- bAllowBackslashEscape==sal_True) may appear in a quoted
- field.
-
- If bEmbeddedLineBreak==sal_False, nothing is parsed and the
- string returned is simply one ReadUniOrByteStringLine().
-
- @param rFieldSeparators
- A list of characters that each may act as a field separator.
-
- @param cFieldQuote
- The quote character used.
-
- @param bAllowBackslashEscape
- If sal_True, an embedded quote character inside a quoted
- field may also be escaped with a preceding backslash.
- Normally, quotes are escaped by doubling them.
-
- check Stream::good() to detect IO problems during read
-
- @ATTENTION
- Note that the string returned may be truncated even inside
- a quoted field if STRING_MAXLEN was reached. There
- currently is no way to exactly determine the conditions,
- whether this was at a line end, or whether open quotes
- would have closed the field before the line end, as even a
- ReadUniOrByteStringLine() may return prematurely but the
- stream was positioned ahead until the real end of line.
- Additionally, due to character encoding conversions, string
- length and bytes read don't necessarily match, and
- resyncing to a previous position matching the string's
- length isn't always possible. As a result, a logical line
- with embedded line breaks and more than STRING_MAXLEN
- characters will be spoiled, and a subsequent ReadCsvLine()
- may start under false preconditions.
- */
- void ReadCsvLine( String& rStr, sal_Bool bEmbeddedLineBreak,
- const String& rFieldSeparators, sal_Unicode cFieldQuote,
- sal_Bool bAllowBackslashEscape = sal_False);
-
void SetBufferSize( sal_uInt16 nBufSize );
sal_uInt16 GetBufferSize() const { return nBufSize; }
diff --git a/tools/source/stream/stream.cxx b/tools/source/stream/stream.cxx
index 8c0efb66c6f2..fea6025e5263 100644
--- a/tools/source/stream/stream.cxx
+++ b/tools/source/stream/stream.cxx
@@ -1019,93 +1019,6 @@ sal_Bool SvStream::StartReadingUnicodeText( rtl_TextEncoding eReadBomCharSet )
/*************************************************************************
|*
-|* Stream::ReadCsvLine()
-|*
-*************************************************************************/
-
-// Precondition: pStr is guaranteed to be non-NULL and points to a 0-terminated
-// array.
-inline const sal_Unicode* lcl_UnicodeStrChr( const sal_Unicode* pStr,
- sal_Unicode c )
-{
- while (*pStr)
- {
- if (*pStr == c)
- return pStr;
- ++pStr;
- }
- return 0;
-}
-
-void SvStream::ReadCsvLine( String& rStr, sal_Bool bEmbeddedLineBreak,
- const String& rFieldSeparators, sal_Unicode cFieldQuote,
- sal_Bool bAllowBackslashEscape)
-{
- ReadUniOrByteStringLine(rStr, GetStreamCharSet());
-
- if (bEmbeddedLineBreak)
- {
- const sal_Unicode* pSeps = rFieldSeparators.GetBuffer();
-
- // See if the separator(s) include tab.
- bool bTabSep = lcl_UnicodeStrChr(pSeps, '\t') != NULL;
-
- xub_StrLen nLastOffset = 0;
- xub_StrLen nQuotes = 0;
- while (!IsEof() && rStr.Len() < STRING_MAXLEN)
- {
- bool bBackslashEscaped = false;
- const sal_Unicode *p, *pStart;
- p = pStart = rStr.GetBuffer();
- p += nLastOffset;
- while (*p)
- {
- if (nQuotes)
- {
- if (bTabSep && *p == '\t' && (nQuotes % 2) != 0)
- {
- // When tab-delimited, tab char ends quoted sequence
- // even if we haven't reached the end quote. Doing
- // this helps keep mal-formed rows from damaging
- // other, well-formed rows.
- nQuotes = 0;
- break;
- }
-
- if (*p == cFieldQuote && !bBackslashEscaped)
- ++nQuotes;
- else if (bAllowBackslashEscape)
- {
- if (*p == '\\')
- bBackslashEscaped = !bBackslashEscaped;
- else
- bBackslashEscaped = false;
- }
- }
- else if (*p == cFieldQuote && (p == pStart ||
- lcl_UnicodeStrChr( pSeps, p[-1])))
- nQuotes = 1;
- // A quote character inside a field content does not start
- // a quote.
- ++p;
- }
-
- if (nQuotes % 2 == 0)
- break;
- else
- {
- nLastOffset = rStr.Len();
- String aNext;
- ReadUniOrByteStringLine(aNext, GetStreamCharSet());
- rStr += sal_Unicode(_LF);
- rStr += aNext;
- }
- }
- }
-}
-
-/*************************************************************************
-|*
|* Stream::SeekRel()
|*
*************************************************************************/