summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKohei Yoshida <kyoshida@novell.com>2010-11-10 00:41:49 -0500
committerKohei Yoshida <kyoshida@novell.com>2010-11-10 00:43:58 -0500
commit322cbc3818b0553254aab2dfb3c5b196fe814097 (patch)
tree24d7950098fe98e9a2077650de9a138a59456ce8
parente080757dc9977a02958a6c4e0ee0840f1b3a44d5 (diff)
Import mal-formed csv files gracefully.
When importing a document with mal-formed rows, try our best to keep them from damaging, other, well-formed rows. Previously calc would eat other well-formed rows occurring after the mal-formed row. (n#507322)
-rw-r--r--tools/source/stream/stream.cxx13
1 files changed, 13 insertions, 0 deletions
diff --git a/tools/source/stream/stream.cxx b/tools/source/stream/stream.cxx
index ee1c513adba6..ac660cbffbcc 100644
--- a/tools/source/stream/stream.cxx
+++ b/tools/source/stream/stream.cxx
@@ -1108,6 +1108,9 @@ sal_Bool SvStream::ReadCsvLine( String& rStr, sal_Bool bEmbeddedLineBreak,
if (bEmbeddedLineBreak)
{
+ // See if the separator(s) include tab.
+ bool bTabSep = lcl_UnicodeStrChr(pSeps, '\t') != NULL;
+
const sal_Unicode* pSeps = rFieldSeparators.GetBuffer();
xub_StrLen nLastOffset = 0;
xub_StrLen nQuotes = 0;
@@ -1121,6 +1124,16 @@ sal_Bool SvStream::ReadCsvLine( String& rStr, sal_Bool bEmbeddedLineBreak,
{
if (nQuotes)
{
+ if (bTabSep && *p == '\t')
+ {
+ // When tab-delimited, tab char ends quoted sequence
+ // even if we haven't reached the end quote. Doing
+ // this helps keep mal-formed rows from damaging
+ // other, well-formed rows.
+ nQuotes = 0;
+ break;
+ }
+
if (*p == cFieldQuote && !bBackslashEscaped)
++nQuotes;
else if (bAllowBackslashEscape)