summaryrefslogtreecommitdiff
path: root/connectivity
diff options
context:
space:
mode:
authorDamjan Jovanovic <damjan@apache.org>2016-04-17 16:44:43 +0000
committerEike Rathke <erack@redhat.com>2016-05-17 12:38:44 +0000
commit2049e55f507b00cf70f72706900e75d20ff3bb30 (patch)
tree505e843698f2d642e14096b53550747bdba5cc6b /connectivity
parentd94b827c404e2801797c97a830eecfc6d1489202 (diff)
Make CSV line parsers consistent with CSV field parsers.
Our CSV field parsing algorithms treats fields starting with a quote (immediately at the beginning of the row, or after the field delimiter) as quoted. A quoted field ends at the corresponding closing quote, and any remaining text between the closing quote and the next field delimeter or end of line is appended to the text already extracted from the field, but not processed further. Any quotes in this extra text are taken verbatim - they do not quote anything. Our CSV line parsers were big hacks - they essentially read and concatenate lines until an even number of quote characters is found, and then feed this through the CSV field parsers. This patch rewrites the line parsers to work exactly how the field parsers work. Text such as: "another" ",something else is now correctly parsed by both Calc and Base as: [another "],[something else] instead of breaking all further parsing. Patch by: me (cherry picked from commit 60e93b8b5b6bc4220d66e95cd234a37f3c8f8fd7) Change-Id: Iced60fad9371e17a2e5640cd7169804b18cf5103 Reviewed-on: https://gerrit.libreoffice.org/24999 Tested-by: Jenkins <ci@libreoffice.org> Reviewed-by: Eike Rathke <erack@redhat.com> Tested-by: Eike Rathke <erack@redhat.com>
Diffstat (limited to 'connectivity')
-rw-r--r--connectivity/source/drivers/flat/ETable.cxx59
1 files changed, 53 insertions, 6 deletions
diff --git a/connectivity/source/drivers/flat/ETable.cxx b/connectivity/source/drivers/flat/ETable.cxx
index a79ded6cc85c..323e7095de10 100644
--- a/connectivity/source/drivers/flat/ETable.cxx
+++ b/connectivity/source/drivers/flat/ETable.cxx
@@ -890,14 +890,61 @@ bool OFlatTable::readLine(sal_Int32 * const pEndPos, sal_Int32 * const pStartPos
return false;
QuotedTokenizedString sLine = m_aCurrentLine; // check if the string continues on next line
- while( (comphelper::string::getTokenCount(sLine.GetString(), m_cStringDelimiter) % 2) != 1 )
+ sal_Int32 nLastOffset = 0;
+ bool isQuoted = false;
+ bool isFieldStarting = true;
+ while (true)
{
- m_pFileStream->ReadByteStringLine(sLine,nEncoding);
- if ( !m_pFileStream->IsEof() )
+ bool wasQuote = false;
+ const sal_Unicode *p = sLine.GetString().getStr() + nLastOffset;
+ while (*p)
{
- OUString aStr = m_aCurrentLine.GetString() + "\n" + sLine.GetString();
- m_aCurrentLine.SetString(aStr);
- sLine = m_aCurrentLine;
+ if (isQuoted)
+ {
+ if (*p == m_cStringDelimiter)
+ wasQuote = !wasQuote;
+ else
+ {
+ if (wasQuote)
+ {
+ wasQuote = false;
+ isQuoted = false;
+ if (*p == m_cFieldDelimiter)
+ isFieldStarting = true;
+ }
+ }
+ }
+ else
+ {
+ if (isFieldStarting)
+ {
+ isFieldStarting = false;
+ if (*p == m_cStringDelimiter)
+ isQuoted = true;
+ else if (*p == m_cFieldDelimiter)
+ isFieldStarting = true;
+ }
+ else if (*p == m_cFieldDelimiter)
+ isFieldStarting = true;
+ }
+ ++p;
+ }
+
+ if (wasQuote)
+ isQuoted = false;
+
+ if (isQuoted)
+ {
+ nLastOffset = sLine.Len();
+ m_pFileStream->ReadByteStringLine(sLine,nEncoding);
+ if ( !m_pFileStream->IsEof() )
+ {
+ OUString aStr = m_aCurrentLine.GetString() + "\n" + sLine.GetString();
+ m_aCurrentLine.SetString(aStr);
+ sLine = m_aCurrentLine;
+ }
+ else
+ break;
}
else
break;