tdf#117115 dbahsql: respect unicode in columns

Change-Id: I6a1dcba0afda88eaf083f0d4c73c1e74b0c78f56 Reviewed-on: https://gerrit.libreoffice.org/54297 Tested-by: Jenkins <ci@libreoffice.org> Reviewed-by: Tamás Bunth <btomi96@gmail.com>
author: Tamas Bunth <tamas.bunth@collabora.co.uk> 2018-05-14 09:19:56 +0200
committer: Tamás Bunth <btomi96@gmail.com> 2018-05-25 15:27:20 +0200
commit: ded4dcbbce875efeffba7e894a6dea1f584e8e9b (patch)
tree: 8e41dd89ef9f1b197549c6a870814e0139619784 /dbaccess
parent: 2f11ce0670749b9da20dfb29269b176cb09fb01e (diff)
1 files changed, 38 insertions, 1 deletions
diff --git a/dbaccess/source/filter/hsqldb/createparser.cxx b/dbaccess/source/filter/hsqldb/createparser.cxx
index 893cfa93330a..9054a2f9bf72 100644
--- a/dbaccess/source/filter/hsqldb/createparser.cxx
+++ b/dbaccess/source/filter/hsqldb/createparser.cxx
@@ -27,6 +27,42 @@ using namespace css::sdbc;
 
 namespace
 {
+//Find ascii escaped unicode
+sal_Int32 lcl_IndexOfUnicode(const OString& rSource, const sal_Int32 nFrom = 0)
+{
+    const OString sHexDigits = "0123456789abcdefABCDEF";
+    sal_Int32 nIndex = rSource.indexOf("\\u", nFrom);
+    if (nIndex == -1)
+    {
+        return -1;
+    }
+    bool bIsUnicode = true;
+    for (short nDist = 2; nDist <= 5; ++nDist)
+    {
+        if (sHexDigits.indexOf(rSource[nIndex + nDist]) == -1)
+        {
+            bIsUnicode = false;
+        }
+    }
+    return bIsUnicode ? nIndex : -1;
+}
+
+//Convert ascii escaped unicode to utf-8
+OUString lcl_ConvertToUTF8(const OString& rText)
+{
+    OString sResult = rText;
+    sal_Int32 nIndex = lcl_IndexOfUnicode(sResult);
+    while (nIndex != -1 && nIndex < rText.getLength())
+    {
+        const OString sHex = sResult.copy(nIndex + 2, 4);
+        const sal_Unicode cDec = static_cast<sal_Unicode>(strtol(sHex.getStr(), nullptr, 16));
+        const OString sNewChar = OString(&cDec, 1, RTL_TEXTENCODING_UTF8);
+        sResult = sResult.replaceAll("\\u" + sHex, sNewChar);
+        nIndex = lcl_IndexOfUnicode(sResult, nIndex);
+    }
+    return OStringToOUString(sResult, RTL_TEXTENCODING_UTF8);
+}
+
 /// Returns substring of sSql from the first occurrence of '(' until the
 /// last occurrence of ')' (excluding the parenthesis)
 OUString lcl_getColumnPart(const OUString& sSql)
@@ -192,7 +228,7 @@ void CreateStmtParser::parseColumnPart(const OUString& sColumnPart)
         // to fetch the whole column name, including quotes
         auto nEndColumnName
             = bIsQuoteUsedForColumnName ? sColumn.indexOf("\"", 1) : sColumn.indexOf(" ");
-        const OUString& rColumnName
+        OUString rColumnName
             = sColumn.copy(0, bIsQuoteUsedForColumnName ? nEndColumnName + 1 : nEndColumnName);
 
         // create a buffer which begins on column type
@@ -232,6 +268,7 @@ void CreateStmtParser::parseColumnPart(const OUString& sColumnPart)
         }
 
         bool bCaseInsensitive = sTypeName.indexOf("IGNORECASE") >= 0;
+        rColumnName = lcl_ConvertToUTF8(OUStringToOString(rColumnName, RTL_TEXTENCODING_UTF8));
         bool isPrimaryKey = lcl_isPrimaryKey(sColumn);
 
         if (isPrimaryKey)
author	Tamas Bunth <tamas.bunth@collabora.co.uk>	2018-05-14 09:19:56 +0200
committer	Tamás Bunth <btomi96@gmail.com>	2018-05-25 15:27:20 +0200
commit	ded4dcbbce875efeffba7e894a6dea1f584e8e9b (patch)
tree	8e41dd89ef9f1b197549c6a870814e0139619784 /dbaccess
parent	2f11ce0670749b9da20dfb29269b176cb09fb01e (diff)