summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Holesovsky <kendy@collabora.com>2016-11-03 22:27:12 +0100
committerJan Holesovsky <kendy@collabora.com>2016-11-08 10:00:31 +0100
commit2b65c716bfe4b71248abb40564ce043f7d316732 (patch)
tree6455f089346e50c4957c4b52dc1d8bafed3c4ded
parent116ca8bb673c5f13b9c1aea9be48c68e83ba2f78 (diff)
tdf#88821: Implement support for <meta charset="..."> for HTML import.
The editengine HTML import was not handling it at all, and consequently not setting the right encoding when importing HTML in Calc. Change-Id: I3ca3dd20f36cfb579fb7ae4cd3da63a69d97601e (cherry picked from commit 84400eae86d7ae8e66f8247f4c4f3a717d90f8c0)
-rw-r--r--include/svtools/htmlkywd.hxx1
-rw-r--r--include/svtools/htmltokn.h1
-rw-r--r--sc/qa/unit/bugfix-test.cxx13
-rw-r--r--sc/qa/unit/data/html/tdf88821-2.html19
-rw-r--r--svtools/source/svhtml/htmlkywd.cxx1
-rw-r--r--svtools/source/svhtml/parhtml.cxx5
6 files changed, 39 insertions, 1 deletions
diff --git a/include/svtools/htmlkywd.hxx b/include/svtools/htmlkywd.hxx
index 54309a7867d1..4cc24949fe2d 100644
--- a/include/svtools/htmlkywd.hxx
+++ b/include/svtools/htmlkywd.hxx
@@ -422,6 +422,7 @@
#define OOO_STRING_SVTOOLS_HTML_O_alt "alt"
#define OOO_STRING_SVTOOLS_HTML_O_axis "axis"
#define OOO_STRING_SVTOOLS_HTML_O_char "char"
+#define OOO_STRING_SVTOOLS_HTML_O_charset "charset"
#define OOO_STRING_SVTOOLS_HTML_O_class "class"
#define OOO_STRING_SVTOOLS_HTML_O_code "code"
#define OOO_STRING_SVTOOLS_HTML_O_codetype "codetype"
diff --git a/include/svtools/htmltokn.h b/include/svtools/htmltokn.h
index eeea777477f2..37ca30e5f61d 100644
--- a/include/svtools/htmltokn.h
+++ b/include/svtools/htmltokn.h
@@ -308,6 +308,7 @@ HTML_OPTION_STRING_START = HTML_OPTION_BOOL_END,
HTML_O_ALT,
HTML_O_AXIS,
HTML_O_CHAR, // HTML3 Table Model Draft
+ HTML_O_CHARSET,
HTML_O_CLASS,
HTML_O_CODE, // HotJava
HTML_O_CODETYPE,
diff --git a/sc/qa/unit/bugfix-test.cxx b/sc/qa/unit/bugfix-test.cxx
index 5cebdeb09a35..0ea47b791ff1 100644
--- a/sc/qa/unit/bugfix-test.cxx
+++ b/sc/qa/unit/bugfix-test.cxx
@@ -88,6 +88,7 @@ public:
// void testTdf40110();
void testTdf98657();
void testTdf88821();
+ void testTdf88821_2();
CPPUNIT_TEST_SUITE(ScFiltersTest);
CPPUNIT_TEST(testTdf64229);
@@ -98,6 +99,7 @@ public:
// CPPUNIT_TEST(testTdf40110);
CPPUNIT_TEST(testTdf98657);
CPPUNIT_TEST(testTdf88821);
+ CPPUNIT_TEST(testTdf88821_2);
CPPUNIT_TEST_SUITE_END();
private:
uno::Reference<uno::XInterface> m_xCalcComponent;
@@ -256,6 +258,17 @@ void ScFiltersTest::testTdf88821()
xDocSh->DoClose();
}
+void ScFiltersTest::testTdf88821_2()
+{
+ ScDocShellRef xDocSh = loadDoc("tdf88821-2.", FORMAT_HTML);
+ ScDocument& rDoc = xDocSh->GetDocument();
+
+ // A2 should be 'ABCabcČŠŽčšž', not 'ABCabcČŠŽÄヘšž'
+ CPPUNIT_ASSERT_EQUAL(OStringToOUString("ABCabc\xC4\x8C\xC5\xA0\xC5\xBD\xC4\x8D\xC5\xA1\xC5\xBE", RTL_TEXTENCODING_UTF8), rDoc.GetString(0, 1, 0));
+
+ xDocSh->DoClose();
+}
+
ScFiltersTest::ScFiltersTest()
: ScBootstrapFixture( "/sc/qa/unit/data" )
{
diff --git a/sc/qa/unit/data/html/tdf88821-2.html b/sc/qa/unit/data/html/tdf88821-2.html
new file mode 100644
index 000000000000..e71094aba364
--- /dev/null
+++ b/sc/qa/unit/data/html/tdf88821-2.html
@@ -0,0 +1,19 @@
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+</head>
+<body>
+ <table border="1">
+ <tr>
+ <td>Text</td>
+ <td>Decimal</td>
+ <td>Date</td>
+ </tr>
+ <tr>
+ <td>ABCabcČŠŽčšž</td>
+ <td>10,50</td>
+ <td>30.1.2015</td>
+ </tr>
+ </table>
+</body>
+</html>
diff --git a/svtools/source/svhtml/htmlkywd.cxx b/svtools/source/svhtml/htmlkywd.cxx
index 166fb5c0ea73..b4d7aaf24ad9 100644
--- a/svtools/source/svhtml/htmlkywd.cxx
+++ b/svtools/source/svhtml/htmlkywd.cxx
@@ -576,6 +576,7 @@ static HTML_TokenEntry aHTMLOptionTab[] = {
{{OOO_STRING_SVTOOLS_HTML_O_alt}, HTML_O_ALT},
{{OOO_STRING_SVTOOLS_HTML_O_axis}, HTML_O_AXIS},
{{OOO_STRING_SVTOOLS_HTML_O_char}, HTML_O_CHAR}, // HTML 3 Table Model Draft
+ {{OOO_STRING_SVTOOLS_HTML_O_charset}, HTML_O_CHARSET},
{{OOO_STRING_SVTOOLS_HTML_O_class}, HTML_O_CLASS},
{{OOO_STRING_SVTOOLS_HTML_O_code}, HTML_O_CODE}, // HotJava
{{OOO_STRING_SVTOOLS_HTML_O_codetype}, HTML_O_CODETYPE},
diff --git a/svtools/source/svhtml/parhtml.cxx b/svtools/source/svhtml/parhtml.cxx
index c38982fb069c..03ece20f2052 100644
--- a/svtools/source/svhtml/parhtml.cxx
+++ b/svtools/source/svhtml/parhtml.cxx
@@ -1942,6 +1942,10 @@ bool HTMLParser::ParseMetaOptionsImpl(
case HTML_O_CONTENT:
aContent = aOption.GetString();
break;
+ case HTML_O_CHARSET:
+ OString sValue(OUStringToOString(aOption.GetString(), RTL_TEXTENCODING_ASCII_US));
+ o_rEnc = GetExtendedCompatibilityTextEncoding(rtl_getTextEncodingFromMimeCharset(sValue.getStr()));
+ break;
}
}
@@ -1957,7 +1961,6 @@ bool HTMLParser::ParseMetaOptionsImpl(
aContent = convertLineEnd(aContent, GetSystemLineEnd());
}
-
if ( bHTTPEquiv && i_pHTTPHeader )
{
// Netscape seems to just ignore a closing ", so we do too