diff options
author | Jan Holesovsky <kendy@collabora.com> | 2016-11-03 22:27:12 +0100 |
---|---|---|
committer | Jan Holesovsky <kendy@collabora.com> | 2016-11-08 10:00:31 +0100 |
commit | 2b65c716bfe4b71248abb40564ce043f7d316732 (patch) | |
tree | 6455f089346e50c4957c4b52dc1d8bafed3c4ded | |
parent | 116ca8bb673c5f13b9c1aea9be48c68e83ba2f78 (diff) |
tdf#88821: Implement support for <meta charset="..."> for HTML import.
The editengine HTML import was not handling it at all, and consequently not
setting the right encoding when importing HTML in Calc.
Change-Id: I3ca3dd20f36cfb579fb7ae4cd3da63a69d97601e
(cherry picked from commit 84400eae86d7ae8e66f8247f4c4f3a717d90f8c0)
-rw-r--r-- | include/svtools/htmlkywd.hxx | 1 | ||||
-rw-r--r-- | include/svtools/htmltokn.h | 1 | ||||
-rw-r--r-- | sc/qa/unit/bugfix-test.cxx | 13 | ||||
-rw-r--r-- | sc/qa/unit/data/html/tdf88821-2.html | 19 | ||||
-rw-r--r-- | svtools/source/svhtml/htmlkywd.cxx | 1 | ||||
-rw-r--r-- | svtools/source/svhtml/parhtml.cxx | 5 |
6 files changed, 39 insertions, 1 deletions
diff --git a/include/svtools/htmlkywd.hxx b/include/svtools/htmlkywd.hxx index 54309a7867d1..4cc24949fe2d 100644 --- a/include/svtools/htmlkywd.hxx +++ b/include/svtools/htmlkywd.hxx @@ -422,6 +422,7 @@ #define OOO_STRING_SVTOOLS_HTML_O_alt "alt" #define OOO_STRING_SVTOOLS_HTML_O_axis "axis" #define OOO_STRING_SVTOOLS_HTML_O_char "char" +#define OOO_STRING_SVTOOLS_HTML_O_charset "charset" #define OOO_STRING_SVTOOLS_HTML_O_class "class" #define OOO_STRING_SVTOOLS_HTML_O_code "code" #define OOO_STRING_SVTOOLS_HTML_O_codetype "codetype" diff --git a/include/svtools/htmltokn.h b/include/svtools/htmltokn.h index eeea777477f2..37ca30e5f61d 100644 --- a/include/svtools/htmltokn.h +++ b/include/svtools/htmltokn.h @@ -308,6 +308,7 @@ HTML_OPTION_STRING_START = HTML_OPTION_BOOL_END, HTML_O_ALT, HTML_O_AXIS, HTML_O_CHAR, // HTML3 Table Model Draft + HTML_O_CHARSET, HTML_O_CLASS, HTML_O_CODE, // HotJava HTML_O_CODETYPE, diff --git a/sc/qa/unit/bugfix-test.cxx b/sc/qa/unit/bugfix-test.cxx index 5cebdeb09a35..0ea47b791ff1 100644 --- a/sc/qa/unit/bugfix-test.cxx +++ b/sc/qa/unit/bugfix-test.cxx @@ -88,6 +88,7 @@ public: // void testTdf40110(); void testTdf98657(); void testTdf88821(); + void testTdf88821_2(); CPPUNIT_TEST_SUITE(ScFiltersTest); CPPUNIT_TEST(testTdf64229); @@ -98,6 +99,7 @@ public: // CPPUNIT_TEST(testTdf40110); CPPUNIT_TEST(testTdf98657); CPPUNIT_TEST(testTdf88821); + CPPUNIT_TEST(testTdf88821_2); CPPUNIT_TEST_SUITE_END(); private: uno::Reference<uno::XInterface> m_xCalcComponent; @@ -256,6 +258,17 @@ void ScFiltersTest::testTdf88821() xDocSh->DoClose(); } +void ScFiltersTest::testTdf88821_2() +{ + ScDocShellRef xDocSh = loadDoc("tdf88821-2.", FORMAT_HTML); + ScDocument& rDoc = xDocSh->GetDocument(); + + // A2 should be 'ABCabcČŠŽčšž', not 'ABCabcČŠŽÄヘšž' + CPPUNIT_ASSERT_EQUAL(OStringToOUString("ABCabc\xC4\x8C\xC5\xA0\xC5\xBD\xC4\x8D\xC5\xA1\xC5\xBE", RTL_TEXTENCODING_UTF8), rDoc.GetString(0, 1, 0)); + + xDocSh->DoClose(); +} + ScFiltersTest::ScFiltersTest() : ScBootstrapFixture( "/sc/qa/unit/data" ) { diff --git a/sc/qa/unit/data/html/tdf88821-2.html b/sc/qa/unit/data/html/tdf88821-2.html new file mode 100644 index 000000000000..e71094aba364 --- /dev/null +++ b/sc/qa/unit/data/html/tdf88821-2.html @@ -0,0 +1,19 @@ +<html lang="en"> +<head> +<meta charset="UTF-8"> +</head> +<body> + <table border="1"> + <tr> + <td>Text</td> + <td>Decimal</td> + <td>Date</td> + </tr> + <tr> + <td>ABCabcČŠŽčšž</td> + <td>10,50</td> + <td>30.1.2015</td> + </tr> + </table> +</body> +</html> diff --git a/svtools/source/svhtml/htmlkywd.cxx b/svtools/source/svhtml/htmlkywd.cxx index 166fb5c0ea73..b4d7aaf24ad9 100644 --- a/svtools/source/svhtml/htmlkywd.cxx +++ b/svtools/source/svhtml/htmlkywd.cxx @@ -576,6 +576,7 @@ static HTML_TokenEntry aHTMLOptionTab[] = { {{OOO_STRING_SVTOOLS_HTML_O_alt}, HTML_O_ALT}, {{OOO_STRING_SVTOOLS_HTML_O_axis}, HTML_O_AXIS}, {{OOO_STRING_SVTOOLS_HTML_O_char}, HTML_O_CHAR}, // HTML 3 Table Model Draft + {{OOO_STRING_SVTOOLS_HTML_O_charset}, HTML_O_CHARSET}, {{OOO_STRING_SVTOOLS_HTML_O_class}, HTML_O_CLASS}, {{OOO_STRING_SVTOOLS_HTML_O_code}, HTML_O_CODE}, // HotJava {{OOO_STRING_SVTOOLS_HTML_O_codetype}, HTML_O_CODETYPE}, diff --git a/svtools/source/svhtml/parhtml.cxx b/svtools/source/svhtml/parhtml.cxx index c38982fb069c..03ece20f2052 100644 --- a/svtools/source/svhtml/parhtml.cxx +++ b/svtools/source/svhtml/parhtml.cxx @@ -1942,6 +1942,10 @@ bool HTMLParser::ParseMetaOptionsImpl( case HTML_O_CONTENT: aContent = aOption.GetString(); break; + case HTML_O_CHARSET: + OString sValue(OUStringToOString(aOption.GetString(), RTL_TEXTENCODING_ASCII_US)); + o_rEnc = GetExtendedCompatibilityTextEncoding(rtl_getTextEncodingFromMimeCharset(sValue.getStr())); + break; } } @@ -1957,7 +1961,6 @@ bool HTMLParser::ParseMetaOptionsImpl( aContent = convertLineEnd(aContent, GetSystemLineEnd()); } - if ( bHTTPEquiv && i_pHTTPHeader ) { // Netscape seems to just ignore a closing ", so we do too |