diff options
author | Rohit Deshmukh <rohit.deshmukh@synerzip.com> | 2013-12-06 15:42:53 +0530 |
---|---|---|
committer | Eike Rathke <erack@redhat.com> | 2014-01-08 19:48:27 +0000 |
commit | 2421317990d00e14325298f34db3c60735527697 (patch) | |
tree | aaa3835c1baac1271697f1a975f02abf30781029 | |
parent | 515c6cf7a3832bfc7a6eeed65704bc9eee96adc1 (diff) |
fdo#72219: Fix for corruption of symbols in docx
Issue:
OUString uses UTF-16, so for a Unicode surrogate character there are 2
values stored, not just 1.
So we are getting assert failure in "rtl_uString_iterateCodePoints" method.
erAck: Underlying cause was that the dictionary breakiterator misused
UTF-16 positions as Unicode code point positions.
Change-Id: I923485f56c2d879b63687adaea2b489a3479991c
Reviewed-on: https://gerrit.libreoffice.org/6955
Reviewed-by: Eike Rathke <erack@redhat.com>
Tested-by: Eike Rathke <erack@redhat.com>
(cherry picked from commit d8fd15875901d584a4bbcc07c927fa20332e4841)
Reviewed-on: https://gerrit.libreoffice.org/7322
(cherry picked from commit 994d0c9e7aa8d1a7602e61b770991da980c1cde5)
Reviewed-on: https://gerrit.libreoffice.org/7324
-rw-r--r-- | i18npool/qa/cppunit/test_breakiterator.cxx | 19 | ||||
-rw-r--r-- | i18npool/source/breakiterator/xdictionary.cxx | 6 |
2 files changed, 22 insertions, 3 deletions
diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx index 41e40779de88..b4174bae17d8 100644 --- a/i18npool/qa/cppunit/test_breakiterator.cxx +++ b/i18npool/qa/cppunit/test_breakiterator.cxx @@ -44,7 +44,7 @@ public: #endif void testKhmer(); void testJapanese(); - + void testChinese(); CPPUNIT_TEST_SUITE(TestBreakIterator); CPPUNIT_TEST(testLineBreaking); CPPUNIT_TEST(testGraphemeIteration); @@ -60,6 +60,7 @@ public: CPPUNIT_TEST(testKhmer); #endif CPPUNIT_TEST(testJapanese); + CPPUNIT_TEST(testChinese); CPPUNIT_TEST_SUITE_END(); private: uno::Reference<i18n::XBreakIterator> m_xBreak; @@ -909,6 +910,22 @@ void TestBreakIterator::testJapanese() } } +void TestBreakIterator::testChinese() +{ + lang::Locale aLocale; + aLocale.Language = "zh"; + aLocale.Country = "CN"; + i18n::Boundary aBounds; + + { + const sal_Unicode CHINESE[] = { 0x6A35, 0x6A30, 0x69FE, 0x8919, 0xD867, 0xDEDB }; + + OUString aTest(CHINESE, SAL_N_ELEMENTS(CHINESE)); + aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale, + i18n::WordType::DICTIONARY_WORD, true); + CPPUNIT_ASSERT(aBounds.startPos == 4 && aBounds.endPos == 6); + } +} void TestBreakIterator::setUp() { BootstrapFixtureBase::setUp(); diff --git a/i18npool/source/breakiterator/xdictionary.cxx b/i18npool/source/breakiterator/xdictionary.cxx index 72da09f87629..3b43fa31e4e2 100644 --- a/i18npool/source/breakiterator/xdictionary.cxx +++ b/i18npool/source/breakiterator/xdictionary.cxx @@ -383,9 +383,11 @@ Boundary xdictionary::getWordBoundary(const OUString& rText, sal_Int32 anyPos, s if (u_isWhitespace(ch)) i--; } + boundary.endPos = boundary.startPos; - rText.iterateCodePoints(&boundary.endPos, aCache.wordboundary[i]); - rText.iterateCodePoints(&boundary.startPos, aCache.wordboundary[i-1]); + boundary.endPos += aCache.wordboundary[i]; + boundary.startPos += aCache.wordboundary[i-1]; + } else { boundary.startPos = anyPos; if (anyPos < len) rText.iterateCodePoints(&anyPos, 1); |