summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRohit Deshmukh <rohit.deshmukh@synerzip.com>2013-12-06 15:42:53 +0530
committerEike Rathke <erack@redhat.com>2014-01-08 19:46:11 +0000
commit994d0c9e7aa8d1a7602e61b770991da980c1cde5 (patch)
tree66c7fb9ecbb1405ea87dbc2dcde4e6a3a296a1cf
parent1657e20e663c0cc65168cd193dd19ff78f478e03 (diff)
fdo#72219: Fix for corruption of symbols in docx
Issue: OUString uses UTF-16, so for a Unicode surrogate character there are 2 values stored, not just 1. So we are getting assert failure in "rtl_uString_iterateCodePoints" method. erAck: Underlying cause was that the dictionary breakiterator misused UTF-16 positions as Unicode code point positions. Change-Id: I923485f56c2d879b63687adaea2b489a3479991c Reviewed-on: https://gerrit.libreoffice.org/6955 Reviewed-by: Eike Rathke <erack@redhat.com> Tested-by: Eike Rathke <erack@redhat.com> (cherry picked from commit d8fd15875901d584a4bbcc07c927fa20332e4841) Reviewed-on: https://gerrit.libreoffice.org/7322
-rw-r--r--i18npool/qa/cppunit/test_breakiterator.cxx19
-rw-r--r--i18npool/source/breakiterator/xdictionary.cxx6
2 files changed, 22 insertions, 3 deletions
diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx
index 15ce8a14318c..2c595cf9940a 100644
--- a/i18npool/qa/cppunit/test_breakiterator.cxx
+++ b/i18npool/qa/cppunit/test_breakiterator.cxx
@@ -45,7 +45,7 @@ public:
#endif
void testKhmer();
void testJapanese();
-
+ void testChinese();
CPPUNIT_TEST_SUITE(TestBreakIterator);
CPPUNIT_TEST(testLineBreaking);
CPPUNIT_TEST(testGraphemeIteration);
@@ -64,6 +64,7 @@ public:
CPPUNIT_TEST(testLao);
#endif
CPPUNIT_TEST(testJapanese);
+ CPPUNIT_TEST(testChinese);
CPPUNIT_TEST_SUITE_END();
private:
uno::Reference<i18n::XBreakIterator> m_xBreak;
@@ -938,6 +939,22 @@ void TestBreakIterator::testJapanese()
}
}
+void TestBreakIterator::testChinese()
+{
+ lang::Locale aLocale;
+ aLocale.Language = "zh";
+ aLocale.Country = "CN";
+ i18n::Boundary aBounds;
+
+ {
+ const sal_Unicode CHINESE[] = { 0x6A35, 0x6A30, 0x69FE, 0x8919, 0xD867, 0xDEDB };
+
+ OUString aTest(CHINESE, SAL_N_ELEMENTS(CHINESE));
+ aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale,
+ i18n::WordType::DICTIONARY_WORD, true);
+ CPPUNIT_ASSERT(aBounds.startPos == 4 && aBounds.endPos == 6);
+ }
+}
void TestBreakIterator::setUp()
{
BootstrapFixtureBase::setUp();
diff --git a/i18npool/source/breakiterator/xdictionary.cxx b/i18npool/source/breakiterator/xdictionary.cxx
index 1200535f38cf..ab2dfd9a94e8 100644
--- a/i18npool/source/breakiterator/xdictionary.cxx
+++ b/i18npool/source/breakiterator/xdictionary.cxx
@@ -387,9 +387,11 @@ Boundary xdictionary::getWordBoundary(const OUString& rText, sal_Int32 anyPos, s
if (u_isWhitespace(ch))
i--;
}
+
boundary.endPos = boundary.startPos;
- rText.iterateCodePoints(&boundary.endPos, aCache.wordboundary[i]);
- rText.iterateCodePoints(&boundary.startPos, aCache.wordboundary[i-1]);
+ boundary.endPos += aCache.wordboundary[i];
+ boundary.startPos += aCache.wordboundary[i-1];
+
} else {
boundary.startPos = anyPos;
if (anyPos < len) rText.iterateCodePoints(&anyPos, 1);