From 8ad1d4443e67784a8c0d3c1a3a72f089cb0cd3ec Mon Sep 17 00:00:00 2001 From: Caolán McNamara Date: Fri, 13 Jul 2012 09:49:02 +0100 Subject: Resolves: fdo#52020 ICU breakiterator not used for Khmer Change-Id: I4c99129cabe70f17aa223cf8ec0ae1529188b6b7 --- i18npool/qa/cppunit/test_breakiterator.cxx | 24 ++++++++++++++++++++++ .../source/breakiterator/breakiterator_unicode.cxx | 2 +- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx index c191bbcce0b7..68dc1ef27e97 100644 --- a/i18npool/qa/cppunit/test_breakiterator.cxx +++ b/i18npool/qa/cppunit/test_breakiterator.cxx @@ -59,6 +59,7 @@ public: #if TODO void testNorthernThai(); #endif + void testKhmer(); CPPUNIT_TEST_SUITE(TestBreakIterator); CPPUNIT_TEST(testLineBreaking); @@ -69,6 +70,7 @@ public: #if TODO CPPUNIT_TEST(testNorthernThai); #endif + CPPUNIT_TEST(testKhmer); CPPUNIT_TEST_SUITE_END(); private: uno::Reference m_xBreak; @@ -341,6 +343,28 @@ void TestBreakIterator::testNorthernThai() } #endif +//A test to ensure that our khmer word boundary detection is useful +//https://bugs.freedesktop.org/show_bug.cgi?id=52020 +void TestBreakIterator::testKhmer() +{ + lang::Locale aLocale; + aLocale.Language = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("km")); + aLocale.Country = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("KH")); + + const sal_Unicode KHMER1[] = { 0x17B2, 0x17D2, 0x1799, 0x1782, 0x17C1 }; + + ::rtl::OUString aTest(KHMER1, SAL_N_ELEMENTS(KHMER1)); + i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, + i18n::WordType::DICTIONARY_WORD, true); + + CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 3); + + aBounds = m_xBreak->getWordBoundary(aTest, aBounds.endPos, aLocale, + i18n::WordType::DICTIONARY_WORD, true); + + CPPUNIT_ASSERT(aBounds.startPos == 3 && aBounds.endPos == 5); +} + void TestBreakIterator::setUp() { BootstrapFixtureBase::setUp(); diff --git a/i18npool/source/breakiterator/breakiterator_unicode.cxx b/i18npool/source/breakiterator/breakiterator_unicode.cxx index 2e32656dae77..0669c0128f30 100644 --- a/i18npool/source/breakiterator/breakiterator_unicode.cxx +++ b/i18npool/source/breakiterator/breakiterator_unicode.cxx @@ -133,7 +133,7 @@ void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star:: rbi = new OOoRuleBasedBreakIterator(udata_open("OpenOffice", "brk", OUStringToOString(breakRules[breakType], RTL_TEXTENCODING_ASCII_US).getStr(), &status), status); } - else if ( rLocale.Language != "th" ) //use icu's breakiterator for Thai + else if (rLocale.Language != "th" && rLocale.Language != "km") //use icu's breakiterator for Thai and Khmer { status = U_ZERO_ERROR; OStringBuffer aUDName(64); -- cgit v1.2.3