summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCaolán McNamara <caolanm@redhat.com>2012-02-14 16:07:10 +0000
committerEike Rathke <erack@redhat.com>2012-02-17 13:44:06 +0100
commitdab92ff8a19533db8633ae0a8c146f06c0e26932 (patch)
tree45d49a16005d641389830ef47fea9e2badb38eca
parenta9c91748166a54520c82fd2d155e41081a400af4 (diff)
use icu's breakiterator for Thai, not our customized generic rules
(cherry picked from commit 475d0c59c66fb7752d230f76130b17145aad0c12) Conflicts: i18npool/qa/cppunit/test_breakiterator.cxx i18npool/source/breakiterator/breakiterator_unicode.cxx Signed-off-by: Eike Rathke <erack@redhat.com>
-rw-r--r--i18npool/qa/cppunit/test_breakiterator.cxx35
-rw-r--r--i18npool/source/breakiterator/breakiterator_unicode.cxx7
2 files changed, 39 insertions, 3 deletions
diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx
index 661e46acc417..820e57b4549c 100644
--- a/i18npool/qa/cppunit/test_breakiterator.cxx
+++ b/i18npool/qa/cppunit/test_breakiterator.cxx
@@ -41,7 +41,8 @@
#include "cppunit/plugin/TestPlugIn.h"
#include <com/sun/star/i18n/XBreakIterator.hpp>
#include <com/sun/star/i18n/CharacterIteratorMode.hpp>
-#include <com/sun/star/i18n/ScriptType.hdl>
+#include <com/sun/star/i18n/ScriptType.hpp>
+#include <com/sun/star/i18n/WordType.hpp>
#include <rtl/strbuf.hxx>
@@ -62,12 +63,14 @@ public:
void testGraphemeIteration();
void testWeak();
void testAsian();
+ void testThai();
CPPUNIT_TEST_SUITE(TestBreakIterator);
CPPUNIT_TEST(testLineBreaking);
CPPUNIT_TEST(testGraphemeIteration);
CPPUNIT_TEST(testWeak);
CPPUNIT_TEST(testAsian);
+ CPPUNIT_TEST(testThai);
CPPUNIT_TEST_SUITE_END();
private:
@@ -249,6 +252,36 @@ void TestBreakIterator::testAsian()
}
}
+//A test to ensure that our thai word boundary detection is useful
+//http://lists.freedesktop.org/archives/libreoffice/2012-February/025959.html
+void TestBreakIterator::testThai()
+{
+ lang::Locale aLocale;
+ aLocale.Language = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("th"));
+ aLocale.Country = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("TH"));
+
+ i18n::Boundary aBounds;
+ {
+ const sal_Unicode THAI1[] = { 0x0E01, 0x0E38, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A };
+ ::rtl::OUString aTest(THAI1, SAL_N_ELEMENTS(THAI1));
+ aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
+ i18n::WordType::DICTIONARY_WORD, true);
+ CPPUNIT_ASSERT_MESSAGE("Should skip full word",
+ aBounds.startPos == 0 && aBounds.endPos == aTest.getLength());
+ }
+
+#ifdef TODO
+ {
+ const sal_Unicode NORTHERN_THAI1[] = { 0x0E01, 0x0E38, 0x0E4A, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A };
+ ::rtl::OUString aTest(NORTHERN_THAI1, SAL_N_ELEMENTS(NORTHERN_THAI1));
+ aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
+ i18n::WordType::DICTIONARY_WORD, true);
+ CPPUNIT_ASSERT_MESSAGE("Should skip full word",
+ aBounds.startPos == 0 && aBounds.endPos == aTest.getLength());
+ }
+#endif
+}
+
TestBreakIterator::TestBreakIterator()
{
m_xContext = cppu::defaultBootstrap_InitialComponentContext();
diff --git a/i18npool/source/breakiterator/breakiterator_unicode.cxx b/i18npool/source/breakiterator/breakiterator_unicode.cxx
index 78ba7a654420..aa5e1d8adf12 100644
--- a/i18npool/source/breakiterator/breakiterator_unicode.cxx
+++ b/i18npool/source/breakiterator/breakiterator_unicode.cxx
@@ -129,10 +129,13 @@ void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::
OOoRuleBasedBreakIterator *rbi = NULL;
- if (breakRules.getLength() > breakType && breakRules[breakType].getLength() > 0) {
+ if (breakRules.getLength() > breakType && !breakRules[breakType].isEmpty())
+ {
rbi = new OOoRuleBasedBreakIterator(udata_open("OpenOffice", "brk",
OUStringToOString(breakRules[breakType], RTL_TEXTENCODING_ASCII_US).getStr(), &status), status);
- } else {
+ }
+ else if (!rLocale.Language.equalsAsciiL(RTL_CONSTASCII_STRINGPARAM("th"))) //use icu's breakiterator for Thai
+ {
status = U_ZERO_ERROR;
OStringBuffer aUDName(64);
aUDName.append(rule);