summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCaolán McNamara <caolanm@redhat.com>2012-04-12 09:35:33 +0100
committerCaolán McNamara <caolanm@redhat.com>2012-04-12 09:46:23 +0100
commit16cd97480d0681d37f86e89366e1f9964ec16ef8 (patch)
treeae59ddb24e040ebd0bef49cf6c30b4eb4ab2a773
parentf0a5e147eb9ae4a343f51eda20126488347e66f8 (diff)
Resolves: fdo#40292 Tamil grapheme cluster rules
-rw-r--r--i18npool/qa/cppunit/test_breakiterator.cxx73
-rw-r--r--i18npool/source/breakiterator/data/char.txt7
-rw-r--r--i18npool/source/breakiterator/data/char_in.txt7
3 files changed, 67 insertions, 20 deletions
diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx
index 820e57b4549c..7a70f010210e 100644
--- a/i18npool/qa/cppunit/test_breakiterator.cxx
+++ b/i18npool/qa/cppunit/test_breakiterator.cxx
@@ -80,10 +80,10 @@ private:
uno::Reference<i18n::XBreakIterator> m_xBreak;
};
-//See https://bugs.freedesktop.org/show_bug.cgi?id=31271 for motivation
+//See https://bugs.freedesktop.org/show_bug.cgi?id=31271
void TestBreakIterator::testLineBreaking()
{
- ::rtl::OUString aTest1(RTL_CONSTASCII_USTRINGPARAM("(some text here)"));
+ ::rtl::OUString aTest(RTL_CONSTASCII_USTRINGPARAM("(some text here)"));
i18n::LineBreakHyphenationOptions aHyphOptions;
i18n::LineBreakUserOptions aUserOptions;
@@ -94,18 +94,19 @@ void TestBreakIterator::testLineBreaking()
{
//Here we want the line break to leave text here) on the next line
- i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest1, strlen("(some tex"), aLocale, 0, aHyphOptions, aUserOptions);
+ i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("(some tex"), aLocale, 0, aHyphOptions, aUserOptions);
CPPUNIT_ASSERT_MESSAGE("Expected a break at the the start of the word", aResult.breakIndex == 6);
}
{
//Here we want the line break to leave "here)" on the next line
- i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest1, strlen("(some text here"), aLocale, 0, aHyphOptions, aUserOptions);
+ i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("(some text here"), aLocale, 0, aHyphOptions, aUserOptions);
CPPUNIT_ASSERT_MESSAGE("Expected a break at the the start of the word", aResult.breakIndex == 11);
}
}
-//See http://qa.openoffice.org/issues/show_bug.cgi?id=111152 for motivation
+//See http://qa.openoffice.org/issues/show_bug.cgi?id=111152
+//See https://bugs.freedesktop.org/show_bug.cgi?id=40292
void TestBreakIterator::testGraphemeIteration()
{
lang::Locale aLocale;
@@ -114,46 +115,90 @@ void TestBreakIterator::testGraphemeIteration()
{
const sal_Unicode BA_HALANT_LA[] = { 0x09AC, 0x09CD, 0x09AF };
- ::rtl::OUString aTest1(BA_HALANT_LA, SAL_N_ELEMENTS(BA_HALANT_LA));
+ ::rtl::OUString aTest(BA_HALANT_LA, SAL_N_ELEMENTS(BA_HALANT_LA));
sal_Int32 nDone=0;
sal_Int32 nPos;
- nPos = m_xBreak->nextCharacters(aTest1, 0, aLocale,
+ nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(BA_HALANT_LA));
- nPos = m_xBreak->previousCharacters(aTest1, SAL_N_ELEMENTS(BA_HALANT_LA), aLocale,
+ nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(BA_HALANT_LA), aLocale,
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
}
{
const sal_Unicode HA_HALANT_NA_VOWELSIGNI[] = { 0x09B9, 0x09CD, 0x09A3, 0x09BF };
- ::rtl::OUString aTest1(HA_HALANT_NA_VOWELSIGNI, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI));
+ ::rtl::OUString aTest(HA_HALANT_NA_VOWELSIGNI, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI));
sal_Int32 nDone=0;
sal_Int32 nPos;
- nPos = m_xBreak->nextCharacters(aTest1, 0, aLocale,
+ nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI));
- nPos = m_xBreak->previousCharacters(aTest1, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI), aLocale,
+ nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI), aLocale,
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
}
{
const sal_Unicode TA_HALANT_MA_HALANT_YA [] = { 0x09A4, 0x09CD, 0x09AE, 0x09CD, 0x09AF };
- ::rtl::OUString aTest1(TA_HALANT_MA_HALANT_YA, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA));
+ ::rtl::OUString aTest(TA_HALANT_MA_HALANT_YA, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA));
sal_Int32 nDone=0;
sal_Int32 nPos;
- nPos = m_xBreak->nextCharacters(aTest1, 0, aLocale,
+ nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA));
- nPos = m_xBreak->previousCharacters(aTest1, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA), aLocale,
+ nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA), aLocale,
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
}
+ aLocale.Language = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("ta"));
+ aLocale.Country = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("IN"));
+
+ {
+ const sal_Unicode KA_VIRAMA_SSA[] = { 0x0B95, 0x0BCD, 0x0BB7 };
+ ::rtl::OUString aTest(KA_VIRAMA_SSA, SAL_N_ELEMENTS(KA_VIRAMA_SSA));
+
+ sal_Int32 nDone=0;
+ sal_Int32 nPos = 0;
+
+ nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
+ i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+ CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(KA_VIRAMA_SSA));
+ nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(KA_VIRAMA_SSA), aLocale,
+ i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+ CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
+ }
+
+ {
+ const sal_Unicode CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI[] =
+ { 0x0B9A, 0x0BBF, 0x0BA4, 0x0BCD, 0x0BA4, 0x0BBF, 0x0BB0, 0x0BC8 };
+ ::rtl::OUString aTest(CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI,
+ SAL_N_ELEMENTS(CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI));
+
+ sal_Int32 nDone=0;
+ sal_Int32 nPos=0;
+
+ for (sal_Int32 i = 0; i < 4; ++i)
+ {
+ sal_Int32 nOldPos = nPos;
+ nPos = m_xBreak->nextCharacters(aTest, nPos, aLocale,
+ i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+ CPPUNIT_ASSERT_MESSAGE("Should skip 2 units", nPos == nOldPos+2);
+ }
+
+ for (sal_Int32 i = 0; i < 4; ++i)
+ {
+ sal_Int32 nOldPos = nPos;
+ nPos = m_xBreak->previousCharacters(aTest, nPos, aLocale,
+ i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+ CPPUNIT_ASSERT_MESSAGE("Should skip 2 units", nPos == nOldPos-2);
+ }
+ }
+
{
const sal_Unicode ALEF_QAMATS [] = { 0x05D0, 0x05B8 };
::rtl::OUString aText(ALEF_QAMATS, SAL_N_ELEMENTS(ALEF_QAMATS));
diff --git a/i18npool/source/breakiterator/data/char.txt b/i18npool/source/breakiterator/data/char.txt
index 8e49a565ed8c..6ab8803681ee 100644
--- a/i18npool/source/breakiterator/data/char.txt
+++ b/i18npool/source/breakiterator/data/char.txt
@@ -40,8 +40,9 @@ $OriyaLetter = [\u0B05-\u0B39 \u0B5C-\u0B61 \u0B71];
$OriyaSignVirama = \u0B4D;
$GurmukhiLetter = [\u0A05-\u0A39 \u0A59-\u0A5E];
$GurmukhiSignVirama = \u0A4D;
-$TamilLetter = [\u0B85-\u0BB9];
+$TamilKa = \u0B95;
$TamilSignVirama = \u0BCD;
+$TamilSsa = \u0BB7;
$TeluguLetter = [\u0C05-\u0C39 \u0C58-\u0C61];
$TeluguSignVirama = \u0C4D;
@@ -70,7 +71,7 @@ $KannadaLetter ($KannadaSignVirama $KannadaLetter?)+;
$MalayalamLetter ($MalayalamSignVirama $MalayalamLetter?)+;
$OriyaLetter ($OriyaSignVirama $OriyaLetter?)+;
$GurmukhiLetter ($GurmukhiSignVirama $GurmukhiLetter?)+;
-$TamilLetter ($TamilSignVirama $TamilLetter?)+;
+$TamilKa $TamilSignVirama $TamilSsa;
$TeluguLetter ($TeluguSignVirama $TeluguLetter?)+;
$L ($L | $V | $LV | $LVT);
@@ -95,7 +96,7 @@ $LF $CR;
($MalayalamLetter? $MalayalamSignVirama)+ $MalayalamLetter;
($OriyaLetter? $OriyaSignVirama)+ $OriyaLetter;
($GurmukhiLetter? $GurmukhiSignVirama)+ $GurmukhiLetter;
-($TamilLetter? $TamilSignVirama)+ $TamilLetter;
+$TamilSsa $TamilSignVirama $TamilKa;
($TeluguLetter? $TeluguSignVirama)+ $TeluguLetter;
($L | $V | $LV | $LVT) $L;
($V | $T) ($LV | $V);
diff --git a/i18npool/source/breakiterator/data/char_in.txt b/i18npool/source/breakiterator/data/char_in.txt
index 5e1ed67596c0..e791055b4ef8 100644
--- a/i18npool/source/breakiterator/data/char_in.txt
+++ b/i18npool/source/breakiterator/data/char_in.txt
@@ -32,8 +32,9 @@ $OriyaLetter = [\u0B05-\u0B39 \u0B5C-\u0B61 \u0B71];
$OriyaSignVirama = \u0B4D;
$GurmukhiLetter = [\u0A05-\u0A39 \u0A59-\u0A5E];
$GurmukhiSignVirama = \u0A4D;
-$TamilLetter = [\u0B85-\u0BB9];
+$TamilKa = \u0B95;
$TamilSignVirama = \u0BCD;
+$TamilSsa = \u0BB7;
$TeluguLetter = [\u0C05-\u0C39 \u0C58-\u0C61];
$TeluguSignVirama = \u0C4D;
@@ -62,7 +63,7 @@ $KannadaLetter ($KannadaSignVirama $KannadaLetter?)+;
$MalayalamLetter ($MalayalamSignVirama $MalayalamLetter?)+;
$OriyaLetter ($OriyaSignVirama $OriyaLetter?)+;
$GurmukhiLetter ($GurmukhiSignVirama $GurmukhiLetter?)+;
-$TamilLetter ($TamilSignVirama $TamilLetter?)+;
+$TamilKa $TamilSignVirama $TamilSsa;
$TeluguLetter ($TeluguSignVirama $TeluguLetter?)+;
$L ($L | $V | $LV | $LVT);
@@ -86,7 +87,7 @@ $LF $CR;
($MalayalamLetter? $MalayalamSignVirama)+ $MalayalamLetter;
($OriyaLetter? $OriyaSignVirama)+ $OriyaLetter;
($GurmukhiLetter? $GurmukhiSignVirama)+ $GurmukhiLetter;
-($TamilLetter? $TamilSignVirama)+ $TamilLetter;
+$TamilSsa $TamilSignVirama $TamilKa;
($TeluguLetter? $TeluguSignVirama)+ $TeluguLetter;
($L | $V | $LV | $LVT) $L;
($V | $T) ($LV | $V);