diff options
Diffstat (limited to 'i18npool/qa/cppunit')
-rw-r--r-- | i18npool/qa/cppunit/indexentry.cxx | 70 | ||||
-rw-r--r-- | i18npool/qa/cppunit/test_breakiterator.cxx | 731 | ||||
-rw-r--r-- | i18npool/qa/cppunit/test_calendar.cxx | 130 | ||||
-rw-r--r-- | i18npool/qa/cppunit/test_characterclassification.cxx | 210 | ||||
-rw-r--r-- | i18npool/qa/cppunit/test_defaultnumberingprovider.cxx | 60 | ||||
-rw-r--r-- | i18npool/qa/cppunit/test_textsearch.cxx | 140 | ||||
-rw-r--r-- | i18npool/qa/cppunit/transliteration.cxx | 192 |
7 files changed, 1417 insertions, 116 deletions
diff --git a/i18npool/qa/cppunit/indexentry.cxx b/i18npool/qa/cppunit/indexentry.cxx new file mode 100644 index 000000000000..a95bca547a98 --- /dev/null +++ b/i18npool/qa/cppunit/indexentry.cxx @@ -0,0 +1,70 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <sal/config.h> + +#include <cppunit/TestFixture.h> +#include <cppunit/extensions/HelperMacros.h> +#include <cppunit/plugin/TestPlugIn.h> + +#include <com/sun/star/i18n/IndexEntrySupplier.hpp> +#include <com/sun/star/i18n/XExtendedIndexEntrySupplier.hpp> +#include <com/sun/star/lang/Locale.hpp> +#include <com/sun/star/uno/Reference.hxx> +#include <comphelper/sequence.hxx> +#include <cppuhelper/bootstrap.hxx> + +namespace +{ +class IndexEntry : public CppUnit::TestFixture +{ +public: + void setUp() + { + supplier_ = css::i18n::IndexEntrySupplier::create( + cppu::defaultBootstrap_InitialComponentContext()); + } + + void testJapanese() + { + css::lang::Locale loc("ja", "JP", ""); + auto const s = supplier_->getAlgorithmList(loc); + CPPUNIT_ASSERT_EQUAL(sal_Int32(4), s.getLength()); + CPPUNIT_ASSERT( + comphelper::findValue(s, "phonetic (alphanumeric first) (grouped by consonant)") != -1); + CPPUNIT_ASSERT( + comphelper::findValue(s, "phonetic (alphanumeric first) (grouped by syllable)") != -1); + CPPUNIT_ASSERT( + comphelper::findValue(s, "phonetic (alphanumeric last) (grouped by consonant)") != -1); + CPPUNIT_ASSERT( + comphelper::findValue(s, "phonetic (alphanumeric last) (grouped by syllable)") != -1); + CPPUNIT_ASSERT(supplier_->loadAlgorithm( + loc, "phonetic (alphanumeric first) (grouped by consonant)", 0)); + CPPUNIT_ASSERT(supplier_->loadAlgorithm( + loc, "phonetic (alphanumeric first) (grouped by syllable)", 0)); + CPPUNIT_ASSERT(supplier_->loadAlgorithm( + loc, "phonetic (alphanumeric last) (grouped by consonant)", 0)); + CPPUNIT_ASSERT( + supplier_->loadAlgorithm(loc, "phonetic (alphanumeric last) (grouped by syllable)", 0)); + } + + CPPUNIT_TEST_SUITE(IndexEntry); + CPPUNIT_TEST(testJapanese); + CPPUNIT_TEST_SUITE_END(); + +private: + css::uno::Reference<css::i18n::XExtendedIndexEntrySupplier> supplier_; +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(IndexEntry); +} + +CPPUNIT_PLUGIN_IMPLEMENT(); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx index 936649bb537e..1e5fd8f025af 100644 --- a/i18npool/qa/cppunit/test_breakiterator.cxx +++ b/i18npool/qa/cppunit/test_breakiterator.cxx @@ -31,36 +31,49 @@ public: void testLineBreaking(); void testWordBoundaries(); + void testSentenceBoundaries(); void testGraphemeIteration(); void testWeak(); void testAsian(); void testThai(); -#if (U_ICU_VERSION_MAJOR_NUM > 51) void testLao(); #ifdef TODO void testNorthernThai(); void testKhmer(); #endif -#endif void testJapanese(); void testChinese(); + void testLegacyDictWordPrepostDash_de_DE(); + void testLegacyDictWordPrepostDash_nds_DE(); + void testLegacyDictWordPrepostDash_nl_NL(); + void testLegacyDictWordPrepostDash_sv_SE(); + void testLegacyHebrewQuoteInsideWord(); + void testLegacySurrogatePairs(); + void testLegacyWordCountCompat(); + CPPUNIT_TEST_SUITE(TestBreakIterator); CPPUNIT_TEST(testLineBreaking); CPPUNIT_TEST(testWordBoundaries); + CPPUNIT_TEST(testSentenceBoundaries); CPPUNIT_TEST(testGraphemeIteration); CPPUNIT_TEST(testWeak); CPPUNIT_TEST(testAsian); CPPUNIT_TEST(testThai); -#if (U_ICU_VERSION_MAJOR_NUM > 51) CPPUNIT_TEST(testLao); #ifdef TODO CPPUNIT_TEST(testKhmer); CPPUNIT_TEST(testNorthernThai); #endif -#endif CPPUNIT_TEST(testJapanese); CPPUNIT_TEST(testChinese); + CPPUNIT_TEST(testLegacyDictWordPrepostDash_de_DE); + CPPUNIT_TEST(testLegacyDictWordPrepostDash_nds_DE); + CPPUNIT_TEST(testLegacyDictWordPrepostDash_nl_NL); + CPPUNIT_TEST(testLegacyDictWordPrepostDash_sv_SE); + CPPUNIT_TEST(testLegacyHebrewQuoteInsideWord); + CPPUNIT_TEST(testLegacySurrogatePairs); + CPPUNIT_TEST(testLegacyWordCountCompat); CPPUNIT_TEST_SUITE_END(); private: @@ -96,7 +109,7 @@ void TestBreakIterator::testLineBreaking() //See https://bugs.libreoffice.org/show_bug.cgi?id=49849 { - static constexpr OUStringLiteral aWord = u"\u05DE\u05D9\u05DC\u05D9\u05DD"; + static constexpr OUString aWord = u"\u05DE\u05D9\u05DC\u05D9\u05DD"_ustr; OUString aTest(aWord + " " + aWord); aLocale.Language = "he"; @@ -122,6 +135,173 @@ void TestBreakIterator::testLineBreaking() } } + // i#22602: writer breaks word after dot immediately followed by a letter + { + aLocale.Language = "en"; + aLocale.Country = "US"; + + { + //Here we want the line break to leave ./bar/baz clumped together on the next line + i18n::LineBreakResults aResult = m_xBreak->getLineBreak( + "foo ./bar/baz", strlen("foo ./bar/ba"), aLocale, 0, aHyphOptions, aUserOptions); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break at the first period", + static_cast<sal_Int32>(4), aResult.breakIndex); + } + } + + // i#81448: slash and backslash make non-breaking spaces of preceding spaces + { + aLocale.Language = "en"; + aLocale.Country = "US"; + + { + // Per the bug, the line break should leave ...BE clumped together on the next line. + // However, the current behavior does not wrap the string at all. This test asserts the + // current behavior as a point of reference. + i18n::LineBreakResults aResult = m_xBreak->getLineBreak( + "THIS... ...BE", strlen("THIS... ...B"), aLocale, 0, aHyphOptions, aUserOptions); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aResult.breakIndex); + } + } + + // i#81448: slash and backslash make non-breaking spaces of preceding spaces + { + aLocale.Language = "en"; + aLocale.Country = "US"; + + { + // The line break should leave /BE clumped together on the next line. + i18n::LineBreakResults aResult = m_xBreak->getLineBreak( + "THIS... /BE", strlen("THIS... /B"), aLocale, 0, aHyphOptions, aUserOptions); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(8), aResult.breakIndex); + } + } + + // i#80548: Bad word wrap between dash and word + { + aLocale.Language = "fi"; + aLocale.Country = "FI"; + + { + // Per the bug, the line break should leave -bar clumped together on the next line. + // However, this change was reverted at some point. This test asserts the new behavior. + i18n::LineBreakResults aResult = m_xBreak->getLineBreak( + "foo -bar", strlen("foo -ba"), aLocale, 0, aHyphOptions, aUserOptions); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break at the first dash", + static_cast<sal_Int32>(5), aResult.breakIndex); + } + } + + // i#80645: Line erroneously breaks at backslash + { + aLocale.Language = "en"; + aLocale.Country = "US"; + + { + // Here we want the line break to leave C:\Program Files\ on the first line + i18n::LineBreakResults aResult = m_xBreak->getLineBreak( + "C:\\Program Files\\LibreOffice", strlen("C:\\Program Files\\Libre"), aLocale, 0, + aHyphOptions, aUserOptions); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(17), aResult.breakIndex); + } + } + + // i#80841: Words separated by hyphens will always break to next line + { + aLocale.Language = "en"; + aLocale.Country = "US"; + + { + // Here we want the line break to leave toll- on the first line + i18n::LineBreakResults aResult = m_xBreak->getLineBreak( + "toll-free", strlen("toll-fr"), aLocale, 0, aHyphOptions, aUserOptions); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(5), aResult.breakIndex); + } + } + + // i#83464: Line break between letter and $ + { + aLocale.Language = "en"; + aLocale.Country = "US"; + + { + // Here we want the line break to leave US$ clumped on the next line. + i18n::LineBreakResults aResult = m_xBreak->getLineBreak( + "word US$ 123", strlen("word U"), aLocale, 0, aHyphOptions, aUserOptions); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(5), aResult.breakIndex); + } + } + + // Unknown bug number: "fix line break problem of dot after letter and before number" + { + aLocale.Language = "en"; + aLocale.Country = "US"; + + { + // Here we want the line break to leave US$ clumped on the next line. + i18n::LineBreakResults aResult = m_xBreak->getLineBreak( + "word L.5 word", strlen("word L"), aLocale, 0, aHyphOptions, aUserOptions); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(5), aResult.breakIndex); + } + } + + // i#83229: Wrong line break when word contains a hyphen + { + aLocale.Language = "en"; + aLocale.Country = "US"; + + { + // Here we want the line break to leave 100- clumped on the first line. + i18n::LineBreakResults aResult = m_xBreak->getLineBreak( + "word 100-199 word", strlen("word 100-1"), aLocale, 0, aHyphOptions, aUserOptions); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(9), aResult.breakIndex); + } + } + + // i#83649: Line break should be between typographical quote and left bracket + { + aLocale.Language = "de"; + aLocale.Country = "DE"; + + { + // Here we want the line break to leave »angetan werden« on the first line + const OUString str = u"»angetan werden« [Passiv]"_ustr; + i18n::LineBreakResults aResult = m_xBreak->getLineBreak( + str, strlen("Xangetan werdenX ["), aLocale, 0, aHyphOptions, aUserOptions); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(17), aResult.breakIndex); + } + } + + // i#72868: Writer/Impress line does not break after Chinese punctuation and Latin letters + { + aLocale.Language = "zh"; + aLocale.Country = "HK"; + + { + // Per the bug, this should break at the ideographic comma. However, this change has + // been reverted at some point. This test only verifies current behavior. + const OUString str = u"word word、word word"_ustr; + i18n::LineBreakResults aResult = m_xBreak->getLineBreak( + str, strlen("word wordXwor"), aLocale, 0, aHyphOptions, aUserOptions); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(13), aResult.breakIndex); + } + } + + // i#80891: Character in the forbidden list sometimes appears at the start of line + { + aLocale.Language = "zh"; + aLocale.Country = "HK"; + + { + // Per the bug, the ideographic two-dot leader should be a forbidden character. However, + // this change seems to have been reverted or broken at some point. + const OUString str = u"電話︰電話"_ustr; + i18n::LineBreakResults aResult + = m_xBreak->getLineBreak(str, 2, aLocale, 0, aHyphOptions, aUserOptions); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(2), aResult.breakIndex); + } + } + //See https://bz.apache.org/ooo/show_bug.cgi?id=19716 { aLocale.Language = "en"; @@ -152,8 +332,8 @@ void TestBreakIterator::testLineBreaking() //See https://bugs.documentfoundation.org/show_bug.cgi?id=96197 { - static constexpr OUStringLiteral aTest = u"\uc560\uad6D\uac00\uc758 \uac00" - "\uc0ac\ub294"; + static constexpr OUString aTest = u"\uc560\uad6D\uac00\uc758 \uac00" + "\uc0ac\ub294"_ustr; aLocale.Language = "ko"; aLocale.Country = "KR"; @@ -164,6 +344,20 @@ void TestBreakIterator::testLineBreaking() CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break don't split the Korean word!", static_cast<sal_Int32>(5), aResult.breakIndex); } } + + // i#65267: Comma is badly broken at end of line + // - The word should be wrapped along with the comma + { + aLocale.Language = "de"; + aLocale.Country = "DE"; + + { + auto res = m_xBreak->getLineBreak("Wort -prinzessinnen, wort", + strlen("Wort -prinzessinnen,"), aLocale, 0, + aHyphOptions, aUserOptions); + CPPUNIT_ASSERT_EQUAL(sal_Int32{ 6 }, res.breakIndex); + } + } } //See https://bugs.libreoffice.org/show_bug.cgi?id=49629 @@ -230,7 +424,7 @@ void TestBreakIterator::testWordBoundaries() //See https://bz.apache.org/ooo/show_bug.cgi?id=14904 { - static constexpr OUStringLiteral aTest = + static constexpr OUString aTest = u"Working \u201CWords" " starting wit" "h quotes\u201D Work" @@ -238,7 +432,7 @@ void TestBreakIterator::testWordBoundaries() "?Spanish? doe" "sn\u2019t work. No" "t even \u00BFreal? " - "Spanish"; + "Spanish"_ustr; aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD, false); CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); @@ -274,9 +468,9 @@ void TestBreakIterator::testWordBoundaries() for (int mode = i18n::WordType::ANY_WORD; mode <= i18n::WordType::WORD_COUNT; ++mode) { //make sure that in all cases isBeginWord and isEndWord matches getWordBoundary - for (size_t i = 0; i < SAL_N_ELEMENTS(aBreakTests); ++i) + for (auto const& i: aBreakTests) { - OUString aTest = "Word" + OUStringChar(aBreakTests[i]) + "Word"; + OUString aTest = "Word" + OUStringChar(i) + "Word"; aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, mode, true); switch (mode) { @@ -307,9 +501,9 @@ void TestBreakIterator::testWordBoundaries() for (int mode = i18n::WordType::ANY_WORD; mode <= i18n::WordType::WORD_COUNT; ++mode) { //make sure that in all cases isBeginWord and isEndWord matches getWordBoundary - for (size_t i = 0; i < SAL_N_ELEMENTS(aJoinTests); ++i) + for (auto const& p: aJoinTests) { - OUString aTest = "Word" + OUStringChar(aJoinTests[i]) + "Word"; + OUString aTest = "Word" + OUStringChar(p) + "Word"; aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, mode, true); switch (mode) { @@ -338,7 +532,7 @@ void TestBreakIterator::testWordBoundaries() //See https://bz.apache.org/ooo/show_bug.cgi?id=13494 { - const OUString aBase("xxAAxxBBxxCCxx"); + constexpr OUString aBase(u"xxAAxxBBxxCCxx"_ustr); const sal_Unicode aTests[] = { '\'', ';', ',', '.', '!', '@', '#', '%', '&', '*', @@ -347,21 +541,21 @@ void TestBreakIterator::testWordBoundaries() }; const sal_Int32 aDoublePositions[] = {0, 2, 4, 6, 8, 10, 12, 14}; - for (size_t j = 0; j < SAL_N_ELEMENTS(aTests); ++j) + for (auto const& r: aTests) { - OUString aTest = aBase.replace('x', aTests[j]); + OUString aTest = aBase.replace('x', r); sal_Int32 nPos = -1; size_t i = 0; do { - CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aDoublePositions)); + CPPUNIT_ASSERT(i < std::size(aDoublePositions)); nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos; CPPUNIT_ASSERT_EQUAL(aDoublePositions[i], nPos); ++i; } while (nPos < aTest.getLength()); nPos = aTest.getLength(); - i = SAL_N_ELEMENTS(aDoublePositions)-1; + i = std::size(aDoublePositions)-1; do { nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos; @@ -372,21 +566,21 @@ void TestBreakIterator::testWordBoundaries() } const sal_Int32 aSinglePositions[] = {0, 1, 3, 4, 6, 7, 9, 10}; - for (size_t j = 1; j < SAL_N_ELEMENTS(aTests); ++j) + for (size_t j = 1; j < std::size(aTests); ++j) { OUString aTest = aBase.replaceAll("xx", OUStringChar(aTests[j])); sal_Int32 nPos = -1; size_t i = 0; do { - CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aSinglePositions)); + CPPUNIT_ASSERT(i < std::size(aSinglePositions)); nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos; CPPUNIT_ASSERT_EQUAL(aSinglePositions[i], nPos); ++i; } while (nPos < aTest.getLength()); nPos = aTest.getLength(); - i = SAL_N_ELEMENTS(aSinglePositions)-1; + i = std::size(aSinglePositions)-1; do { nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos; @@ -404,14 +598,14 @@ void TestBreakIterator::testWordBoundaries() size_t i = 0; do { - CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aSingleQuotePositions)); + CPPUNIT_ASSERT(i < std::size(aSingleQuotePositions)); nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos; CPPUNIT_ASSERT_EQUAL(aSingleQuotePositions[i], nPos); ++i; } while (nPos < aTest.getLength()); nPos = aTest.getLength(); - i = SAL_N_ELEMENTS(aSingleQuotePositions)-1; + i = std::size(aSingleQuotePositions)-1; do { nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos; @@ -434,14 +628,14 @@ void TestBreakIterator::testWordBoundaries() size_t i = 0; do { - CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected)); + CPPUNIT_ASSERT(i < std::size(aExpected)); nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale, i18n::WordType::DICTIONARY_WORD, true).endPos; CPPUNIT_ASSERT_EQUAL(aExpected[i], nPos); ++i; } while (nPos++ < aTest.getLength()); - CPPUNIT_ASSERT_EQUAL(SAL_N_ELEMENTS(aExpected), i); + CPPUNIT_ASSERT_EQUAL(std::size(aExpected), i); } //See https://bz.apache.org/ooo/show_bug.cgi?id=85411 @@ -466,22 +660,22 @@ void TestBreakIterator::testWordBoundaries() break; } - static constexpr OUStringLiteral aTest = - u"I\u200Bwant\u200Bto\u200Bgo"; + static constexpr OUString aTest = + u"I\u200Bwant\u200Bto\u200Bgo"_ustr; sal_Int32 nPos = 0; sal_Int32 aExpected[] = {1, 6, 9, 12}; size_t i = 0; do { - CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected)); + CPPUNIT_ASSERT(i < std::size(aExpected)); nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale, i18n::WordType::DICTIONARY_WORD, true).endPos; CPPUNIT_ASSERT_EQUAL(aExpected[i], nPos); ++i; } while (nPos++ < aTest.getLength()); - CPPUNIT_ASSERT_EQUAL(SAL_N_ELEMENTS(aExpected), i); + CPPUNIT_ASSERT_EQUAL(std::size(aExpected), i); } //https://bz.apache.org/ooo/show_bug.cgi?id=21290 @@ -502,25 +696,25 @@ void TestBreakIterator::testWordBoundaries() break; } - static constexpr OUStringLiteral aTest = + static constexpr OUString aTest = u"\u1F0C\u03BD\u03B4\u03C1\u03B1 \u1F00" "\u03C1\u03BD\u1F7B\u03BC\u03B5\u03BD\u03BF" "\u03C2 \u1F00\u03BB\u03BB \u1F24" - "\u03C3\u03B8\u03B9\u03BF\u03BD"; + "\u03C3\u03B8\u03B9\u03BF\u03BD"_ustr; sal_Int32 nPos = 0; sal_Int32 aExpected[] = {5, 15, 19, 26}; size_t i = 0; do { - CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected)); + CPPUNIT_ASSERT(i < std::size(aExpected)); nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale, i18n::WordType::DICTIONARY_WORD, true).endPos; CPPUNIT_ASSERT_EQUAL(aExpected[i], nPos); ++i; } while (nPos++ < aTest.getLength()); - CPPUNIT_ASSERT_EQUAL(SAL_N_ELEMENTS(aExpected), i); + CPPUNIT_ASSERT_EQUAL(std::size(aExpected), i); } //See https://bz.apache.org/ooo/show_bug.cgi?id=58513 @@ -537,14 +731,14 @@ void TestBreakIterator::testWordBoundaries() size_t i = 0; do { - CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected)); + CPPUNIT_ASSERT(i < std::size(aExpected)); nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale, i18n::WordType::WORD_COUNT, true).endPos; CPPUNIT_ASSERT_EQUAL(aExpected[i], nPos); ++i; } while (nPos++ < aTest.getLength()); - CPPUNIT_ASSERT_EQUAL(SAL_N_ELEMENTS(aExpected), i); + CPPUNIT_ASSERT_EQUAL(std::size(aExpected), i); } { @@ -554,7 +748,7 @@ void TestBreakIterator::testWordBoundaries() size_t i = 0; do { - CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected)); + CPPUNIT_ASSERT(i < std::size(aExpected)); aBounds = m_xBreak->getWordBoundary(aTest, nPos, aLocale, i18n::WordType::DICTIONARY_WORD, true); CPPUNIT_ASSERT_EQUAL(aExpected[i], aBounds.startPos); @@ -564,7 +758,7 @@ void TestBreakIterator::testWordBoundaries() nPos = aBounds.endPos; } while (nPos++ < aTest.getLength()); - CPPUNIT_ASSERT_EQUAL(SAL_N_ELEMENTS(aExpected), i); + CPPUNIT_ASSERT_EQUAL(std::size(aExpected), i); } } @@ -573,8 +767,8 @@ void TestBreakIterator::testWordBoundaries() aLocale.Language = "en"; aLocale.Country = "US"; - static constexpr OUStringLiteral aTest = - u"ru\uFB00le \uFB01sh"; + static constexpr OUString aTest = + u"ru\uFB00le \uFB01sh"_ustr; aBounds = m_xBreak->getWordBoundary(aTest, 1, aLocale, i18n::WordType::DICTIONARY_WORD, false); CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); @@ -590,8 +784,8 @@ void TestBreakIterator::testWordBoundaries() aLocale.Language = "en"; aLocale.Country = "US"; - static constexpr OUStringLiteral aTest = - u"a\u2013b\u2014c"; + static constexpr OUString aTest = + u"a\u2013b\u2014c"_ustr; aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, i18n::WordType::DICTIONARY_WORD, true); CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); @@ -605,6 +799,174 @@ void TestBreakIterator::testWordBoundaries() CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aBounds.startPos); CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.endPos); } + + // i#55778: Words containing numbers get broken up + { + aLocale.Language = "en"; + aLocale.Country = "US"; + + static constexpr OUString aTest = u"first i18n third"_ustr; + + aBounds + = m_xBreak->getWordBoundary(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(10), aBounds.endPos); + } + + // i#56347: "BreakIterator patch for Hungarian" + // Rules for Hungarian affixes after numbers and certain symbols + { + auto mode = i18n::WordType::DICTIONARY_WORD; + aLocale.Language = "hu"; + aLocale.Country = "HU"; + + OUString aTest = u"szavak 15 15-tel 15%-kal €-val szavak"_ustr; + + aBounds = m_xBreak->getWordBoundary(aTest, 2, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 7, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(7), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 11, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(10), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(16), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 18, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(17), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(24), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 25, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(25), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(30), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 27, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(25), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(30), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 34, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(31), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(37), aBounds.endPos); + } + + // i#56348: Special chars in first pos not handled by spell checking in Writer (Hungarian) + // Rules for Hungarian affixes after numbers and certain symbols in edit mode. + // The patch was merged, but the original bug was never closed and the current behavior seems + // identical to the ICU default behavior. Added this test to ensure that doesn't change. + { + auto mode = i18n::WordType::ANY_WORD; + aLocale.Language = "hu"; + aLocale.Country = "HU"; + + OUString aTest = u"szavak 15 15-tel 15%-kal €-val szavak"_ustr; + + aBounds = m_xBreak->getWordBoundary(aTest, 2, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 7, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(7), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 11, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(10), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(12), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 11, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(10), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(12), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 12, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(12), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(13), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 13, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(13), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(16), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 16, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(16), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(17), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 17, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(17), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(19), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 19, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(19), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(20), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 20, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(20), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(21), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 21, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(21), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(24), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 24, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(24), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(25), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 25, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(25), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(26), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 26, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(26), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(27), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 27, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(27), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(30), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 30, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(30), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(31), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 31, aLocale, mode, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(31), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(37), aBounds.endPos); + } +} + +void TestBreakIterator::testSentenceBoundaries() +{ + lang::Locale aLocale; + aLocale.Language = "en"; + aLocale.Country = "US"; + + // Trivial characteristic test for sentence boundary detection + { + OUString aTest("This is a sentence. This is a different sentence."); + + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), m_xBreak->beginOfSentence(aTest, 5, aLocale)); + CPPUNIT_ASSERT_EQUAL(sal_Int32(19), m_xBreak->endOfSentence(aTest, 5, aLocale)); + CPPUNIT_ASSERT_EQUAL(sal_Int32(20), m_xBreak->beginOfSentence(aTest, 31, aLocale)); + CPPUNIT_ASSERT_EQUAL(sal_Int32(49), m_xBreak->endOfSentence(aTest, 31, aLocale)); + } + + // i#24098: i18n API beginOfSentence/endOfSentence + // fix beginOfSentence, ... when cursor is on the beginning of the sentence + { + OUString aTest("This is a sentence. This is a different sentence."); + + CPPUNIT_ASSERT_EQUAL(sal_Int32(20), m_xBreak->beginOfSentence(aTest, 20, aLocale)); + CPPUNIT_ASSERT_EQUAL(sal_Int32(49), m_xBreak->endOfSentence(aTest, 20, aLocale)); + } + + // i#24098: i18n API beginOfSentence/endOfSentence + // "skip preceding space for beginOfSentence" + { + OUString aTest("This is a sentence. This is a different sentence."); + + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), m_xBreak->beginOfSentence(aTest, 20, aLocale)); + CPPUNIT_ASSERT_EQUAL(sal_Int32(19), m_xBreak->endOfSentence(aTest, 20, aLocale)); + CPPUNIT_ASSERT_EQUAL(sal_Int32(24), m_xBreak->beginOfSentence(aTest, 26, aLocale)); + CPPUNIT_ASSERT_EQUAL(sal_Int32(53), m_xBreak->endOfSentence(aTest, 26, aLocale)); + } } //See https://bugs.libreoffice.org/show_bug.cgi?id=40292 @@ -618,7 +980,7 @@ void TestBreakIterator::testGraphemeIteration() aLocale.Country = "IN"; { - static constexpr OUStringLiteral aTest = u"\u09AC\u09CD\u09AF"; // BA HALANT LA + static constexpr OUString aTest = u"\u09AC\u09CD\u09AF"_ustr; // BA HALANT LA sal_Int32 nDone=0; sal_Int32 nPos; @@ -631,7 +993,7 @@ void TestBreakIterator::testGraphemeIteration() } { - static constexpr OUStringLiteral aTest = u"\u09B9\u09CD\u09A3\u09BF"; + static constexpr OUString aTest = u"\u09B9\u09CD\u09A3\u09BF"_ustr; // HA HALANT NA VOWELSIGNI sal_Int32 nDone=0; @@ -645,7 +1007,7 @@ void TestBreakIterator::testGraphemeIteration() } { - static constexpr OUStringLiteral aTest = u"\u09A4\u09CD\u09AE\u09CD\u09AF"; + static constexpr OUString aTest = u"\u09A4\u09CD\u09AE\u09CD\u09AF"_ustr; // TA HALANT MA HALANT YA sal_Int32 nDone=0; @@ -662,7 +1024,7 @@ void TestBreakIterator::testGraphemeIteration() aLocale.Country = "IN"; { - static constexpr OUStringLiteral aTest = u"\u0B9A\u0BBF\u0BA4\u0BCD\u0BA4\u0BBF\u0BB0\u0BC8"; // CA VOWELSIGNI TA VIRAMA TA VOWELSIGNI RA VOWELSIGNAI + static constexpr OUString aTest = u"\u0B9A\u0BBF\u0BA4\u0BCD\u0BA4\u0BBF\u0BB0\u0BC8"_ustr; // CA VOWELSIGNI TA VIRAMA TA VOWELSIGNI RA VOWELSIGNAI sal_Int32 nDone=0; sal_Int32 nPos = 0; @@ -687,7 +1049,7 @@ void TestBreakIterator::testGraphemeIteration() } { - static constexpr OUStringLiteral aTest = u"\u0B95\u0BC1"; // KA VOWELSIGNU + static constexpr OUString aTest = u"\u0B95\u0BC1"_ustr; // KA VOWELSIGNU sal_Int32 nDone=0; sal_Int32 nPos = 0; @@ -701,8 +1063,8 @@ void TestBreakIterator::testGraphemeIteration() } { - static constexpr OUStringLiteral aTest = - u"\u0B9A\u0BBF\u0BA4\u0BCD\u0BA4\u0BBF\u0BB0\u0BC8"; + static constexpr OUString aTest = + u"\u0B9A\u0BBF\u0BA4\u0BCD\u0BA4\u0BBF\u0BB0\u0BC8"_ustr; // CA VOWELSIGNI TA VIRAMA TA VOWELSIGNI RA VOWELSIGNAI sal_Int32 nDone=0; @@ -726,7 +1088,7 @@ void TestBreakIterator::testGraphemeIteration() } { - static constexpr OUStringLiteral aText = u"\u05D0\u05B8"; // ALEF QAMATS + static constexpr OUString aText = u"\u05D0\u05B8"_ustr; // ALEF QAMATS sal_Int32 nGraphemeCount = 0; @@ -746,7 +1108,7 @@ void TestBreakIterator::testGraphemeIteration() aLocale.Country = "IN"; { - static constexpr OUStringLiteral aTest = u"\u0936\u0940"; // SHA VOWELSIGNII + static constexpr OUString aTest = u"\u0936\u0940"_ustr; // SHA VOWELSIGNII sal_Int32 nDone=0; sal_Int32 nPos = 0; @@ -758,6 +1120,48 @@ void TestBreakIterator::testGraphemeIteration() i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32>(0), nPos); } + + // tdf#49885: Replace custom Thai implementation with ICU + { + aLocale.Language = "th"; + aLocale.Country = "TH"; + + static constexpr OUString aTest = u"กำ"_ustr; + + CPPUNIT_ASSERT_EQUAL(sal_Int32{ 2 }, aTest.getLength()); + + sal_Int32 nDone = 0; + sal_Int32 nPos = 0; + + nPos = m_xBreak->nextCharacters(aTest, 0, aLocale, i18n::CharacterIteratorMode::SKIPCELL, 1, + nDone); + CPPUNIT_ASSERT_EQUAL(aTest.getLength(), nPos); + + nPos = m_xBreak->previousCharacters(aTest, aTest.getLength(), aLocale, + i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL(sal_Int32{ 0 }, nPos); + } + + // Korean may also use grapheme clusters for character composition + { + aLocale.Language = "ko"; + aLocale.Country = "KR"; + + static constexpr OUString aTest = u"각"_ustr; + + CPPUNIT_ASSERT_EQUAL(sal_Int32{ 3 }, aTest.getLength()); + + sal_Int32 nDone = 0; + sal_Int32 nPos = 0; + + nPos = m_xBreak->nextCharacters(aTest, 0, aLocale, i18n::CharacterIteratorMode::SKIPCELL, 1, + nDone); + CPPUNIT_ASSERT_EQUAL(aTest.getLength(), nPos); + + nPos = m_xBreak->previousCharacters(aTest, aTest.getLength(), aLocale, + i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL(sal_Int32{ 0 }, nPos); + } } //A test to ensure that certain ranges and codepoints that are categorized as @@ -770,7 +1174,7 @@ void TestBreakIterator::testWeak() aLocale.Country = "US"; { - static constexpr OUStringLiteral aWeaks = + static constexpr OUString aWeaks = u"\u0001\u0002" " \u00A0" "\u0300\u036F" //Combining Diacritical Marks @@ -786,7 +1190,7 @@ void TestBreakIterator::testWeak() "\u2100\u214F" //Letterlike Symbols "\u2308\u230B" //Miscellaneous technical "\u25A0\u25FF" //Geometric Shapes - "\u2B30\u2B4C"; //Miscellaneous Symbols and Arrows + "\u2B30\u2B4C"_ustr; //Miscellaneous Symbols and Arrows for (sal_Int32 i = 0; i < aWeaks.getLength(); ++i) { @@ -812,7 +1216,7 @@ void TestBreakIterator::testAsian() aLocale.Country = "US"; { - static constexpr OUStringLiteral aAsians = + static constexpr OUString aAsians = //some typical CJK chars u"\u4E00\u62FF" //The full HalfWidth and FullWidth block has historically been @@ -822,7 +1226,7 @@ void TestBreakIterator::testAsian() "\uFF10\uFF19" //HalfWidth and FullWidth forms of ASCII A-z, categorized under //UAX25 as "Latin", i.e. by that logic LATIN - "\uFF21\uFF5A"; + "\uFF21\uFF5A"_ustr; for (sal_Int32 i = 0; i < aAsians.getLength(); ++i) { @@ -837,7 +1241,6 @@ void TestBreakIterator::testAsian() } } -#if (U_ICU_VERSION_MAJOR_NUM > 51) //A test to ensure that our Lao word boundary detection is useful void TestBreakIterator::testLao() { @@ -845,7 +1248,7 @@ void TestBreakIterator::testLao() aLocale.Language = "lo"; aLocale.Country = "LA"; - static constexpr OUStringLiteral aTest = u"\u0e8d\u0eb4\u0e99\u0e94\u0eb5\u0e95\u0ec9\u0ead\u0e99\u0eae\u0eb1\u0e9a"; + static constexpr OUString aTest = u"\u0e8d\u0eb4\u0e99\u0e94\u0eb5\u0e95\u0ec9\u0ead\u0e99\u0eae\u0eb1\u0e9a"_ustr; i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, i18n::WordType::DICTIONARY_WORD, true); @@ -856,9 +1259,20 @@ void TestBreakIterator::testLao() i18n::WordType::DICTIONARY_WORD, true); CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.startPos); +#if (U_ICU_VERSION_MAJOR_NUM < 70) CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.endPos); -} +#else + // FIXME: + // In ICU 70/71 for yet unknown reason the word boundary 9 is not detected and + // instead the length 12 is returned as endpos. + // Deep in + // icu_70::RuleBasedBreakIterator::BreakCache::next() + // icu_70::RuleBasedBreakIterator::BreakCache::following() + // icu_70::RuleBasedBreakIterator::following() + // i18npool::BreakIterator_Unicode::getWordBoundary() + CPPUNIT_ASSERT_EQUAL(sal_Int32(12), aBounds.endPos); #endif +} //A test to ensure that our thai word boundary detection is useful void TestBreakIterator::testThai() @@ -869,7 +1283,7 @@ void TestBreakIterator::testThai() //See http://lists.freedesktop.org/archives/libreoffice/2012-February/025959.html { - static constexpr OUStringLiteral aTest = u"\u0E01\u0E38\u0E2B\u0E25\u0E32\u0E1A"; + static constexpr OUString aTest = u"\u0E01\u0E38\u0E2B\u0E25\u0E32\u0E1A"_ustr; i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, i18n::WordType::DICTIONARY_WORD, true); CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full word", @@ -881,13 +1295,13 @@ void TestBreakIterator::testThai() //See https://bz.apache.org/ooo/show_bug.cgi?id=29548 //make sure forwards and back are consistent { - static constexpr OUStringLiteral aTest = + static constexpr OUString aTest = u"\u0E2D\u0E38\u0E17\u0E22\u0E32\u0E19\u0E41" "\u0E2B\u0E48\u0E07\u0E0A\u0E32\u0E15\u0E34" "\u0E19\u0E49\u0E33\u0E2B\u0E19\u0E32\u0E27" "\u0E2D\u0E38\u0E17\u0E22\u0E32\u0E19\u0E41" "\u0E2B\u0E48\u0E07\u0E0A\u0E32\u0E15\u0E34" - "\u0E19\u0E49\u0E33\u0E2B\u0E19\u0E32\u0E27"; + "\u0E19\u0E49\u0E33\u0E2B\u0E19\u0E32\u0E27"_ustr; std::stack<sal_Int32> aPositions; sal_Int32 nPos = -1; @@ -912,7 +1326,7 @@ void TestBreakIterator::testThai() // tdf#113694 { - static constexpr OUStringLiteral aTest = u"\U00010000"; + static constexpr OUString aTest = u"\U00010000"_ustr; sal_Int32 nDone=0; sal_Int32 nPos; @@ -995,7 +1409,7 @@ void TestBreakIterator::doTestJapanese(uno::Reference< i18n::XBreakIterator > co } { - static constexpr OUStringLiteral aTest = u"\u9EBB\u306E\u8449\u9EBB\u306E\u8449"; + static constexpr OUString aTest = u"\u9EBB\u306E\u8449\u9EBB\u306E\u8449"_ustr; aBounds = xBreak->getWordBoundary(aTest, 1, aLocale, i18n::WordType::DICTIONARY_WORD, true); @@ -1037,6 +1451,199 @@ void TestBreakIterator::testChinese() CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aBounds.endPos); } } + +void TestBreakIterator::testLegacyDictWordPrepostDash_de_DE() +{ + lang::Locale aLocale; + aLocale.Language = "de"; + aLocale.Country = "DE"; + + { + auto aTest = u"Arbeits- -nehmer"_ustr; + + i18n::Boundary aBounds + = m_xBreak->getWordBoundary(aTest, 3, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(8), aBounds.endPos); + + aBounds + = m_xBreak->getWordBoundary(aTest, 13, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(16), aBounds.endPos); + } +} + +void TestBreakIterator::testLegacyDictWordPrepostDash_nds_DE() +{ + lang::Locale aLocale; + aLocale.Language = "nds"; + aLocale.Country = "DE"; + + { + auto aTest = u"Arbeits- -nehmer"_ustr; + + i18n::Boundary aBounds + = m_xBreak->getWordBoundary(aTest, 3, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(8), aBounds.endPos); + + aBounds + = m_xBreak->getWordBoundary(aTest, 13, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(16), aBounds.endPos); + } +} + +void TestBreakIterator::testLegacyDictWordPrepostDash_nl_NL() +{ + lang::Locale aLocale; + aLocale.Language = "nl"; + aLocale.Country = "NL"; + + { + auto aTest = u"Arbeits- -nehmer"_ustr; + + i18n::Boundary aBounds + = m_xBreak->getWordBoundary(aTest, 3, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(8), aBounds.endPos); + + aBounds + = m_xBreak->getWordBoundary(aTest, 13, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(16), aBounds.endPos); + } +} + +void TestBreakIterator::testLegacyDictWordPrepostDash_sv_SE() +{ + lang::Locale aLocale; + aLocale.Language = "sv"; + aLocale.Country = "SE"; + + { + auto aTest = u"Arbeits- -nehmer"_ustr; + + i18n::Boundary aBounds + = m_xBreak->getWordBoundary(aTest, 3, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(8), aBounds.endPos); + + aBounds + = m_xBreak->getWordBoundary(aTest, 13, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(16), aBounds.endPos); + } +} + +void TestBreakIterator::testLegacyHebrewQuoteInsideWord() +{ + lang::Locale aLocale; + + aLocale.Language = "he"; + aLocale.Country = "IL"; + + { + auto aTest = u"פַּרְדּ״ס פַּרְדּ\"ס"_ustr; + + i18n::Boundary aBounds + = m_xBreak->getWordBoundary(aTest, 3, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.endPos); + + aBounds + = m_xBreak->getWordBoundary(aTest, 13, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(10), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(19), aBounds.endPos); + } +} + +void TestBreakIterator::testLegacySurrogatePairs() +{ + lang::Locale aLocale; + + aLocale.Language = "ja"; + aLocale.Country = "JP"; + + // i#75632: [surrogate pair] Japanese word break does not work properly for surrogate pairs. + // and many others to address bugs: i#75631 i#75633 i#75412 etc. + // + // BreakIterator supports surrogate pairs (UTF-16). This is a simple characteristic test. + { + const sal_Unicode buf[] = { u"X 𠮟 X" }; + OUString aTest(buf, SAL_N_ELEMENTS(buf)); + + auto aBounds + = m_xBreak->getWordBoundary(aTest, 1, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aBounds.endPos); + + aBounds + = m_xBreak->getWordBoundary(aTest, 2, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.endPos); + + aBounds + = m_xBreak->getWordBoundary(aTest, 5, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aBounds.endPos); + } +} + +void TestBreakIterator::testLegacyWordCountCompat() +{ + lang::Locale aLocale; + + aLocale.Language = "en"; + aLocale.Country = "US"; + + // i#80815: "Word count differs from MS Word" + // This is a characteristic test for word count using test data from the linked bug. + { + const OUString str = u"" + "test data for word count issue #80815\n" + "fo\\\'sforos\n" + "archipi\\\'elago\n" + "do\\^me\n" + "f**k\n" + "\n" + "battery-driven\n" + "and/or\n" + "apple(s)\n" + "money+opportunity\n" + "Micro$oft\n" + "\n" + "300$\n" + "I(not you)\n" + "a****n\n" + "1+3=4\n" + "\n" + "aaaaaaa.aaaaaaa\n" + "aaaaaaa,aaaaaaa\n" + "aaaaaaa;aaaaaaa\n"_ustr; + + int num_words = 0; + sal_Int32 next_pos = 0; + int iter_guard = 0; + while (true) + { + CPPUNIT_ASSERT_MESSAGE("Tripped infinite loop check", ++iter_guard < 100); + + auto aBounds = m_xBreak->nextWord(str, next_pos, aLocale, i18n::WordType::WORD_COUNT); + + if (aBounds.endPos < next_pos) + { + break; + } + + next_pos = aBounds.endPos; + ++num_words; + } + + CPPUNIT_ASSERT_EQUAL(23, num_words); + } +} + void TestBreakIterator::setUp() { BootstrapFixtureBase::setUp(); diff --git a/i18npool/qa/cppunit/test_calendar.cxx b/i18npool/qa/cppunit/test_calendar.cxx new file mode 100644 index 000000000000..a522a4a74236 --- /dev/null +++ b/i18npool/qa/cppunit/test_calendar.cxx @@ -0,0 +1,130 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ +#include <calendar_hijri.hxx> +#include <unotest/bootstrapfixturebase.hxx> + +using namespace com::sun::star; + +class TestCalendar : public test::BootstrapFixtureBase +{ +public: + void testHijriGregorian(); + void testGetGregorianJulianDay(); + + CPPUNIT_TEST_SUITE(TestCalendar); + CPPUNIT_TEST(testHijriGregorian); + CPPUNIT_TEST(testGetGregorianJulianDay); + CPPUNIT_TEST_SUITE_END(); +}; + +void TestCalendar::testHijriGregorian() +{ + // 21-7-1443 (Hijri) == 22-2-2022 (Gregorian) + sal_Int32 day = 22, month = 2, year = 2022; + i18npool::Calendar_hijri::getHijri(&day, &month, &year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(21), day); + CPPUNIT_ASSERT_EQUAL(sal_Int32(7), month); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1443), year); + + i18npool::Calendar_hijri::ToGregorian(&day, &month, &year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(22), day); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2), month); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2022), year); + + // 1-1-1 (Hijri) == 15-7-622 (Gregorian) + // NOTE: The calculated date is 15-7-622, as it was with the + // previous version of i18npool::Calendar_hijri::ToGregorian() + // but in some articles, 15-7-622 is considered the equivalent date + // https://en.wikipedia.org/wiki/622 + // This article states that 15-7-622 is correct: + // "On the Origins of the Hijrī Calendar: A Multi-Faceted Perspective + // Based on the Covenants of the Prophet and Specific Date Verification" + // https://www.mdpi.com/2077-1444/12/1/42/htm + day = 15; + month = 7; + year = 622; + i18npool::Calendar_hijri::getHijri(&day, &month, &year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), day); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), month); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), year); + + i18npool::Calendar_hijri::ToGregorian(&day, &month, &year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(15), day); + CPPUNIT_ASSERT_EQUAL(sal_Int32(7), month); + CPPUNIT_ASSERT_EQUAL(sal_Int32(622), year); + + // 1-1-100 (Hijri) == 2-8-718 (Gregorian) + // https://habibur.com/hijri/100/ + day = 2; + month = 8; + year = 718; + i18npool::Calendar_hijri::getHijri(&day, &month, &year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), day); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), month); + CPPUNIT_ASSERT_EQUAL(sal_Int32(100), year); + + i18npool::Calendar_hijri::ToGregorian(&day, &month, &year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2), day); + CPPUNIT_ASSERT_EQUAL(sal_Int32(8), month); + CPPUNIT_ASSERT_EQUAL(sal_Int32(718), year); + + // 1-1-1000 (Hijri) == 19-10-1591 (Gregorian) + // NOTE: The calculated date is 18-10-1591, but there is inconsistency + // with this website, as it states it should be 19-10-1591 + // https://habibur.com/hijri/1000/ + day = 18; + month = 10; + year = 1591; + i18npool::Calendar_hijri::getHijri(&day, &month, &year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), day); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), month); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1000), year); + + i18npool::Calendar_hijri::ToGregorian(&day, &month, &year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(18), day); + CPPUNIT_ASSERT_EQUAL(sal_Int32(10), month); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1591), year); + + // 1-1-2000 (Hijri) == 7-1-2562 (Gregorian) + // NOTE: The calculated date is 7-1-2562, but there is inconsistency + // with this website, as it states it should be 8-1-2562 + // https://habibur.com/hijri/2000/ + day = 7; + month = 1; + year = 2562; + i18npool::Calendar_hijri::getHijri(&day, &month, &year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), day); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), month); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2000), year); + + i18npool::Calendar_hijri::ToGregorian(&day, &month, &year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(7), day); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), month); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2562), year); +} + +void TestCalendar::testGetGregorianJulianDay() +{ + // Julian day for 22-2-2022 (Gregorian) == 2459633 + // https://core2.gsfc.nasa.gov/time/julian.html + sal_Int32 lJulianDay, day = 22, month = 2, year = 2022; + lJulianDay = i18npool::Calendar_hijri::getJulianDay(day, month, year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2459633), lJulianDay); + + i18npool::Calendar_hijri::getGregorianDay(lJulianDay, &day, &month, &year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(22), day); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2), month); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2022), year); +} + +CPPUNIT_TEST_SUITE_REGISTRATION(TestCalendar); + +CPPUNIT_PLUGIN_IMPLEMENT(); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ diff --git a/i18npool/qa/cppunit/test_characterclassification.cxx b/i18npool/qa/cppunit/test_characterclassification.cxx index 846477d615f1..06f6095a376a 100644 --- a/i18npool/qa/cppunit/test_characterclassification.cxx +++ b/i18npool/qa/cppunit/test_characterclassification.cxx @@ -19,21 +19,14 @@ public: virtual void setUp() override; virtual void tearDown() override; - void testTitleCase(); - void testStringType(); - - CPPUNIT_TEST_SUITE(TestCharacterClassification); - CPPUNIT_TEST(testTitleCase); - CPPUNIT_TEST(testStringType); - CPPUNIT_TEST_SUITE_END(); -private: +protected: uno::Reference<i18n::XCharacterClassification> m_xCC; }; //A test to ensure that our Title Case functionality is working //http://lists.freedesktop.org/archives/libreoffice/2012-June/032767.html //https://bz.apache.org/ooo/show_bug.cgi?id=30863 -void TestCharacterClassification::testTitleCase() +CPPUNIT_TEST_FIXTURE(TestCharacterClassification, testTitleCase) { lang::Locale aLocale; aLocale.Language = "en"; @@ -52,7 +45,7 @@ void TestCharacterClassification::testTitleCase() { //tricky one - static constexpr OUStringLiteral aTest = u"\u01F3"; // LATIN SMALL LETTER DZ + static constexpr OUString aTest = u"\u01F3"_ustr; // LATIN SMALL LETTER DZ OUString sTitleCase = m_xCC->toTitle(aTest, 0, aTest.getLength(), aLocale); CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", sal_Int32(1), sTitleCase.getLength()); CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", u'\u01F2', sTitleCase[0]); @@ -66,7 +59,7 @@ void TestCharacterClassification::testTitleCase() } //https://bugs.libreoffice.org/show_bug.cgi?id=69641 -void TestCharacterClassification::testStringType() +CPPUNIT_TEST_FIXTURE(TestCharacterClassification, testStringType) { lang::Locale aLocale; aLocale.Language = "en"; @@ -81,13 +74,204 @@ void TestCharacterClassification::testStringType() { //tricky case - static constexpr OUStringLiteral sTest = u"\U0001D703"; // MATHEMATICAL ITALIC SMALL THETA + static constexpr OUString sTest = u"\U0001D703"_ustr; // MATHEMATICAL ITALIC SMALL THETA sal_Int32 nResult = m_xCC->getStringType(sTest, 0, sTest.getLength(), aLocale); CPPUNIT_ASSERT_EQUAL(sal_Int32(228), nResult); } } +CPPUNIT_TEST_FIXTURE(TestCharacterClassification, testSigma) +{ + { + // From upper case + OUString sTest(u"ὈΔΥΣΣΕΎΣ"_ustr); + OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", u"ὀδυσσεύς"_ustr, sLowerCase); + OUString sUpperCase = m_xCC->toUpper(sLowerCase, 0, sLowerCase.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sTest, sUpperCase); + } + + { + // From lower case + OUString sTest(u"ὀδυσσεύς"_ustr); + OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", u"Ὀδυσσεύς"_ustr, sTitleCase); + OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", u"ὈΔΥΣΣΕΎΣ"_ustr, sUpperCase); + OUString sLowerCase = m_xCC->toLower(sUpperCase, 0, sUpperCase.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", sTest, sLowerCase); + } + + { + // From title case + OUString sTest(u"Ὀδυσσεύς"_ustr); + OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", u"Ὀδυσσεύς"_ustr, sTitleCase); + OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", u"ὈΔΥΣΣΕΎΣ"_ustr, sUpperCase); + OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", u"ὀδυσσεύς"_ustr, sLowerCase); + } +} + +CPPUNIT_TEST_FIXTURE(TestCharacterClassification, testTdf96343) +{ + { + // From upper case + OUString sTest(u"ꙊꙌꙖ"_ustr); + OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", u"ꙋꙍꙗ"_ustr, sLowerCase); + OUString sUpperCase = m_xCC->toUpper(sLowerCase, 0, sLowerCase.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sTest, sUpperCase); + } + + { + // From lower case + OUString sTest(u"ꙋꙍꙗ"_ustr); + OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", u"Ꙋꙍꙗ"_ustr, sTitleCase); + OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", u"ꙊꙌꙖ"_ustr, sUpperCase); + OUString sLowerCase = m_xCC->toLower(sUpperCase, 0, sUpperCase.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", sTest, sLowerCase); + } + + { + // From title case + OUString sTest(u"Ꙋꙍꙗ"_ustr); + OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", u"Ꙋꙍꙗ"_ustr, sTitleCase); + OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", u"ꙊꙌꙖ"_ustr, sUpperCase); + OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", u"ꙋꙍꙗ"_ustr, sLowerCase); + } +} + +CPPUNIT_TEST_FIXTURE(TestCharacterClassification, testTdf134766) +{ + { + // From upper case + OUString sTest(u"QꞋORBꞋAL"_ustr); + OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", u"qꞌorbꞌal"_ustr, sLowerCase); + OUString sUpperCase = m_xCC->toUpper(sLowerCase, 0, sLowerCase.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sTest, sUpperCase); + } + + { + // From lower case + OUString sTest(u"qꞌorbꞌal"_ustr); + OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", u"Qꞌorbꞌal"_ustr, sTitleCase); + OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", u"QꞋORBꞋAL"_ustr, sUpperCase); + OUString sLowerCase = m_xCC->toLower(sUpperCase, 0, sUpperCase.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", sTest, sLowerCase); + } + + { + // From title case + OUString sTest(u"Qꞌorbꞌal"_ustr); + OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", u"Qꞌorbꞌal"_ustr, sTitleCase); + OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", u"QꞋORBꞋAL"_ustr, sUpperCase); + OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", u"qꞌorbꞌal"_ustr, sLowerCase); + } +} + +CPPUNIT_TEST_FIXTURE(TestCharacterClassification, testTdf97152) +{ + { + // From upper case + OUString sTest(u"ͲͰϽϾϿͿϏϹ"_ustr); + OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", u"ͳͱͻͼͽϳϗϲ"_ustr, sLowerCase); + OUString sUpperCase = m_xCC->toUpper(sLowerCase, 0, sLowerCase.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sTest, sUpperCase); + } + + { + // From lower case + OUString sTest(u"ͳͱͻͼͽϳϗϲ"_ustr); + OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", u"Ͳͱͻͼͽϳϗϲ"_ustr, sTitleCase); + OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", u"ͲͰϽϾϿͿϏϹ"_ustr, sUpperCase); + OUString sLowerCase = m_xCC->toLower(sUpperCase, 0, sUpperCase.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", sTest, sLowerCase); + } + + { + // From title case + OUString sTest(u"Ͳͱͻͼͽϳϗϲ"_ustr); + OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", u"Ͳͱͻͼͽϳϗϲ"_ustr, sTitleCase); + OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", u"ͲͰϽϾϿͿϏϹ"_ustr, sUpperCase); + OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", u"ͳͱͻͼͽϳϗϲ"_ustr, sLowerCase); + } +} + +CPPUNIT_TEST_FIXTURE(TestCharacterClassification, testSurrogatePairs) +{ + { + // No case mapping + OUString sTest(u"\U0001F600"_ustr); + OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", u"\U0001F600"_ustr, sLowerCase); + OUString sUpperCase = m_xCC->toUpper(sLowerCase, 0, sLowerCase.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sTest, sUpperCase); + } + + { + // Case mapping + OUString sTest(u"\U00010400"_ustr); + OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", u"\U00010428"_ustr, sLowerCase); + OUString sUpperCase = m_xCC->toUpper(sLowerCase, 0, sLowerCase.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sTest, sUpperCase); + } +} + +CPPUNIT_TEST_FIXTURE(TestCharacterClassification, testAdlam) +{ + OUString sUpper(u"𞤀𞤁𞤂𞤃𞤄𞤅𞤆𞤇𞤈𞤉𞤊𞤋𞤌𞤍𞤎𞤏𞤐𞤑𞤒𞤓𞤔𞤕𞤖𞤗𞤘𞤙𞤚𞤛𞤜𞤝𞤞𞤟𞤠𞤡"_ustr); + OUString sLower(u"𞤢𞤣𞤤𞤥𞤦𞤧𞤨𞤩𞤪𞤫𞤬𞤭𞤮𞤯𞤰𞤱𞤲𞤳𞤴𞤵𞤶𞤷𞤸𞤹𞤺𞤻𞤼𞤽𞤾𞤿𞥀𞥁𞥂𞥃"_ustr); + OUString sTitle = sLower; // Adlam doesn’t have title case? + { + // From upper case + OUString sLowerRes = m_xCC->toLower(sUpper, 0, sUpper.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", sLower, sLowerRes); + OUString sUpperRes = m_xCC->toUpper(sLowerRes, 0, sLower.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sUpper, sUpperRes); + } + + { + // From lower case + OUString sTitleRes = m_xCC->toTitle(sLower, 0, sLower.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", sTitle, sTitleRes); + OUString sUpperRes = m_xCC->toUpper(sLower, 0, sLower.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sUpper, sUpperRes); + OUString sLowerRes = m_xCC->toLower(sUpperRes, 0, sUpperRes.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", sLower, sLowerRes); + } + + { + // From title case + OUString sTitleRes = m_xCC->toTitle(sTitle, 0, sTitle.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", sTitle, sTitleRes); + OUString sUpperRes = m_xCC->toUpper(sTitle, 0, sTitle.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sUpper, sUpperRes); + OUString sLowerRes = m_xCC->toLower(sTitle, 0, sTitle.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", sLower, sLowerRes); + } +} + void TestCharacterClassification::setUp() { BootstrapFixtureBase::setUp(); @@ -100,8 +284,6 @@ void TestCharacterClassification::tearDown() m_xCC.clear(); } -CPPUNIT_TEST_SUITE_REGISTRATION(TestCharacterClassification); - CPPUNIT_PLUGIN_IMPLEMENT(); /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/qa/cppunit/test_defaultnumberingprovider.cxx b/i18npool/qa/cppunit/test_defaultnumberingprovider.cxx index 1bf0ab521079..7e37f1c28ff7 100644 --- a/i18npool/qa/cppunit/test_defaultnumberingprovider.cxx +++ b/i18npool/qa/cppunit/test_defaultnumberingprovider.cxx @@ -63,7 +63,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testNumberingIdentifi { aFail.emplace_back( "Numbering: " + OString::number(i) + " \"" + aID.toUtf8() + "\"" - + (aID.isEmpty() ? "" + + (aID.isEmpty() ? ""_ostr : OString(" duplicate of " + OString::number(aMap[aID]))) + "\n"); } @@ -72,7 +72,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testNumberingIdentifi if (!aFail.empty()) { - OString aMsg("Not unique numbering identifiers:\n"); + OString aMsg("Not unique numbering identifiers:\n"_ostr); for (auto const& r : aFail) aMsg += r; CPPUNIT_ASSERT_MESSAGE(aMsg.getStr(), false); @@ -197,7 +197,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanCounting) OUString aActual = xFormatter->makeNumberingString(aProperties, aLocale); // Without the accompanying fix in place, this test would have failed with a // lang.IllegalArgumentException, support for NUMBER_HANGUL_KO was missing. - CPPUNIT_ASSERT_EQUAL(OUString(u"\uc77c"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\uc77c"_ustr, aActual); // 10 -> "십" aProperties = { @@ -206,7 +206,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanCounting) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(10)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\uc2ed"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\uc2ed"_ustr, aActual); // 100 -> "백" aProperties = { @@ -215,7 +215,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanCounting) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(100)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\ubc31"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\ubc31"_ustr, aActual); } CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanLegal) @@ -232,7 +232,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanLegal) OUString aActual = xFormatter->makeNumberingString(aProperties, aLocale); // Without the accompanying fix in place, this test would have failed with a // lang.IllegalArgumentException, support for NUMBER_LEGAL_KO was missing. - CPPUNIT_ASSERT_EQUAL(OUString(u"\ud558\ub098"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\ud558\ub098"_ustr, aActual); // 2 -> "둘" aProperties = { @@ -241,7 +241,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanLegal) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(2)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\ub458"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\ub458"_ustr, aActual); // 3 -> "셋" aProperties = { @@ -250,7 +250,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanLegal) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(3)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\uc14b"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\uc14b"_ustr, aActual); // 4 -> "넷" aProperties = { @@ -259,7 +259,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanLegal) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(4)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\ub137"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\ub137"_ustr, aActual); // 5 -> "다섯" aProperties = { @@ -268,7 +268,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanLegal) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(5)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\ub2e4\uc12f"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\ub2e4\uc12f"_ustr, aActual); // 6 -> "여섯 aProperties = { comphelper::makePropertyValue( @@ -276,7 +276,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanLegal) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(6)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\uc5ec\uc12f"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\uc5ec\uc12f"_ustr, aActual); // 7 -> "일곱" aProperties = { comphelper::makePropertyValue( @@ -284,7 +284,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanLegal) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(7)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\uc77c\uacf1"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\uc77c\uacf1"_ustr, aActual); // 8 -> "여덟" aProperties = { @@ -293,7 +293,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanLegal) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(8)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\uc5ec\ub35f"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\uc5ec\ub35f"_ustr, aActual); // 9 -> "아홉" aProperties = { @@ -302,7 +302,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanLegal) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(9)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\uc544\ud649"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\uc544\ud649"_ustr, aActual); // 10 -> "열" aProperties = { @@ -311,7 +311,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanLegal) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(10)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\uc5f4"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\uc5f4"_ustr, aActual); // 21 -> "스물하나" aProperties = { @@ -320,7 +320,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanLegal) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(21)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\uc2a4\ubb3c\ud558\ub098"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\uc2a4\ubb3c\ud558\ub098"_ustr, aActual); // 32 -> "서른둘" aProperties = { @@ -329,7 +329,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanLegal) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(32)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\uc11c\ub978\ub458"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\uc11c\ub978\ub458"_ustr, aActual); // 43 -> "마흔셋" aProperties = { @@ -338,7 +338,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanLegal) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(43)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\ub9c8\ud754\uc14b"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\ub9c8\ud754\uc14b"_ustr, aActual); // 54 -> "쉰넷" aProperties = { @@ -347,7 +347,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanLegal) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(54)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\uc270\ub137"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\uc270\ub137"_ustr, aActual); // 65 -> "예순다섯" aProperties = { @@ -356,7 +356,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanLegal) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(65)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\uc608\uc21c\ub2e4\uc12f"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\uc608\uc21c\ub2e4\uc12f"_ustr, aActual); // 76 -> "일흔여섯" aProperties = { @@ -365,7 +365,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanLegal) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(76)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\uc77c\ud754\uc5ec\uc12f"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\uc77c\ud754\uc5ec\uc12f"_ustr, aActual); // 87 -> "여든일곱" aProperties = { @@ -374,7 +374,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanLegal) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(87)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\uc5ec\ub4e0\uc77c\uacf1"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\uc5ec\ub4e0\uc77c\uacf1"_ustr, aActual); // 98 -> "아흔여덟" aProperties = { @@ -383,7 +383,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanLegal) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(98)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\uc544\ud754\uc5ec\ub35f"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\uc544\ud754\uc5ec\ub35f"_ustr, aActual); // 99 -> "아흔아홉" aProperties = { @@ -392,7 +392,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanLegal) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(99)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\uc544\ud754\uc544\ud649"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\uc544\ud754\uc544\ud649"_ustr, aActual); } CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanDigital) @@ -409,7 +409,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanDigital) OUString aActual = xFormatter->makeNumberingString(aProperties, aLocale); // Without the accompanying fix in place, this test would have failed with a // lang.IllegalArgumentException, support for NUMBER_DIGITAL_KO was missing. - CPPUNIT_ASSERT_EQUAL(OUString(u"\uc77c"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\uc77c"_ustr, aActual); // 10 -> "일영" aProperties = { @@ -418,7 +418,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanDigital) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(10)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\uc77c\uc601"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\uc77c\uc601"_ustr, aActual); // 100 -> "일영영" aProperties = { @@ -427,7 +427,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanDigital) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(100)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\uc77c\uc601\uc601"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\uc77c\uc601\uc601"_ustr, aActual); } CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanDigital2) @@ -444,7 +444,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanDigital2) OUString aActual = xFormatter->makeNumberingString(aProperties, aLocale); // Without the accompanying fix in place, this test would have failed with a // lang.IllegalArgumentException, support for NUMBER_DIGITAL2_KO was missing. - CPPUNIT_ASSERT_EQUAL(OUString(u"\u4e00"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\u4e00"_ustr, aActual); // 10 -> "一零" aProperties = { @@ -453,7 +453,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanDigital2) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(10)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\u4e00\u96f6"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\u4e00\u96f6"_ustr, aActual); // 100 -> "一零零" aProperties = { @@ -462,7 +462,7 @@ CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanDigital2) comphelper::makePropertyValue("Value", static_cast<sal_Int32>(100)), }; aActual = xFormatter->makeNumberingString(aProperties, aLocale); - CPPUNIT_ASSERT_EQUAL(OUString(u"\u4e00\u96f6\u96f6"), aActual); + CPPUNIT_ASSERT_EQUAL(u"\u4e00\u96f6\u96f6"_ustr, aActual); } CPPUNIT_PLUGIN_IMPLEMENT(); diff --git a/i18npool/qa/cppunit/test_textsearch.cxx b/i18npool/qa/cppunit/test_textsearch.cxx index 31c66d94cd8d..2efb2b9cdc37 100644 --- a/i18npool/qa/cppunit/test_textsearch.cxx +++ b/i18npool/qa/cppunit/test_textsearch.cxx @@ -27,7 +27,6 @@ #include <unicode/regex.h> using namespace ::com::sun::star; -typedef U_ICU_NAMESPACE::UnicodeString IcuUniString; class TestTextSearch : public test::BootstrapFixtureBase { @@ -39,12 +38,14 @@ public: void testSearches(); void testWildcardSearch(); void testApostropheSearch(); + void testTdf138410(); CPPUNIT_TEST_SUITE(TestTextSearch); CPPUNIT_TEST(testICU); CPPUNIT_TEST(testSearches); CPPUNIT_TEST(testWildcardSearch); CPPUNIT_TEST(testApostropheSearch); + CPPUNIT_TEST(testTdf138410); CPPUNIT_TEST_SUITE_END(); private: uno::Reference<util::XTextSearch> m_xSearch; @@ -59,11 +60,11 @@ void TestTextSearch::testICU() OUString aString( "abcdefgh" ); OUString aPattern( "e" ); - IcuUniString aSearchPat( reinterpret_cast<const UChar*>(aPattern.getStr()), aPattern.getLength() ); + icu::UnicodeString aSearchPat( reinterpret_cast<const UChar*>(aPattern.getStr()), aPattern.getLength() ); std::unique_ptr<icu::RegexMatcher> pRegexMatcher(new icu::RegexMatcher( aSearchPat, nSearchFlags, nErr )); - IcuUniString aSource( reinterpret_cast<const UChar*>(aString.getStr()), aString.getLength() ); + icu::UnicodeString aSource( reinterpret_cast<const UChar*>(aString.getStr()), aString.getLength() ); pRegexMatcher->reset( aSource ); CPPUNIT_ASSERT( pRegexMatcher->find( 0, nErr ) ); @@ -76,10 +77,10 @@ void TestTextSearch::testICU() OUString aString2( "acababaabcababadcdaa" ); OUString aPattern2( "a" ); - IcuUniString aSearchPat2( reinterpret_cast<const UChar*>(aPattern2.getStr()), aPattern2.getLength() ); + icu::UnicodeString aSearchPat2( reinterpret_cast<const UChar*>(aPattern2.getStr()), aPattern2.getLength() ); pRegexMatcher.reset(new icu::RegexMatcher( aSearchPat2, nSearchFlags, nErr )); - IcuUniString aSource2( reinterpret_cast<const UChar*>(aString2.getStr()), aString2.getLength() ); + icu::UnicodeString aSource2( reinterpret_cast<const UChar*>(aString2.getStr()), aString2.getLength() ); pRegexMatcher->reset( aSource2 ); CPPUNIT_ASSERT( pRegexMatcher->find( 0, nErr ) ); @@ -293,7 +294,7 @@ void TestTextSearch::testWildcardSearch() void TestTextSearch::testApostropheSearch() { // A) find typographic apostrophes also by using ASCII apostrophe in searchString - OUString str( u"It\u2019s an apostrophe." ); + OUString str( u"It\u2019s an apostrophe."_ustr ); sal_Int32 startPos = 0, endPos = str.getLength(); // set options @@ -354,7 +355,7 @@ void TestTextSearch::testApostropheSearch() CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.endOffset[0] ); // C) search typographic apostrophe in a text with ASCII apostrophes (no result) - aOptions.searchString = OUString(u"\u2019"); + aOptions.searchString = u"\u2019"_ustr; m_xSearch->setOptions( aOptions ); aRes = m_xSearch->searchForward( str, startPos, endPos ); @@ -379,9 +380,9 @@ void TestTextSearch::testApostropheSearch() CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.endOffset[0] ); // E) search mixed apostrophes in a text with mixed apostrophes: - aOptions.searchString = OUString(u"'\u2019"); + aOptions.searchString = u"'\u2019"_ustr; m_xSearch->setOptions( aOptions ); - str = u"test: \u2019'"; + str = u"test: \u2019'"_ustr; // search forward aRes = m_xSearch->searchForward( str, startPos, str.getLength()); @@ -392,7 +393,7 @@ void TestTextSearch::testApostropheSearch() CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); // F) search mixed apostrophes in a text with ASCII apostrophes: - str = u"test: ''"; + str = u"test: ''"_ustr; // search forward aRes = m_xSearch->searchForward( str, startPos, str.getLength()); @@ -403,6 +404,125 @@ void TestTextSearch::testApostropheSearch() CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); } +void TestTextSearch::testTdf138410() +{ + OUString str(u"\u0643\u064f\u062a\u064f\u0628 \u0643\u062a\u0628"_ustr); + sal_Int32 startPos = 0, endPos = str.getLength(); + + util::SearchOptions aOptions; + aOptions.algorithmType = util::SearchAlgorithms_ABSOLUTE; + + util::SearchResult aRes; + + // A) base alone + // The search string will be found whether it is followed by a mark in the + // text or not, and whether IGNORE_DIACRITICS_CTL is set or not. + + // set options + aOptions.searchString = u"\u0643"_ustr; + aOptions.transliterateFlags = 0; + m_xSearch->setOptions(aOptions); + + // search forward + aRes = m_xSearch->searchForward(str, startPos, endPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.endOffset[0]); + + // search backwards + aRes = m_xSearch->searchBackward(str, endPos, startPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(7), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(6), aRes.endOffset[0]); + + // check with transliteration + aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_DIACRITICS_CTL); + m_xSearch->setOptions(aOptions); + + // search forward + aRes = m_xSearch->searchForward(str, startPos, endPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.endOffset[0]); + + // search backwards + aRes = m_xSearch->searchBackward(str, endPos, startPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(7), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(6), aRes.endOffset[0]); + + // b) base+mark + // The search string will be found when followed by a mark in the text, or + // when IGNORE_DIACRITICS_CTL is set whether it is followed by a mark or + // not. + + // set options + aOptions.searchString = u"\u0643\u064f"_ustr; + aOptions.transliterateFlags = 0; + m_xSearch->setOptions(aOptions); + + // search forward + aRes = m_xSearch->searchForward(str, startPos, endPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(2), aRes.endOffset[0]); + + // search backwards + aRes = m_xSearch->searchBackward(str, endPos, startPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(2), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.endOffset[0]); + + // check with transliteration + aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_DIACRITICS_CTL); + m_xSearch->setOptions(aOptions); + + // search forward + aRes = m_xSearch->searchForward(str, startPos, endPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.endOffset[0]); + + // search backwards + aRes = m_xSearch->searchBackward(str, endPos, startPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(7), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(6), aRes.endOffset[0]); + + // b) mark alone + // The search string will be found only when IGNORE_DIACRITICS_CTL is not + // set. + + // set options + aOptions.searchString = u"\u064f"_ustr; + aOptions.transliterateFlags = 0; + m_xSearch->setOptions(aOptions); + + // search forward + aRes = m_xSearch->searchForward(str, startPos, endPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(2), aRes.endOffset[0]); + + // search backwards + aRes = m_xSearch->searchBackward(str, endPos, startPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(4), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(3), aRes.endOffset[0]); + + // with ignore marks the mark will not be found + aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_DIACRITICS_CTL); + m_xSearch->setOptions(aOptions); + + // search forward + aRes = m_xSearch->searchForward(str, startPos, endPos); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions); + + // search backwards + aRes = m_xSearch->searchBackward(str, endPos, startPos); + CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions); +} + void TestTextSearch::setUp() { BootstrapFixtureBase::setUp(); diff --git a/i18npool/qa/cppunit/transliteration.cxx b/i18npool/qa/cppunit/transliteration.cxx new file mode 100644 index 000000000000..cc8eccfbb5c2 --- /dev/null +++ b/i18npool/qa/cppunit/transliteration.cxx @@ -0,0 +1,192 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <sal/config.h> + +#include <cppunit/TestFixture.h> +#include <cppunit/extensions/HelperMacros.h> +#include <cppunit/plugin/TestPlugIn.h> + +#include <com/sun/star/i18n/Transliteration.hpp> +#include <com/sun/star/i18n/TransliterationModulesNew.hpp> +#include <com/sun/star/i18n/XExtendedTransliteration.hpp> +#include <com/sun/star/uno/Reference.hxx> +#include <cppuhelper/bootstrap.hxx> + +namespace +{ +class Transliteration : public CppUnit::TestFixture +{ +public: + void setUp() + { + transliteration_ + = css::i18n::Transliteration::create(cppu::defaultBootstrap_InitialComponentContext()); + } + + void testLoadModuleNew() + { + // Verify that loading succeeds without throwing an exception, for each possible + // TransliterationModulesNew value (TODO: there is an upper limit of maxCascade 27 in + // i18npool/inc/transliterationImpl.hxx for the length of the passed + // TransliterationModulesNew value, so pass each one individually rather than all 65 at + // once): + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_UPPERCASE_LOWERCASE }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_LOWERCASE_UPPERCASE }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_HALFWIDTH_FULLWIDTH }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_FULLWIDTH_HALFWIDTH }, {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_KATAKANA_HIRAGANA }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_HIRAGANA_KATAKANA }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_IGNORE_CASE }, {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_IGNORE_KANA }, {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_IGNORE_WIDTH }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_IgnoreTraditionalKanji_ja_JP }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_IgnoreTraditionalKana_ja_JP }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_IgnoreMinusSign_ja_JP }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_IgnoreIterationMark_ja_JP }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_IgnoreSeparator_ja_JP }, {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreZiZu_ja_JP }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreBaFa_ja_JP }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreTiJi_ja_JP }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreHyuByu_ja_JP }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreSeZe_ja_JP }, + {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_IgnoreIandEfollowedByYa_ja_JP }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_IgnoreKiKuFollowedBySa_ja_JP }, {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreSize_ja_JP }, + {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_IgnoreProlongedSoundMark_ja_JP }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_IgnoreMiddleDot_ja_JP }, {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreSpace_ja_JP }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_SmallToLarge_ja_JP }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_LargeToSmall_ja_JP }, + {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToTextLower_zh_CN }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToTextUpper_zh_CN }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToTextLower_zh_TW }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToTextUpper_zh_TW }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToTextFormalHangul_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToTextFormalLower_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToTextFormalUpper_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToTextInformalHangul_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToTextInformalLower_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToTextInformalUpper_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToCharLower_zh_CN }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToCharUpper_zh_CN }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToCharLower_zh_TW }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToCharUpper_zh_TW }, {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToCharHangul_ko }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToCharLower_ko }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToCharUpper_ko }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToCharFullwidth }, + {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToCharKanjiShort_ja_JP }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_TextToNumLower_zh_CN }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_TextToNumUpper_zh_CN }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_TextToNumLower_zh_TW }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_TextToNumUpper_zh_TW }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_TextToNumFormalHangul_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_TextToNumFormalLower_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_TextToNumFormalUpper_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_TextToNumInformalHangul_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_TextToNumInformalLower_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_TextToNumInformalUpper_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_CharToNumLower_zh_CN }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_CharToNumUpper_zh_CN }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_CharToNumLower_zh_TW }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_CharToNumUpper_zh_TW }, {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_CharToNumHangul_ko }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_CharToNumLower_ko }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_CharToNumUpper_ko }, + {}); + } + + void testTextToChuyin_zh_TW() + { + // Verify that loading succeeds: + transliteration_->loadModuleByImplName("TextToChuyin_zh_TW", {}); + } + + void testTextToPinyin_zh_CN() + { + // Verify that loading succeeds: + transliteration_->loadModuleByImplName("TextToPinyin_zh_CN", {}); + } + + CPPUNIT_TEST_SUITE(Transliteration); + CPPUNIT_TEST(testLoadModuleNew); + CPPUNIT_TEST(testTextToChuyin_zh_TW); + CPPUNIT_TEST(testTextToPinyin_zh_CN); + CPPUNIT_TEST_SUITE_END(); + +private: + css::uno::Reference<css::i18n::XExtendedTransliteration> transliteration_; +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(Transliteration); +} + +CPPUNIT_PLUGIN_IMPLEMENT(); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ |