From 5dc52ee00102cbf4262805d6e8f338bf0a88f470 Mon Sep 17 00:00:00 2001 From: Khaled Hosny Date: Mon, 21 May 2018 14:28:25 +0200 Subject: tdf#113694 Fix BreakIterator_CTL surrogate pairs BreakIterator_CTL in the non CharacterIteratorMode::SKIPCELL mode did not handle UTF-16 surrogate pairs at all, causing backspace to delete lone surrogates which is really bad. Just copied the corresponding code from BreakIterator_Unicode. Additionally, BreakIterator_th was not correctly skipping non-Thai text and always treating one character as Thai. Change-Id: Ia379327e042ff602fc19a485c4cbd1a3683f9230 Reviewed-on: https://gerrit.libreoffice.org/54631 Tested-by: Jenkins Reviewed-by: Eike Rathke --- i18npool/qa/cppunit/test_breakiterator.cxx | 23 ++++++++++++++++++++++ .../source/breakiterator/breakiterator_ctl.cxx | 8 ++++---- i18npool/source/breakiterator/breakiterator_th.cxx | 2 +- 3 files changed, 28 insertions(+), 5 deletions(-) (limited to 'i18npool') diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx index 552274864035..0c132acf3a43 100644 --- a/i18npool/qa/cppunit/test_breakiterator.cxx +++ b/i18npool/qa/cppunit/test_breakiterator.cxx @@ -919,6 +919,29 @@ void TestBreakIterator::testThai() } while (nPos > 0); } + + // tdf#113694 + { + const sal_Unicode NON_BMP[] = { 0xD800, 0xDC00 }; + OUString aTest(NON_BMP, SAL_N_ELEMENTS(NON_BMP)); + + sal_Int32 nDone=0; + sal_Int32 nPos; + + nPos = m_xBreak->nextCharacters(aTest, 0, aLocale, + i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full surrogate pair", static_cast(SAL_N_ELEMENTS(NON_BMP)), nPos); + nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(NON_BMP), aLocale, + i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full surrogate pair", static_cast(0), nPos); + + nPos = m_xBreak->nextCharacters(aTest, 0, aLocale, + i18n::CharacterIteratorMode::SKIPCHARACTER, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full surrogate pair", static_cast(SAL_N_ELEMENTS(NON_BMP)), nPos); + nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(NON_BMP), aLocale, + i18n::CharacterIteratorMode::SKIPCHARACTER, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full surrogate pair", static_cast(0), nPos); + } } #ifdef TODO diff --git a/i18npool/source/breakiterator/breakiterator_ctl.cxx b/i18npool/source/breakiterator/breakiterator_ctl.cxx index b307c37fd9ac..932fb1b77182 100644 --- a/i18npool/source/breakiterator/breakiterator_ctl.cxx +++ b/i18npool/source/breakiterator/breakiterator_ctl.cxx @@ -71,8 +71,8 @@ sal_Int32 SAL_CALL BreakIterator_CTL::previousCharacters( const OUString& Text, } else nStartPos = 0; } else { // for BS to delete one char. - nDone = std::min(nStartPos, nCount); - nStartPos -= nDone; + for (nDone = 0; nDone < nCount && nStartPos > 0; nDone++) + Text.iterateCodePoints(&nStartPos, -1); } return nStartPos; @@ -98,8 +98,8 @@ sal_Int32 SAL_CALL BreakIterator_CTL::nextCharacters(const OUString& Text, } else nStartPos = len; } else { - nDone = std::min(len - nStartPos, nCount); - nStartPos += nDone; + for (nDone = 0; nDone < nCount && nStartPos < Text.getLength(); nDone++) + Text.iterateCodePoints(&nStartPos); } return nStartPos; diff --git a/i18npool/source/breakiterator/breakiterator_th.cxx b/i18npool/source/breakiterator/breakiterator_th.cxx index 36ced38daba8..49f81047247c 100644 --- a/i18npool/source/breakiterator/breakiterator_th.cxx +++ b/i18npool/source/breakiterator/breakiterator_th.cxx @@ -127,7 +127,7 @@ void BreakIterator_th::makeIndex(const OUString& Text, sal_Int32 const nStartPos sal_Int32 startPos = nStartPos; while (startPos > 0 && is_Thai(str[startPos-1])) startPos--; - sal_Int32 endPos = std::min(len, nStartPos+1); + sal_Int32 endPos = nStartPos; while (endPos < len && is_Thai(str[endPos])) endPos++; sal_Int32 start, end, pos; -- cgit v1.2.3