summaryrefslogtreecommitdiff
path: root/i18npool
diff options
context:
space:
mode:
authorAndreas Heinisch <andreas.heinisch@yahoo.de>2022-07-29 09:29:33 +0200
committerAndreas Heinisch <andreas.heinisch@yahoo.de>2022-08-21 14:12:43 +0200
commit70c99eef1643a17b2c1f0dba38b55a58cdb9aafc (patch)
tree44b9d9820254de4472aa03b51df5bb3ece2f2e2d /i18npool
parent6c0b8669e1f09a8301f3ebd1da21855d84abb2b1 (diff)
tdf#135451 - Change the skipSpace implementation of the break iterator
Change the ICU whitespace function of the break iterator from u_isWhitespace to u_isUWhiteSpace to include no-break spaces. u_isWhitespace includes Java isWhitespace; Z + whitespace ISO controls but excludes no-break spaces u_isUWhiteSpace includes all code points with the Unicode White_Space property; most of general categories "Z" (separators) + most whitespace ISO controls (including no-break spaces, but excluding IS1..IS4) See https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/uchar_8h.html for further details. Change-Id: I21fddefaf2149096824908f644310a59d6e2f38d Reviewed-on: https://gerrit.libreoffice.org/c/core/+/137582 Tested-by: Jenkins Reviewed-by: Andreas Heinisch <andreas.heinisch@yahoo.de>
Diffstat (limited to 'i18npool')
-rw-r--r--i18npool/source/breakiterator/breakiteratorImpl.cxx4
-rw-r--r--i18npool/source/breakiterator/breakiterator_unicode.cxx15
2 files changed, 11 insertions, 8 deletions
diff --git a/i18npool/source/breakiterator/breakiteratorImpl.cxx b/i18npool/source/breakiterator/breakiteratorImpl.cxx
index 71f371a7eddc..71d794e2c9d4 100644
--- a/i18npool/source/breakiterator/breakiteratorImpl.cxx
+++ b/i18npool/source/breakiterator/breakiteratorImpl.cxx
@@ -82,7 +82,7 @@ static sal_Int32 skipSpace(const OUString& Text, sal_Int32 nPos, sal_Int32 len,
while (nPos < len)
{
ch = Text.iterateCodePoints(&pos);
- if (!u_isWhitespace(ch) && !isZWSP(ch))
+ if (!u_isUWhiteSpace(ch) && !isZWSP(ch))
break;
nPos = pos;
}
@@ -90,7 +90,7 @@ static sal_Int32 skipSpace(const OUString& Text, sal_Int32 nPos, sal_Int32 len,
while (nPos > 0)
{
ch = Text.iterateCodePoints(&pos, -1);
- if (!u_isWhitespace(ch) && !isZWSP(ch))
+ if (!u_isUWhiteSpace(ch) && !isZWSP(ch))
break;
nPos = pos;
}
diff --git a/i18npool/source/breakiterator/breakiterator_unicode.cxx b/i18npool/source/breakiterator/breakiterator_unicode.cxx
index a4a00c46a1ad..cfac4ddea89d 100644
--- a/i18npool/source/breakiterator/breakiterator_unicode.cxx
+++ b/i18npool/source/breakiterator/breakiterator_unicode.cxx
@@ -379,9 +379,10 @@ Boundary SAL_CALL BreakIterator_Unicode::nextWord( const OUString& Text, sal_Int
if( rv.startPos >= Text.getLength() || rv.startPos == icu::BreakIterator::DONE )
rv.endPos = result.startPos;
else {
- if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES ||
- rWordType == WordType::DICTIONARY_WORD ) &&
- u_isWhitespace(Text.iterateCodePoints(&rv.startPos, 0)) )
+ if ((rWordType == WordType::ANYWORD_IGNOREWHITESPACES
+ && u_isUWhiteSpace(Text.iterateCodePoints(&rv.startPos, 0)))
+ || (rWordType == WordType::DICTIONARY_WORD
+ && u_isWhitespace(Text.iterateCodePoints(&rv.startPos, 0))))
rv.startPos = icuBI->mpValue->mpBreakIterator->following(rv.startPos);
rv.endPos = icuBI->mpValue->mpBreakIterator->following(rv.startPos);
@@ -402,9 +403,11 @@ Boundary SAL_CALL BreakIterator_Unicode::previousWord(const OUString& Text, sal_
if( rv.startPos < 0)
rv.endPos = rv.startPos;
else {
- if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES ||
- rWordType == WordType::DICTIONARY_WORD) &&
- u_isWhitespace(Text.iterateCodePoints(&rv.startPos, 0)) )
+
+ if ((rWordType == WordType::ANYWORD_IGNOREWHITESPACES
+ && u_isUWhiteSpace(Text.iterateCodePoints(&rv.startPos, 0)))
+ || (rWordType == WordType::DICTIONARY_WORD
+ && u_isWhitespace(Text.iterateCodePoints(&rv.startPos, 0))))
rv.startPos = icuBI->mpValue->mpBreakIterator->preceding(rv.startPos);
rv.endPos = icuBI->mpValue->mpBreakIterator->following(rv.startPos);