diff options
author | RĂ¼diger Timm <rt@openoffice.org> | 2007-07-26 08:08:51 +0000 |
---|---|---|
committer | RĂ¼diger Timm <rt@openoffice.org> | 2007-07-26 08:08:51 +0000 |
commit | 2791553b4e3fc5e04b96d0b2fd119d9fba1946bc (patch) | |
tree | c0f1d7d36b9b13f61a31b1b995e1dee39a65b612 /i18npool/source/breakiterator/xdictionary.cxx | |
parent | 1c79a2bf1e89ac4eb409922ab7eb8ad3cacc688a (diff) |
INTEGRATION: CWS i18n31 (1.14.60); FILE MERGED
2007/07/16 22:18:44 khong 1.14.60.4: i75631 i75632 i75633 i75412 handle surrogate pair characters
2007/07/13 20:37:32 khong 1.14.60.3: #i75632# use ICU characters properties
2007/07/04 01:17:22 khong 1.14.60.2: i75631 i75632 i75633 i75412 handle surrogate pair characters
2007/06/27 04:33:11 khong 1.14.60.1: i75631 i75632 i75633 i75412 handle surrogate pair characters
Diffstat (limited to 'i18npool/source/breakiterator/xdictionary.cxx')
-rw-r--r-- | i18npool/source/breakiterator/xdictionary.cxx | 45 |
1 files changed, 27 insertions, 18 deletions
diff --git a/i18npool/source/breakiterator/xdictionary.cxx b/i18npool/source/breakiterator/xdictionary.cxx index 39574bf9d36a..6b854a0ba7f1 100644 --- a/i18npool/source/breakiterator/xdictionary.cxx +++ b/i18npool/source/breakiterator/xdictionary.cxx @@ -4,9 +4,9 @@ * * $RCSfile: xdictionary.cxx,v $ * - * $Revision: 1.14 $ + * $Revision: 1.15 $ * - * last change: $Author: obo $ $Date: 2006-09-17 09:14:44 $ + * last change: $Author: rt $ $Date: 2007-07-26 09:08:51 $ * * The Contents of this file are made available subject to * the terms of GNU Lesser General Public License Version 2.1. @@ -47,7 +47,7 @@ #include <com/sun/star/i18n/WordType.hpp> #include <xdictionary.hxx> -#include <i18nutil/unicode.hxx> +#include <unicode/uchar.h> #include <string.h> #include <breakiteratorImpl.hxx> @@ -168,13 +168,13 @@ sal_Bool SAL_CALL xdictionary::seekSegment(const sal_Unicode *text, sal_Int32 po sal_Int32 len, Boundary& segBoundary) { for (segBoundary.startPos = pos - 1; segBoundary.startPos >= 0 && - (unicode::isWhiteSpace(text[segBoundary.startPos]) || exists(text[segBoundary.startPos])); + (u_isWhitespace((sal_uInt32)text[segBoundary.startPos]) || exists(text[segBoundary.startPos])); segBoundary.startPos--); segBoundary.startPos++; for (segBoundary.endPos = pos; segBoundary.endPos < len && - (unicode::isWhiteSpace(text[segBoundary.endPos]) || exists(text[segBoundary.endPos])); + (u_isWhitespace((sal_uInt32)text[segBoundary.endPos]) || exists(text[segBoundary.endPos])); segBoundary.endPos++); return segBoundary.endPos > segBoundary.startPos + 1; @@ -224,7 +224,7 @@ WordBreakCache& SAL_CALL xdictionary::getCache(const sal_Unicode *text, Boundary while (aCache.wordboundary[i] < aCache.length) { len = 0; // look the continuous white space as one word and cashe it - while (unicode::isWhiteSpace(text[wordBoundary.startPos + aCache.wordboundary[i] + len])) + while (u_isWhitespace((sal_uInt32)text[wordBoundary.startPos + aCache.wordboundary[i] + len])) len ++; if (len == 0) { @@ -272,25 +272,33 @@ WordBreakCache& SAL_CALL xdictionary::getCache(const sal_Unicode *text, Boundary return aCache; } -Boundary SAL_CALL xdictionary::previousWord(const sal_Unicode *text, sal_Int32 anyPos, sal_Int32 len, sal_Int16 wordType) +Boundary SAL_CALL xdictionary::previousWord(const OUString& rText, sal_Int32 anyPos, sal_Int16 wordType) { // looking for the first non-whitespace character from anyPos - while (unicode::isWhiteSpace(text[anyPos - 1])) anyPos --; - return getWordBoundary(text, anyPos - 1, len, wordType, true); + sal_uInt32 ch = rText.iterateCodePoints(&anyPos, -1); + + while (anyPos > 0 && u_isWhitespace(ch)) ch = rText.iterateCodePoints(&anyPos, -1); + + return getWordBoundary(rText, anyPos, wordType, true); } -Boundary SAL_CALL xdictionary::nextWord(const sal_Unicode *text, sal_Int32 anyPos, sal_Int32 len, sal_Int16 wordType) +Boundary SAL_CALL xdictionary::nextWord(const OUString& rText, sal_Int32 anyPos, sal_Int16 wordType) { - boundary = getWordBoundary(text, anyPos, len, wordType, true); - // looknig for the first non-whitespace character from anyPos + boundary = getWordBoundary(rText, anyPos, wordType, true); anyPos = boundary.endPos; - while (unicode::isWhiteSpace(text[anyPos])) anyPos ++; + if (anyPos < rText.getLength()) { + // looknig for the first non-whitespace character from anyPos + sal_uInt32 ch = rText.iterateCodePoints(&anyPos, 0); + while (u_isWhitespace(ch)) ch=rText.iterateCodePoints(&anyPos, 1); + } - return getWordBoundary(text, anyPos, len, wordType, true); + return getWordBoundary(rText, anyPos, wordType, true); } -Boundary SAL_CALL xdictionary::getWordBoundary(const sal_Unicode *text, sal_Int32 anyPos, sal_Int32 len, sal_Int16 wordType, sal_Bool bDirection) +Boundary SAL_CALL xdictionary::getWordBoundary(const OUString& rText, sal_Int32 anyPos, sal_Int16 wordType, sal_Bool bDirection) { + const sal_Unicode *text=rText.getStr(); + sal_Int32 len=rText.getLength(); if (anyPos >= len || anyPos < 0) { boundary.startPos = boundary.endPos = anyPos < 0 ? 0 : len; } else if (seekSegment(text, anyPos, len, boundary)) { // character in dict @@ -302,17 +310,18 @@ Boundary SAL_CALL xdictionary::getWordBoundary(const sal_Unicode *text, sal_Int3 sal_Int32 startPos = aCache.wordboundary[i - 1]; // if bDirection is false if (!bDirection && startPos > 0 && startPos == (anyPos - boundary.startPos) && - unicode::isWhiteSpace(text[anyPos - 1])) + u_isWhitespace((sal_uInt32) text[anyPos - 1])) i--; boundary.endPos = aCache.wordboundary[i] + boundary.startPos; boundary.startPos += aCache.wordboundary[i - 1]; } else { - boundary.startPos = anyPos++; + boundary.startPos = anyPos; + if (anyPos < len) rText.iterateCodePoints(&anyPos, 1); boundary.endPos = anyPos < len ? anyPos : len; } if (wordType == WordType::WORD_COUNT) { // skip punctuation for word count. - while (boundary.endPos < len && unicode::isPunctuation(text[boundary.endPos])) + while (boundary.endPos < len && u_ispunct((sal_uInt32)text[boundary.endPos])) boundary.endPos++; } |