summaryrefslogtreecommitdiff
path: root/i18npool
diff options
context:
space:
mode:
authorJens-Heiner Rechtien <hr@openoffice.org>2004-03-08 16:16:04 +0000
committerJens-Heiner Rechtien <hr@openoffice.org>2004-03-08 16:16:04 +0000
commitf0939f43315a21f5134cd631773ddae7cfef4493 (patch)
treeb0860de030413d6eac7ebfca4d46ad0bf3a60b04 /i18npool
parentc0f5a2a1e88c224316b9a295d90dce55ba639188 (diff)
INTEGRATION: CWS i18n09 (1.12.2); FILE MERGED
2003/12/09 19:35:48 khong 1.12.2.4: #112021# fix word boundary problem on begining and end of the string 2003/12/08 23:47:26 khong 1.12.2.3: #i21907# fix isBeginWord and isEndWord problem 2003/11/18 22:55:08 khong 1.12.2.2: #i21290# #i22530# #i14640# extend CTL script support, extend Greek script type 2003/11/18 22:31:59 khong 1.12.2.1: #i21290# #i22530# #i14640# extend CTL script support, extend Greek script type
Diffstat (limited to 'i18npool')
-rw-r--r--i18npool/source/breakiterator/breakiteratorImpl.cxx87
1 files changed, 34 insertions, 53 deletions
diff --git a/i18npool/source/breakiterator/breakiteratorImpl.cxx b/i18npool/source/breakiterator/breakiteratorImpl.cxx
index 7777c79beb1d..ff3076f85299 100644
--- a/i18npool/source/breakiterator/breakiteratorImpl.cxx
+++ b/i18npool/source/breakiterator/breakiteratorImpl.cxx
@@ -2,9 +2,9 @@
*
* $RCSfile: breakiteratorImpl.cxx,v $
*
- * $Revision: 1.13 $
+ * $Revision: 1.14 $
*
- * last change: $Author: rt $ $Date: 2004-01-20 13:20:28 $
+ * last change: $Author: hr $ $Date: 2004-03-08 17:16:04 $
*
* The Contents of this file are made available subject to the terms of
* either of the following licenses
@@ -60,7 +60,7 @@
************************************************************************/
#include <breakiteratorImpl.hxx>
-#include <unicode.hxx>
+#include <i18nutil/unicode.hxx>
#include <rtl/ustrbuf.hxx>
using namespace ::com::sun::star::uno;
@@ -201,17 +201,17 @@ Boundary SAL_CALL BreakIteratorImpl::getWordBoundary( const OUString& Text, sal_
prev = skipSpace(Text, nPos, len, rWordType, sal_False);
if (prev == 0 && next == len) {
result.endPos = result.startPos = nPos;
+ } else if (prev == 0 && ! bDirection) {
+ result.endPos = result.startPos = 0;
+ } else if (next == len && bDirection) {
+ result.endPos = result.startPos = len;
} else {
- if (next == nPos) {
- bDirection = sal_True;
- nPos = next;
+ if (next != prev) {
+ if (next == nPos && next != len)
+ bDirection = sal_True;
+ else
+ nPos = bDirection ? next : prev;
}
- else if (prev == nPos) {
- bDirection = sal_False;
- nPos = prev;
- }
- else
- nPos = bDirection ? next : prev;
result = LBI->getWordBoundary(Text, nPos, rLocale, rWordType, bDirection);
}
}
@@ -405,47 +405,28 @@ sal_Int16 SAL_CALL BreakIteratorImpl::getWordType( const OUString& Text,
}
static ScriptTypeList typeList[] = {
- { UnicodeScript_kBasicLatin, ScriptType::LATIN }, // 0,
- { UnicodeScript_kLatin1Supplement, ScriptType::LATIN }, // 1,
- { UnicodeScript_kLatinExtendedA, ScriptType::LATIN }, // 2,
- { UnicodeScript_kLatinExtendedB, ScriptType::LATIN }, // 3,
- { UnicodeScript_kIPAExtension, ScriptType::LATIN }, // 4,
- { UnicodeScript_kSpacingModifier, ScriptType::LATIN }, // 5,
- { UnicodeScript_kCombiningDiacritical, ScriptType::LATIN }, // 6,
- { UnicodeScript_kGreek, ScriptType::LATIN }, // 7,
- { UnicodeScript_kCyrillic, ScriptType::LATIN }, // 8,
-
- { UnicodeScript_kHebrew, ScriptType::COMPLEX }, // 10,
- { UnicodeScript_kArabic, ScriptType::COMPLEX }, // 11,
- { UnicodeScript_kDevanagari, ScriptType::COMPLEX }, // 14,
- { UnicodeScript_kThai, ScriptType::COMPLEX }, // 24,
-
- { UnicodeScript_kTibetan, ScriptType::LATIN }, // 26,
-
- { UnicodeScript_kCJKRadicalsSupplement, ScriptType::ASIAN }, // 57,
- { UnicodeScript_kKangxiRadicals, ScriptType::ASIAN }, // 58,
- { UnicodeScript_kIdeographicDescriptionCharacters, ScriptType::ASIAN }, // 59,
- { UnicodeScript_kCJKSymbolPunctuation, ScriptType::ASIAN }, // 60,
- { UnicodeScript_kHiragana, ScriptType::ASIAN }, // 61,
- { UnicodeScript_kKatakana, ScriptType::ASIAN }, // 62,
- { UnicodeScript_kBopomofo, ScriptType::ASIAN }, // 63,
- { UnicodeScript_kHangulCompatibilityJamo, ScriptType::ASIAN }, // 64,
- { UnicodeScript_kKanbun, ScriptType::ASIAN }, // 65,
- { UnicodeScript_kBopomofoExtended, ScriptType::ASIAN }, // 66,
- { UnicodeScript_kEnclosedCJKLetterMonth, ScriptType::ASIAN }, // 67,
- { UnicodeScript_kCJKCompatibility, ScriptType::ASIAN }, // 68,
- { UnicodeScript_k_CJKUnifiedIdeographsExtensionA, ScriptType::ASIAN }, // 69,
- { UnicodeScript_kCJKUnifiedIdeograph, ScriptType::ASIAN }, // 70,
- { UnicodeScript_kYiSyllables, ScriptType::ASIAN }, // 71,
- { UnicodeScript_kYiRadicals, ScriptType::ASIAN }, // 72,
- { UnicodeScript_kHangulSyllable, ScriptType::ASIAN }, // 73,
- { UnicodeScript_kCJKCompatibilityIdeograph, ScriptType::ASIAN }, // 78,
- { UnicodeScript_kCombiningHalfMark, ScriptType::ASIAN }, // 81,
- { UnicodeScript_kCJKCompatibilityForm, ScriptType::ASIAN }, // 82,
- { UnicodeScript_kSmallFormVariant, ScriptType::ASIAN }, // 83,
- { UnicodeScript_kHalfwidthFullwidthForm, ScriptType::ASIAN }, // 86,
-
- { UnicodeScript_kScriptCount, ScriptType::WEAK } // 88
+ { UnicodeScript_kBasicLatin, UnicodeScript_kArmenian, ScriptType::LATIN }, // 0-9,
+ { UnicodeScript_kHebrew, UnicodeScript_kMyanmar, ScriptType::COMPLEX }, // 10-27,
+ { UnicodeScript_kGeorgian, UnicodeScript_kGeorgian, ScriptType::LATIN }, // 28,
+ { UnicodeScript_kHangulJamo, UnicodeScript_kHangulJamo, ScriptType::ASIAN }, // 29,
+ { UnicodeScript_kEthiopic, UnicodeScript_kRunic, ScriptType::LATIN }, // 30-34,
+ { UnicodeScript_kKhmer, UnicodeScript_kMongolian, ScriptType::COMPLEX }, // 35-36,
+ { UnicodeScript_kLatinExtendedAdditional,
+ UnicodeScript_kGreekExtended, ScriptType::LATIN }, // 37-38,
+ { UnicodeScript_kCJKRadicalsSupplement,
+ UnicodeScript_kHangulSyllable, ScriptType::ASIAN }, // 57-73,
+ { UnicodeScript_kCJKCompatibilityIdeograph,
+ UnicodeScript_kCJKCompatibilityIdeograph, ScriptType::ASIAN }, // 78,
+ { UnicodeScript_kArabicPresentationA,
+ UnicodeScript_kArabicPresentationA, ScriptType::COMPLEX }, // 80,
+ { UnicodeScript_kCJKCompatibilityForm,
+ UnicodeScript_kCJKCompatibilityForm, ScriptType::ASIAN }, // 82,
+ { UnicodeScript_kArabicPresentationB,
+ UnicodeScript_kArabicPresentationB, ScriptType::COMPLEX }, // 84,
+ { UnicodeScript_kHalfwidthFullwidthForm,
+ UnicodeScript_kHalfwidthFullwidthForm, ScriptType::ASIAN }, // 86,
+ { UnicodeScript_kScriptCount,
+ UnicodeScript_kScriptCount, ScriptType::WEAK } // 88
};
sal_Int16 BreakIteratorImpl::getScriptClass(sal_Unicode currentChar )