diff options
author | Ivo Hinkelmann <ihi@openoffice.org> | 2009-09-16 14:55:33 +0000 |
---|---|---|
committer | Ivo Hinkelmann <ihi@openoffice.org> | 2009-09-16 14:55:33 +0000 |
commit | e260d2a64d6de337cc0aa991e577a1c00af4a936 (patch) | |
tree | eb0858a0f45a8037b16e07d8106599841d46c4cb /i18npool/source/breakiterator/xdictionary.cxx | |
parent | e66e25555bc9d6c95144d349235216db4420c099 (diff) |
CWS-TOOLING: integrate CWS locales32
2009-09-06 19:13:15 +0200 er r275860 : #i104308# Lower and Upper Sorbian
2009-09-03 01:40:09 +0200 erack r275732 : CWS-TOOLING: rebase CWS locales32 to trunk@275331 (milestone: DEV300:m56)
2009-08-17 21:48:26 +0200 erack r275072 : * #i102920# use OUString::iterateCodePoints() in xdictionary; patch from <cmc>
* Adapted local iterateCodePoints() in breakiteratorImpl.cxx to cope with
surrogates at text end.
* Use OUString::iterateCodePoints() in BreakIterator_CJK::getLineBreak()
2009-08-09 23:14:39 +0200 erack r274792 : mergeinfo
2009-08-09 23:09:31 +0200 erack r274791 : re-adding this again, merge from tag didn't work; SVN IS A PIECE OF CRAP
2009-08-09 22:45:02 +0200 erack r274790 : don't re-add file, merge instead
2009-08-09 22:09:49 +0200 erack r274789 : cws rebase ERRONEOUSLY REMOVED THIS FILE!
2009-08-09 22:01:02 +0200 erack r274788 : adapt to new postset.mk completelangiso content; get rid of /i modifier uglyness
2009-08-09 19:40:46 +0200 erack r274787 : #i99823# sort currency list ISO,symbol,language/country; removed unused STR_EUROPE
2009-08-09 01:23:35 +0200 erack r274786 : make AZM currency legacyOnly to avoid duplicate listing in number formatter because of the identical 'man.' currency symbol
2009-08-09 01:11:38 +0200 erack r274785 : #i94445# make ROL currency legacyOnly to avoid duplicate listing in number formatter because of the identical 'lei' currency symbol
2009-08-06 19:10:34 +0200 erack r274743 : CWS-TOOLING: rebase CWS locales32 to trunk@274622 (milestone: DEV300:m54)
2009-08-04 21:26:24 +0200 erack r274634 : #i94445# new currency RON
2009-08-04 21:06:55 +0200 erack r274633 : #i103193# corrected data; from <calibaashi>
2009-07-18 23:56:55 +0200 erack r274104 : #i103408# ignore an empty LANGUAGE variable; patch from <cmc>
2009-07-07 00:21:02 +0200 erack r273764 : Langpack.ulf is gone
2009-07-07 00:02:39 +0200 erack r273763 : #i103358# add 'is' Icelandic
2009-07-04 02:10:55 +0200 erack r273720 : #i101173# typographic quotation marks
2009-07-04 01:07:44 +0200 erack r273719 : #i65127# make it compile with OSL_DEBUG_LEVEL>2 again
2009-06-26 22:50:49 +0200 erack r273444 : #i97602# add Asturian_Spain [ast-ES]; locale data contributed by <astur>/<it46>
2009-06-26 22:01:25 +0200 erack r273443 : #i101173# add Oromo [om-ET] locale data; contributed by <barreessaa>
2009-06-26 21:55:41 +0200 erack r273442 : #i101173# add Oromo [om-ET] locale data; contributed by <barreessaa>
2009-06-26 21:11:46 +0200 erack r273441 : #i102991# linguistic corrections
2009-06-26 21:02:45 +0200 erack r273440 : #i102986# add Somali 'so'
2009-06-22 00:04:35 +0200 erack r273190 : #i101235# add Uyghur_China [ug-CN] locale; contributed by <sahran>/<it46>
2009-06-21 23:35:21 +0200 erack r273189 : blah
2009-06-21 23:22:40 +0200 erack r273188 : #i99972# add Quechua (Ecuador) [qu-EC]
2009-06-21 21:57:29 +0200 erack r273187 : #i97791# add Yiddish [yi-IL]
2009-06-21 21:24:29 +0200 erack r273186 : #i102186# add Greek, Ancient [grc-GR]
2009-06-21 21:00:59 +0200 erack r273185 : #i98489# add Arabic (Oman) [ar-OM] locale data; contributed by <zayed2001>/<it46>
2009-06-21 20:54:07 +0200 erack r273184 : check ListSeparator for ';' semicolon, for consistency
2009-06-21 20:20:13 +0200 erack r273183 : ISO 4217 checks only if not legacy (e.g. Macau Pound 'P')
2009-06-21 20:00:27 +0200 erack r273182 : check CurrencyID and BankSymbol vor ISO 4217
2009-06-21 19:10:36 +0200 erack r273181 : #i73118# Bokmål instead of Bokmal
2009-06-21 19:00:47 +0200 erack r273180 : #i99827# add Sardinian locale data; contributed by <valterubuntu>
2009-06-21 18:20:05 +0200 erack r273179 : give URL of svn instead of legacy cvs
2009-06-21 17:45:31 +0200 erack r273178 : #i87907# add Oromo [om-ET]
2009-06-21 17:23:14 +0200 erack r273177 : grep in completelangiso of postset.mk
2009-03-24 00:22:16 +0100 erack r269901 : #i100368# parentheses, parentheses, parentheses ...
2009-03-23 23:31:22 +0100 erack r269899 : #i99712# call OutlineNumberingLevel_Impl dtor
2009-03-23 22:37:31 +0100 erack r269897 : #i98347# register NumToCharHalfwidth
2009-03-22 18:07:40 +0100 erack r269852 : definitely ignore any output resulting from the cd command in list of rules, which may have lead to a spurious rule file
Diffstat (limited to 'i18npool/source/breakiterator/xdictionary.cxx')
-rw-r--r-- | i18npool/source/breakiterator/xdictionary.cxx | 77 |
1 files changed, 52 insertions, 25 deletions
diff --git a/i18npool/source/breakiterator/xdictionary.cxx b/i18npool/source/breakiterator/xdictionary.cxx index f286dd2449ac..add22f39d58e 100644 --- a/i18npool/source/breakiterator/xdictionary.cxx +++ b/i18npool/source/breakiterator/xdictionary.cxx @@ -126,8 +126,9 @@ void xdictionary::setJapaneseWordBreak() japaneseWordBreak = sal_True; } -sal_Bool xdictionary::exists(const sal_Unicode c) { - sal_Bool exist = existMark ? sal::static_int_cast<sal_Bool>((existMark[c>>3] & (1<<(c&0x07))) != 0) : sal_False; +sal_Bool xdictionary::exists(const sal_uInt32 c) { + // 0x1FFF is the hardcoded limit in gendict for existMarks + sal_Bool exist = (existMark && ((c>>3) < 0x1FFF)) ? sal::static_int_cast<sal_Bool>((existMark[c>>3] & (1<<(c&0x07))) != 0) : sal_False; if (!exist && japaneseWordBreak) return BreakIteratorImpl::getScriptClass(c) == ScriptType::ASIAN; else @@ -197,20 +198,35 @@ sal_Bool WordBreakCache::equals(const sal_Unicode* str, Boundary& boundary) { * @param pos : Position of the given character. * @return true if CJK. */ -sal_Bool xdictionary::seekSegment(const sal_Unicode *text, sal_Int32 pos, - sal_Int32 len, Boundary& segBoundary) { - for (segBoundary.startPos = pos - 1; - segBoundary.startPos >= 0 && - (u_isWhitespace((sal_uInt32)text[segBoundary.startPos]) || exists(text[segBoundary.startPos])); - segBoundary.startPos--) ; - segBoundary.startPos++; - - for (segBoundary.endPos = pos; - segBoundary.endPos < len && - (u_isWhitespace((sal_uInt32)text[segBoundary.endPos]) || exists(text[segBoundary.endPos])); - segBoundary.endPos++) ; - - return segBoundary.endPos > segBoundary.startPos + 1; +sal_Bool xdictionary::seekSegment(const rtl::OUString &rText, sal_Int32 pos, + Boundary& segBoundary) +{ + sal_Int32 indexUtf16; + segBoundary.endPos = segBoundary.startPos = pos; + + indexUtf16 = pos; + while (indexUtf16 > 0) + { + sal_uInt32 ch = rText.iterateCodePoints(&indexUtf16, -1); + if (u_isWhitespace(ch) || exists(ch)) + segBoundary.startPos = indexUtf16; + else + break; + } + + indexUtf16 = pos; + while (indexUtf16 < rText.getLength()) + { + sal_uInt32 ch = rText.iterateCodePoints(&indexUtf16, 1); + if (u_isWhitespace(ch) || exists(ch)) + segBoundary.endPos = indexUtf16; + else + break; + } + + indexUtf16 = segBoundary.startPos; + rText.iterateCodePoints(&indexUtf16, 1); + return segBoundary.endPos > indexUtf16; } #define KANJA 1 @@ -340,19 +356,24 @@ Boundary xdictionary::getWordBoundary(const OUString& rText, sal_Int32 anyPos, s sal_Int32 len=rText.getLength(); if (anyPos >= len || anyPos < 0) { boundary.startPos = boundary.endPos = anyPos < 0 ? 0 : len; - } else if (seekSegment(text, anyPos, len, boundary)) { // character in dict + } else if (seekSegment(rText, anyPos, boundary)) { // character in dict WordBreakCache& aCache = getCache(text, boundary); sal_Int32 i = 0; - while (aCache.wordboundary[i] <= (sal_Int32)anyPos - boundary.startPos) i++; + while (aCache.wordboundary[i] <= anyPos - boundary.startPos) i++; sal_Int32 startPos = aCache.wordboundary[i - 1]; // if bDirection is false - if (!bDirection && startPos > 0 && startPos == (anyPos - boundary.startPos) && - u_isWhitespace((sal_uInt32) text[anyPos - 1])) - i--; - boundary.endPos = aCache.wordboundary[i] + boundary.startPos; - boundary.startPos += aCache.wordboundary[i - 1]; + if (!bDirection && startPos > 0 && startPos == (anyPos - boundary.startPos)) + { + sal_Int32 indexUtf16 = anyPos-1; + sal_uInt32 ch = rText.iterateCodePoints(&indexUtf16, 1); + if (u_isWhitespace(ch)) + i--; + } + boundary.endPos = boundary.startPos; + rText.iterateCodePoints(&boundary.endPos, aCache.wordboundary[i]); + rText.iterateCodePoints(&boundary.startPos, aCache.wordboundary[i-1]); } else { boundary.startPos = anyPos; if (anyPos < len) rText.iterateCodePoints(&anyPos, 1); @@ -360,8 +381,14 @@ Boundary xdictionary::getWordBoundary(const OUString& rText, sal_Int32 anyPos, s } if (wordType == WordType::WORD_COUNT) { // skip punctuation for word count. - while (boundary.endPos < len && u_ispunct((sal_uInt32)text[boundary.endPos])) - boundary.endPos++; + while (boundary.endPos < len) + { + sal_Int32 indexUtf16 = boundary.endPos; + if (u_ispunct(rText.iterateCodePoints(&indexUtf16, 1))) + boundary.endPos = indexUtf16; + else + break; + } } return boundary; |