diff options
author | Oliver Bolte <obo@openoffice.org> | 2004-03-17 08:02:48 +0000 |
---|---|---|
committer | Oliver Bolte <obo@openoffice.org> | 2004-03-17 08:02:48 +0000 |
commit | b32f3ef00bda12dcbaf2da6d60a29f1334d433d1 (patch) | |
tree | 00111906ac2123e881d40722543635ec8c9794dd /i18npool | |
parent | 170eb11a6d894ace40b2e4708264b0142ac4e603 (diff) |
INTEGRATION: CWS i18n11 (1.3.74); FILE MERGED
2004/01/06 22:30:12 khong 1.3.74.1: #105745# #107978# #109715# #110835# #112014# #112068# #112094# #112102# #112282# fix Japanese ambiguous search problems
Diffstat (limited to 'i18npool')
-rw-r--r-- | i18npool/source/search/textsearch.cxx | 322 |
1 files changed, 265 insertions, 57 deletions
diff --git a/i18npool/source/search/textsearch.cxx b/i18npool/source/search/textsearch.cxx index 7b45029f8a71..128bbfeae7a3 100644 --- a/i18npool/source/search/textsearch.cxx +++ b/i18npool/source/search/textsearch.cxx @@ -2,9 +2,9 @@ * * $RCSfile: textsearch.cxx,v $ * - * $Revision: 1.3 $ + * $Revision: 1.4 $ * - * last change: $Author: vg $ $Date: 2003-04-24 11:08:05 $ + * last change: $Author: obo $ $Date: 2004-03-17 09:02:48 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -59,7 +59,6 @@ * ************************************************************************/ - #include "textsearch.hxx" #include "levdis.hxx" #include <regexp/reclass.hxx> @@ -115,10 +114,28 @@ using namespace ::com::sun::star::lang; using namespace ::com::sun::star::i18n; using namespace ::rtl; +static sal_Int32 COMPLEX_TRANS_MASK_TMP = + TransliterationModules_ignoreBaFa_ja_JP | + TransliterationModules_ignoreIterationMark_ja_JP | + TransliterationModules_ignoreTiJi_ja_JP | + TransliterationModules_ignoreHyuByu_ja_JP | + TransliterationModules_ignoreSeZe_ja_JP | + TransliterationModules_ignoreIandEfollowedByYa_ja_JP | + TransliterationModules_ignoreKiKuFollowedBySa_ja_JP | + TransliterationModules_ignoreProlongedSoundMark_ja_JP; +static const sal_Int32 SIMPLE_TRANS_MASK = 0xffffffff ^ COMPLEX_TRANS_MASK_TMP; +static const sal_Int32 COMPLEX_TRANS_MASK = + COMPLEX_TRANS_MASK_TMP | + TransliterationModules_IGNORE_KANA | + TransliterationModules_IGNORE_WIDTH; + // Above 2 transliteration is simple but need to take effect in + // complex transliteration + TextSearch::TextSearch(const Reference < XMultiServiceFactory > & rxMSF) : pRegExp( 0 ) , pWLD( 0 ) , pJumpTable( 0 ) + , pJumpTable2( 0 ) , xMSF( rxMSF ) { SearchOptions aOpt; @@ -133,6 +150,7 @@ TextSearch::~TextSearch() delete pRegExp; delete pWLD; delete pJumpTable; + delete pJumpTable2; } void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeException ) @@ -142,9 +160,10 @@ void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeExcep delete pRegExp, pRegExp = 0; delete pWLD, pWLD = 0; delete pJumpTable, pJumpTable = 0; + delete pJumpTable2, pJumpTable2 = 0; // Create Transliteration class - if( aSrchPara.transliterateFlags ) + if( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ) { if( !xTranslit.is() ) { @@ -156,16 +175,35 @@ void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeExcep (const Reference< XExtendedTransliteration >*)0)) >>= xTranslit; } - // Load transliteration module if( xTranslit.is() ) xTranslit->loadModule( - (TransliterationModules)aSrchPara.transliterateFlags, + (TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ), aSrchPara.Locale); } else if( xTranslit.is() ) xTranslit = 0; + // Create Transliteration for 2<->1, 2<->2 transliteration + if ( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ) + { + if( !xTranslit2.is() ) + { + Reference < XInterface > xI = xMSF->createInstance( + OUString::createFromAscii( + "com.sun.star.i18n.Transliteration")); + if ( xI.is() ) + xI->queryInterface( ::getCppuType( + (const Reference< XExtendedTransliteration >*)0)) + >>= xTranslit2; + } + // Load transliteration module + if( xTranslit2.is() ) + xTranslit2->loadModule( + (TransliterationModules)( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ), + aSrchPara.Locale); + } + if ( !xBreak.is() ) { Reference < XInterface > xI = xMSF->createInstance( @@ -180,10 +218,15 @@ void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeExcep // use transliteration here, but only if not RegEx, which does it different if ( aSrchPara.algorithmType != SearchAlgorithms_REGEXP && xTranslit.is() && - aSrchPara.transliterateFlags ) + aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ) sSrchStr = xTranslit->transliterateString2String( aSrchPara.searchString, 0, aSrchPara.searchString.getLength()); + if ( aSrchPara.algorithmType != SearchAlgorithms_REGEXP && xTranslit2.is() && + aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ) + sSrchStr2 = xTranslit2->transliterateString2String( + aSrchPara.searchString, 0, aSrchPara.searchString.getLength()); + // When start or end of search string is a complex script type, we need to // make sure the result boundary is not located in the middle of cell. checkCTLStart = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 0) == @@ -240,42 +283,86 @@ SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 sta SearchResult sres; OUString in_str(searchStr); + sal_Int32 newStartPos = startPos; + sal_Int32 newEndPos = endPos; + + bUsePrimarySrchStr = true; if ( xTranslit.is() ) { + // apply normal transliteration (1<->1, 1<->0) com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); - - in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset); + in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset ); // JP 20.6.2001: also the start and end positions must be corrected! if( startPos ) - startPos = FindPosInSeq_Impl( offset, startPos ); + newStartPos = FindPosInSeq_Impl( offset, startPos ); if( endPos < searchStr.getLength() ) - endPos = FindPosInSeq_Impl( offset, endPos ); + newEndPos = FindPosInSeq_Impl( offset, endPos ); else - endPos = in_str.getLength(); + newEndPos = in_str.getLength(); - sres = (this->*fnForward)( in_str, startPos, endPos ); + sres = (this->*fnForward)( in_str, newStartPos, newEndPos ); for ( int k = 0; k < sres.startOffset.getLength(); k++ ) { if (sres.startOffset[k]) - sres.startOffset[k] = offset[sres.startOffset[k]-1] + 1; + sres.startOffset[k] = offset[sres.startOffset[k]]; // JP 20.6.2001: end is ever exclusive and then don't return // the position of the next character - return the // next position behind the last found character! // "a b c" find "b" must return 2,3 and not 2,4!!! if (sres.endOffset[k]) - sres.endOffset[k] = offset[sres.endOffset[k]-1] + 1; + sres.endOffset[k] = offset[sres.endOffset[k]-1] + 1; } - } else { sres = (this->*fnForward)( in_str, startPos, endPos ); } + if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP) + { + SearchResult sres2; + + in_str = OUString(searchStr); + com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); + + in_str = xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset ); + + if( startPos ) + startPos = FindPosInSeq_Impl( offset, startPos ); + + if( endPos < searchStr.getLength() ) + endPos = FindPosInSeq_Impl( offset, endPos ); + else + endPos = in_str.getLength(); + + bUsePrimarySrchStr = false; + sres2 = (this->*fnForward)( in_str, startPos, endPos ); + + for ( int k = 0; k < sres2.startOffset.getLength(); k++ ) + { + if (sres2.startOffset[k]) + sres2.startOffset[k] = offset[sres2.startOffset[k]-1] + 1; + if (sres2.endOffset[k]) + sres2.endOffset[k] = offset[sres2.endOffset[k]-1] + 1; + } + + // pick first and long one + if ( sres.subRegExpressions == 0) + return sres2; + if ( sres2.subRegExpressions == 1) + { + if ( sres.startOffset[0] > sres2.startOffset[0]) + return sres2; + else if ( sres.startOffset[0] == sres2.startOffset[0] && + sres.endOffset[0] < sres2.endOffset[0]) + return sres2; + } + } + return sres; } @@ -285,34 +372,38 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st SearchResult sres; OUString in_str(searchStr); + sal_Int32 newStartPos = startPos; + sal_Int32 newEndPos = endPos; + + bUsePrimarySrchStr = true; if ( xTranslit.is() ) { + // apply only simple 1<->1 transliteration here com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); + in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset ); - in_str = xTranslit->transliterate(searchStr, 0, in_str.getLength(), offset); - - // JP 20.6.2001: the start and end positions must be corrected too! + // JP 20.6.2001: also the start and end positions must be corrected! if( startPos < searchStr.getLength() ) - startPos = FindPosInSeq_Impl( offset, startPos ); - else - startPos = in_str.getLength(); + newStartPos = FindPosInSeq_Impl( offset, startPos ); + else + newStartPos = in_str.getLength(); if( endPos ) - endPos = FindPosInSeq_Impl( offset, endPos ); + newEndPos = FindPosInSeq_Impl( offset, endPos ); - sres = (this->*fnBackward)( in_str, startPos, endPos ); + sres = (this->*fnBackward)( in_str, newStartPos, newEndPos ); - for( int k = 0; k < sres.startOffset.getLength(); k++ ) + for ( int k = 0; k < sres.startOffset.getLength(); k++ ) { - // JP 20.6.2001: start is ever exclusive and then don't return - // the position of the prev character - return the - // prev position before the first found character! - // "a b c" find "b" must return 3,2 and not 4,2!!! if (sres.startOffset[k]) - sres.startOffset[k] = offset[sres.startOffset[k]-1]+1; + sres.startOffset[k] = offset[sres.startOffset[k] - 1] + 1; + // JP 20.6.2001: end is ever exclusive and then don't return + // the position of the next character - return the + // next position behind the last found character! + // "a b c" find "b" must return 2,3 and not 2,4!!! if (sres.endOffset[k]) - sres.endOffset[k] = offset[sres.endOffset[k]-1]+1; + sres.endOffset[k] = offset[sres.endOffset[k]]; } } else @@ -320,6 +411,47 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st sres = (this->*fnBackward)( in_str, startPos, endPos ); } + if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP ) + { + SearchResult sres2; + + in_str = OUString(searchStr); + com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); + + in_str = xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset); + + if( startPos < searchStr.getLength() ) + startPos = FindPosInSeq_Impl( offset, startPos ); + else + startPos = in_str.getLength(); + + if( endPos ) + endPos = FindPosInSeq_Impl( offset, endPos ); + + bUsePrimarySrchStr = false; + sres2 = (this->*fnBackward)( in_str, startPos, endPos ); + + for( int k = 0; k < sres2.startOffset.getLength(); k++ ) + { + if (sres2.startOffset[k]) + sres2.startOffset[k] = offset[sres2.startOffset[k]-1]+1; + if (sres2.endOffset[k]) + sres2.endOffset[k] = offset[sres2.endOffset[k]-1]+1; + } + + // pick last and long one + if ( sres.subRegExpressions == 0 ) + return sres2; + if ( sres2.subRegExpressions == 1 ) + { + if ( sres.startOffset[0] < sres2.startOffset[0] ) + return sres2; + if ( sres.startOffset[0] == sres2.startOffset[0] && + sres.endOffset[0] > sres2.endOffset[0] ) + return sres2; + } + } + return sres; } @@ -357,6 +489,7 @@ bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const // --------- methods for the kind of boyer-morre search ------------------ + void TextSearch::MakeForwardTab() { // create the jumptable for the search text @@ -375,7 +508,8 @@ void TextSearch::MakeForwardTab() { sal_Unicode cCh = sSrchStr[n]; sal_Int32 nDiff = nLen - n - 1; - TextSearchJumpTable::value_type aEntry( cCh, nDiff ); + TextSearchJumpTable::value_type aEntry( cCh, nDiff ); + ::std::pair< TextSearchJumpTable::iterator, bool > aPair = pJumpTable->insert( aEntry ); if ( !aPair.second ) @@ -383,6 +517,33 @@ void TextSearch::MakeForwardTab() } } +void TextSearch::MakeForwardTab2() +{ + // create the jumptable for the search text + if( pJumpTable2 ) + { + if( bIsForwardTab ) + return ; // the jumpTable is ok + delete pJumpTable2; + } + bIsForwardTab = true; + + sal_Int32 n, nLen = sSrchStr2.getLength(); + pJumpTable2 = new TextSearchJumpTable; + + for( n = 0; n < nLen - 1; ++n ) + { + sal_Unicode cCh = sSrchStr2[n]; + sal_Int32 nDiff = nLen - n - 1; + + TextSearchJumpTable::value_type aEntry( cCh, nDiff ); + ::std::pair< TextSearchJumpTable::iterator, bool > aPair = + pJumpTable2->insert( aEntry ); + if ( !aPair.second ) + (*(aPair.first)).second = nDiff; + } +} + void TextSearch::MakeBackwardTab() { // create the jumptable for the search text @@ -408,11 +569,47 @@ void TextSearch::MakeBackwardTab() } } +void TextSearch::MakeBackwardTab2() +{ + // create the jumptable for the search text + if( pJumpTable2 ) + { + if( !bIsForwardTab ) + return ; // the jumpTable is ok + delete pJumpTable2; + } + bIsForwardTab = false; + + sal_Int32 n, nLen = sSrchStr2.getLength(); + pJumpTable2 = new TextSearchJumpTable; + + for( n = nLen-1; n > 0; --n ) + { + sal_Unicode cCh = sSrchStr2[n]; + TextSearchJumpTable::value_type aEntry( cCh, n ); + ::std::pair< TextSearchJumpTable::iterator, bool > aPair = + pJumpTable2->insert( aEntry ); + if ( !aPair.second ) + (*(aPair.first)).second = n; + } +} + sal_Int32 TextSearch::GetDiff( const sal_Unicode cChr ) const { - TextSearchJumpTable::const_iterator iLook = pJumpTable->find( cChr ); - if ( iLook == pJumpTable->end() ) - return sSrchStr.getLength(); + TextSearchJumpTable *pJump; + OUString sSearchKey; + + if ( bUsePrimarySrchStr ) { + pJump = pJumpTable; + sSearchKey = sSrchStr; + } else { + pJump = pJumpTable2; + sSearchKey = sSrchStr2; + } + + TextSearchJumpTable::const_iterator iLook = pJump->find( cChr ); + if ( iLook == pJump->end() ) + return sSearchKey.getLength(); return (*iLook).second; } @@ -423,37 +620,42 @@ SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startP SearchResult aRet; aRet.subRegExpressions = 0; + OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2; + OUString aStr( searchStr ); sal_Int32 nSuchIdx = aStr.getLength(); sal_Int32 nEnde = endPos; - if( !nSuchIdx || !sSrchStr.getLength() || sSrchStr.getLength() > nSuchIdx ) + if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx ) return aRet; - if( nEnde < sSrchStr.getLength() ) // position inside the search region ? + if( nEnde < sSearchKey.getLength() ) // position inside the search region ? return aRet; - nEnde -= sSrchStr.getLength(); + nEnde -= sSearchKey.getLength(); - MakeForwardTab(); // create the jumptable + if (bUsePrimarySrchStr) + MakeForwardTab(); // create the jumptable + else + MakeForwardTab2(); for (sal_Int32 nCmpIdx = startPos; // start position for the search nCmpIdx <= nEnde; - nCmpIdx += GetDiff( aStr[nCmpIdx + sSrchStr.getLength()-1])) + nCmpIdx += GetDiff( aStr[nCmpIdx + sSearchKey.getLength()-1])) { // if the match would be the completed cells, skip it. if ( (checkCTLStart && !isCellStart( aStr, nCmpIdx )) || (checkCTLEnd - && !isCellStart( aStr, nCmpIdx + sSrchStr.getLength())) ) + && !isCellStart( aStr, nCmpIdx + sSearchKey.getLength())) ) continue; - nSuchIdx = sSrchStr.getLength() - 1; - while( nSuchIdx >= 0 && sSrchStr[nSuchIdx] == aStr[nCmpIdx + nSuchIdx]) + nSuchIdx = sSearchKey.getLength() - 1; + while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == aStr[nCmpIdx + nSuchIdx]) { if( nSuchIdx == 0 ) { if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag ) { - sal_Int32 nFndEnd = nCmpIdx + sSrchStr.getLength(); + sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength(); bool bAtStart = !nCmpIdx; bool bAtEnd = nFndEnd == endPos; bool bDelimBefore = bAtStart || IsDelimiter( aStr, nCmpIdx-1 ); @@ -473,7 +675,8 @@ SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startP aRet.startOffset.realloc( 1 ); aRet.startOffset[ 0 ] = nCmpIdx; aRet.endOffset.realloc( 1 ); - aRet.endOffset[ 0 ] = nCmpIdx + sSrchStr.getLength(); + aRet.endOffset[ 0 ] = nCmpIdx + sSearchKey.getLength(); + return aRet; } else @@ -489,18 +692,23 @@ SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startP SearchResult aRet; aRet.subRegExpressions = 0; + OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2; + OUString aStr( searchStr ); sal_Int32 nSuchIdx = aStr.getLength(); sal_Int32 nEnde = endPos; - if( nSuchIdx == 0 || sSrchStr.getLength() == 0 || sSrchStr.getLength() > nSuchIdx) + if( nSuchIdx == 0 || sSearchKey.getLength() == 0 || sSearchKey.getLength() > nSuchIdx) return aRet; - MakeBackwardTab(); // create the jumptable + if (bUsePrimarySrchStr) + MakeBackwardTab(); // create the jumptable + else + MakeBackwardTab2(); if( nEnde == nSuchIdx ) // end position for the search - nEnde = sSrchStr.getLength(); + nEnde = sSearchKey.getLength(); else - nEnde += sSrchStr.getLength(); + nEnde += sSearchKey.getLength(); sal_Int32 nCmpIdx = startPos; // start position for the search @@ -508,18 +716,18 @@ SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startP { // if the match would be the completed cells, skip it. if ( (!checkCTLStart || isCellStart( aStr, nCmpIdx - - sSrchStr.getLength() )) && (!checkCTLEnd || + sSearchKey.getLength() )) && (!checkCTLEnd || isCellStart( aStr, nCmpIdx))) { nSuchIdx = 0; - while( nSuchIdx < sSrchStr.getLength() && sSrchStr[nSuchIdx] == - aStr[nCmpIdx + nSuchIdx - sSrchStr.getLength()] ) + while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] == + aStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] ) nSuchIdx++; - if( nSuchIdx >= sSrchStr.getLength() ) + if( nSuchIdx >= sSearchKey.getLength() ) { if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag ) { - sal_Int32 nFndStt = nCmpIdx - sSrchStr.getLength(); + sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength(); bool bAtStart = !nFndStt; bool bAtEnd = nCmpIdx == startPos; bool bDelimBehind = IsDelimiter( aStr, nCmpIdx ); @@ -538,7 +746,7 @@ SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startP aRet.startOffset.realloc( 1 ); aRet.startOffset[ 0 ] = nCmpIdx; aRet.endOffset.realloc( 1 ); - aRet.endOffset[ 0 ] = nCmpIdx - sSrchStr.getLength(); + aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength(); return aRet; } } @@ -548,12 +756,12 @@ SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startP aRet.startOffset.realloc( 1 ); aRet.startOffset[ 0 ] = nCmpIdx; aRet.endOffset.realloc( 1 ); - aRet.endOffset[ 0 ] = nCmpIdx - sSrchStr.getLength(); + aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength(); return aRet; } } } - nSuchIdx = GetDiff( aStr[nCmpIdx - sSrchStr.getLength()] ); + nSuchIdx = GetDiff( aStr[nCmpIdx - sSearchKey.getLength()] ); if( nCmpIdx < nSuchIdx ) return aRet; nCmpIdx -= nSuchIdx; |