diff options
author | Eike Rathke <erack@redhat.com> | 2013-03-08 21:36:44 +0100 |
---|---|---|
committer | Fridrich Strba <fridrich@documentfoundation.org> | 2013-03-12 13:19:29 +0000 |
commit | 5dd1542bb372627167b44f1b020eab5a405fa6fa (patch) | |
tree | 66bb4901496f595cd9caea4d0c75ed00edefce4f | |
parent | d4b3e398f95225951f86d3e8f7474f7a0fb86b92 (diff) |
more regex fixes, fdo#60259 related
fdo#60259 prevent crash when searching backward for $ anchor regex
Old code wasn't prepared that searching for $ may actually return a
result set pointing behind the search string which it does with the ICU
regex engine.
(cherry picked from commit c00601dab0f5533b152cd63cec0a89bfec1ba95f)
Conflicts:
i18npool/source/search/textsearch.cxx
regex: handle zero-length matches, fdo#60259 related
Also in backward search ignore all zero-length matches except the text
end single $ anchor search. The anchor search is a valid match, treat it
as such in Writer.
This still doesn't solve the backward $ backward search, the convoluted
Writer code in that place apparently never worked, someone more familiar
with those internals should straighten out the mess.
(cherry picked from commit 3bc5cb3c485d67f1ce0541d349d11637f52ebda5)
regex: don't loop 10000 identical matches to find a single $ anchor
(cherry picked from commit ccc349d3abb70ef38cd2b7706da51b060a385908)
make forward replacement of $ work again, fdo#60259 related
broken with 3bc5cb3c485d67f1ce0541d349d11637f52ebda5
(cherry picked from commit d8dcfa0e5dbecf77c4d6a8d49caf61b339cf9b43)
Change-Id: I6b5eb28d0a54ceecb6873a3c05f18f70312ab1a2
Reviewed-on: https://gerrit.libreoffice.org/2679
Reviewed-by: Fridrich Strba <fridrich@documentfoundation.org>
Tested-by: Fridrich Strba <fridrich@documentfoundation.org>
-rw-r--r-- | i18npool/source/search/textsearch.cxx | 99 | ||||
-rw-r--r-- | sw/source/core/crsr/findtxt.cxx | 13 |
2 files changed, 79 insertions, 33 deletions
diff --git a/i18npool/source/search/textsearch.cxx b/i18npool/source/search/textsearch.cxx index dceb4d759915..997b01dd6afa 100644 --- a/i18npool/source/search/textsearch.cxx +++ b/i18npool/source/search/textsearch.cxx @@ -124,14 +124,12 @@ void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeExcep sSrchStr = aSrchPara.searchString; // use transliteration here - if ( xTranslit.is() && - aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ) + if ( xTranslit.is() && aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ) sSrchStr = xTranslit->transliterateString2String( - aSrchPara.searchString, 0, aSrchPara.searchString.getLength()); + aSrchPara.searchString, 0, aSrchPara.searchString.getLength()); - if ( xTranslit2.is() && - aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ) - sSrchStr2 = xTranslit2->transliterateString2String( + if ( xTranslit2.is() && aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ) + sSrchStr2 = xTranslit2->transliterateString2String( aSrchPara.searchString, 0, aSrchPara.searchString.getLength()); // When start or end of search string is a complex script type, we need to @@ -204,22 +202,32 @@ SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 sta newStartPos = FindPosInSeq_Impl( offset, startPos ); if( endPos < searchStr.getLength() ) - newEndPos = FindPosInSeq_Impl( offset, endPos ); + newEndPos = FindPosInSeq_Impl( offset, endPos ); else newEndPos = in_str.getLength(); sres = (this->*fnForward)( in_str, newStartPos, newEndPos ); - for ( int k = 0; k < sres.startOffset.getLength(); k++ ) + // Map offsets back to untransliterated string. + const sal_Int32 nOffsets = offset.getLength(); + if (nOffsets) { - if (sres.startOffset[k]) - sres.startOffset[k] = offset[sres.startOffset[k]]; - // JP 20.6.2001: end is ever exclusive and then don't return - // the position of the next character - return the - // next position behind the last found character! - // "a b c" find "b" must return 2,3 and not 2,4!!! - if (sres.endOffset[k]) - sres.endOffset[k] = offset[sres.endOffset[k]-1] + 1; + // For regex nGroups is the number of groups+1 with group 0 being + // the entire match. + const sal_Int32 nGroups = sres.startOffset.getLength(); + for ( sal_Int32 k = 0; k < nGroups; k++ ) + { + const sal_Int32 nStart = sres.startOffset[k]; + if (nStart > 0) + sres.startOffset[k] = (nStart < nOffsets ? offset[nStart] : (offset[nOffsets - 1] + 1)); + // JP 20.6.2001: end is ever exclusive and then don't return + // the position of the next character - return the + // next position behind the last found character! + // "a b c" find "b" must return 2,3 and not 2,4!!! + const sal_Int32 nStop = sres.endOffset[k]; + if (nStop > 0) + sres.endOffset[k] = offset[(nStop <= nOffsets ? nStop : nOffsets) - 1] + 1; + } } } else @@ -291,24 +299,34 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st // JP 20.6.2001: also the start and end positions must be corrected! if( startPos < searchStr.getLength() ) newStartPos = FindPosInSeq_Impl( offset, startPos ); - else - newStartPos = in_str.getLength(); + else + newStartPos = in_str.getLength(); if( endPos ) - newEndPos = FindPosInSeq_Impl( offset, endPos ); + newEndPos = FindPosInSeq_Impl( offset, endPos ); sres = (this->*fnBackward)( in_str, newStartPos, newEndPos ); - for ( int k = 0; k < sres.startOffset.getLength(); k++ ) + // Map offsets back to untransliterated string. + const sal_Int32 nOffsets = offset.getLength(); + if (nOffsets) { - if (sres.startOffset[k]) - sres.startOffset[k] = offset[sres.startOffset[k] - 1] + 1; - // JP 20.6.2001: end is ever exclusive and then don't return - // the position of the next character - return the - // next position behind the last found character! - // "a b c" find "b" must return 2,3 and not 2,4!!! - if (sres.endOffset[k]) - sres.endOffset[k] = offset[sres.endOffset[k]]; + // For regex nGroups is the number of groups+1 with group 0 being + // the entire match. + const sal_Int32 nGroups = sres.startOffset.getLength(); + for ( sal_Int32 k = 0; k < nGroups; k++ ) + { + const sal_Int32 nStart = sres.startOffset[k]; + if (nStart > 0) + sres.startOffset[k] = offset[(nStart <= nOffsets ? nStart : nOffsets) - 1] + 1; + // JP 20.6.2001: end is ever exclusive and then don't return + // the position of the next character - return the + // next position behind the last found character! + // "a b c" find "b" must return 2,3 and not 2,4!!! + const sal_Int32 nStop = sres.endOffset[k]; + if (nStop > 0) + sres.endOffset[k] = (nStop < nOffsets ? offset[nStop] : (offset[nOffsets - 1] + 1)); + } } } else @@ -734,6 +752,11 @@ SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr, int nEndOfs = pRegexMatcher->end( nIcuErr); if( nStartOfs < nEndOfs) break; + // If the zero-length match is behind the string, do not match it again + // and again until startPos reaches there. A match behind the string is + // a "$" anchor. + if (nStartOfs == endPos) + break; // try at next position if there was a zero-length match if( ++startPos >= endPos) return aRet; @@ -779,17 +802,35 @@ SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr, // find the last match int nLastPos = 0; int nFoundEnd = 0; + int nGoodPos = 0, nGoodEnd = 0; + bool bFirst = true; do { nLastPos = pRegexMatcher->start( nIcuErr); nFoundEnd = pRegexMatcher->end( nIcuErr); + if (nLastPos < nFoundEnd) + { + // remember last non-zero-length match + nGoodPos = nLastPos; + nGoodEnd = nFoundEnd; + } if( nFoundEnd >= startPos) break; + bFirst = false; if( nFoundEnd == nLastPos) ++nFoundEnd; } while( pRegexMatcher->find( nFoundEnd, nIcuErr)); + // Ignore all zero-length matches except "$" anchor on first match. + if (nGoodPos == nGoodEnd) + { + if (bFirst && nLastPos == startPos) + nGoodPos = nLastPos; + else + return aRet; + } + // find last match again to get its details - pRegexMatcher->find( nLastPos, nIcuErr); + pRegexMatcher->find( nGoodPos, nIcuErr); // fill in the details of the last match const int nGroupCount = pRegexMatcher->groupCount(); diff --git a/sw/source/core/crsr/findtxt.cxx b/sw/source/core/crsr/findtxt.cxx index 101b002d728e..427c4fab9efe 100644 --- a/sw/source/core/crsr/findtxt.cxx +++ b/sw/source/core/crsr/findtxt.cxx @@ -446,8 +446,9 @@ bool SwPaM::DoSearch( const SearchOptions& rSearchOpt, utl::TextSearch& rSTxt, } xub_StrLen nStringEnd = nEnd; - while ( (bSrchForward && nStart < nStringEnd) || - (! bSrchForward && nStart > nStringEnd) ) + bool bZeroMatch = false; // zero-length match, i.e. only $ anchor as regex + while ( ((bSrchForward && nStart < nStringEnd) || + (! bSrchForward && nStart > nStringEnd)) && !bZeroMatch ) { // SearchAlgorithms_APPROXIMATE works on a per word base so we have to // provide the text searcher with the correct locale, because it uses @@ -475,7 +476,8 @@ bool SwPaM::DoSearch( const SearchOptions& rSearchOpt, utl::TextSearch& rSTxt, } if( nSearchScript == nCurrScript && - (rSTxt.*fnMove->fnSearch)( sCleanStr, &nStart, &nEnd, 0 )) + (rSTxt.*fnMove->fnSearch)( sCleanStr, &nStart, &nEnd, 0 ) && + !(bZeroMatch = (nStart == nEnd))) { // set section correctly *GetPoint() = *pPam->GetPoint(); @@ -518,11 +520,14 @@ bool SwPaM::DoSearch( const SearchOptions& rSearchOpt, utl::TextSearch& rSTxt, if ( bFound ) return true; - else if( ( bChkEmptyPara && !nStart && !nTxtLen ) || bChkParaEnd ) + else if( ( bChkEmptyPara && !nStart && !nTxtLen ) || bChkParaEnd) { *GetPoint() = *pPam->GetPoint(); GetPoint()->nContent = bChkParaEnd ? nTxtLen : 0; SetMark(); + /* FIXME: this condition does not work for !bSrchForward backward + * search, it probably never did. (pSttNd != &rNdIdx.GetNode()) + * is never true in this case. */ if( (bSrchForward || pSttNd != &rNdIdx.GetNode()) && Move( fnMoveForward, fnGoCntnt ) && (!bSrchForward || pSttNd != &GetPoint()->nNode.GetNode()) && |