From afae8d1e9eb37265bd356773caa77d6d8ac481bc Mon Sep 17 00:00:00 2001 From: Michael Stahl Date: Tue, 10 Mar 2015 23:19:18 +0100 Subject: i18npool: fix spurious regex ^ matching in TextSearch::searchForward() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thanks to Eike for finding this: The anchors ^ and $ now anchor at the selection boundary because the only text the regex matcher gets passed is the selected text. This in two paragraphs aaa bbb aaa bbb aaa bbb aaa bbb when the selection spans from the second aaa to the third bbb, for "^aaa" finds the second aaa, where previously it found the third aaa at the real paragraph start. This may not be expected by the user, because the behavior of ^ is described as "Match at the beginning of a line" (or paragraph in our case), which the previous implementation did. (regression from 806ced87cfe3da72df0d8e4faf5b82535fc7d1b7) Unfortunately it's not obvious how to implement the same in searchBackward(). Change-Id: I07f7a8476b672d9511fa74ca473c32eea427698f (cherry picked from commit 9aae521b451269007f03527c83645b8b935eb419) Reviewed-on: https://gerrit.libreoffice.org/14829 Reviewed-by: Caolán McNamara Tested-by: Caolán McNamara --- i18npool/source/search/textsearch.cxx | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'i18npool') diff --git a/i18npool/source/search/textsearch.cxx b/i18npool/source/search/textsearch.cxx index 959227dfa817..094b5543e5d1 100644 --- a/i18npool/source/search/textsearch.cxx +++ b/i18npool/source/search/textsearch.cxx @@ -242,13 +242,26 @@ SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 sta in_str = xTranslit->transliterate( searchStr, startPos, endPos - startPos, offset ); // JP 20.6.2001: also the start and end positions must be corrected! - sal_Int32 const newStartPos = + sal_Int32 newStartPos = (startPos == 0) ? 0 : FindPosInSeq_Impl( offset, startPos ); - sal_Int32 const newEndPos = (endPos < searchStr.getLength()) + sal_Int32 newEndPos = (endPos < searchStr.getLength()) ? FindPosInSeq_Impl( offset, endPos ) : in_str.getLength(); + sal_Int32 nExtraOffset = 0; + if (pRegexMatcher && startPos > 0) + { + // avoid matching ^ here - in_str omits a prefix of the searchStr + // this is a really lame way to do it, but ICU only offers + // useAnchoringBounds() to disable *both* bounds but what is needed + // here is to disable only one bound and respect the other + in_str = "X" + in_str; + nExtraOffset = 1; + newStartPos += nExtraOffset; + newEndPos += nExtraOffset; + } + sres = (this->*fnForward)( in_str, newStartPos, newEndPos ); // Map offsets back to untransliterated string. @@ -260,14 +273,14 @@ SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 sta const sal_Int32 nGroups = sres.startOffset.getLength(); for ( sal_Int32 k = 0; k < nGroups; k++ ) { - const sal_Int32 nStart = sres.startOffset[k]; + const sal_Int32 nStart = sres.startOffset[k] - nExtraOffset; if (startPos > 0 || nStart > 0) sres.startOffset[k] = (nStart < nOffsets ? offset[nStart] : (offset[nOffsets - 1] + 1)); // JP 20.6.2001: end is ever exclusive and then don't return // the position of the next character - return the // next position behind the last found character! // "a b c" find "b" must return 2,3 and not 2,4!!! - const sal_Int32 nStop = sres.endOffset[k]; + const sal_Int32 nStop = sres.endOffset[k] - nExtraOffset; if (startPos > 0 || nStop > 0) sres.endOffset[k] = offset[(nStop <= nOffsets ? nStop : nOffsets) - 1] + 1; } @@ -345,6 +358,10 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st sal_Int32 const newEndPos = (endPos == 0) ? 0 : FindPosInSeq_Impl( offset, endPos ); + // TODO: this would need nExtraOffset handling to avoid $ matching + // if (pRegexMatcher && startPos < searchStr.getLength()) + // but that appears to be impossible with ICU regex + sres = (this->*fnBackward)( in_str, newStartPos, newEndPos ); // Map offsets back to untransliterated string. -- cgit v1.2.3