summaryrefslogtreecommitdiff
path: root/i18npool
diff options
context:
space:
mode:
authorHerbert Dürr <hdu@apache.org>2012-09-07 12:27:55 +0000
committerEike Rathke <erack@redhat.com>2013-03-07 19:44:31 +0100
commit8c26876fea085a1bc847abba63dffa97a9499c1d (patch)
treeea4e6ab12eb7930dc66ce0e03091a75f9b5eeb2e /i18npool
parent4cbce6f183a8ecda72b8071417615c5b8057b8bd (diff)
i#120598 better emulation of regexp word-start and word-end operators
The emulation of the word-start and word-end operators provided the previous regexp engine can be approximated much better by using the ICU-regexp exngines powerful look-around feature. Patch-by: Herbert Duerr Found-by: ldgolds33@yahoo.com (cherry picked from commit ec7ef30693f10315ce80a8f5d7325a0e40855e66) Change-Id: If375d6d5bb93b3873f657673f7581f0884b3b35e
Diffstat (limited to 'i18npool')
-rw-r--r--i18npool/source/search/textsearch.cxx21
1 files changed, 14 insertions, 7 deletions
diff --git a/i18npool/source/search/textsearch.cxx b/i18npool/source/search/textsearch.cxx
index 075cd218b81c..072c0bedda6b 100644
--- a/i18npool/source/search/textsearch.cxx
+++ b/i18npool/source/search/textsearch.cxx
@@ -681,13 +681,20 @@ void TextSearch::RESrchPrepare( const ::com::sun::star::util::SearchOptions& rOp
IcuUniString aIcuSearchPatStr( (const UChar*)rPatternStr.getStr(), rPatternStr.getLength());
#ifndef DISABLE_WORDBOUND_EMULATION
// for conveniance specific syntax elements of the old regex engine are emulated
- // by using regular word boundary matching \b to replace \< and \>
- static const IcuUniString aChevronPattern( "\\\\<|\\\\>", -1, IcuUniString::kInvariant);
- static const IcuUniString aChevronReplace( "\\\\b", -1, IcuUniString::kInvariant);
- static RegexMatcher aChevronMatcher( aChevronPattern, 0, nIcuErr);
- aChevronMatcher.reset( aIcuSearchPatStr);
- aIcuSearchPatStr = aChevronMatcher.replaceAll( aChevronReplace, nIcuErr);
- aChevronMatcher.reset();
+ // - by replacing \< with "word-break followed by a look-ahead word-char"
+ static const IcuUniString aChevronPatternB( "\\\\<", -1, IcuUniString::kInvariant);
+ static const IcuUniString aChevronReplaceB( "\\\\b(?=\\\\w)", -1, IcuUniString::kInvariant);
+ static RegexMatcher aChevronMatcherB( aChevronPatternB, 0, nIcuErr);
+ aChevronMatcherB.reset( aIcuSearchPatStr);
+ aIcuSearchPatStr = aChevronMatcherB.replaceAll( aChevronReplaceB, nIcuErr);
+ aChevronMatcherB.reset();
+ // - by replacing \> with "look-behind word-char followed by a word-break"
+ static const IcuUniString aChevronPatternE( "\\\\>", -1, IcuUniString::kInvariant);
+ static const IcuUniString aChevronReplaceE( "(?<=\\\\w)\\\\b", -1, IcuUniString::kInvariant);
+ static RegexMatcher aChevronMatcherE( aChevronPatternE, 0, nIcuErr);
+ aChevronMatcherE.reset( aIcuSearchPatStr);
+ aIcuSearchPatStr = aChevronMatcherE.replaceAll( aChevronReplaceE, nIcuErr);
+ aChevronMatcherE.reset();
#endif
pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, nIcuErr);
if( nIcuErr)