From e7e4d6778661d039d6ee9cba1da1a22cea542f3e Mon Sep 17 00:00:00 2001 From: Michael Meeks Date: Mon, 10 Dec 2012 20:41:38 +0000 Subject: fdo#55570 - use a hash instead of set initially until sorting needed This rather substantially accelerates the first use of autocorrection. Interestingly, it also appears to accelerate the sorting of the items; potentially inserting sorted items into a set is a pathological balancing case, that is avoided by the hash algorithm's randomness. --- editeng/inc/editeng/svxacorr.hxx | 37 ++++++++----- editeng/source/misc/svxacorr.cxx | 109 ++++++++++++++++++++++++++++----------- 2 files changed, 105 insertions(+), 41 deletions(-) (limited to 'editeng') diff --git a/editeng/inc/editeng/svxacorr.hxx b/editeng/inc/editeng/svxacorr.hxx index 1aec424afa8a..79f8e9df7778 100644 --- a/editeng/inc/editeng/svxacorr.hxx +++ b/editeng/inc/editeng/svxacorr.hxx @@ -33,6 +33,7 @@ #include #include +#include #include class CharClass; @@ -132,7 +133,7 @@ public: const String& GetShort() const { return sShort; } const String& GetLong() const { return sLong; } - sal_Bool IsTextOnly() const { return bIsTxtOnly; } + sal_Bool IsTextOnly() const { return bIsTxtOnly; } }; struct CompareSvxAutocorrWordList @@ -140,20 +141,32 @@ struct CompareSvxAutocorrWordList bool operator()( SvxAutocorrWord* const& lhs, SvxAutocorrWord* const& rhs ) const; }; -typedef std::set SvxAutocorrWordList_Base; -class EDITENG_DLLPUBLIC SvxAutocorrWordList : SvxAutocorrWordList_Base +typedef std::set SvxAutocorrWordList_Set; +typedef ::boost::unordered_map< ::rtl::OUString, SvxAutocorrWord *, + ::rtl::OUStringHash > SvxAutocorrWordList_Hash; + +class EDITENG_DLLPUBLIC SvxAutocorrWordList { + // only one of these contains the data + mutable SvxAutocorrWordList_Set maSet; + mutable SvxAutocorrWordList_Hash maHash; // key is 'Short' + + bool WordMatches(const SvxAutocorrWord *pFnd, + const String &rTxt, + xub_StrLen &rStt, + xub_StrLen nEndPos) const; public: - typedef SvxAutocorrWordList_Base::iterator iterator; + // free any objects still in the set + ~SvxAutocorrWordList(); + void DeleteAndDestroyAll(); + bool Insert(SvxAutocorrWord *pWord); + SvxAutocorrWord* FindAndRemove(SvxAutocorrWord *pWord); + void LoadEntry(String sWrong, String sRight, sal_Bool bOnlyTxt); + bool empty() const; + typedef std::vector Content; - // free any objects still in the set - ~SvxAutocorrWordList(); - void DeleteAndDestroyAll(); - bool Insert(SvxAutocorrWord *pWord); - SvxAutocorrWord *FindAndRemove(SvxAutocorrWord *pWord); - void LoadEntry(String sWrong, String sRight, sal_Bool bOnlyTxt); - bool empty() const; - Content getSortedContent() const; + Content getSortedContent() const; + const SvxAutocorrWord* SearchWordsInList(const String& rTxt, xub_StrLen& rStt, xub_StrLen nEndPos) const; }; diff --git a/editeng/source/misc/svxacorr.cxx b/editeng/source/misc/svxacorr.cxx index 349f6299bf51..aaee9533c603 100644 --- a/editeng/source/misc/svxacorr.cxx +++ b/editeng/source/misc/svxacorr.cxx @@ -2657,38 +2657,63 @@ SvxAutocorrWordList::~SvxAutocorrWordList() void SvxAutocorrWordList::DeleteAndDestroyAll() { - for( const_iterator it = SvxAutocorrWordList_Base::begin(); - it != SvxAutocorrWordList_Base::end(); ++it ) - delete *it; - clear(); + for( SvxAutocorrWordList_Hash::const_iterator it = maHash.begin(); it != maHash.end(); ++it ) + delete it->second; + maHash.clear(); + + for( SvxAutocorrWordList_Set::const_iterator it2 = maSet.begin(); it2 != maSet.end(); ++it2 ) + delete *it2; + maSet.clear(); } +// returns true if inserted bool SvxAutocorrWordList::Insert(SvxAutocorrWord *pWord) { - return SvxAutocorrWordList_Base::insert( pWord ).second; + if ( maSet.size() == 0 ) // use the hash + { + rtl::OUString aShort( pWord->GetShort() ); + bool bThere = maHash.find( aShort ) != maHash.end(); + if (!bThere) + maHash.insert( std::pair( aShort, pWord ) ); + return !bThere; + } + else + return maSet.insert( pWord ).second; } void SvxAutocorrWordList::LoadEntry(String sWrong, String sRight, sal_Bool bOnlyTxt) { SvxAutocorrWord* pNew = new SvxAutocorrWord( sWrong, sRight, bOnlyTxt ); - if( !Insert( pNew ) ) delete pNew; } bool SvxAutocorrWordList::empty() const { - return SvxAutocorrWordList_Base::empty(); + return maHash.empty() && maSet.empty(); } SvxAutocorrWord *SvxAutocorrWordList::FindAndRemove(SvxAutocorrWord *pWord) { SvxAutocorrWord *pMatch = NULL; - SvxAutocorrWordList::iterator it = find( pWord ); - if( it != end() ) + + if ( maSet.size() == 0 ) // use the hash { - pMatch = *it; - erase (it); + SvxAutocorrWordList_Hash::iterator it = maHash.find( pWord->GetShort() ); + if( it != maHash.end() ) + { + pMatch = it->second; + maHash.erase (it); + } + } + else + { + SvxAutocorrWordList_Set::iterator it = maSet.find( pWord ); + if( it != maSet.end() ) + { + pMatch = *it; + maSet.erase (it); + } } return pMatch; } @@ -2697,35 +2722,61 @@ SvxAutocorrWord *SvxAutocorrWordList::FindAndRemove(SvxAutocorrWord *pWord) SvxAutocorrWordList::Content SvxAutocorrWordList::getSortedContent() const { Content aContent; - for( const_iterator it = begin(); it != SvxAutocorrWordList_Base::end(); ++it ) + + // convert from hash to set permanantly + if ( maSet.size() == 0 ) + { + // This beasty has some O(N log(N)) in a terribly slow ICU collate fn. + for( SvxAutocorrWordList_Hash::const_iterator it = maHash.begin(); it != maHash.end(); ++it ) + maSet.insert( it->second ); + maHash.clear(); + } + for( SvxAutocorrWordList_Set::const_iterator it = maSet.begin(); it != maSet.end(); ++it ) aContent.push_back( *it ); + return aContent; } -const SvxAutocorrWord* SvxAutocorrWordList::SearchWordsInList(const String& rTxt, xub_StrLen& rStt, - xub_StrLen nEndPos) const +bool SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd, + const String &rTxt, + xub_StrLen &rStt, + xub_StrLen nEndPos) const { - TransliterationWrapper& rCmp = GetIgnoreTranslWrapper(); - for( SvxAutocorrWordList::const_iterator it = begin(); it != SvxAutocorrWordList_Base::end(); ++it ) + const String& rChk = pFnd->GetShort(); + if( nEndPos >= rChk.Len() ) { - const SvxAutocorrWord* pFnd = *it; - const String& rChk = pFnd->GetShort(); - if( nEndPos >= rChk.Len() ) + xub_StrLen nCalcStt = nEndPos - rChk.Len(); + if( ( !nCalcStt || nCalcStt == rStt || + ( nCalcStt < rStt && + IsWordDelim( rTxt.GetChar( nCalcStt - 1 ) ))) ) { - xub_StrLen nCalcStt = nEndPos - rChk.Len(); - if( ( !nCalcStt || nCalcStt == rStt || - ( nCalcStt < rStt && - IsWordDelim( rTxt.GetChar(nCalcStt - 1 ) ))) ) + TransliterationWrapper& rCmp = GetIgnoreTranslWrapper(); + + rtl::OUString sWord(rTxt.GetBuffer() + nCalcStt, rChk.Len()); + if( rCmp.isEqual( rChk, sWord )) { - rtl::OUString sWord(rTxt.GetBuffer() + nCalcStt, rChk.Len()); - if( rCmp.isEqual( rChk, sWord )) - { - rStt = nCalcStt; - return pFnd; - } + rStt = nCalcStt; + return true; } } } + return false; +} + +const SvxAutocorrWord* SvxAutocorrWordList::SearchWordsInList(const String& rTxt, xub_StrLen& rStt, + xub_StrLen nEndPos) const +{ + for( SvxAutocorrWordList_Hash::const_iterator it = maHash.begin(); it != maHash.end(); ++it ) + { + if( WordMatches( it->second, rTxt, rStt, nEndPos ) ) + return it->second; + } + + for( SvxAutocorrWordList_Set::const_iterator it2 = maSet.begin(); it2 != maSet.end(); ++it2 ) + { + if( WordMatches( *it2, rTxt, rStt, nEndPos ) ) + return *it2; + } return 0; } -- cgit v1.2.3