From 44ead04eb5fc61a3f56f783adb1509fab440e212 Mon Sep 17 00:00:00 2001 From: "Matthew J. Francis" Date: Thu, 11 Sep 2014 00:42:49 +0800 Subject: fdo#81272 Speed up break iterators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I7a45a61465f71a1cc6f583b9ffc68157d19e51fa Reviewed-on: https://gerrit.libreoffice.org/11386 Reviewed-by: Caolán McNamara Tested-by: Caolán McNamara --- i18npool/inc/xdictionary.hxx | 2 ++ .../source/breakiterator/breakiterator_unicode.cxx | 2 +- i18npool/source/breakiterator/xdictionary.cxx | 21 +++++++++++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/i18npool/inc/xdictionary.hxx b/i18npool/inc/xdictionary.hxx index 57f5238ac853..f7c0110d39fd 100644 --- a/i18npool/inc/xdictionary.hxx +++ b/i18npool/inc/xdictionary.hxx @@ -76,6 +76,8 @@ public: private: WordBreakCache cache[CACHE_MAX]; + OUString segmentCachedString; + Boundary segmentCachedBoundary; bool seekSegment(const OUString& rText, sal_Int32 pos, Boundary& boundary); WordBreakCache& getCache(const sal_Unicode *text, Boundary& boundary); diff --git a/i18npool/source/breakiterator/breakiterator_unicode.cxx b/i18npool/source/breakiterator/breakiterator_unicode.cxx index 5270b1db5bad..658d4ff861ea 100644 --- a/i18npool/source/breakiterator/breakiterator_unicode.cxx +++ b/i18npool/source/breakiterator/breakiterator_unicode.cxx @@ -185,7 +185,7 @@ void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star:: } } - if (newBreak || !icuBI->aICUText.equals(rText)) + if (newBreak || icuBI->aICUText.pData != rText.pData) { // UChar != sal_Unicode in MinGW const UChar *pText = reinterpret_cast(rText.getStr()); diff --git a/i18npool/source/breakiterator/xdictionary.cxx b/i18npool/source/breakiterator/xdictionary.cxx index 1067e33bcecc..b930f46d9840 100644 --- a/i18npool/source/breakiterator/xdictionary.cxx +++ b/i18npool/source/breakiterator/xdictionary.cxx @@ -271,6 +271,23 @@ bool xdictionary::seekSegment(const OUString &rText, sal_Int32 pos, Boundary& segBoundary) { sal_Int32 indexUtf16; + + if (segmentCachedString.pData != rText.pData) { + // Cache the passed text so we can avoid regenerating the segment if it's the same + // (pData is refcounted and assigning the OUString references it, which ensures that + // the object is the same if we get the same pointer back later) + segmentCachedString = rText; + } else { + // If pos is within the cached boundary, use that boundary + if (pos >= segmentCachedBoundary.startPos && pos <= segmentCachedBoundary.endPos) { + segBoundary.startPos = segmentCachedBoundary.startPos; + segBoundary.endPos = segmentCachedBoundary.endPos; + indexUtf16 = segmentCachedBoundary.startPos; + rText.iterateCodePoints(&indexUtf16, 1); + return segmentCachedBoundary.endPos > indexUtf16; + } + } + segBoundary.endPos = segBoundary.startPos = pos; indexUtf16 = pos; @@ -293,6 +310,10 @@ bool xdictionary::seekSegment(const OUString &rText, sal_Int32 pos, break; } + // Cache the calculated boundary + segmentCachedBoundary.startPos = segBoundary.startPos; + segmentCachedBoundary.endPos = segBoundary.endPos; + indexUtf16 = segBoundary.startPos; rText.iterateCodePoints(&indexUtf16, 1); return segBoundary.endPos > indexUtf16; -- cgit v1.2.3