diff options
author | Caolán McNamara <caolanm@redhat.com> | 2012-08-28 17:10:35 +0100 |
---|---|---|
committer | Caolán McNamara <caolanm@redhat.com> | 2012-08-29 09:02:50 +0100 |
commit | 42a15f45ff4e02f98229de02efd0d8c19f10bcd5 (patch) | |
tree | 67031948d50d251825c1d05d5547a499a1c5e51b /sw/source/core/txtnode | |
parent | 02f6e55231c8b1646cbafc0e3e591da8122e2bf1 (diff) |
Resolves: fdo#38983 allow extra word boundary characters
i.e. word overrides emdash and endash to be word boundary characters
for the purposes of counting words. And there are some who want
to treat =,- etc similarly.
Default to a configuration that gives the same results as Word for
word counting.
Change-Id: Ia8ce6ac12011a1d6e547f11644c76163c4c993c5
Diffstat (limited to 'sw/source/core/txtnode')
-rw-r--r-- | sw/source/core/txtnode/txtedt.cxx | 79 |
1 files changed, 59 insertions, 20 deletions
diff --git a/sw/source/core/txtnode/txtedt.cxx b/sw/source/core/txtnode/txtedt.cxx index e37dada60149..40e6dc28f42a 100644 --- a/sw/source/core/txtnode/txtedt.cxx +++ b/sw/source/core/txtnode/txtedt.cxx @@ -37,6 +37,7 @@ #include <editeng/hangulhanja.hxx> #include <SwSmartTagMgr.hxx> #include <linguistic/lngprops.hxx> +#include <officecfg/Office/Writer.hxx> #include <unotools/transliterationwrapper.hxx> #include <unotools/charclass.hxx> #include <dlelstnr.hxx> @@ -655,12 +656,44 @@ XubString SwTxtNode::GetCurWord( xub_StrLen nPos ) const SwScanner::SwScanner( const SwTxtNode& rNd, const rtl::OUString& rTxt, const LanguageType* pLang, const ModelToViewHelper& rConvMap, sal_uInt16 nType, sal_Int32 nStart, sal_Int32 nEnde, sal_Bool bClp ) - : rNode( rNd ), aText( rTxt), pLanguage( pLang ), rConversionMap( rConvMap ), nLen( 0 ), nWordType( nType ), bClip( bClp ) + : rNode( rNd ) + , aPreDashReplacementText(rTxt) + , pLanguage( pLang ) + , rConversionMap( rConvMap ) + , nLen( 0 ) + , nOverriddenDashCount( 0 ) + , nWordType( nType ) + , bClip( bClp ) { - OSL_ENSURE( !aText.isEmpty(), "SwScanner: EmptyString" ); + OSL_ENSURE( !aPreDashReplacementText.isEmpty(), "SwScanner: EmptyString" ); nStartPos = nBegin = nStart; nEndPos = nEnde; + //MSWord f.e has special emdash and endash behaviour in that they break + //words for the purposes of word counting, while a hyphen etc. doesn't. + // + //The default configuration treats emdash/endash as a word break, but + //additional ones can be added in under tools->options + if (nWordType == i18n::WordType::WORD_COUNT) + { + rtl::OUString sDashes = officecfg::Office::Writer::WordCount::AdditionalSeperators::get(); + rtl::OUStringBuffer aBuf(aPreDashReplacementText); + for (sal_Int32 i = nStartPos; i < nEndPos; ++i) + { + sal_Unicode cChar = aBuf[i]; + if (sDashes.indexOf(cChar) != -1) + { + aBuf[i] = ' '; + ++nOverriddenDashCount; + } + } + aText = aBuf.makeStringAndClear(); + } + else + aText = aPreDashReplacementText; + + assert(aPreDashReplacementText.getLength() == aText.getLength()); + if ( pLanguage ) { aCurrLang = *pLanguage; @@ -836,7 +869,7 @@ sal_Bool SwScanner::NextWord() if ( nWordType == i18n::WordType::WORD_COUNT ) nLen = forceEachAsianCodePointToWord(aText, nBegin, nLen); - aWord = aText.copy( nBegin, nLen ); + aWord = aPreDashReplacementText.copy( nBegin, nLen ); return sal_True; } @@ -1892,30 +1925,35 @@ void SwTxtNode::CountWords( SwDocStat& rStat, sal_uInt32 nTmpCharsExcludingSpaces = 0; // all non-white chars // count words in masked and expanded text: - if (!aExpandText.isEmpty() && pBreakIt->GetBreakIter().is()) + if (!aExpandText.isEmpty()) { - // zero is NULL for pLanguage -----------v last param = true for clipping - SwScanner aScanner( *this, aExpandText, 0, aConversionMap, i18n::WordType::WORD_COUNT, - nExpandBegin, nExpandEnd, true ); + if (pBreakIt->GetBreakIter().is()) + { + // zero is NULL for pLanguage -----------v last param = true for clipping + SwScanner aScanner( *this, aExpandText, 0, aConversionMap, i18n::WordType::WORD_COUNT, + nExpandBegin, nExpandEnd, true ); - // used to filter out scanner returning almost empty strings (len=1; unichar=0x0001) - const rtl::OUString aBreakWord( CH_TXTATR_BREAKWORD ); + // used to filter out scanner returning almost empty strings (len=1; unichar=0x0001) + const rtl::OUString aBreakWord( CH_TXTATR_BREAKWORD ); - while ( aScanner.NextWord() ) - { - // 1 is len(CH_TXTATR_BREAKWORD) : match returns length of match - if( 1 != aExpandText.match(aBreakWord, aScanner.GetBegin() )) + while ( aScanner.NextWord() ) { - ++nTmpWords; - const rtl::OUString &rWord = aScanner.GetWord(); - if (pBreakIt->GetBreakIter()->getScriptType(rWord, 0) == i18n::ScriptType::ASIAN) - ++nTmpAsianWords; - nTmpCharsExcludingSpaces += pBreakIt->getGraphemeCount(rWord); + // 1 is len(CH_TXTATR_BREAKWORD) : match returns length of match + if( 1 != aExpandText.match(aBreakWord, aScanner.GetBegin() )) + { + ++nTmpWords; + const rtl::OUString &rWord = aScanner.GetWord(); + if (pBreakIt->GetBreakIter()->getScriptType(rWord, 0) == i18n::ScriptType::ASIAN) + ++nTmpAsianWords; + nTmpCharsExcludingSpaces += pBreakIt->getGraphemeCount(rWord); + } } + + nTmpCharsExcludingSpaces += aScanner.getOverriddenDashCount(); } - } - nTmpChars = pBreakIt->getGraphemeCount(aExpandText, nExpandBegin, nExpandEnd); + nTmpChars = pBreakIt->getGraphemeCount(aExpandText, nExpandBegin, nExpandEnd); + } // no nTmpCharsExcludingSpaces adjust needed neither for blanked out MaskedChars // nor for mid-word selection - set scanner bClip = true at creation @@ -1938,6 +1976,7 @@ void SwTxtNode::CountWords( SwDocStat& rStat, nTmpCharsExcludingSpaces += pBreakIt->getGraphemeCount(rWord); } + nTmpCharsExcludingSpaces += aScanner.getOverriddenDashCount(); nTmpChars += pBreakIt->getGraphemeCount(sNumString); } else if ( bHasBullet ) |