Fixes char overcount when selection ends in middle of word

Patch in sw changes only one object: sw/source/core/txtedt.cxx
author: John LeMoyne Castle <jlc@mail2lee.com> 2010-11-02 00:08:15 -0600
committer: Cédric Bosdonnat <cedricbosdo@openoffice.org> 2010-11-04 14:57:37 +0100
commit: 335534df4946437a12cd3c18b4a24beee188317b (patch)
tree: df0098580358b82ccc827cf708644c877a88eba5
parent: 66c52b972ac2d04012c79fbef00ab216976d68db (diff)
1 files changed, 110 insertions, 96 deletions
diff --git a/sw/source/core/txtnode/txtedt.cxx b/sw/source/core/txtnode/txtedt.cxx
index 9f053464f3..c03ab0573b 100644
--- a/sw/source/core/txtnode/txtedt.cxx
+++ b/sw/source/core/txtnode/txtedt.cxx
@@ -1864,116 +1864,130 @@ void SwTxtNode::ReplaceTextOnly( xub_StrLen nPos, xub_StrLen nLen,
 void SwTxtNode::CountWords( SwDocStat& rStat,
                             xub_StrLen nStt, xub_StrLen nEnd ) const
 {
+    sal_Bool isCountAll = ( (0 == nStt) && (GetTxt().Len() == nEnd) );
+
     ++rStat.nAllPara; // #i93174#: count _all_ paragraphs
-    if( nStt < nEnd )
+    if( nStt >= nEnd )
+    {   // empty node or empty selection or bad call
+        return;
+    }
+    if ( IsHidden() )
+    {   // not counting hidden paras
+        return;
+    }
+    // Shortcut when counting whole paragraph and current count is clean
+    if ( isCountAll && !IsWordCountDirty() )
     {
-        if ( !IsHidden() )
-        {
-            ++rStat.nPara;
-            ULONG nTmpWords = 0;
-            ULONG nTmpChars = 0;
-            ULONG nTmpCharsExcludingSpaces = 0;  // Number of characters in actual words (i.e. excluding spaces)
-
-            // Shortcut: Whole paragraph should be considered and cached values
-            // are valid:
-            if ( 0 == nStt && GetTxt().Len() == nEnd && !IsWordCountDirty() )
-            {
-                nTmpWords = GetParaNumberOfWords();
-                nTmpChars = GetParaNumberOfChars();
-                nTmpCharsExcludingSpaces = GetParaNumberOfCharsExcludingSpaces();
-            }
-            else
-            {
-                String aOldStr( m_Text );
-                String& rCastStr = const_cast<String&>(m_Text);
-
-                // fills the deleted redlines and hidden ranges with cChar:
-                const xub_Unicode cChar(' ');
-                const USHORT nNumOfMaskedChars =
-                        lcl_MaskRedlinesAndHiddenText( *this, rCastStr, nStt, nEnd, cChar, false );
-
-                // expand fields
-                rtl::OUString aExpandText;
-                const ModelToViewHelper::ConversionMap* pConversionMap =
-                        BuildConversionMap( aExpandText );
+        // accumulate into DocStat record to return the values
+        rStat.nWord += GetParaNumberOfWords();
+        rStat.nChar += GetParaNumberOfChars();
+        rStat.nCharExcludingSpaces += GetParaNumberOfCharsExcludingSpaces();
+        return;
+    }
 
-                const sal_uInt32 nExpandBegin = ModelToViewHelper::ConvertToViewPosition( pConversionMap, nStt );
-                const sal_uInt32 nExpandEnd   = ModelToViewHelper::ConvertToViewPosition( pConversionMap, nEnd );
+    // make a copy of the text
+    String& rTextCopy = const_cast<String&>(m_Text);
 
-                const bool bCount = aExpandText.getLength() > 0;
+    // mask out the redlined and hidden text with ' '
+    const xub_Unicode cChar(' ');
+    const USHORT nNumOfMaskedChars = lcl_MaskRedlinesAndHiddenText( *this, rTextCopy, nStt, nEnd, cChar, false );
 
-                // count words in 'regular' text:
-                if( bCount && pBreakIt->GetBreakIter().is() )
-                {
-                    const String aScannerText( aExpandText );
-                    SwScanner aScanner( *this, aScannerText, 0, pConversionMap,
-                                        i18n::WordType::WORD_COUNT,
-                                        (xub_StrLen)nExpandBegin, (xub_StrLen)nExpandEnd );
+    // expand text into pConversionMap for scanner
+    rtl::OUString aExpandText;
+    const ModelToViewHelper::ConversionMap* pConversionMap = BuildConversionMap( aExpandText );
 
-                    const rtl::OUString aBreakWord( CH_TXTATR_BREAKWORD );
+    // map start and end points onto the ConversionMap
+    const sal_uInt32 nExpandBegin = ModelToViewHelper::ConvertToViewPosition( pConversionMap, nStt );
+    const sal_uInt32 nExpandEnd   = ModelToViewHelper::ConvertToViewPosition( pConversionMap, nEnd );
 
-                    while ( aScanner.NextWord() )
-                    {
-                        if( aScanner.GetLen()  > 1 ||
-                            CH_TXTATR_BREAKWORD != aExpandText.match(aBreakWord, aScanner.GetBegin() ))
-                            ++nTmpWords;
+    if ( aExpandText.getLength() <= 0 )
+    {
+        OSL_ENSURE(aExpandText.getLength() >= 0, "Node text expansion error: length < 0." );
+        return;
+    }
 
-                        if( CH_TXTATR_BREAKWORD != aExpandText.match(aBreakWord, aScanner.GetBegin() ))
-                            nTmpCharsExcludingSpaces += aScanner.GetLen();
+    //do the count
+    // all counts exclude hidden paras and hidden+redlined within para
+    // definition of space/white chars in SwScanner (and BreakIter!)
+    // uses both lcl_IsSkippableWhiteSpace and BreakIter getWordBoundary in SwScanner
+    sal_uInt32 nTmpWords = 0;        // count of all contiguous blocks of non-white chars
+    sal_uInt32 nTmpChars = 0;        // count of all chars
+    sal_uInt32 nTmpCharsExcludingSpaces = 0;  // all non-white chars
 
-                    }
-                }
+    ++rStat.nPara;      // count of non-empty paras
 
-                ASSERT( aExpandText.getLength() >= nNumOfMaskedChars,
-                        "More characters hidden that characters in string!" )
-                nTmpChars = nExpandEnd - nExpandBegin - nNumOfMaskedChars;
+    // count words in masked and expanded text:
+    if( pBreakIt->GetBreakIter().is() )
+    {
+        const String aScannerText( aExpandText );
+        // zero is NULL for pLanguage -----------v               last param = true for clipping
+        SwScanner aScanner( *this, aScannerText, 0, pConversionMap, i18n::WordType::WORD_COUNT,
+                            (xub_StrLen)nExpandBegin, (xub_StrLen)nExpandEnd, true );
 
-                // count words in numbering string:
-                if ( nStt == 0 && bCount )
-                {
-                    // add numbering label
-                    const String aNumString = GetNumString();
-                    const xub_StrLen nNumStringLen = aNumString.Len();
-                    if ( nNumStringLen > 0 )
-                    {
-                        LanguageType aLanguage = GetLang( 0 );
+        // used to filter out scanner returning almost empty strings (len=1; unichar=0x0001)
+        const rtl::OUString aBreakWord( CH_TXTATR_BREAKWORD );
 
-                        SwScanner aScanner( *this, aNumString, &aLanguage, 0,
-                                            i18n::WordType::WORD_COUNT,
-                                            0, nNumStringLen );
+        while ( aScanner.NextWord() )
+        {
+            //  1 is len(CH_TXTATR_BREAKWORD) : match returns length of match
+            if( 1 != aExpandText.match(aBreakWord, aScanner.GetBegin() ))
+            {
+                ++nTmpWords;
+                nTmpCharsExcludingSpaces += aScanner.GetLen();
+            }
+        }
+    }
 
-                        while ( aScanner.NextWord() )
-                            ++nTmpWords;
+    nTmpChars = nExpandEnd - nExpandBegin - nNumOfMaskedChars;
+    OSL_ENSURE( nTmpChars >= 0, "Less than zero total characters found!" );
 
-                        nTmpChars += nNumStringLen;
-                    }
-                    else if ( HasBullet() )
-                    {
-                        ++nTmpWords;
-                        ++nTmpChars;
-                    }
-                }
+    // no nTmpCharsExcludingSpaces adjust needed neither for blanked out MaskedChars
+    // nor for mid-word selection - set scanner bClip = true at creation
 
-                delete pConversionMap;
+    // count words in numbering string if started at beginning of para:
+    if ( nStt == 0 )
+    {
+        // count outline number label - ? no expansion into map
+        // always counts all of number-ish label
+        const String aNumString = GetNumString();
+        const xub_StrLen nNumStringLen = aNumString.Len();
+        if ( nNumStringLen > 0 )
+        {
+            LanguageType aLanguage = GetLang( 0 );
 
-                rCastStr = aOldStr;
+            SwScanner aScanner( *this, aNumString, &aLanguage, 0,
+                                i18n::WordType::WORD_COUNT, 0, nNumStringLen, true );
 
-                // If the whole paragraph has been calculated, update cached
-                // values:
-                if ( 0 == nStt && GetTxt().Len() == nEnd )
-                {
-                    SetParaNumberOfWords( nTmpWords );
-                    SetParaNumberOfChars( nTmpChars );
-                    SetParaNumberOfCharsExcludingSpaces( nTmpCharsExcludingSpaces );
-                    SetWordCountDirty( false );
-                }
+            while ( aScanner.NextWord() )
+            {
+                ++nTmpWords;
+                nTmpCharsExcludingSpaces += aScanner.GetLen();
             }
 
-            rStat.nWord += nTmpWords;
-            rStat.nChar += nTmpChars;
-            rStat.nCharExcludingSpaces += nTmpCharsExcludingSpaces;
+            nTmpChars += nNumStringLen;
         }
+        else if ( HasBullet() )
+        {
+            ++nTmpWords;
+            ++nTmpChars;
+            ++nTmpCharsExcludingSpaces;
+        }
+    }
+
+    delete pConversionMap;
+
+    // If counting the whole para then update cached values and mark clean
+    if ( isCountAll )
+    {
+        SetParaNumberOfWords( nTmpWords );
+        SetParaNumberOfChars( nTmpChars );
+        SetParaNumberOfCharsExcludingSpaces( nTmpCharsExcludingSpaces );
+        SetWordCountDirty( false );
     }
+    // accumulate into DocStat record to return the values
+    rStat.nWord += nTmpWords;
+    rStat.nChar += nTmpChars;
+    rStat.nCharExcludingSpaces += nTmpCharsExcludingSpaces;
 }
 
 //
@@ -1981,17 +1995,17 @@ void SwTxtNode::CountWords( SwDocStat& rStat,
 //
 struct SwParaIdleData_Impl
 {
-    SwWrongList* pWrong;            // for spell checking
+    SwWrongList* pWrong;                // for spell checking
     SwGrammarMarkUp* pGrammarCheck;     // for grammar checking /  proof reading
     SwWrongList* pSmartTags;
     ULONG nNumberOfWords;
     ULONG nNumberOfChars;
     ULONG nNumberOfCharsExcludingSpaces;
-    bool bWordCountDirty        : 1;
-    bool bWrongDirty            : 1;    // Ist das Wrong-Feld auf invalid?
-    bool bGrammarCheckDirty     : 1;
-    bool bSmartTagDirty         : 1;
-    bool bAutoComplDirty        : 1;    // die ACompl-Liste muss angepasst werden
+    bool bWordCountDirty;
+    bool bWrongDirty;                   // Ist das Wrong-Feld auf invalid?
+    bool bGrammarCheckDirty;
+    bool bSmartTagDirty;
+    bool bAutoComplDirty;               // die ACompl-Liste muss angepasst werden
 
     SwParaIdleData_Impl() :
         pWrong              ( 0 ),
@@ -1999,7 +2013,7 @@ struct SwParaIdleData_Impl
         pSmartTags          ( 0 ),
         nNumberOfWords      ( 0 ),
         nNumberOfChars      ( 0 ),
-        nNumberOfCharsExcludingSpaces      ( 0 ),
+        nNumberOfCharsExcludingSpaces ( 0 ),
         bWordCountDirty     ( true ),
         bWrongDirty         ( true ),
         bGrammarCheckDirty  ( true ),
author	John LeMoyne Castle <jlc@mail2lee.com>	2010-11-02 00:08:15 -0600
committer	Cédric Bosdonnat <cedricbosdo@openoffice.org>	2010-11-04 14:57:37 +0100
commit	335534df4946437a12cd3c18b4a24beee188317b (patch)
tree	df0098580358b82ccc827cf708644c877a88eba5
parent	66c52b972ac2d04012c79fbef00ab216976d68db (diff)