summaryrefslogtreecommitdiff
path: root/sw/source/core/doc/doccomp.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'sw/source/core/doc/doccomp.cxx')
-rw-r--r--sw/source/core/doc/doccomp.cxx932
1 files changed, 881 insertions, 51 deletions
diff --git a/sw/source/core/doc/doccomp.cxx b/sw/source/core/doc/doccomp.cxx
index bd3ee2cdc3dd..1da77046d5cf 100644
--- a/sw/source/core/doc/doccomp.cxx
+++ b/sw/source/core/doc/doccomp.cxx
@@ -32,7 +32,9 @@
#include <editeng/crsditem.hxx>
#include <editeng/colritem.hxx>
#include <editeng/boxitem.hxx>
+#include <editeng/svxenum.hxx>
#include <editeng/udlnitem.hxx>
+#include <swmodule.hxx>
#include <doc.hxx>
#include <IDocumentUndoRedo.hxx>
#include <docary.hxx>
@@ -49,6 +51,9 @@
#include <vector>
+#include <set>
+#include <cctype>
+
using namespace ::com::sun::star;
using ::std::vector;
@@ -189,6 +194,147 @@ public:
Compare( sal_uLong nDiff, CompareData& rData1, CompareData& rData2 );
};
+class ArrayComparator
+{
+public:
+ virtual bool Compare( int nIdx1, int nIdx2 ) const = 0;
+ virtual int GetLen1() const = 0;
+ virtual int GetLen2() const = 0;
+};
+
+// Consider two lines equal if similar enough (e.g. look like different
+// versions of the same paragraph)
+class LineArrayComparator : public ArrayComparator
+{
+private:
+ int nLen1, nLen2;
+ const CompareData &rData1, &rData2;
+ int nFirst1, nFirst2;
+
+public:
+ LineArrayComparator( const CompareData &rD1, const CompareData &rD2,
+ int nStt1, int nEnd1, int nStt2, int nEnd2 );
+
+ virtual bool Compare( int nIdx1, int nIdx2 ) const;
+ virtual int GetLen1() const { return nLen1; }
+ virtual int GetLen2() const { return nLen2; }
+};
+
+class WordArrayComparator : public ArrayComparator
+{
+private:
+ const SwTxtNode *pTxtNd1, *pTxtNd2;
+ int *pPos1, *pPos2;
+ int nCnt1, nCnt2; // number of words
+ const unsigned nMul;
+
+ void CalcPositions( int *pPos, const SwTxtNode *pTxtNd, int &nCnt );
+
+public:
+ WordArrayComparator( const SwTxtNode *pNode1, const SwTxtNode *pNode2 );
+ ~WordArrayComparator();
+
+ virtual bool Compare( int nIdx1, int nIdx2 ) const;
+ virtual int GetLen1() const { return nCnt1; }
+ virtual int GetLen2() const { return nCnt2; }
+ int GetCharSequence( const int *pWordLcs1, const int *pWordLcs2,
+ int *pSubseq1, int *pSubseq2, int nLcsLen );
+};
+
+class CharArrayComparator : public ArrayComparator
+{
+private:
+ const SwTxtNode *pTxtNd1, *pTxtNd2;
+
+public:
+ CharArrayComparator( const SwTxtNode *pNode1, const SwTxtNode *pNode2 )
+ : pTxtNd1( pNode1 ), pTxtNd2( pNode2 )
+ {
+ }
+
+ virtual bool Compare( int nIdx1, int nIdx2 ) const;
+ virtual int GetLen1() const { return pTxtNd1->GetTxt().Len(); }
+ virtual int GetLen2() const { return pTxtNd2->GetTxt().Len(); }
+};
+
+// Options set in Tools->Options->Writer->Comparison
+struct CmpOptionsContainer
+{
+ SvxCompareMode eCmpMode;
+ int nIgnoreLen;
+ bool bUseRsid;
+} CmpOptions;
+
+class CommonSubseq
+{
+private:
+ int *pData;
+ int nSize;
+
+protected:
+ ArrayComparator &rCmp;
+
+ CommonSubseq( ArrayComparator &rComparator, int nMaxSize )
+ : nSize( nMaxSize ), rCmp( rComparator )
+ {
+ pData = new int[ nSize ];
+ }
+
+ ~CommonSubseq()
+ {
+ delete[] pData;
+ }
+
+ int FindLCS( int *pLcs1 = 0, int *pLcs2 = 0, int nStt1 = 0,
+ int nEnd1 = 0, int nStt2 = 0, int nEnd2 = 0 );
+
+public:
+ int IgnoreIsolatedPieces( int *pLcs1, int *pLcs2, int nLen1, int nLen2,
+ int nLcsLen, int nPieceLen );
+};
+
+// Use Hirschberg's algrithm to find LCS in linear space
+class LgstCommonSubseq: public CommonSubseq
+{
+private:
+ static const int CUTOFF = 1<<20; // Stop recursion at this value
+
+ int *pL1, *pL2;
+ int *pBuff1, *pBuff2;
+
+ void FindL( int *pL, int nStt1, int nEnd1, int nStt2, int nEnd2 );
+ int HirschbergLCS( int *pLcs1, int *pLcs2, int nStt1, int nEnd1,
+ int nStt2, int nEnd2 );
+
+public:
+ LgstCommonSubseq( ArrayComparator &rComparator );
+ ~LgstCommonSubseq();
+
+ int Find( int *pSubseq1, int *pSubseq2 );
+};
+
+// Find a common subsequence in linear time
+class FastCommonSubseq: private CommonSubseq
+{
+private:
+ static const int CUTOFF = 2056;
+
+ int FindFastCS( int *pSeq1, int *pSeq2, int nStt1, int nEnd1,
+ int nStt2, int nEnd2 );
+
+public:
+ FastCommonSubseq( ArrayComparator &rComparator )
+ : CommonSubseq( rComparator, CUTOFF )
+ {
+ }
+
+ int Find( int *pSubseq1, int *pSubseq2 )
+ {
+ return FindFastCS( pSubseq1, pSubseq2, 0, rCmp.GetLen1(),
+ 0, rCmp.GetLen2() );
+ }
+};
+
CompareLine::~CompareLine() {}
CompareData::CompareData()
@@ -250,20 +396,16 @@ sal_uLong CompareData::ShowDiffs( const CompareData& rData )
{
if( rData.GetChanged( nStt1 ) || GetChanged( nStt2 ) )
{
+ // Find a region of different lines between two pairs of identical
+ // lines.
sal_uLong nSav1 = nStt1, nSav2 = nStt2;
while( nStt1 < nLen1 && rData.GetChanged( nStt1 )) ++nStt1;
while( nStt2 < nLen2 && GetChanged( nStt2 )) ++nStt2;
- // rData is the original,
- // "this" recieves the changes
- if( nSav2 != nStt2 && nSav1 != nStt1 )
- CheckForChangesInLine( rData, nSav1, nStt1, nSav2, nStt2 );
+ // Check if there are changed lines (only slightly different) and
+ // compare them in detail.
+ CheckForChangesInLine( rData, nSav1, nStt1, nSav2, nStt2 );
- if( nSav2 != nStt2 )
- ShowInsert( nSav2, nStt2 );
-
- if( nSav1 != nStt1 )
- ShowDelete( rData, nSav1, nStt1, nStt2 );
++nCnt;
}
++nStt1, ++nStt2;
@@ -968,7 +1110,8 @@ sal_Bool SwCompareLine::CompareNode( const SwNode& rDstNd, const SwNode& rSrcNd
switch( rDstNd.GetNodeType() )
{
case ND_TEXTNODE:
- bRet = CompareTxtNd( (SwTxtNode&)rDstNd, (SwTxtNode&)rSrcNd );
+ bRet = CompareTxtNd( (SwTxtNode&)rDstNd, (SwTxtNode&)rSrcNd )
+ && ( !CmpOptions.bUseRsid || ((SwTxtNode&)rDstNd).CompareParRsid( (SwTxtNode&)rSrcNd ) );
break;
case ND_TABLENODE:
@@ -1136,66 +1279,126 @@ sal_Bool SwCompareLine::ChangesInLine( const SwCompareLine& rLine,
SwPaM *& rpInsRing, SwPaM*& rpDelRing ) const
{
sal_Bool bRet = sal_False;
+
+ // Only compare textnodes
if( ND_TEXTNODE == rNode.GetNodeType() &&
ND_TEXTNODE == rLine.GetNode().GetNodeType() )
{
- SwTxtNode& rDestNd = *(SwTxtNode*)rNode.GetTxtNode();
+ SwTxtNode& rDstNd = *(SwTxtNode*)rNode.GetTxtNode();
const SwTxtNode& rSrcNd = *rLine.GetNode().GetTxtNode();
+ SwDoc* pDstDoc = rDstNd.GetDoc();
- xub_StrLen nDEnd = rDestNd.GetTxt().Len(), nSEnd = rSrcNd.GetTxt().Len();
- xub_StrLen nStt;
- xub_StrLen nEnd;
+ int nLcsLen = 0;
- for( nStt = 0, nEnd = Min( nDEnd, nSEnd ); nStt < nEnd; ++nStt )
- if( rDestNd.GetTxt().GetChar( nStt ) !=
- rSrcNd.GetTxt().GetChar( nStt ) )
- break;
+ int nDstLen = rDstNd.GetTxt().Len();
+ int nSrcLen = rSrcNd.GetTxt().Len();
- while( nStt < nDEnd && nStt < nSEnd )
+ int nMinLen = std::min( nDstLen , nSrcLen );
+ int nAvgLen = ( nDstLen + nSrcLen )/2;
+
+ int *pLcsDst = new int[ nMinLen + 1 ];
+ int *pLcsSrc = new int[ nMinLen + 1 ];
+
+ if( CmpOptions.eCmpMode == SVX_CMP_BY_WORD )
{
- --nDEnd, --nSEnd;
- if( rDestNd.GetTxt().GetChar( nDEnd ) !=
- rSrcNd.GetTxt().GetChar( nSEnd ) )
+ int *pTmpLcsDst = new int[ nMinLen + 1 ];
+ int *pTmpLcsSrc = new int[ nMinLen + 1 ];
+
+ WordArrayComparator aCmp( &rDstNd, &rSrcNd );
+
+ LgstCommonSubseq aSeq( aCmp );
+
+ nLcsLen = aSeq.Find( pTmpLcsDst, pTmpLcsSrc );
+
+ if( CmpOptions.nIgnoreLen )
{
- ++nDEnd, ++nSEnd;
- break;
+ nLcsLen = aSeq.IgnoreIsolatedPieces( pTmpLcsDst, pTmpLcsSrc,
+ aCmp.GetLen1(), aCmp.GetLen2(),
+ nLcsLen, CmpOptions.nIgnoreLen );
}
+
+ nLcsLen = aCmp.GetCharSequence( pTmpLcsDst, pTmpLcsSrc,
+ pLcsDst, pLcsSrc, nLcsLen );
+
+ delete[] pTmpLcsDst;
+ delete[] pTmpLcsSrc;
}
+ else
+ {
+ CharArrayComparator aCmp( &rDstNd, &rSrcNd );
+ LgstCommonSubseq aSeq( aCmp );
+
+ nLcsLen = aSeq.Find( pLcsDst, pLcsSrc );
- if( nStt || !nDEnd || !nSEnd || nDEnd < rDestNd.GetTxt().Len() ||
- nSEnd < rSrcNd.GetTxt().Len() )
+ if( CmpOptions.nIgnoreLen )
+ {
+ nLcsLen = aSeq.IgnoreIsolatedPieces( pLcsDst, pLcsSrc, nDstLen,
+ nSrcLen, nLcsLen,
+ CmpOptions.nIgnoreLen );
+ }
+ }
+
+ // find the sum of the squares of the continuous substrings
+ int nSqSum = 0;
+ int nCnt = 1;
+ for( int i = 0; i < nLcsLen; i++ )
{
- // The newly inserted is now between nStt and nDEnd
- // and the deleted is between nStt and nSEnd
- SwDoc* pDoc = rDestNd.GetDoc();
- SwPaM aPam( rDestNd, nDEnd );
- if( nStt != nDEnd )
+ if( i != nLcsLen - 1 && pLcsDst[i] + 1 == pLcsDst[i + 1]
+ && pLcsSrc[i] + 1 == pLcsSrc[i + 1] )
+ {
+ nCnt++;
+ }
+ else
+ {
+ nSqSum += nCnt*nCnt;
+ nCnt = 1;
+ }
+ }
+
+ // Don't compare if there aren't enough similarities
+ if ( nAvgLen >= 8 && nSqSum*32 < nAvgLen*nAvgLen )
+ {
+ return sal_False;
+ }
+
+ // Show the differences
+ int nSkip = 0;
+ for( int i = 0; i <= nLcsLen; i++ )
+ {
+ int nDstFrom = i ? (pLcsDst[i - 1] + 1) : 0;
+ int nDstTo = ( i == nLcsLen ) ? nDstLen : pLcsDst[i];
+ int nSrcFrom = i ? (pLcsSrc[i - 1] + 1) : 0;
+ int nSrcTo = ( i == nLcsLen ) ? nSrcLen : pLcsSrc[i];
+
+ SwPaM aPam( rDstNd, nDstTo + nSkip );
+
+ if ( nDstFrom < nDstTo )
{
SwPaM* pTmp = new SwPaM( *aPam.GetPoint(), rpInsRing );
if( !rpInsRing )
rpInsRing = pTmp;
-
pTmp->SetMark();
- pTmp->GetMark()->nContent = nStt;
+ pTmp->GetMark()->nContent = nDstFrom + nSkip;
}
- if( nStt != nSEnd )
+ if ( nSrcFrom < nSrcTo )
{
- {
- ::sw::UndoGuard const ug(pDoc->GetIDocumentUndoRedo());
- SwPaM aCpyPam( rSrcNd, nStt );
- aCpyPam.SetMark();
- aCpyPam.GetPoint()->nContent = nSEnd;
- aCpyPam.GetDoc()->CopyRange( aCpyPam, *aPam.GetPoint(),
- false );
- }
+ sal_Bool bUndo = pDstDoc->GetIDocumentUndoRedo().DoesUndo();
+ pDstDoc->GetIDocumentUndoRedo().DoUndo( sal_False );
+ SwPaM aCpyPam( rSrcNd, nSrcFrom );
+ aCpyPam.SetMark();
+ aCpyPam.GetPoint()->nContent = nSrcTo;
+ aCpyPam.GetDoc()->CopyRange( aCpyPam, *aPam.GetPoint(),
+ false );
+ pDstDoc->GetIDocumentUndoRedo().DoUndo( bUndo );
SwPaM* pTmp = new SwPaM( *aPam.GetPoint(), rpDelRing );
if( !rpDelRing )
rpDelRing = pTmp;
pTmp->SetMark();
- pTmp->GetMark()->nContent = nDEnd;
+ pTmp->GetMark()->nContent = nDstTo + nSkip;
+ nSkip += nSrcTo - nSrcFrom;
if( rpInsRing )
{
@@ -1204,9 +1407,14 @@ sal_Bool SwCompareLine::ChangesInLine( const SwCompareLine& rLine,
*pCorr->GetPoint() = *pTmp->GetMark();
}
}
- bRet = sal_True;
}
+
+ delete[] pLcsDst;
+ delete[] pLcsSrc;
+
+ bRet = sal_True;
}
+
return bRet;
}
@@ -1381,15 +1589,52 @@ void SwCompareData::CheckForChangesInLine( const CompareData& rData,
sal_uLong& rStt, sal_uLong& rEnd,
sal_uLong& rThisStt, sal_uLong& rThisEnd )
{
- while( rStt < rEnd && rThisStt < rThisEnd )
+ LineArrayComparator aCmp( (CompareData&)*this, rData, rThisStt, rThisEnd,
+ rStt, rEnd );
+
+ int nMinLen = std::min( aCmp.GetLen1(), aCmp.GetLen2() );
+ int *pLcsDst = new int[ nMinLen ];
+ int *pLcsSrc = new int[ nMinLen ];
+
+ FastCommonSubseq subseq( aCmp );
+ int nLcsLen = subseq.Find( pLcsDst, pLcsSrc );
+ for (int i = 0; i <= nLcsLen; i++)
{
- SwCompareLine* pDstLn = (SwCompareLine*)GetLine( rThisStt );
- SwCompareLine* pSrcLn = (SwCompareLine*)rData.GetLine( rStt );
- if( !pDstLn->ChangesInLine( *pSrcLn, pInsRing, pDelRing ) )
- break;
+ // Beginning of inserted lines (inclusive)
+ int nDstFrom = i ? pLcsDst[i - 1] + 1 : 0;
+ // End of inserted lines (exclusive)
+ int nDstTo = ( i == nLcsLen ) ? aCmp.GetLen1() : pLcsDst[i];
+ // Begining of deleted lines (inclusive)
+ int nSrcFrom = i ? pLcsSrc[i - 1] + 1 : 0;
+ // End of deleted lines (exclusive)
+ int nSrcTo = ( i == nLcsLen ) ? aCmp.GetLen2() : pLcsSrc[i];
+
+ if( i )
+ {
+ SwCompareLine* pDstLn = (SwCompareLine*)GetLine( rThisStt + nDstFrom - 1 );
+ SwCompareLine* pSrcLn = (SwCompareLine*)rData.GetLine( rStt + nSrcFrom - 1 );
+
+ // Show differences in detail for lines that
+ // were matched as only slightly different
+ if( !pDstLn->ChangesInLine( *pSrcLn, pInsRing, pDelRing ) )
+ {
+ ShowInsert( rThisStt + nDstFrom - 1, rThisStt + nDstFrom );
+ ShowDelete( rData, rStt + nSrcFrom - 1, rStt + nSrcFrom,
+ rThisStt + nDstFrom );
+ }
+ }
+
+ // Lines missing from source are inserted
+ if( nDstFrom != nDstTo )
+ {
+ ShowInsert( rThisStt + nDstFrom, rThisStt + nDstTo );
+ }
- ++rStt;
- ++rThisStt;
+ // Lines missing from destination are deleted
+ if( nSrcFrom != nSrcTo )
+ {
+ ShowDelete( rData, rStt + nSrcFrom, rStt + nSrcTo, rThisStt + nDstTo );
+ }
}
}
@@ -1537,6 +1782,29 @@ long SwDoc::CompareDoc( const SwDoc& rDoc )
long nRet = 0;
+ // Get comparison options
+ CmpOptions.eCmpMode = SW_MOD()->GetCompareMode();
+ if( CmpOptions.eCmpMode == SVX_CMP_AUTO )
+ {
+ if( getRsidRoot() == rDoc.getRsidRoot() )
+ {
+ CmpOptions.eCmpMode = SVX_CMP_BY_CHAR;
+ CmpOptions.bUseRsid = true;
+ CmpOptions.nIgnoreLen = 2;
+ }
+ else
+ {
+ CmpOptions.eCmpMode = SVX_CMP_BY_WORD;
+ CmpOptions.bUseRsid = false;
+ CmpOptions.nIgnoreLen = 3;
+ }
+ }
+ else
+ {
+ CmpOptions.bUseRsid = getRsidRoot() == rDoc.getRsidRoot() && SW_MOD()->IsUseRsid();
+ CmpOptions.nIgnoreLen = SW_MOD()->IsIgnorePieces() ? SW_MOD()->GetPieceLen() : 0;
+ }
+
GetIDocumentUndoRedo().StartUndo(UNDO_EMPTY, NULL);
sal_Bool bDocWasModified = IsModified();
SwDoc& rSrcDoc = (SwDoc&)rDoc;
@@ -1831,4 +2099,566 @@ long SwDoc::MergeDoc( const SwDoc& rDoc )
return nRet;
}
+LineArrayComparator::LineArrayComparator( const CompareData &rD1,
+ const CompareData &rD2, int nStt1,
+ int nEnd1, int nStt2, int nEnd2 )
+ : rData1( rD1 ), rData2( rD2 ), nFirst1( nStt1 ), nFirst2( nStt2 )
+{
+ nLen1 = nEnd1 - nStt1;
+ nLen2 = nEnd2 - nStt2;
+}
+
+bool LineArrayComparator::Compare( int nIdx1, int nIdx2 ) const
+{
+ if( nIdx1 < 0 || nIdx2 < 0 || nIdx1 >= nLen1 || nIdx2 >= nLen2 )
+ {
+ OSL_ENSURE( 0, "Index out of range!" );
+ return false;
+ }
+
+ const SwTxtNode *pTxtNd1 = ( ( SwCompareLine* )rData1.GetLine( nFirst1 + nIdx1 ) )->GetNode().GetTxtNode();
+ const SwTxtNode *pTxtNd2 = ( ( SwCompareLine* )rData2.GetLine( nFirst2 + nIdx2 ) )->GetNode().GetTxtNode();
+
+ if( !pTxtNd1 || !pTxtNd2
+ || ( CmpOptions.bUseRsid && !pTxtNd1->CompareParRsid( *pTxtNd2 ) ) )
+ {
+ return false;
+ }
+
+ int nPar1Len = pTxtNd1->Len();
+ int nPar2Len = pTxtNd2->Len();
+
+ if( std::min( nPar1Len, nPar2Len ) * 3 < std::max( nPar1Len, nPar2Len ) )
+ {
+ return false;
+ }
+
+ int nBorderLen = ( nPar1Len + nPar2Len )/16;
+
+ if( nBorderLen < 3 )
+ {
+ nBorderLen = std::min( 3, std::min( nPar1Len, nPar2Len ) );
+ }
+
+ std::set<unsigned> aHashes;
+ unsigned nHash = 0;
+ unsigned nMul = 251;
+ unsigned nPow = 1;
+ int i;
+
+ for( i = 0; i < nBorderLen - 1; i++ )
+ {
+ nPow *= nMul;
+ }
+ for( i = 0; i < nBorderLen; i++ )
+ {
+ nHash = nHash*nMul + pTxtNd1->GetTxt().GetChar( i );
+ }
+ aHashes.insert( nHash );
+ for( ; i < nPar1Len; i++ )
+ {
+ nHash = nHash - nPow*pTxtNd1->GetTxt().GetChar( i - nBorderLen );
+ nHash = nHash*nMul + pTxtNd1->GetTxt().GetChar( i );
+
+ aHashes.insert( nHash );
+ }
+
+ nHash = 0;
+ for( i = 0; i < nBorderLen; i++ )
+ {
+ nHash = nHash*nMul + pTxtNd2->GetTxt().GetChar( i );
+ }
+
+ if( aHashes.find( nHash ) != aHashes.end() )
+ {
+ return true;
+ }
+
+ for( ; i < nPar2Len; i++ )
+ {
+ nHash = nHash - nPow*pTxtNd2->GetTxt().GetChar( i - nBorderLen );
+ nHash = nHash*nMul + pTxtNd2->GetTxt().GetChar( i );
+ if( aHashes.find( nHash ) != aHashes.end() )
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool CharArrayComparator::Compare( int nIdx1, int nIdx2 ) const
+{
+ if( nIdx1 < 0 || nIdx2 < 0 || nIdx1 >= GetLen1() || nIdx2 >= GetLen2() )
+ {
+ OSL_ENSURE( 0, "Index out of range!" );
+ return false;
+ }
+
+ return ( !CmpOptions.bUseRsid
+ || pTxtNd1->CompareRsid( *pTxtNd2, nIdx1 + 1, nIdx2 + 1 ) )
+ && pTxtNd1->GetTxt().GetChar( nIdx1 )
+ == pTxtNd2->GetTxt().GetChar( nIdx2 );
+}
+
+WordArrayComparator::WordArrayComparator( const SwTxtNode *pNode1,
+ const SwTxtNode *pNode2 )
+ : pTxtNd1( pNode1 ), pTxtNd2( pNode2 ), nMul( 251 )
+{
+ pPos1 = new int[ pTxtNd1->GetTxt().Len() + 1 ];
+ pPos2 = new int[ pTxtNd2->GetTxt().Len() + 1 ];
+
+ CalcPositions( pPos1, pTxtNd1, nCnt1 );
+ CalcPositions( pPos2, pTxtNd2, nCnt2 );
+}
+
+WordArrayComparator::~WordArrayComparator()
+{
+ delete[] pPos1;
+ delete[] pPos2;
+}
+
+bool WordArrayComparator::Compare( int nIdx1, int nIdx2 ) const
+{
+ int nLen = pPos1[ nIdx1 + 1 ] - pPos1[ nIdx1 ];
+ if( nLen != pPos2[ nIdx2 + 1 ] - pPos2[ nIdx2 ] )
+ {
+ return false;
+ }
+ for( int i = 0; i < nLen; i++)
+ {
+ if( pTxtNd1->GetTxt().GetChar( pPos1[ nIdx1 ] + i )
+ != pTxtNd2->GetTxt().GetChar( pPos2[ nIdx2 ] + i )
+ || ( CmpOptions.bUseRsid && !pTxtNd1->CompareRsid( *pTxtNd2,
+ pPos1[ nIdx1 ] + i, pPos2[ nIdx2 ] + i ) ) )
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+int WordArrayComparator::GetCharSequence( const int *pWordLcs1,
+ const int *pWordLcs2, int *pSubseq1, int *pSubseq2, int nLcsLen )
+{
+ int nLen = 0;
+ for( int i = 0; i < nLcsLen; i++ )
+ {
+ // Check for hash collisions
+ if( pPos1[ pWordLcs1[i] + 1 ] - pPos1[ pWordLcs1[i] ]
+ != pPos2[ pWordLcs2[i] + 1 ] - pPos2[ pWordLcs2[i] ] )
+ {
+ continue;
+ }
+ for( int j = 0; j < pPos1[pWordLcs1[i]+1] - pPos1[pWordLcs1[i]]; j++)
+ {
+ pSubseq1[ nLen ] = pPos1[ pWordLcs1[i] ] + j;
+ pSubseq2[ nLen ] = pPos2[ pWordLcs2[i] ] + j;
+
+ if( pTxtNd1->GetTxt().GetChar( pPos1[ pWordLcs1[i] ] + j )
+ != pTxtNd2->GetTxt().GetChar( pPos2[ pWordLcs2[i] ] + j ) )
+ {
+ nLen -= j;
+ break;
+ }
+
+ nLen++;
+ }
+ }
+ return nLen;
+}
+
+void WordArrayComparator::CalcPositions( int *pPos, const SwTxtNode *pTxtNd,
+ int &nCnt )
+{
+ nCnt = -1;
+ for( int i = 0; i <= pTxtNd->GetTxt().Len(); i++ )
+ {
+ if( i == 0 || i == pTxtNd->GetTxt().Len()
+ || !isalnum( pTxtNd->GetTxt().GetChar( i - 1 ) )
+ || !isalnum( pTxtNd->GetTxt().GetChar( i ) ) )
+ { // Begin new word
+ nCnt++;
+ pPos[ nCnt ] = i;
+ }
+ }
+}
+
+
+int CommonSubseq::FindLCS( int *pLcs1, int *pLcs2, int nStt1, int nEnd1,
+ int nStt2, int nEnd2 )
+{
+ int nLen1 = nEnd1 ? nEnd1 - nStt1 : rCmp.GetLen1();
+ int nLen2 = nEnd2 ? nEnd2 - nStt2 : rCmp.GetLen2();
+
+ OSL_ASSERT( nLen1 >= 0 );
+ OSL_ASSERT( nLen2 >= 0 );
+
+ int **pLcs = new int*[ nLen1 + 1 ];
+ pLcs[ 0 ] = pData;
+
+ for( int i = 1; i < nLen1 + 1; i++ )
+ pLcs[ i ] = pLcs[ i - 1 ] + nLen2 + 1;
+
+ for( int i = 0; i <= nLen1; i++ )
+ pLcs[i][0] = 0;
+
+ for( int j = 0; j <= nLen2; j++ )
+ pLcs[0][j] = 0;
+
+ // Find lcs
+ for( int i = 1; i <= nLen1; i++ )
+ {
+ for( int j = 1; j <= nLen2; j++ )
+ {
+ if( rCmp.Compare( nStt1 + i - 1, nStt2 + j - 1 ) )
+ pLcs[i][j] = pLcs[i - 1][j - 1] + 1;
+ else
+ pLcs[i][j] = std::max( pLcs[i][j - 1], pLcs[i - 1][j] );
+ }
+ }
+
+ int nLcsLen = pLcs[ nLen1 ][ nLen2 ];
+
+ // Recover the lcs in the two sequences
+ if( pLcs1 && pLcs2 )
+ {
+ int nIdx1 = nLen1;
+ int nIdx2 = nLen2;
+ int nIdx = nLcsLen - 1;
+
+ while( nIdx1 > 0 && nIdx2 > 0 )
+ {
+ if( pLcs[ nIdx1 ][ nIdx2 ] == pLcs[ nIdx1 - 1 ][ nIdx2 ] )
+ nIdx1--;
+ else if( pLcs[ nIdx1 ][ nIdx2 ] == pLcs[ nIdx1 ][ nIdx2 - 1 ] )
+ nIdx2--;
+ else
+ {
+ nIdx1--, nIdx2--;
+ pLcs1[ nIdx ] = nIdx1 + nStt1;
+ pLcs2[ nIdx ] = nIdx2 + nStt2;
+ nIdx--;
+ }
+ }
+ }
+
+ delete[] pLcs;
+
+ return nLcsLen;
+}
+
+int CommonSubseq::IgnoreIsolatedPieces( int *pLcs1, int *pLcs2, int nLen1,
+ int nLen2, int nLcsLen, int nPieceLen )
+{
+ if( !nLcsLen )
+ {
+ return 0;
+ }
+
+ int nNext = 0;
+
+ // Don't ignore text at the beginning of the paragraphs
+ if( pLcs1[ 0 ] == 0 && pLcs2[ 0 ] == 0 )
+ {
+ while( nNext < nLcsLen - 1 && pLcs1[ nNext ] + 1 == pLcs1[ nNext + 1 ]
+ && pLcs2[ nNext ] + 1 == pLcs2[ nNext + 1 ] )
+ {
+ nNext++;
+ }
+ nNext++;
+ }
+
+ int nCnt = 1;
+
+ for( int i = nNext; i < nLcsLen; i++ )
+ {
+ if( i != nLcsLen - 1 && pLcs1[ i ] + 1 == pLcs1[ i + 1 ]
+ && pLcs2[ i ] + 1 == pLcs2[ i + 1 ] )
+ {
+ nCnt++;
+ }
+ else
+ {
+ if( nCnt > nPieceLen
+ // Don't ignore text at the end of the paragraphs
+ || ( i == nLcsLen - 1
+ && pLcs1[i] == nLen1 - 1 && pLcs2[i] == nLen2 - 1 ))
+ {
+ for( int j = i + 1 - nCnt; j <= i; j++ )
+ {
+ pLcs2[ nNext ] = pLcs2[ j ];
+ pLcs1[ nNext ] = pLcs1[ j ];
+ nNext++;
+ }
+ }
+ nCnt = 1;
+ }
+ }
+
+ return nNext;
+}
+
+LgstCommonSubseq::LgstCommonSubseq( ArrayComparator &rComparator )
+ : CommonSubseq( rComparator, CUTOFF )
+{
+ pBuff1 = new int[ rComparator.GetLen2() + 1 ];
+ pBuff2 = new int[ rComparator.GetLen2() + 1 ];
+
+ pL1 = new int[ rComparator.GetLen2() + 1 ];
+ pL2 = new int[ rComparator.GetLen2() + 1 ];
+}
+
+LgstCommonSubseq::~LgstCommonSubseq()
+{
+ delete[] pBuff1;
+ delete[] pBuff2;
+
+ delete[] pL1;
+ delete[] pL2;
+}
+
+void LgstCommonSubseq::FindL( int *pL, int nStt1, int nEnd1,
+ int nStt2, int nEnd2 )
+{
+ int nLen1 = nEnd1 ? nEnd1 - nStt1 : rCmp.GetLen1();
+ int nLen2 = nEnd2 ? nEnd2 - nStt2 : rCmp.GetLen2();
+
+ int *currL = pBuff1;
+ int *prevL = pBuff2;
+
+ // Avoid memory corruption
+ if( nLen2 > rCmp.GetLen2() )
+ {
+ assert( false );
+ return;
+ }
+
+ memset( pBuff1, 0, sizeof( *pBuff1 ) * ( nLen2 + 1 ) );
+ memset( pBuff2, 0, sizeof( *pBuff2 ) * ( nLen2 + 1 ) );
+
+ // Find lcs
+ for( int i = 1; i <= nLen1; i++ )
+ {
+ for( int j = 1; j <= nLen2; j++ )
+ {
+ if( rCmp.Compare( nStt1 + i - 1, nStt2 + j - 1 ) )
+ currL[j] = prevL[j - 1] + 1;
+ else
+ currL[j] = std::max( currL[j - 1], prevL[j] );
+ }
+ int *tmp = currL;
+ currL = prevL;
+ prevL = tmp;
+ }
+ memcpy( pL, prevL, ( nLen2 + 1 ) * sizeof( *prevL ) );
+}
+
+int LgstCommonSubseq::HirschbergLCS( int *pLcs1, int *pLcs2, int nStt1,
+ int nEnd1, int nStt2, int nEnd2 )
+{
+ static int nLen1;
+ static int nLen2;
+ nLen1 = nEnd1 - nStt1;
+ nLen2 = nEnd2 - nStt2;
+
+ if( ( nLen1 + 1 ) * ( nLen2 + 1 ) <= CUTOFF )
+ {
+ if( !nLen1 || !nLen2 )
+ {
+ return 0;
+ }
+ return FindLCS(pLcs1, pLcs2, nStt1, nEnd1, nStt2, nEnd2);
+ }
+
+ int nMid = nLen1/2;
+
+ FindL( pL1, nStt1, nStt1 + nMid, nStt2, nEnd2 );
+ FindL( pL2, nStt1 + nMid, nEnd1, nStt2, nEnd2 );
+
+ int nMaxPos = 0;
+ static int nMaxVal;
+ nMaxVal = -1;
+
+ static int i;
+ for( i = 0; i <= nLen2; i++ )
+ {
+ if( pL1[i] + ( pL2[nLen2] - pL2[i] ) > nMaxVal )
+ {
+ nMaxPos = i;
+ nMaxVal = pL1[i]+( pL2[nLen2] - pL2[i] );
+ }
+ }
+
+ int nRet = HirschbergLCS( pLcs1, pLcs2, nStt1, nStt1 + nMid,
+ nStt2, nStt2 + nMaxPos );
+ nRet += HirschbergLCS( pLcs1 + nRet, pLcs2 + nRet, nStt1 + nMid, nEnd1,
+ nStt2 + nMaxPos, nEnd2 );
+
+ return nRet;
+}
+
+int LgstCommonSubseq::Find( int *pSubseq1, int *pSubseq2 )
+{
+ int nStt = 0;
+ int nCutEnd = 0;
+ int nEnd1 = rCmp.GetLen1();
+ int nEnd2 = rCmp.GetLen2();
+
+ // Check for corresponding lines in the beginning of the sequences
+ while( nStt < nEnd1 && nStt < nEnd2 && rCmp.Compare( nStt, nStt ) )
+ {
+ pSubseq1[ nStt ] = nStt;
+ pSubseq2[ nStt ] = nStt;
+ nStt++;
+ }
+
+ pSubseq1 += nStt;
+ pSubseq2 += nStt;
+
+ // Check for corresponding lines in the end of the sequences
+ while( nStt < nEnd1 && nStt < nEnd2
+ && rCmp.Compare( nEnd1 - 1, nEnd2 - 1 ) )
+ {
+ nCutEnd++;
+ nEnd1--;
+ nEnd2--;
+ }
+
+ int nLen = HirschbergLCS( pSubseq1, pSubseq2, nStt, nEnd1, nStt, nEnd2 );
+
+ for( int i = 0; i < nCutEnd; i++ )
+ {
+ pSubseq1[ nLen + i ] = nEnd1 + i;
+ pSubseq2[ nLen + i ] = nEnd2 + i;
+ }
+
+ return nStt + nLen + nCutEnd;
+}
+
+int FastCommonSubseq::FindFastCS( int *pSeq1, int *pSeq2, int nStt1,
+ int nEnd1, int nStt2, int nEnd2 )
+{
+ int nCutBeg = 0;
+ int nCutEnd = 0;
+
+ // Check for corresponding lines in the beginning of the sequences
+ while( nStt1 < nEnd1 && nStt2 < nEnd2 && rCmp.Compare( nStt1, nStt2 ) )
+ {
+ pSeq1[ nCutBeg ] = nStt1++;
+ pSeq2[ nCutBeg ] = nStt2++;
+ nCutBeg++;
+ }
+
+ pSeq1 += nCutBeg;
+ pSeq2 += nCutBeg;
+
+ // Check for corresponding lines in the end of the sequences
+ while( nStt1 < nEnd1 && nStt2 < nEnd2
+ && rCmp.Compare( nEnd1 - 1, nEnd2 - 1 ) )
+ {
+ nCutEnd++;
+ nEnd1--;
+ nEnd2--;
+ }
+
+ int nLen1 = nEnd1 - nStt1;
+ int nLen2 = nEnd2 - nStt2;
+
+ // Return if a sequence is empty
+ if( nLen1 <= 0 || nLen2 <= 0 )
+ {
+ for( int i = 0; i < nCutEnd; i++ )
+ {
+ pSeq1[ i ] = nEnd1 + i;
+ pSeq2[ i ] = nEnd2 + i;
+ }
+ return nCutBeg + nCutEnd;
+ }
+
+ // Cut to LCS for small values
+ if( nLen1 < 3 || nLen2 < 3 || ( nLen1 + 1 ) * ( nLen2 + 1 ) <= CUTOFF )
+ {
+ int nLcsLen = FindLCS( pSeq1, pSeq2, nStt1, nEnd1, nStt2, nEnd2);
+
+ for( int i = 0; i < nCutEnd; i++ )
+ {
+ pSeq1[ nLcsLen + i ] = nEnd1 + i;
+ pSeq2[ nLcsLen + i ] = nEnd2 + i;
+ }
+ return nCutBeg + nLcsLen + nCutEnd;
+ }
+
+ int nMid1 = nLen1/2;
+ int nMid2 = nLen2/2;
+
+ int nRad;
+ int nPos1 = -1, nPos2 = -1;
+
+ // Find a point of correspondence in the middle of the sequences
+ for( nRad = 0; nRad*nRad < std::min( nMid1, nMid2 ); nRad++ )
+ {
+ // Search to the left and to the right of the middle of the first sequence
+ for( int i = nMid1 - nRad; i <= nMid1 + nRad; i++ )
+ {
+ if( rCmp.Compare( nStt1 + i, nStt2 + nMid2 - nRad ) )
+ {
+ nPos1 = nStt1 + i;
+ nPos2 = nStt2 + nMid2 - nRad;
+ break;
+ }
+ if( rCmp.Compare( nStt1 + i, nStt2 + nMid2 + nRad ) )
+ {
+ nPos1 = nStt1 + i;
+ nPos2 = nStt2 + nMid2 - nRad;
+ break;
+ }
+ }
+ // Search to the left and to the right of the middle of the second sequence
+ for( int i = nMid2 - nRad; i <= nMid2 + nRad; i++ )
+ {
+ if( rCmp.Compare( nStt2 + nMid2 - nRad, nStt2 + i ) )
+ {
+ nPos2 = nStt2 + i;
+ nPos1 = nStt1 + nMid1 - nRad;
+ break;
+ }
+ if( rCmp.Compare( nStt2 + nMid2 - nRad, nStt2 + i ) )
+ {
+ nPos2 = nStt2 + i;
+ nPos1 = nStt1 + nMid1 - nRad;
+ break;
+ }
+ }
+ }
+
+ // return if no point of correspondence found
+ if( nPos1 == -1 )
+ {
+ for( int i = 0; i < nCutEnd; i++ )
+ {
+ pSeq1[ i ] = nEnd1 + i;
+ pSeq2[ i ] = nEnd2 + i;
+ }
+ return nCutBeg + nCutEnd;
+ }
+
+ // Run the same on the sequences to the left of the correspondence point
+ int nLen = FindFastCS( pSeq1, pSeq2, nStt1, nPos1, nStt2, nPos2 );
+
+ pSeq1[ nLen ] = nPos1;
+ pSeq2[ nLen ] = nPos2;
+
+ // Run the same on the sequences to the right of the correspondence point
+ nLen += FindFastCS( pSeq1 + nLen + 1, pSeq2 + nLen + 1,
+ nPos1 + 1, nEnd1, nPos2 + 1, nEnd2 ) + 1;
+
+ for( int i = 0; i < nCutEnd; i++ )
+ {
+ pSeq1[ nLen + i ] = nEnd1 + i;
+ pSeq2[ nLen + i ] = nEnd2 + i;
+ }
+
+ return nLen + nCutBeg + nCutEnd;
+}
+
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */