summaryrefslogtreecommitdiff
path: root/linguistic
diff options
context:
space:
mode:
authorThomas Lange <tl@openoffice.org>2000-12-22 11:45:48 +0000
committerThomas Lange <tl@openoffice.org>2000-12-22 11:45:48 +0000
commit1f956e2ab61d1b1bea868cd0786102ebb8a135a7 (patch)
tree95ce6268268c941415263179e91ad216197fa700 /linguistic
parente22125337c50560ac30e24ea8489e6f3654e6dfe (diff)
linguistic functionality for handling words with hyphens and control chars
Diffstat (limited to 'linguistic')
-rw-r--r--linguistic/inc/misc.hxx18
-rw-r--r--linguistic/source/hyphdta.cxx12
-rw-r--r--linguistic/source/misc.cxx194
3 files changed, 156 insertions, 68 deletions
diff --git a/linguistic/inc/misc.hxx b/linguistic/inc/misc.hxx
index 4eeb18a80c87..e09dade4373a 100644
--- a/linguistic/inc/misc.hxx
+++ b/linguistic/inc/misc.hxx
@@ -2,9 +2,9 @@
*
* $RCSfile: misc.hxx,v $
*
- * $Revision: 1.2 $
+ * $Revision: 1.3 $
*
- * last change: $Author: tl $ $Date: 2000-12-21 09:56:55 $
+ * last change: $Author: tl $ $Date: 2000-12-22 12:44:34 $
*
* The Contents of this file are made available subject to the terms of
* either of the following licenses
@@ -87,6 +87,9 @@
#ifndef _COM_SUN_STAR_LINGUISTIC2_XSEARCHABLEDICTIONARYLIST_HPP_
#include <com/sun/star/linguistic2/XSearchableDictionaryList.hpp>
#endif
+#ifndef _COM_SUN_STAR_LINGUISTIC2_XHYPHENATEDWORD_HPP_
+#include <com/sun/star/linguistic2/XHyphenatedWord.hpp>
+#endif
#include <uno/lbnames.h> // CPPU_CURRENT_LANGUAGE_BINDING_NAME macro, which specify the environment type
#include <cppuhelper/implbase1.hxx> // helper for implementations
@@ -175,10 +178,13 @@ LanguageType
///////////////////////////////////////////////////////////////////////////
-BOOL HasHyphens( const rtl::OUString &rTxt );
-INT32 GetNumControlChars( const rtl::OUString &rTxt );
-BOOL RemoveHyphens( rtl::OUString &rTxt );
-BOOL RemoveControlChars( rtl::OUString &rTxt );
+INT32 GetPosInWordToCheck( const rtl::OUString &rTxt, INT32 nPos );
+
+::com::sun::star::uno::Reference<
+ ::com::sun::star::linguistic2::XHyphenatedWord >
+ RebuildHyphensAndControlChars( const rtl::OUString &rOrigWord,
+ ::com::sun::star::uno::Reference<
+ ::com::sun::star::linguistic2::XHyphenatedWord > &rxHyphWord );
///////////////////////////////////////////////////////////////////////////
diff --git a/linguistic/source/hyphdta.cxx b/linguistic/source/hyphdta.cxx
index e3809db68581..cce98a764d9d 100644
--- a/linguistic/source/hyphdta.cxx
+++ b/linguistic/source/hyphdta.cxx
@@ -2,9 +2,9 @@
*
* $RCSfile: hyphdta.cxx,v $
*
- * $Revision: 1.1.1.1 $
+ * $Revision: 1.2 $
*
- * last change: $Author: hr $ $Date: 2000-11-17 12:37:36 $
+ * last change: $Author: tl $ $Date: 2000-12-22 12:45:48 $
*
* The Contents of this file are made available subject to the terms of
* either of the following licenses
@@ -81,6 +81,9 @@
#ifndef _TOOLS_DEBUG_HXX //autogen wg. DBG_ASSERT
#include <tools/debug.hxx>
#endif
+#ifndef _SVTOOLS_LNGMISC_HXX_
+#include <svtools/lngmisc.hxx>
+#endif
//using namespace utl;
using namespace osl;
@@ -104,7 +107,10 @@ HyphenatedWord::HyphenatedWord(const OUString &rWord, INT16 nLang, INT16 nHPos,
nHyphenationPos (nHPos),
nHyphPos (nPos)
{
- bIsAltSpelling = rWord != rHyphWord;
+ OUString aTmp( rHyphWord );
+ RemoveHyphens( aTmp );
+ RemoveControlChars( aTmp );
+ bIsAltSpelling = rWord != aTmp;
}
diff --git a/linguistic/source/misc.cxx b/linguistic/source/misc.cxx
index d9f08643b2ed..090f45bcc6f4 100644
--- a/linguistic/source/misc.cxx
+++ b/linguistic/source/misc.cxx
@@ -2,9 +2,9 @@
*
* $RCSfile: misc.cxx,v $
*
- * $Revision: 1.4 $
+ * $Revision: 1.5 $
*
- * last change: $Author: tl $ $Date: 2000-12-21 09:57:08 $
+ * last change: $Author: tl $ $Date: 2000-12-22 12:44:50 $
*
* The Contents of this file are made available subject to the terms of
* either of the following licenses
@@ -68,16 +68,15 @@
#ifndef _TOOLS_DEBUG_HXX
#include <tools/debug.hxx>
#endif
-#ifndef _RTL_USTRBUF_HXX_
-#include <rtl/ustrbuf.hxx>
-#endif
+//#ifndef _RTL_USTRBUF_HXX_
+//#include <rtl/ustrbuf.hxx>
+//#endif
#ifndef INCLUDED_SVTOOLS_PATHOPTIONS_HXX
#include <svtools/pathoptions.hxx>
#endif
-
-#include "misc.hxx"
-#include "defs.hxx"
-#include "lngprops.hxx"
+#ifndef _SVTOOLS_LNGMISC_HXX_
+#include <svtools/lngmisc.hxx>
+#endif
#ifndef _COM_SUN_STAR_BEANS_XPROPERTYSET_HPP_
#include <com/sun/star/beans/XPropertySet.hpp>
@@ -105,6 +104,12 @@
#include <unotools/processfactory.hxx>
#endif
+#include "misc.hxx"
+#include "defs.hxx"
+#include "lngprops.hxx"
+#include <hyphdta.hxx>
+
+
using namespace utl;
using namespace osl;
using namespace rtl;
@@ -310,83 +315,154 @@ uno::Sequence< INT16 >
///////////////////////////////////////////////////////////////////////////
-#define SOFT_HYPHEN ((sal_Unicode) 0x00AD)
-#define HARD_HYPHEN ((sal_Unicode) 0x2011)
+#define SOFT_HYPHEN SVT_SOFT_HYPHEN
+#define HARD_HYPHEN SVT_HARD_HYPHEN
-inline BOOL IsHyphen( sal_Unicode cChar )
+static BOOL GetAltSpelling( INT16 &rnChgPos, INT16 &rnChgLen, OUString &rRplc,
+ Reference< XHyphenatedWord > &rxHyphWord )
{
- return cChar == SOFT_HYPHEN || cChar == HARD_HYPHEN;
-}
-
-
-inline BOOL IsControlChar( sal_Unicode cChar )
-{
- return cChar < (sal_Unicode) ' ';
-}
-
+ BOOL bRes = rxHyphWord->isAlternativeSpelling();
+ if (bRes)
+ {
+ OUString aWord( rxHyphWord->getWord() ),
+ aHyphenatedWord( rxHyphWord->getHyphenatedWord() );
+ INT16 nHyphenationPos = rxHyphWord->getHyphenationPos(),
+ nHyphenPos = rxHyphWord->getHyphenPos();
+ const sal_Unicode *pWord = aWord.getStr(),
+ *pAltWord = aHyphenatedWord.getStr();
+
+ // at least char changes directly left or right to the hyphen
+ // should(!) be handled properly...
+ //! nHyphenationPos and nHyphenPos differ at most by 1 (see above)
+ //! Beware: eg "Schiffahrt" in German (pre spelling reform)
+ //! proves to be a bit nasty (nChgPosLeft and nChgPosRight overlap
+ //! to an extend.)
+
+ // find first different char from left
+ sal_Int32 nPosL = 0,
+ nAltPosL = 0;
+ for (INT16 i = 0 ; pWord[ nPosL ] == pAltWord[ nAltPosL ]; nPosL++, nAltPosL++, i++)
+ {
+ // restrict changes area beginning to the right to
+ // the char immediately following the hyphen.
+ //! serves to insert the additional "f" in "Schiffahrt" at
+ //! position 5 rather than position 6.
+ if (i >= nHyphenationPos + 1)
+ break;
+ }
-inline BOOL HasHyphens( const OUString &rTxt )
-{
- return rTxt.indexOf( SOFT_HYPHEN ) != -1 ||
- rTxt.indexOf( HARD_HYPHEN ) != -1;
+ // find first different char from right
+ sal_Int32 nPosR = aWord.getLength() - 1,
+ nAltPosR = aHyphenatedWord.getLength() - 1;
+ for ( ; nPosR >= nPosL && nAltPosR >= nAltPosL
+ && pWord[ nPosR ] == pAltWord[ nAltPosR ];
+ nPosR--, nAltPosR--)
+ ;
+
+ rnChgPos = (INT16) nPosL;
+ rnChgLen = nPosR - nPosL + 1;
+ DBG_ASSERT( rnChgLen >= 0, "nChgLen < 0");
+
+ sal_Int32 nTxtStart = nPosL;
+ sal_Int32 nTxtLen = nAltPosL - nPosL + 1;
+ rRplc = aHyphenatedWord.copy( nTxtStart, nTxtLen );
+ }
+ return bRes;
}
-INT32 GetNumControlChars( const OUString &rTxt )
+static INT16 GetOrigWordPos( const OUString &rOrigWord, INT16 nHyphenPos )
{
- INT32 nCnt = 0;
- INT32 nLen = rTxt.getLength();
+ INT32 nLen = rOrigWord.getLength();
+ INT32 nNotSkippedChars = 0;
for (INT32 i = 0; i < nLen; ++i)
{
- if (IsControlChar( rTxt[i] ))
- ++nCnt;
+ sal_Unicode cChar = rOrigWord[i];
+ BOOL bSkip = IsHyphen( cChar ) || IsControlChar( cChar );
+ if (!bSkip)
+ ++nNotSkippedChars;
+ if (nNotSkippedChars > nHyphenPos)
+ break;
}
- return nCnt;
+ return i < nLen ? i : -1;
}
-BOOL RemoveHyphens( OUString &rTxt )
+INT32 GetPosInWordToCheck( const OUString &rTxt, INT32 nPos )
{
- BOOL bModified = FALSE;
- if (HasHyphens( rTxt ))
+ INT32 nRes = -1;
+ INT32 nLen = rTxt.getLength();
+ if (0 <= nPos && nPos < nLen)
{
- String aTmp( rTxt );
- aTmp.EraseAllChars( SOFT_HYPHEN );
- aTmp.EraseAllChars( HARD_HYPHEN );
- rTxt = aTmp;
- bModified = TRUE;
+ INT32 nSkipped = 0;
+ BOOL bSkip;
+ for (INT32 i = 0; i <= nPos; ++i)
+ {
+ sal_Unicode cChar = rTxt[i];
+ bSkip = IsHyphen( cChar ) || IsControlChar( cChar );
+ if (bSkip)
+ ++nSkipped;
+ }
+ nRes = nPos - nSkipped;
}
- return bModified;
+ return nRes;
}
-BOOL RemoveControlChars( OUString &rTxt )
+Reference< XHyphenatedWord > RebuildHyphensAndControlChars(
+ const OUString &rOrigWord,
+ Reference< XHyphenatedWord > &rxHyphWord )
{
- BOOL bModified = FALSE;
- INT32 nCtrlChars = GetNumControlChars( rTxt );
- if (nCtrlChars)
+ Reference< XHyphenatedWord > xRes;
+ if (rOrigWord.getLength() && rxHyphWord.is())
{
- INT32 nLen = rTxt.getLength();
- INT32 nSize = nLen - nCtrlChars;
- OUStringBuffer aBuf( nSize );
- INT32 nCnt = 0;
- for (INT32 i = 0; i < nLen; ++i)
+ INT16 nChgPos = 0,
+ nChgLen = 0;
+ OUString aRplc;
+ BOOL bAltSpelling = GetAltSpelling( nChgPos, nChgLen, aRplc, rxHyphWord );
+
+ OUString aOrigHyphenatedWord;
+ INT16 nOrigHyphenPos = -1;
+ if (!bAltSpelling)
{
- sal_Unicode cChar = rTxt[i];
- if (!IsControlChar( cChar ))
- {
- DBG_ASSERT( nCnt < nSize, "index out of range" );
- aBuf.setCharAt( nCnt++, cChar );
- }
+#ifdef DEBUG
+ OUString aWord( rxHyphWord->getWord() );
+#endif
+ aOrigHyphenatedWord = rOrigWord;
+ nOrigHyphenPos = GetOrigWordPos( rOrigWord, rxHyphWord->getHyphenPos() );
+ }
+ else
+ {
+ OUString aLeft, aRight;
+ INT16 nPos = GetOrigWordPos( rOrigWord, nChgPos );
+ aLeft = rOrigWord.copy( 0, nPos );
+ nPos = GetOrigWordPos( rOrigWord, nChgPos + nChgLen );
+ aRight = rOrigWord.copy( nPos );
+
+ aOrigHyphenatedWord = aLeft;
+ aOrigHyphenatedWord += aRplc;
+ aOrigHyphenatedWord += aRight;
+
+ nOrigHyphenPos = aLeft.getLength() +
+ rxHyphWord->getHyphenPos() - nChgPos;
}
- DBG_ASSERT( nCnt == nSize, "wrong size" );
- rTxt = aBuf.makeStringAndClear();
- bModified = TRUE;
+
+ if (nOrigHyphenPos == -1)
+ DBG_ERROR( "failed to get nOrigHyphenPos" );
+ else
+ {
+ INT16 nLang = LocaleToLanguage( rxHyphWord->getLocale() );
+ xRes = new HyphenatedWord(
+ rxHyphWord->getWord(), nLang, rxHyphWord->getHyphenationPos(),
+ aOrigHyphenatedWord, nOrigHyphenPos );
+ }
+
}
- return bModified;
+ return xRes;
}
+
///////////////////////////////////////////////////////////////////////////
// TL_TODO: