diff options
author | Thomas Lange <tl@openoffice.org> | 2000-12-22 11:45:48 +0000 |
---|---|---|
committer | Thomas Lange <tl@openoffice.org> | 2000-12-22 11:45:48 +0000 |
commit | 1f956e2ab61d1b1bea868cd0786102ebb8a135a7 (patch) | |
tree | 95ce6268268c941415263179e91ad216197fa700 /linguistic | |
parent | e22125337c50560ac30e24ea8489e6f3654e6dfe (diff) |
linguistic functionality for handling words with hyphens and control chars
Diffstat (limited to 'linguistic')
-rw-r--r-- | linguistic/inc/misc.hxx | 18 | ||||
-rw-r--r-- | linguistic/source/hyphdta.cxx | 12 | ||||
-rw-r--r-- | linguistic/source/misc.cxx | 194 |
3 files changed, 156 insertions, 68 deletions
diff --git a/linguistic/inc/misc.hxx b/linguistic/inc/misc.hxx index 4eeb18a80c87..e09dade4373a 100644 --- a/linguistic/inc/misc.hxx +++ b/linguistic/inc/misc.hxx @@ -2,9 +2,9 @@ * * $RCSfile: misc.hxx,v $ * - * $Revision: 1.2 $ + * $Revision: 1.3 $ * - * last change: $Author: tl $ $Date: 2000-12-21 09:56:55 $ + * last change: $Author: tl $ $Date: 2000-12-22 12:44:34 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -87,6 +87,9 @@ #ifndef _COM_SUN_STAR_LINGUISTIC2_XSEARCHABLEDICTIONARYLIST_HPP_ #include <com/sun/star/linguistic2/XSearchableDictionaryList.hpp> #endif +#ifndef _COM_SUN_STAR_LINGUISTIC2_XHYPHENATEDWORD_HPP_ +#include <com/sun/star/linguistic2/XHyphenatedWord.hpp> +#endif #include <uno/lbnames.h> // CPPU_CURRENT_LANGUAGE_BINDING_NAME macro, which specify the environment type #include <cppuhelper/implbase1.hxx> // helper for implementations @@ -175,10 +178,13 @@ LanguageType /////////////////////////////////////////////////////////////////////////// -BOOL HasHyphens( const rtl::OUString &rTxt ); -INT32 GetNumControlChars( const rtl::OUString &rTxt ); -BOOL RemoveHyphens( rtl::OUString &rTxt ); -BOOL RemoveControlChars( rtl::OUString &rTxt ); +INT32 GetPosInWordToCheck( const rtl::OUString &rTxt, INT32 nPos ); + +::com::sun::star::uno::Reference< + ::com::sun::star::linguistic2::XHyphenatedWord > + RebuildHyphensAndControlChars( const rtl::OUString &rOrigWord, + ::com::sun::star::uno::Reference< + ::com::sun::star::linguistic2::XHyphenatedWord > &rxHyphWord ); /////////////////////////////////////////////////////////////////////////// diff --git a/linguistic/source/hyphdta.cxx b/linguistic/source/hyphdta.cxx index e3809db68581..cce98a764d9d 100644 --- a/linguistic/source/hyphdta.cxx +++ b/linguistic/source/hyphdta.cxx @@ -2,9 +2,9 @@ * * $RCSfile: hyphdta.cxx,v $ * - * $Revision: 1.1.1.1 $ + * $Revision: 1.2 $ * - * last change: $Author: hr $ $Date: 2000-11-17 12:37:36 $ + * last change: $Author: tl $ $Date: 2000-12-22 12:45:48 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -81,6 +81,9 @@ #ifndef _TOOLS_DEBUG_HXX //autogen wg. DBG_ASSERT #include <tools/debug.hxx> #endif +#ifndef _SVTOOLS_LNGMISC_HXX_ +#include <svtools/lngmisc.hxx> +#endif //using namespace utl; using namespace osl; @@ -104,7 +107,10 @@ HyphenatedWord::HyphenatedWord(const OUString &rWord, INT16 nLang, INT16 nHPos, nHyphenationPos (nHPos), nHyphPos (nPos) { - bIsAltSpelling = rWord != rHyphWord; + OUString aTmp( rHyphWord ); + RemoveHyphens( aTmp ); + RemoveControlChars( aTmp ); + bIsAltSpelling = rWord != aTmp; } diff --git a/linguistic/source/misc.cxx b/linguistic/source/misc.cxx index d9f08643b2ed..090f45bcc6f4 100644 --- a/linguistic/source/misc.cxx +++ b/linguistic/source/misc.cxx @@ -2,9 +2,9 @@ * * $RCSfile: misc.cxx,v $ * - * $Revision: 1.4 $ + * $Revision: 1.5 $ * - * last change: $Author: tl $ $Date: 2000-12-21 09:57:08 $ + * last change: $Author: tl $ $Date: 2000-12-22 12:44:50 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -68,16 +68,15 @@ #ifndef _TOOLS_DEBUG_HXX #include <tools/debug.hxx> #endif -#ifndef _RTL_USTRBUF_HXX_ -#include <rtl/ustrbuf.hxx> -#endif +//#ifndef _RTL_USTRBUF_HXX_ +//#include <rtl/ustrbuf.hxx> +//#endif #ifndef INCLUDED_SVTOOLS_PATHOPTIONS_HXX #include <svtools/pathoptions.hxx> #endif - -#include "misc.hxx" -#include "defs.hxx" -#include "lngprops.hxx" +#ifndef _SVTOOLS_LNGMISC_HXX_ +#include <svtools/lngmisc.hxx> +#endif #ifndef _COM_SUN_STAR_BEANS_XPROPERTYSET_HPP_ #include <com/sun/star/beans/XPropertySet.hpp> @@ -105,6 +104,12 @@ #include <unotools/processfactory.hxx> #endif +#include "misc.hxx" +#include "defs.hxx" +#include "lngprops.hxx" +#include <hyphdta.hxx> + + using namespace utl; using namespace osl; using namespace rtl; @@ -310,83 +315,154 @@ uno::Sequence< INT16 > /////////////////////////////////////////////////////////////////////////// -#define SOFT_HYPHEN ((sal_Unicode) 0x00AD) -#define HARD_HYPHEN ((sal_Unicode) 0x2011) +#define SOFT_HYPHEN SVT_SOFT_HYPHEN +#define HARD_HYPHEN SVT_HARD_HYPHEN -inline BOOL IsHyphen( sal_Unicode cChar ) +static BOOL GetAltSpelling( INT16 &rnChgPos, INT16 &rnChgLen, OUString &rRplc, + Reference< XHyphenatedWord > &rxHyphWord ) { - return cChar == SOFT_HYPHEN || cChar == HARD_HYPHEN; -} - - -inline BOOL IsControlChar( sal_Unicode cChar ) -{ - return cChar < (sal_Unicode) ' '; -} - + BOOL bRes = rxHyphWord->isAlternativeSpelling(); + if (bRes) + { + OUString aWord( rxHyphWord->getWord() ), + aHyphenatedWord( rxHyphWord->getHyphenatedWord() ); + INT16 nHyphenationPos = rxHyphWord->getHyphenationPos(), + nHyphenPos = rxHyphWord->getHyphenPos(); + const sal_Unicode *pWord = aWord.getStr(), + *pAltWord = aHyphenatedWord.getStr(); + + // at least char changes directly left or right to the hyphen + // should(!) be handled properly... + //! nHyphenationPos and nHyphenPos differ at most by 1 (see above) + //! Beware: eg "Schiffahrt" in German (pre spelling reform) + //! proves to be a bit nasty (nChgPosLeft and nChgPosRight overlap + //! to an extend.) + + // find first different char from left + sal_Int32 nPosL = 0, + nAltPosL = 0; + for (INT16 i = 0 ; pWord[ nPosL ] == pAltWord[ nAltPosL ]; nPosL++, nAltPosL++, i++) + { + // restrict changes area beginning to the right to + // the char immediately following the hyphen. + //! serves to insert the additional "f" in "Schiffahrt" at + //! position 5 rather than position 6. + if (i >= nHyphenationPos + 1) + break; + } -inline BOOL HasHyphens( const OUString &rTxt ) -{ - return rTxt.indexOf( SOFT_HYPHEN ) != -1 || - rTxt.indexOf( HARD_HYPHEN ) != -1; + // find first different char from right + sal_Int32 nPosR = aWord.getLength() - 1, + nAltPosR = aHyphenatedWord.getLength() - 1; + for ( ; nPosR >= nPosL && nAltPosR >= nAltPosL + && pWord[ nPosR ] == pAltWord[ nAltPosR ]; + nPosR--, nAltPosR--) + ; + + rnChgPos = (INT16) nPosL; + rnChgLen = nPosR - nPosL + 1; + DBG_ASSERT( rnChgLen >= 0, "nChgLen < 0"); + + sal_Int32 nTxtStart = nPosL; + sal_Int32 nTxtLen = nAltPosL - nPosL + 1; + rRplc = aHyphenatedWord.copy( nTxtStart, nTxtLen ); + } + return bRes; } -INT32 GetNumControlChars( const OUString &rTxt ) +static INT16 GetOrigWordPos( const OUString &rOrigWord, INT16 nHyphenPos ) { - INT32 nCnt = 0; - INT32 nLen = rTxt.getLength(); + INT32 nLen = rOrigWord.getLength(); + INT32 nNotSkippedChars = 0; for (INT32 i = 0; i < nLen; ++i) { - if (IsControlChar( rTxt[i] )) - ++nCnt; + sal_Unicode cChar = rOrigWord[i]; + BOOL bSkip = IsHyphen( cChar ) || IsControlChar( cChar ); + if (!bSkip) + ++nNotSkippedChars; + if (nNotSkippedChars > nHyphenPos) + break; } - return nCnt; + return i < nLen ? i : -1; } -BOOL RemoveHyphens( OUString &rTxt ) +INT32 GetPosInWordToCheck( const OUString &rTxt, INT32 nPos ) { - BOOL bModified = FALSE; - if (HasHyphens( rTxt )) + INT32 nRes = -1; + INT32 nLen = rTxt.getLength(); + if (0 <= nPos && nPos < nLen) { - String aTmp( rTxt ); - aTmp.EraseAllChars( SOFT_HYPHEN ); - aTmp.EraseAllChars( HARD_HYPHEN ); - rTxt = aTmp; - bModified = TRUE; + INT32 nSkipped = 0; + BOOL bSkip; + for (INT32 i = 0; i <= nPos; ++i) + { + sal_Unicode cChar = rTxt[i]; + bSkip = IsHyphen( cChar ) || IsControlChar( cChar ); + if (bSkip) + ++nSkipped; + } + nRes = nPos - nSkipped; } - return bModified; + return nRes; } -BOOL RemoveControlChars( OUString &rTxt ) +Reference< XHyphenatedWord > RebuildHyphensAndControlChars( + const OUString &rOrigWord, + Reference< XHyphenatedWord > &rxHyphWord ) { - BOOL bModified = FALSE; - INT32 nCtrlChars = GetNumControlChars( rTxt ); - if (nCtrlChars) + Reference< XHyphenatedWord > xRes; + if (rOrigWord.getLength() && rxHyphWord.is()) { - INT32 nLen = rTxt.getLength(); - INT32 nSize = nLen - nCtrlChars; - OUStringBuffer aBuf( nSize ); - INT32 nCnt = 0; - for (INT32 i = 0; i < nLen; ++i) + INT16 nChgPos = 0, + nChgLen = 0; + OUString aRplc; + BOOL bAltSpelling = GetAltSpelling( nChgPos, nChgLen, aRplc, rxHyphWord ); + + OUString aOrigHyphenatedWord; + INT16 nOrigHyphenPos = -1; + if (!bAltSpelling) { - sal_Unicode cChar = rTxt[i]; - if (!IsControlChar( cChar )) - { - DBG_ASSERT( nCnt < nSize, "index out of range" ); - aBuf.setCharAt( nCnt++, cChar ); - } +#ifdef DEBUG + OUString aWord( rxHyphWord->getWord() ); +#endif + aOrigHyphenatedWord = rOrigWord; + nOrigHyphenPos = GetOrigWordPos( rOrigWord, rxHyphWord->getHyphenPos() ); + } + else + { + OUString aLeft, aRight; + INT16 nPos = GetOrigWordPos( rOrigWord, nChgPos ); + aLeft = rOrigWord.copy( 0, nPos ); + nPos = GetOrigWordPos( rOrigWord, nChgPos + nChgLen ); + aRight = rOrigWord.copy( nPos ); + + aOrigHyphenatedWord = aLeft; + aOrigHyphenatedWord += aRplc; + aOrigHyphenatedWord += aRight; + + nOrigHyphenPos = aLeft.getLength() + + rxHyphWord->getHyphenPos() - nChgPos; } - DBG_ASSERT( nCnt == nSize, "wrong size" ); - rTxt = aBuf.makeStringAndClear(); - bModified = TRUE; + + if (nOrigHyphenPos == -1) + DBG_ERROR( "failed to get nOrigHyphenPos" ); + else + { + INT16 nLang = LocaleToLanguage( rxHyphWord->getLocale() ); + xRes = new HyphenatedWord( + rxHyphWord->getWord(), nLang, rxHyphWord->getHyphenationPos(), + aOrigHyphenatedWord, nOrigHyphenPos ); + } + } - return bModified; + return xRes; } + /////////////////////////////////////////////////////////////////////////// // TL_TODO: |