summaryrefslogtreecommitdiff
path: root/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx')
-rwxr-xr-xlingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx866
1 files changed, 432 insertions, 434 deletions
diff --git a/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx b/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx
index 3b0871cc36b6..5d040be72cb9 100755
--- a/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx
+++ b/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx
@@ -32,7 +32,6 @@
#include <com/sun/star/uno/Reference.h>
#include <com/sun/star/linguistic2/XSearchableDictionaryList.hpp>
-//#include <com/sun/star/linguistic2/SpellFailure.hpp>
#include <cppuhelper/factory.hxx> // helper for factories
#include <com/sun/star/registry/XRegistryKey.hpp>
#include <i18npool/mslangid.hxx>
@@ -43,15 +42,12 @@
#include <osl/mutex.hxx>
#include <hyphen.h>
-
-#ifndef _HYPHENIMP_HXX
#include <hyphenimp.hxx>
-#endif
#include <linguistic/hyphdta.hxx>
#include <rtl/ustring.hxx>
-
#include <rtl/ustrbuf.hxx>
+#include <rtl/textenc.h>
#include <linguistic/lngprops.hxx>
#include <unotools/pathoptions.hxx>
@@ -97,9 +93,8 @@ Hyphenator::Hyphenator() :
{
bDisposing = FALSE;
pPropHelper = NULL;
- aDicts = NULL;
- numdict = 0;
-
+ aDicts = NULL;
+ numdict = 0;
}
@@ -108,21 +103,22 @@ Hyphenator::~Hyphenator()
if (pPropHelper)
pPropHelper->RemoveAsPropListener();
- if ((numdict) && (aDicts)) {
- for (int i=0; i < numdict; i++) {
+ if ((numdict) && (aDicts))
+ {
+ for (int i=0; i < numdict; i++)
+ {
if (aDicts[i].apCC) delete aDicts[i].apCC;
aDicts[i].apCC = NULL;
- }
+ }
}
- if (aDicts) delete[] aDicts;
+ if (aDicts) delete[] aDicts;
aDicts = NULL;
- numdict = 0;
+ numdict = 0;
}
PropertyHelper_Hyphen & Hyphenator::GetPropHelper_Impl()
{
-
if (!pPropHelper)
{
Reference< XPropertySet > xPropSet( GetLinguProperties(), UNO_QUERY );
@@ -212,12 +208,7 @@ Sequence< Locale > SAL_CALL Hyphenator::getLocales()
// add dictionary information
aDicts = new HDInfo[numdict];
-/*
- aTEncs = new rtl_TextEncoding [numdict];
- aTLocs = new Locale [numdict];
- aTNames = new OUString [numdict];
- aCharSetInfo = new CharClass* [numdict];
-*/
+
k = 0;
for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
{
@@ -233,7 +224,7 @@ Sequence< Locale > SAL_CALL Hyphenator::getLocales()
for (sal_Int32 i = 0; i < nLocales; ++i)
{
aDicts[k].aPtr = NULL;
- aDicts[k].aEnc = 0;
+ aDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW;
aDicts[k].aLoc = MsLangId::convertLanguageToLocale(
MsLangId::convertIsoStringToLanguage( aDictIt->aLocaleNames[i] ));
aDicts[k].apCC = new CharClass( aDicts[k].aLoc );
@@ -288,240 +279,268 @@ sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale)
}
-Reference< XHyphenatedWord > SAL_CALL
-Hyphenator::hyphenate( const ::rtl::OUString& aWord,
- const ::com::sun::star::lang::Locale& aLocale,
- sal_Int16 nMaxLeading,
- const ::com::sun::star::beans::PropertyValues& aProperties )
- throw (com::sun::star::uno::RuntimeException,
- com::sun::star::lang::IllegalArgumentException)
+Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const ::rtl::OUString& aWord,
+ const ::com::sun::star::lang::Locale& aLocale,
+ sal_Int16 nMaxLeading,
+ const ::com::sun::star::beans::PropertyValues& aProperties )
+ throw (com::sun::star::uno::RuntimeException, com::sun::star::lang::IllegalArgumentException)
{
int nHyphenationPos = -1;
- int nHyphenationPosAlt = -1;
- int nHyphenationPosAltHyph = -1;
+ int nHyphenationPosAlt = -1;
+ int nHyphenationPosAltHyph = -1;
int wordlen;
char *hyphens;
- char *lcword;
- int k = 0;
+ char *lcword;
+ int k = 0;
- PropertyHelper_Hyphen & rHelper = GetPropHelper();
- rHelper.SetTmpPropVals(aProperties);
+ PropertyHelper_Hyphen & rHelper = GetPropHelper();
+ rHelper.SetTmpPropVals(aProperties);
sal_Int16 minTrail = rHelper.GetMinTrailing();
sal_Int16 minLead = rHelper.GetMinLeading();
sal_Int16 minLen = rHelper.GetMinWordLength();
HyphenDict *dict = NULL;
- rtl_TextEncoding aEnc = 0;
- CharClass * pCC = NULL;
+ rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
+ CharClass * pCC = NULL;
Reference< XHyphenatedWord > xRes;
- k = -1;
- for (int j = 0; j < numdict; j++)
- if (aLocale == aDicts[j].aLoc) k = j;
-
-
- // if we have a hyphenation dictionary matching this locale
- if (k != -1) {
+ k = -1;
+ for (int j = 0; j < numdict; j++)
+ {
+ if (aLocale == aDicts[j].aLoc)
+ k = j;
+ }
+ // if we have a hyphenation dictionary matching this locale
+ if (k != -1)
+ {
// if this dictinary has not been loaded yet do that
- if (!aDicts[k].aPtr) {
-
- OUString DictFN = aDicts[k].aName + A2OU(".dic");
- OUString dictpath;
+ if (!aDicts[k].aPtr)
+ {
+ OUString DictFN = aDicts[k].aName + A2OU(".dic");
+ OUString dictpath;
- osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
- OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
+ osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
+ OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
#if defined(WNT)
- // workaround for Windows specifc problem that the
- // path length in calls to 'fopen' is limted to somewhat
- // about 120+ characters which will usually be exceed when
- // using dictionaries as extensions.
- sTmp = Win_GetShortPathName( dictpath );
+ // workaround for Windows specifc problem that the
+ // path length in calls to 'fopen' is limted to somewhat
+ // about 120+ characters which will usually be exceed when
+ // using dictionaries as extensions.
+ sTmp = Win_GetShortPathName( dictpath );
#endif
- if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
- {
- fprintf(stderr, "Couldn't find file %s\n", OU2ENC(dictpath, osl_getThreadTextEncoding()) );
- return NULL;
- }
- aDicts[k].aPtr = dict;
- aDicts[k].aEnc = rtl_getTextEncodingFromUnixCharset(dict->cset);
- if (aDicts[k].aEnc == RTL_TEXTENCODING_DONTKNOW) {
- if (strcmp("ISCII-DEVANAGARI", dict->cset) == 0) {
- aDicts[k].aEnc = RTL_TEXTENCODING_ISCII_DEVANAGARI;
- } else if (strcmp("UTF-8", dict->cset) == 0) {
- aDicts[k].aEnc = RTL_TEXTENCODING_UTF8;
- }
- }
+ if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
+ {
+ fprintf(stderr, "Couldn't find file %s\n", OU2ENC(dictpath, osl_getThreadTextEncoding()) );
+ return NULL;
+ }
+ aDicts[k].aPtr = dict;
+ aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
}
- // other wise hyphenate the word with that dictionary
- dict = aDicts[k].aPtr;
- aEnc = aDicts[k].aEnc;
- pCC = aDicts[k].apCC;
+ // other wise hyphenate the word with that dictionary
+ dict = aDicts[k].aPtr;
+ eEnc = aDicts[k].eEnc;
+ pCC = aDicts[k].apCC;
- sal_uInt16 ct = CAPTYPE_UNKNOWN;
- ct = capitalType(aWord, pCC);
+ // we don't want to work with a default text encoding since following incorrect
+ // results may occur only for specific text and thus may be hard to notice.
+ // Thus better always make a clean exit here if the text encoding is in question.
+ // Hopefully something not working at all will raise proper attention quickly. ;-)
+ DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
+ if (eEnc == RTL_TEXTENCODING_DONTKNOW)
+ return NULL;
- // first convert any smart quotes or apostrophes to normal ones
+ sal_uInt16 ct = CAPTYPE_UNKNOWN;
+ ct = capitalType(aWord, pCC);
+
+ // first convert any smart quotes or apostrophes to normal ones
OUStringBuffer rBuf(aWord);
- sal_Int32 nc = rBuf.getLength();
- sal_Unicode ch;
- for (sal_Int32 ix=0; ix < nc; ix++) {
+ sal_Int32 nc = rBuf.getLength();
+ sal_Unicode ch;
+ for (sal_Int32 ix=0; ix < nc; ix++)
+ {
ch = rBuf.charAt(ix);
- if ((ch == 0x201C) || (ch == 0x201D)) rBuf.setCharAt(ix,(sal_Unicode)0x0022);
- if ((ch == 0x2018) || (ch == 0x2019)) rBuf.setCharAt(ix,(sal_Unicode)0x0027);
- }
- OUString nWord(rBuf.makeStringAndClear());
+ if ((ch == 0x201C) || (ch == 0x201D))
+ rBuf.setCharAt(ix,(sal_Unicode)0x0022);
+ if ((ch == 0x2018) || (ch == 0x2019))
+ rBuf.setCharAt(ix,(sal_Unicode)0x0027);
+ }
+ OUString nWord(rBuf.makeStringAndClear());
- // now convert word to all lowercase for pattern recognition
- OUString nTerm(makeLowerCase(nWord, pCC));
+ // now convert word to all lowercase for pattern recognition
+ OUString nTerm(makeLowerCase(nWord, pCC));
- // now convert word to needed encoding
- OString encWord(OU2ENC(nTerm,aEnc));
+ // now convert word to needed encoding
+ OString encWord(OU2ENC(nTerm,eEnc));
wordlen = encWord.getLength();
- lcword = new char[wordlen + 1];
+ lcword = new char[wordlen + 1];
hyphens = new char[wordlen + 5];
- char ** rep = NULL; // replacements of discretionary hyphenation
- int * pos = NULL; // array of [hyphenation point] minus [deletion position]
- int * cut = NULL; // length of deletions in original word
-
- // copy converted word into simple char buffer
- strcpy(lcword,encWord.getStr());
-
- // now strip off any ending periods
- int n = wordlen-1;
- while((n >=0) && (lcword[n] == '.')) n--;
- n++;
- if (n > 0) {
- if (hnj_hyphen_hyphenate3(dict, lcword, n, hyphens, NULL, &rep, &pos, &cut,
- minLead, minTrail, Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
- Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2)))))
- {
- //whoops something did not work
- delete[] hyphens;
- delete[] lcword;
- if (rep) {
- for(int j = 0; j < n; j++) {
+ char ** rep = NULL; // replacements of discretionary hyphenation
+ int * pos = NULL; // array of [hyphenation point] minus [deletion position]
+ int * cut = NULL; // length of deletions in original word
+
+ // copy converted word into simple char buffer
+ strcpy(lcword,encWord.getStr());
+
+ // now strip off any ending periods
+ int n = wordlen-1;
+ while((n >=0) && (lcword[n] == '.'))
+ n--;
+ n++;
+ if (n > 0)
+ {
+ const bool bFailed = 0 != hnj_hyphen_hyphenate3( dict, lcword, n, hyphens, NULL,
+ &rep, &pos, &cut, minLead, minTrail,
+ Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
+ Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
+ if (bFailed)
+ {
+ //whoops something did not work
+ delete[] hyphens;
+ delete[] lcword;
+ if (rep)
+ {
+ for(int j = 0; j < n; j++)
+ {
if (rep[j]) free(rep[j]);
}
free(rep);
- }
- if (pos) free(pos);
- if (cut) free(cut);
- return NULL;
- }
+ }
+ if (pos) free(pos);
+ if (cut) free(cut);
+ return NULL;
}
+ }
- // now backfill hyphens[] for any removed trailing periods
- for (int c = n; c < wordlen; c++) hyphens[c] = '0';
- hyphens[wordlen] = '\0';
+ // now backfill hyphens[] for any removed trailing periods
+ for (int c = n; c < wordlen; c++) hyphens[c] = '0';
+ hyphens[wordlen] = '\0';
INT32 Leading = GetPosInWordToCheck( aWord, nMaxLeading );
for (INT32 i = 0; i < n; i++)
{
- int leftrep = 0;
- BOOL hit = (n >= minLen);
- if (!rep || !rep[i] || (i >= n)) {
- hit = hit && (hyphens[i]&1) && (i < Leading);
- hit = hit && (i >= (minLead-1) );
- hit = hit && ((n - i - 1) >= minTrail);
- } else {
- // calculate change character length before hyphenation point signed with '='
- for (char * c = rep[i]; *c && (*c != '='); c++) {
- if (aEnc == RTL_TEXTENCODING_UTF8) {
- if (((unsigned char) *c) >> 6 != 2) leftrep++;
- } else leftrep++;
+ int leftrep = 0;
+ BOOL hit = (n >= minLen);
+ if (!rep || !rep[i] || (i >= n))
+ {
+ hit = hit && (hyphens[i]&1) && (i < Leading);
+ hit = hit && (i >= (minLead-1) );
+ hit = hit && ((n - i - 1) >= minTrail);
+ }
+ else
+ {
+ // calculate change character length before hyphenation point signed with '='
+ for (char * c = rep[i]; *c && (*c != '='); c++)
+ {
+ if (eEnc == RTL_TEXTENCODING_UTF8)
+ {
+ if (((unsigned char) *c) >> 6 != 2)
+ leftrep++;
}
- hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading);
- hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) );
- hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail);
+ else
+ leftrep++;
}
- if (hit) {
- nHyphenationPos = i;
- if (rep && (i < n) && rep[i]) {
- nHyphenationPosAlt = i - pos[i];
- nHyphenationPosAltHyph = i + leftrep - pos[i];
- }
+ hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading);
+ hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) );
+ hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail);
+ }
+ if (hit)
+ {
+ nHyphenationPos = i;
+ if (rep && (i < n) && rep[i])
+ {
+ nHyphenationPosAlt = i - pos[i];
+ nHyphenationPosAltHyph = i + leftrep - pos[i];
}
- }
-
- if (nHyphenationPos == -1) {
- xRes = NULL;
- } else {
- if (rep && rep[nHyphenationPos]) {
- // remove equal sign
- char * s = rep[nHyphenationPos];
- int eq = 0;
- for (; *s; s++) {
- if (*s == '=') eq = 1;
- if (eq) *s = *(s + 1);
- }
- OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), aEnc);
- OUString repHyph;
- switch (ct) {
- case CAPTYPE_ALLCAP:
- {
- repHyph = makeUpperCase(repHyphlow, pCC);
- break;
- }
- case CAPTYPE_INITCAP:
- {
- if (nHyphenationPosAlt == 0) {
- repHyph = makeInitCap(repHyphlow, pCC);
- } else {
- repHyph = repHyphlow;
- }
- break;
- }
- default:
- {
- repHyph = repHyphlow;
- break;
- }
}
+ }
+
+ if (nHyphenationPos == -1)
+ {
+ xRes = NULL;
+ }
+ else
+ {
+ if (rep && rep[nHyphenationPos])
+ {
+ // remove equal sign
+ char * s = rep[nHyphenationPos];
+ int eq = 0;
+ for (; *s; s++)
+ {
+ if (*s == '=') eq = 1;
+ if (eq) *s = *(s + 1);
+ }
+ OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), eEnc);
+ OUString repHyph;
+ switch (ct)
+ {
+ case CAPTYPE_ALLCAP:
+ {
+ repHyph = makeUpperCase(repHyphlow, pCC);
+ break;
+ }
+ case CAPTYPE_INITCAP:
+ {
+ if (nHyphenationPosAlt == 0)
+ repHyph = makeInitCap(repHyphlow, pCC);
+ else
+ repHyph = repHyphlow;
+ break;
+ }
+ default:
+ {
+ repHyph = repHyphlow;
+ break;
+ }
+ }
- // handle shortening
- INT16 nPos = (INT16) ((nHyphenationPosAltHyph < nHyphenationPos) ?
- nHyphenationPosAltHyph : nHyphenationPos);
- // dicretionary hyphenation
+ // handle shortening
+ INT16 nPos = (INT16) ((nHyphenationPosAltHyph < nHyphenationPos) ?
+ nHyphenationPosAltHyph : nHyphenationPos);
+ // dicretionary hyphenation
xRes = new HyphenatedWord( aWord, LocaleToLanguage( aLocale ), nPos,
- aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph),
- (INT16) nHyphenationPosAltHyph);
- } else {
+ aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph),
+ (INT16) nHyphenationPosAltHyph);
+ }
+ else
+ {
xRes = new HyphenatedWord( aWord, LocaleToLanguage( aLocale ),
- (INT16)nHyphenationPos, aWord, (INT16) nHyphenationPos);
- }
+ (INT16)nHyphenationPos, aWord, (INT16) nHyphenationPos);
+ }
}
- delete[] lcword;
+ delete[] lcword;
delete[] hyphens;
- if (rep) {
- for(int j = 0; j < n; j++) {
- if (rep[j]) free(rep[j]);
- }
- free(rep);
+ if (rep)
+ {
+ for(int j = 0; j < n; j++)
+ {
+ if (rep[j]) free(rep[j]);
}
- if (pos) free(pos);
- if (cut) free(cut);
+ free(rep);
+ }
+ if (pos) free(pos);
+ if (cut) free(cut);
return xRes;
}
- return NULL;
+ return NULL;
}
-Reference < XHyphenatedWord > SAL_CALL
- Hyphenator::queryAlternativeSpelling( const ::rtl::OUString& /*aWord*/,
- const ::com::sun::star::lang::Locale& /*aLocale*/,
- sal_Int16 /*nIndex*/,
- const ::com::sun::star::beans::PropertyValues& /*aProperties*/ )
- throw(::com::sun::star::lang::IllegalArgumentException,
- ::com::sun::star::uno::RuntimeException)
+Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling(
+ const ::rtl::OUString& /*aWord*/,
+ const ::com::sun::star::lang::Locale& /*aLocale*/,
+ sal_Int16 /*nIndex*/,
+ const ::com::sun::star::beans::PropertyValues& /*aProperties*/ )
+ throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException)
{
/* alternative spelling isn't supported by tex dictionaries */
/* XXX: OOo's extended libhjn algorithm can support alternative spellings with extended TeX dic. */
@@ -529,251 +548,262 @@ Reference < XHyphenatedWord > SAL_CALL
return NULL;
}
-Reference< XPossibleHyphens > SAL_CALL
- Hyphenator::createPossibleHyphens( const ::rtl::OUString& aWord,
- const ::com::sun::star::lang::Locale& aLocale,
- const ::com::sun::star::beans::PropertyValues& aProperties )
- throw(::com::sun::star::lang::IllegalArgumentException,
- ::com::sun::star::uno::RuntimeException)
-
+Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const ::rtl::OUString& aWord,
+ const ::com::sun::star::lang::Locale& aLocale,
+ const ::com::sun::star::beans::PropertyValues& aProperties )
+ throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException)
{
- int wordlen;
- char *hyphens;
- char *lcword;
- int k;
+ int wordlen;
+ char *hyphens;
+ char *lcword;
+ int k;
- PropertyHelper_Hyphen & rHelper = GetPropHelper();
- rHelper.SetTmpPropVals(aProperties);
- sal_Int16 minTrail = rHelper.GetMinTrailing();
- sal_Int16 minLead = rHelper.GetMinLeading();
+ PropertyHelper_Hyphen & rHelper = GetPropHelper();
+ rHelper.SetTmpPropVals(aProperties);
+ sal_Int16 minTrail = rHelper.GetMinTrailing();
+ sal_Int16 minLead = rHelper.GetMinLeading();
- HyphenDict *dict = NULL;
- rtl_TextEncoding aEnc = 0;
- CharClass* pCC = NULL;
+ HyphenDict *dict = NULL;
+ rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
+ CharClass* pCC = NULL;
- Reference< XPossibleHyphens > xRes;
+ Reference< XPossibleHyphens > xRes;
- k = -1;
- for (int j = 0; j < numdict; j++)
- if (aLocale == aDicts[j].aLoc) k = j;
+ k = -1;
+ for (int j = 0; j < numdict; j++)
+ {
+ if (aLocale == aDicts[j].aLoc) k = j;
+ }
+ // if we have a hyphenation dictionary matching this locale
+ if (k != -1)
+ {
+ // if this dictioanry has not been loaded yet do that
+ if (!aDicts[k].aPtr)
+ {
+ OUString DictFN = aDicts[k].aName + A2OU(".dic");
+ OUString dictpath;
- // if we have a hyphenation dictionary matching this locale
- if (k != -1) {
+ osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
+ OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
- // if this dictioanry has not been loaded yet do that
- if (!aDicts[k].aPtr) {
+#if defined(WNT)
+ // workaround for Windows specifc problem that the
+ // path length in calls to 'fopen' is limted to somewhat
+ // about 120+ characters which will usually be exceed when
+ // using dictionaries as extensions.
+ sTmp = Win_GetShortPathName( dictpath );
+#endif
- OUString DictFN = aDicts[k].aName + A2OU(".dic");
- OUString dictpath;
+ if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
+ {
+ fprintf(stderr, "Couldn't find file %s and %s\n", sTmp.getStr(), OU2ENC(dictpath, osl_getThreadTextEncoding()) );
+ return NULL;
+ }
+ aDicts[k].aPtr = dict;
+ aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
+ }
- osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
- OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
+ // other wise hyphenate the word with that dictionary
+ dict = aDicts[k].aPtr;
+ eEnc = aDicts[k].eEnc;
+ pCC = aDicts[k].apCC;
-#if defined(WNT)
- // workaround for Windows specifc problem that the
- // path length in calls to 'fopen' is limted to somewhat
- // about 120+ characters which will usually be exceed when
- // using dictionaries as extensions.
- sTmp = Win_GetShortPathName( dictpath );
-#endif
+ // we don't want to work with a default text encoding since following incorrect
+ // results may occur only for specific text and thus may be hard to notice.
+ // Thus better always make a clean exit here if the text encoding is in question.
+ // Hopefully something not working at all will raise proper attention quickly. ;-)
+ DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
+ if (eEnc == RTL_TEXTENCODING_DONTKNOW)
+ return NULL;
- if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
+ // first handle smart quotes both single and double
+ OUStringBuffer rBuf(aWord);
+ sal_Int32 nc = rBuf.getLength();
+ sal_Unicode ch;
+ for (sal_Int32 ix=0; ix < nc; ix++)
{
- fprintf(stderr, "Couldn't find file %s and %s\n", sTmp.getStr(), OU2ENC(dictpath, osl_getThreadTextEncoding()) );
- return NULL;
+ ch = rBuf.charAt(ix);
+ if ((ch == 0x201C) || (ch == 0x201D))
+ rBuf.setCharAt(ix,(sal_Unicode)0x0022);
+ if ((ch == 0x2018) || (ch == 0x2019))
+ rBuf.setCharAt(ix,(sal_Unicode)0x0027);
}
- aDicts[k].aPtr = dict;
- aDicts[k].aEnc = rtl_getTextEncodingFromUnixCharset(dict->cset);
- if (aDicts[k].aEnc == RTL_TEXTENCODING_DONTKNOW) {
- if (strcmp("ISCII-DEVANAGARI", dict->cset) == 0) {
- aDicts[k].aEnc = RTL_TEXTENCODING_ISCII_DEVANAGARI;
- } else if (strcmp("UTF-8", dict->cset) == 0) {
- aDicts[k].aEnc = RTL_TEXTENCODING_UTF8;
+ OUString nWord(rBuf.makeStringAndClear());
+
+ // now convert word to all lowercase for pattern recognition
+ OUString nTerm(makeLowerCase(nWord, pCC));
+
+ // now convert word to needed encoding
+ OString encWord(OU2ENC(nTerm,eEnc));
+
+ wordlen = encWord.getLength();
+ lcword = new char[wordlen+1];
+ hyphens = new char[wordlen+5];
+ char ** rep = NULL; // replacements of discretionary hyphenation
+ int * pos = NULL; // array of [hyphenation point] minus [deletion position]
+ int * cut = NULL; // length of deletions in original word
+
+ // copy converted word into simple char buffer
+ strcpy(lcword,encWord.getStr());
+
+ // first remove any trailing periods
+ int n = wordlen-1;
+ while((n >=0) && (lcword[n] == '.'))
+ n--;
+ n++;
+ // fprintf(stderr,"hyphenate... %s\n",lcword); fflush(stderr);
+ if (n > 0)
+ {
+ const bool bFailed = 0 != hnj_hyphen_hyphenate3(dict, lcword, n, hyphens, NULL,
+ &rep, &pos, &cut, minLead, minTrail,
+ Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
+ Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
+ if (bFailed)
+ {
+ delete[] hyphens;
+ delete[] lcword;
+
+ if (rep)
+ {
+ for(int j = 0; j < n; j++)
+ {
+ if (rep[j]) free(rep[j]);
+ }
+ free(rep);
+ }
+ if (pos) free(pos);
+ if (cut) free(cut);
+
+ return NULL;
}
- }
- }
-
- // other wise hyphenate the word with that dictionary
- dict = aDicts[k].aPtr;
- aEnc = aDicts[k].aEnc;
- pCC = aDicts[k].apCC;
-
- // first handle smart quotes both single and double
- OUStringBuffer rBuf(aWord);
- sal_Int32 nc = rBuf.getLength();
- sal_Unicode ch;
- for (sal_Int32 ix=0; ix < nc; ix++) {
- ch = rBuf.charAt(ix);
- if ((ch == 0x201C) || (ch == 0x201D)) rBuf.setCharAt(ix,(sal_Unicode)0x0022);
- if ((ch == 0x2018) || (ch == 0x2019)) rBuf.setCharAt(ix,(sal_Unicode)0x0027);
- }
- OUString nWord(rBuf.makeStringAndClear());
-
- // now convert word to all lowercase for pattern recognition
- OUString nTerm(makeLowerCase(nWord, pCC));
-
- // now convert word to needed encoding
- OString encWord(OU2ENC(nTerm,aEnc));
-
- wordlen = encWord.getLength();
- lcword = new char[wordlen+1];
- hyphens = new char[wordlen+5];
- char ** rep = NULL; // replacements of discretionary hyphenation
- int * pos = NULL; // array of [hyphenation point] minus [deletion position]
- int * cut = NULL; // length of deletions in original word
-
- // copy converted word into simple char buffer
- strcpy(lcword,encWord.getStr());
-
- // first remove any trailing periods
- int n = wordlen-1;
- while((n >=0) && (lcword[n] == '.')) n--;
- n++;
- // fprintf(stderr,"hyphenate... %s\n",lcword); fflush(stderr);
- if (n > 0) {
- if (hnj_hyphen_hyphenate3(dict, lcword, n, hyphens, NULL, &rep, &pos, &cut,
- minLead, minTrail, Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
- Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2)))))
- {
- delete[] hyphens;
- delete[] lcword;
-
- if (rep) {
- for(int j = 0; j < n; j++) {
- if (rep[j]) free(rep[j]);
- }
- free(rep);
- }
- if (pos) free(pos);
- if (cut) free(cut);
-
- return NULL;
- }
- }
- // now backfill hyphens[] for any removed periods
- for (int c = n; c < wordlen; c++) hyphens[c] = '0';
- hyphens[wordlen] = '\0';
- // fprintf(stderr,"... %s\n",hyphens); fflush(stderr);
-
- INT16 nHyphCount = 0;
- INT16 i;
-
- for ( i = 0; i < encWord.getLength(); i++)
- if (hyphens[i]&1 && (!rep || !rep[i]))
- nHyphCount++;
-
- Sequence< INT16 > aHyphPos(nHyphCount);
- INT16 *pPos = aHyphPos.getArray();
- OUStringBuffer hyphenatedWordBuffer;
- OUString hyphenatedWord;
- nHyphCount = 0;
-
- for (i = 0; i < nWord.getLength(); i++) {
- hyphenatedWordBuffer.append(aWord[i]);
- // hyphenation position (not alternative)
- if (hyphens[i]&1 && (!rep || !rep[i])) {
- pPos[nHyphCount] = i;
- hyphenatedWordBuffer.append(sal_Unicode('='));
- nHyphCount++;
- }
- }
-
- hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear();
- //fprintf(stderr,"result is %s\n",OU2A(hyphenatedWord));
- //fflush(stderr);
-
- xRes = new PossibleHyphens( aWord, LocaleToLanguage( aLocale ),
- hyphenatedWord, aHyphPos );
-
- delete[] hyphens;
- delete[] lcword;
-
- if (rep) {
- for(int j = 0; j < n; j++) {
- if (rep[j]) free(rep[j]);
- }
- free(rep);
- }
- if (pos) free(pos);
- if (cut) free(cut);
-
- return xRes;
- }
+ }
+ // now backfill hyphens[] for any removed periods
+ for (int c = n; c < wordlen; c++)
+ hyphens[c] = '0';
+ hyphens[wordlen] = '\0';
+ // fprintf(stderr,"... %s\n",hyphens); fflush(stderr);
- return NULL;
+ INT16 nHyphCount = 0;
+ INT16 i;
+
+ for ( i = 0; i < encWord.getLength(); i++)
+ {
+ if (hyphens[i]&1 && (!rep || !rep[i]))
+ nHyphCount++;
+ }
+
+ Sequence< INT16 > aHyphPos(nHyphCount);
+ INT16 *pPos = aHyphPos.getArray();
+ OUStringBuffer hyphenatedWordBuffer;
+ OUString hyphenatedWord;
+ nHyphCount = 0;
+
+ for (i = 0; i < nWord.getLength(); i++)
+ {
+ hyphenatedWordBuffer.append(aWord[i]);
+ // hyphenation position (not alternative)
+ if (hyphens[i]&1 && (!rep || !rep[i]))
+ {
+ pPos[nHyphCount] = i;
+ hyphenatedWordBuffer.append(sal_Unicode('='));
+ nHyphCount++;
+ }
+ }
+
+ hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear();
+ //fprintf(stderr,"result is %s\n",OU2A(hyphenatedWord));
+ //fflush(stderr);
+
+ xRes = new PossibleHyphens( aWord, LocaleToLanguage( aLocale ),
+ hyphenatedWord, aHyphPos );
+
+ delete[] hyphens;
+ delete[] lcword;
+
+ if (rep)
+ {
+ for(int j = 0; j < n; j++)
+ {
+ if (rep[j]) free(rep[j]);
+ }
+ free(rep);
+ }
+ if (pos) free(pos);
+ if (cut) free(cut);
+ return xRes;
+ }
+
+ return NULL;
}
sal_uInt16 SAL_CALL Hyphenator::capitalType(const OUString& aTerm, CharClass * pCC)
{
- sal_Int32 tlen = aTerm.getLength();
- if ((pCC) && (tlen)) {
- String aStr(aTerm);
- sal_Int32 nc = 0;
- for (xub_StrLen tindex = 0; tindex < tlen; tindex++) {
- if (pCC->getCharacterType(aStr,tindex) &
- ::com::sun::star::i18n::KCharacterType::UPPER) nc++;
- }
-
- if (nc == 0) return (sal_uInt16) CAPTYPE_NOCAP;
-
- if (nc == tlen) return (sal_uInt16) CAPTYPE_ALLCAP;
+ sal_Int32 tlen = aTerm.getLength();
+ if ((pCC) && (tlen))
+ {
+ String aStr(aTerm);
+ sal_Int32 nc = 0;
+ for (xub_StrLen tindex = 0; tindex < tlen; tindex++)
+ {
+ if (pCC->getCharacterType(aStr,tindex) & ::com::sun::star::i18n::KCharacterType::UPPER)
+ nc++;
+ }
- if ((nc == 1) && (pCC->getCharacterType(aStr,0) &
- ::com::sun::star::i18n::KCharacterType::UPPER))
- return (sal_uInt16) CAPTYPE_INITCAP;
+ if (nc == 0)
+ return (sal_uInt16) CAPTYPE_NOCAP;
+ if (nc == tlen)
+ return (sal_uInt16) CAPTYPE_ALLCAP;
+ if ((nc == 1) && (pCC->getCharacterType(aStr,0) & ::com::sun::star::i18n::KCharacterType::UPPER))
+ return (sal_uInt16) CAPTYPE_INITCAP;
- return (sal_uInt16) CAPTYPE_MIXED;
+ return (sal_uInt16) CAPTYPE_MIXED;
}
- return (sal_uInt16) CAPTYPE_UNKNOWN;
+ return (sal_uInt16) CAPTYPE_UNKNOWN;
}
OUString SAL_CALL Hyphenator::makeLowerCase(const OUString& aTerm, CharClass * pCC)
{
- if (pCC)
- return pCC->toLower_rtl(aTerm, 0, aTerm.getLength());
- return aTerm;
+ if (pCC)
+ return pCC->toLower_rtl(aTerm, 0, aTerm.getLength());
+ return aTerm;
}
OUString SAL_CALL Hyphenator::makeUpperCase(const OUString& aTerm, CharClass * pCC)
{
- if (pCC)
- return pCC->toUpper_rtl(aTerm, 0, aTerm.getLength());
- return aTerm;
+ if (pCC)
+ return pCC->toUpper_rtl(aTerm, 0, aTerm.getLength());
+ return aTerm;
}
OUString SAL_CALL Hyphenator::makeInitCap(const OUString& aTerm, CharClass * pCC)
{
- sal_Int32 tlen = aTerm.getLength();
- if ((pCC) && (tlen)) {
- OUString bTemp = aTerm.copy(0,1);
- if (tlen > 1)
- return ( pCC->toUpper_rtl(bTemp, 0, 1)
- + pCC->toLower_rtl(aTerm,1,(tlen-1)) );
-
- return pCC->toUpper_rtl(bTemp, 0, 1);
+ sal_Int32 tlen = aTerm.getLength();
+ if ((pCC) && (tlen))
+ {
+ OUString bTemp = aTerm.copy(0,1);
+ if (tlen > 1)
+ return ( pCC->toUpper_rtl(bTemp, 0, 1) + pCC->toLower_rtl(aTerm,1,(tlen-1)) );
+
+ return pCC->toUpper_rtl(bTemp, 0, 1);
}
- return aTerm;
+ return aTerm;
}
-
-
Reference< XInterface > SAL_CALL Hyphenator_CreateInstance(
- const Reference< XMultiServiceFactory > & /*rSMgr*/ )
+ const Reference< XMultiServiceFactory > & /*rSMgr*/ )
throw(Exception)
{
-
Reference< XInterface > xService = (cppu::OWeakObject*) new Hyphenator;
return xService;
}
-sal_Bool SAL_CALL
- Hyphenator::addLinguServiceEventListener(
- const Reference< XLinguServiceEventListener >& rxLstnr )
+sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener(
+ const Reference< XLinguServiceEventListener >& rxLstnr )
throw(RuntimeException)
{
MutexGuard aGuard( GetLinguMutex() );
@@ -787,9 +817,8 @@ sal_Bool SAL_CALL
}
-sal_Bool SAL_CALL
- Hyphenator::removeLinguServiceEventListener(
- const Reference< XLinguServiceEventListener >& rxLstnr )
+sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener(
+ const Reference< XLinguServiceEventListener >& rxLstnr )
throw(RuntimeException)
{
MutexGuard aGuard( GetLinguMutex() );
@@ -804,8 +833,7 @@ sal_Bool SAL_CALL
}
-OUString SAL_CALL
- Hyphenator::getServiceDisplayName( const Locale& /*rLocale*/ )
+OUString SAL_CALL Hyphenator::getServiceDisplayName( const Locale& /*rLocale*/ )
throw(RuntimeException)
{
MutexGuard aGuard( GetLinguMutex() );
@@ -813,8 +841,7 @@ OUString SAL_CALL
}
-void SAL_CALL
- Hyphenator::initialize( const Sequence< Any >& rArguments )
+void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments )
throw(Exception, RuntimeException)
{
MutexGuard aGuard( GetLinguMutex() );
@@ -836,16 +863,15 @@ void SAL_CALL
xPropHelper = pPropHelper;
pPropHelper->AddAsPropListener(); //! after a reference is established
}
- else {
+ else
+ {
DBG_ERROR( "wrong number of arguments in sequence" );
}
-
}
}
-void SAL_CALL
- Hyphenator::dispose()
+void SAL_CALL Hyphenator::dispose()
throw(RuntimeException)
{
MutexGuard aGuard( GetLinguMutex() );
@@ -859,8 +885,7 @@ void SAL_CALL
}
-void SAL_CALL
- Hyphenator::addEventListener( const Reference< XEventListener >& rxListener )
+void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener )
throw(RuntimeException)
{
MutexGuard aGuard( GetLinguMutex() );
@@ -870,8 +895,7 @@ void SAL_CALL
}
-void SAL_CALL
- Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener )
+void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener )
throw(RuntimeException)
{
MutexGuard aGuard( GetLinguMutex() );
@@ -927,32 +951,6 @@ Sequence< OUString > Hyphenator::getSupportedServiceNames_Static()
return aSNS;
}
-
-sal_Bool SAL_CALL Hyphenator_writeInfo(
- void * /*pServiceManager*/, registry::XRegistryKey * pRegistryKey )
-{
-
- try
- {
- String aImpl( '/' );
- aImpl += Hyphenator::getImplementationName_Static().getStr();
- aImpl.AppendAscii( "/UNO/SERVICES" );
- Reference< registry::XRegistryKey > xNewKey =
- pRegistryKey->createKey( aImpl );
- Sequence< OUString > aServices =
- Hyphenator::getSupportedServiceNames_Static();
- for( INT32 i = 0; i < aServices.getLength(); i++ )
- xNewKey->createKey( aServices.getConstArray()[i] );
-
- return sal_True;
- }
- catch(Exception &)
- {
- return sal_False;
- }
-}
-
-
void * SAL_CALL Hyphenator_getFactory( const sal_Char * pImplName,
XMultiServiceFactory * pServiceManager, void * )
{