CWS-TOOLING: integrate CWS hunspell4thesaurus

2009-02-02 16:45:01 +0100 hjs r267278 : #i98415# - kick touch here - go for the root cause elsewhere 2009-02-02 12:09:15 +0100 hjs r267257 : #i98415# - fix parameters of touch 2009-01-23 23:13:00 +0100 mba r266855 : fixed warning 2009-01-23 18:59:55 +0100 mba r266848 : #i98415#: touch copied files 2009-01-21 09:58:05 +0100 nemeth r266633 : CWS-TOOLING: rebase CWS hunspell4thesaurus to trunk@266428 (milestone: DEV300:m39) 2009-01-21 09:04:48 +0100 nemeth r266629 : Issue: #19563 Submitted by: nemeth Reviewed by: nemeth Patch: handle bad dictionary items for back compatibility (affix separator without affix flags) 2009-01-20 20:04:10 +0100 nemeth r266622 : Issue: #19563 Submitted by: nemeth Reviewed by:nemeth Add two small fixes for SF.net Hunspell Bug ID 2487684 2519814 2008-12-10 00:21:41 +0100 nemeth r265141 : CWS-TOOLING: rebase CWS hunspell4thesaurus to trunk@264807 (milestone: DEV300:m37) 2008-12-09 16:12:56 +0100 nemeth r265113 : #i19563#: fixed stemming, and #i90028#: fixed and improved hyphenation 2008-11-26 23:09:05 +0100 nemeth r264438 : #i90028#: CWS hunspell4thesaurus: Windows fixes of the Hunspell patch. 2008-11-26 22:51:03 +0100 nemeth r264436 : CWS-TOOLING: rebase CWS hunspell4thesaurus to trunk@264325 (milestone: DEV300:m36) 2008-11-22 09:02:20 +0100 nemeth r264182 : CWS-TOOLING: rebase CWS hunspell4thesaurus to trunk@263288 (milestone: DEV300:m35) 2008-11-22 08:23:10 +0100 nemeth r264181 : CWS-TOOLING: rebase CWS hunspell4thesaurus to trunk@263288 (milestone: DEV300:m35) 2008-11-21 22:15:21 +0100 nemeth r264176 : #i90028#: migrate CWS hunspell4thesaurus to SVN.
author: Oliver Bolte <obo@openoffice.org> 2009-03-04 09:51:42 +0000
committer: Oliver Bolte <obo@openoffice.org> 2009-03-04 09:51:42 +0000
commit: 76af6630bf438a418981dd4843749b8b9458dd66 (patch)
tree: 62ffd7f02f35ffb956a95afc1bc96384a7d13734 /lingucomponent
parent: 03ff7f6dadd9e0dea3c9647cef41601f6975b257 (diff)
7 files changed, 155 insertions, 364 deletions
diff --git a/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx b/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx
index f7ad506cd34f..b24bc6ee80f5 100644
--- a/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx
+++ b/lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx
@@ -86,6 +86,11 @@ using namespace linguistic;
 #define CAPTYPE_ALLCAP  3
 #define CAPTYPE_MIXED   4
 
+// min, max
+
+//#define Min(a,b) (a < b ? a : b)
+#define Max(a,b) (a > b ? a : b)
+
 ///////////////////////////////////////////////////////////////////////////
 
 
@@ -395,7 +400,9 @@ Hyphenator::hyphenate( const ::rtl::OUString& aWord,
         while((n >=0) && (lcword[n] == '.')) n--;
             n++;
             if (n > 0) {
-           if (hnj_hyphen_hyphenate2(dict, lcword, n, hyphens, NULL, &rep, &pos, &cut))
+           if (hnj_hyphen_hyphenate3(dict, lcword, n, hyphens, NULL, &rep, &pos, &cut,
+            minLead, minTrail, Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
+            Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2)))))
            {
               //whoops something did not work
               delete[] hyphens;
diff --git a/lingucomponent/source/spellcheck/hunspell/phonet.cxx b/lingucomponent/source/spellcheck/hunspell/phonet.cxx
index ee14606d7541..e69de29bb2d1 100644
--- a/lingucomponent/source/spellcheck/hunspell/phonet.cxx
+++ b/lingucomponent/source/spellcheck/hunspell/phonet.cxx
@@ -1,297 +0,0 @@
-/*  phonetic.c - generic replacement aglogithms for phonetic transformation
-    Copyright (C) 2000 Bjoern Jacke
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License version 2.1 as published by the Free Software Foundation;
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; If not, see
-    <http://www.gnu.org/licenses/>.
-
-    Changelog:
-
-    2000-01-05  Bjoern Jacke <bjoern at j3e.de>
-                Initial Release insprired by the article about phonetic
-                transformations out of c't 25/1999
-
-    2007-07-26  Bjoern Jacke <bjoern at j3e.de>
-        Released under MPL/GPL/LGPL tri-license for Hunspell
-
-    2007-08-23  Laszlo Nemeth <nemeth at OOo>
-                Porting from Aspell to Hunspell using C-like structs
-*/
-
-// MARKER(update_precomp.py): autogen include statement, do not remove
-#include "precompiled_lingucomponent.hxx"
-#ifndef MOZILLA_CLIENT
-#include <cstdlib>
-#include <cstring>
-#include <cstdio>
-#include <cctype>
-using namespace std;
-#else
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#include <ctype.h>
-#endif
-
-#include "csutil.hxx"
-#include "phonet.hxx"
-
-void init_phonet_hash(phonetable & parms)
-  {
-    int i, k;
-
-    for (i = 0; i < parms.hash_size; i++) {
-      parms.hash[i] = -1;
-    }
-
-    for (i = 0; parms.rules[i][0] != '\0'; i += 2) {
-      /**  set hash value  **/
-      k = (unsigned char) parms.rules[i][0];
-
-      if (parms.hash[k] < 0) {
-    parms.hash[k] = i;
-      }
-    }
-  }
-
-  // like strcpy but safe if the strings overlap
-  //   but only if dest < src
-  static inline void strmove(char * dest, char * src) {
-    while (*src)
-      *dest++ = *src++;
-    *dest = '\0';
-  }
-
-/*  phonetic transcription algorithm                   */
-/*  see: http://aspell.net/man-html/Phonetic-Code.html */
-/*  convert string to uppercase before this call       */
-int phonet (const char * inword, char * target,
-              int len,
-          phonetable & parms)
-  {
-    /**       Do phonetic transformation.       **/
-    /**  "len" = length of "inword" incl. '\0'. **/
-
-    /**  result:  >= 0:  length of "target"    **/
-    /**            otherwise:  error            **/
-
-    int  i,j,k=0,n,p,z;
-    int  k0,n0,p0=-333,z0;
-    char c, c0;
-    const char * s;
-    typedef unsigned char uchar;
-    char word[MAXPHONETUTF8LEN + 1];
-    if (len == -1) len = strlen(inword);
-    if (len > MAXPHONETUTF8LEN) return 0;
-    strcpy(word, inword);
-
-    /**  check word  **/
-    i = j = z = 0;
-    while ((c = word[i]) != '\0') {
-      n = parms.hash[(uchar) c];
-      z0 = 0;
-
-      if (n >= 0) {
-        /**  check all rules for the same letter  **/
-        while (parms.rules[n][0] == c) {
-
-          /**  check whole string  **/
-          k = 1;   /** number of found letters  **/
-          p = 5;   /** default priority  **/
-          s = parms.rules[n];
-          s++;     /**  important for (see below)  "*(s-1)"  **/
-
-          while (*s != '\0'  &&  word[i+k] == *s
-                 &&  !isdigit (*s)  &&  strchr ("(-<^$", *s) == NULL) {
-            k++;
-            s++;
-          }
-          if (*s == '(') {
-            /**  check letters in "(..)"  **/
-            if (isalpha(word[i+k])  // ...could be implied?
-                && strchr(s+1, word[i+k]) != NULL) {
-              k++;
-              while (*s != ')')
-                s++;
-              s++;
-            }
-          }
-          p0 = (int) *s;
-          k0 = k;
-          while (*s == '-'  &&  k > 1) {
-            k--;
-            s++;
-          }
-          if (*s == '<')
-            s++;
-          if (isdigit (*s)) {
-            /**  determine priority  **/
-            p = *s - '0';
-            s++;
-          }
-          if (*s == '^'  &&  *(s+1) == '^')
-            s++;
-
-          if (*s == '\0'
-              || (*s == '^'
-                  && (i == 0  ||  ! isalpha(word[i-1]))
-                  && (*(s+1) != '$'
-                      || (! isalpha(word[i+k0]) )))
-              || (*s == '$'  &&  i > 0
-                  &&  isalpha(word[i-1])
-                  && (! isalpha(word[i+k0]) )))
-          {
-            /**  search for followup rules, if:     **/
-            /**  parms.followup and k > 1  and  NO '-' in searchstring **/
-            c0 = word[i+k-1];
-            n0 = parms.hash[(uchar) c0];
-
-//            if (parms.followup  &&  k > 1  &&  n0 >= 0
-            if (k > 1  &&  n0 >= 0
-                &&  p0 != (int) '-'  &&  word[i+k] != '\0') {
-              /**  test follow-up rule for "word[i+k]"  **/
-              while (parms.rules[n0][0] == c0) {
-
-                /**  check whole string  **/
-                k0 = k;
-                p0 = 5;
-                s = parms.rules[n0];
-                s++;
-                while (*s != '\0'  &&  word[i+k0] == *s
-                       && ! isdigit(*s)  &&  strchr("(-<^$",*s) == NULL) {
-                  k0++;
-                  s++;
-                }
-                if (*s == '(') {
-                  /**  check letters  **/
-                  if (isalpha(word[i+k0])
-                      &&  strchr (s+1, word[i+k0]) != NULL) {
-                    k0++;
-                    while (*s != ')'  &&  *s != '\0')
-                      s++;
-                    if (*s == ')')
-                      s++;
-                  }
-                }
-                while (*s == '-') {
-                  /**  "k0" gets NOT reduced   **/
-                  /**  because "if (k0 == k)"  **/
-                  s++;
-                }
-                if (*s == '<')
-                  s++;
-                if (isdigit (*s)) {
-                  p0 = *s - '0';
-                  s++;
-                }
-
-                if (*s == '\0'
-                    /**  *s == '^' cuts  **/
-                    || (*s == '$'  &&  ! isalpha(word[i+k0])))
-                {
-                  if (k0 == k) {
-                    /**  this is just a piece of the string  **/
-                    n0 += 2;
-                    continue;
-                  }
-
-                  if (p0 < p) {
-                    /**  priority too low  **/
-                    n0 += 2;
-                    continue;
-                  }
-                  /**  rule fits; stop search  **/
-                  break;
-                }
-                n0 += 2;
-              } /**  End of "while (parms.rules[n0][0] == c0)"  **/
-
-              if (p0 >= p  && parms.rules[n0][0] == c0) {
-                n += 2;
-                continue;
-              }
-            } /** end of follow-up stuff **/
-
-            /**  replace string  **/
-            s = parms.rules[n+1];
-            p0 = (parms.rules[n][0] != '\0'
-                 &&  strchr (parms.rules[n]+1,'<') != NULL) ? 1:0;
-            if (p0 == 1 &&  z == 0) {
-              /**  rule with '<' is used  **/
-              if (j > 0  &&  *s != '\0'
-                 && (target[j-1] == c  ||  target[j-1] == *s)) {
-                j--;
-              }
-              z0 = 1;
-              z = 1;
-              k0 = 0;
-              while (*s != '\0'  &&  word[i+k0] != '\0') {
-                word[i+k0] = *s;
-                k0++;
-                s++;
-              }
-              if (k > k0)
-                strmove (&word[0]+i+k0, &word[0]+i+k);
-
-              /**  new "actual letter"  **/
-              c = word[i];
-            }
-            else { /** no '<' rule used **/
-              i += k - 1;
-              z = 0;
-              while (*s != '\0'
-                     &&  *(s+1) != '\0'  &&  j < len) {
-                if (j == 0  ||  target[j-1] != *s) {
-                  target[j] = *s;
-                  j++;
-                }
-                s++;
-              }
-              /**  new "actual letter"  **/
-              c = *s;
-              if (parms.rules[n][0] != '\0'
-                 &&  strstr (parms.rules[n]+1, "^^") != NULL) {
-                if (c != '\0') {
-                  target[j] = c;
-                  j++;
-                }
-                strmove (&word[0], &word[0]+i+1);
-                i = 0;
-                z0 = 1;
-              }
-            }
-            break;
-          }  /** end of follow-up stuff **/
-          n += 2;
-        } /**  end of while (parms.rules[n][0] == c)  **/
-      } /**  end of if (n >= 0)  **/
-      if (z0 == 0) {
-//        if (k && (assert(p0!=-333),!p0) &&  j < len &&  c != '\0'
-//           && (!parms.collapse_result  ||  j == 0  ||  target[j-1] != c)){
-        if (k && !p0 && j < len &&  c != '\0'
-           && (1 || j == 0  ||  target[j-1] != c)){
-           /**  condense only double letters  **/
-          target[j] = c;
-      ///printf("\n setting \n");
-          j++;
-        }
-
-        i++;
-        z = 0;
-    k=0;
-      }
-    }  /**  end of   while ((c = word[i]) != '\0')  **/
-
-    target[j] = '\0';
-    return (j);
-
-  }  /**  end of function "phonet"  **/
diff --git a/lingucomponent/source/spellcheck/hunspell/phonet.hxx b/lingucomponent/source/spellcheck/hunspell/phonet.hxx
index 4e9e0d647080..e69de29bb2d1 100644
--- a/lingucomponent/source/spellcheck/hunspell/phonet.hxx
+++ b/lingucomponent/source/spellcheck/hunspell/phonet.hxx
@@ -1,50 +0,0 @@
-/*  phonetic.c - generic replacement aglogithms for phonetic transformation
-    Copyright (C) 2000 Bjoern Jacke
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License version 2.1 as published by the Free Software Foundation;
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; If not, see
-    <http://www.gnu.org/licenses/>.
-
-    Changelog:
-
-    2000-01-05  Bjoern Jacke <bjoern at j3e.de>
-                Initial Release insprired by the article about phonetic
-                transformations out of c't 25/1999
-
-    2007-07-26  Bjoern Jacke <bjoern at j3e.de>
-        Released under MPL/GPL/LGPL tri-license for Hunspell
-
-    2007-08-23  Laszlo Nemeth <nemeth at OOo>
-                Porting from Aspell to Hunspell using C-like structs
-*/
-
-#ifndef __PHONETHXX__
-#define __PHONETHXX__
-
-#define MAXPHONETLEN      256
-#define MAXPHONETUTF8LEN  (MAXPHONETLEN * 4)
-
-struct phonetable {
-  char utf8;
-  cs_info * lang;
-  int num;
-  char * * rules;
-  static const int hash_size = 256;
-  int hash[hash_size];
-};
-
-void init_phonet_hash(phonetable & parms);
-
-int phonet (const char * inword, char * target,
-              int len, phonetable & phone);
-
-#endif
diff --git a/lingucomponent/source/spellcheck/spell/sspellimp.cxx b/lingucomponent/source/spellcheck/spell/sspellimp.cxx
index 02de8841b3ef..ed1ddf4ca77b 100644
--- a/lingucomponent/source/spellcheck/spell/sspellimp.cxx
+++ b/lingucomponent/source/spellcheck/spell/sspellimp.cxx
@@ -72,6 +72,8 @@ using namespace com::sun::star::uno;
 using namespace com::sun::star::linguistic2;
 using namespace linguistic;
 
+// XML-header of SPELLML queries
+#define SPELLML_HEADER "<?xml?>"
 
 ///////////////////////////////////////////////////////////////////////////
 
@@ -387,7 +389,7 @@ sal_Bool SAL_CALL
     rHelper.SetTmpPropVals( rProperties );
 
     INT16 nFailure = GetSpellFailure( rWord, rLocale );
-    if (nFailure != -1)
+    if (nFailure != -1 && !rWord.match(A2OU(SPELLML_HEADER), 0))
     {
         INT16 nLang = LocaleToLanguage( rLocale );
         // postprocess result for errors that should be ignored
diff --git a/lingucomponent/source/thesaurus/libnth/nthesimp.cxx b/lingucomponent/source/thesaurus/libnth/nthesimp.cxx
index c712a29c9fdf..1ecba30bcb8c 100644
--- a/lingucomponent/source/thesaurus/libnth/nthesimp.cxx
+++ b/lingucomponent/source/thesaurus/libnth/nthesimp.cxx
@@ -7,7 +7,7 @@
  * OpenOffice.org - a multi-platform office productivity suite
  *
  * $RCSfile: nthesimp.cxx,v $
- * $Revision: 1.18 $
+ * $Revision: 1.15.6.4 $
  *
  * This file is part of OpenOffice.org.
  *
@@ -62,7 +62,8 @@
 #define CAPTYPE_ALLCAP  3
 #define CAPTYPE_MIXED   4
 
-
+// XML-header to query SPELLML support
+#define SPELLML_SUPPORT "<?xml?>"
 
 using namespace utl;
 using namespace osl;
@@ -78,6 +79,19 @@ using namespace linguistic;
 
 ///////////////////////////////////////////////////////////////////////////
 
+static uno::Reference< XLinguServiceManager > GetLngSvcMgr_Impl()
+{
+    uno::Reference< XLinguServiceManager > xRes;
+    uno::Reference< XMultiServiceFactory >  xMgr = getProcessServiceFactory();
+    if (xMgr.is())
+    {
+        xRes = uno::Reference< XLinguServiceManager > ( xMgr->createInstance(
+                OUString( RTL_CONSTASCII_USTRINGPARAM(
+                    "com.sun.star.linguistic2.LinguServiceManager" ) ) ), UNO_QUERY ) ;
+    }
+    return xRes;
+}
+
 Thesaurus::Thesaurus() :
     aEvtListeners   ( GetLinguMutex() )
 {
@@ -291,7 +305,7 @@ sal_Bool SAL_CALL Thesaurus::hasLocale(const Locale& rLocale)
 
 
 Sequence < Reference < ::com::sun::star::linguistic2::XMeaning > > SAL_CALL
-        Thesaurus::queryMeanings( const OUString& rTerm, const Locale& rLocale,
+        Thesaurus::queryMeanings( const OUString& qTerm, const Locale& rLocale,
                                   const PropertyValues& rProperties)
                                   throw(IllegalArgumentException, RuntimeException)
 {
@@ -299,6 +313,14 @@ Sequence < Reference < ::com::sun::star::linguistic2::XMeaning > > SAL_CALL
 
         uno::Sequence< Reference< XMeaning > > aMeanings( 1 );
         uno::Sequence< Reference< XMeaning > > noMeanings( 0 );
+        uno::Reference< XLinguServiceManager > xLngSvcMgr( GetLngSvcMgr_Impl() );
+        uno::Reference< XSpellChecker1 > xSpell;
+
+        OUString rTerm(qTerm);
+        OUString pTerm(qTerm);
+        sal_uInt16 ct = CAPTYPE_UNKNOWN;
+        sal_Int32 stem = 0;
+        sal_Int32 stem2 = 0;
 
         INT16 nLanguage = LocaleToLanguage( rLocale );
 
@@ -312,6 +334,8 @@ Sequence < Reference < ::com::sun::star::linguistic2::XMeaning > > SAL_CALL
         return noMeanings;
 #endif
 
+        if (prevTerm == qTerm && prevLocale == nLanguage) return prevMeanings;
+
         mentry * pmean = NULL;
     sal_Int32 nmean = 0;
 
@@ -322,7 +346,6 @@ Sequence < Reference < ::com::sun::star::linguistic2::XMeaning > > SAL_CALL
         rtl_TextEncoding aEnc = 0;
         CharClass * pCC = NULL;
 
-
         // find the first thesaurus that matches the locale
         for (int i =0; i < numthes; i++) {
             if (rLocale == aTLocs[i])
@@ -374,10 +397,9 @@ Sequence < Reference < ::com::sun::star::linguistic2::XMeaning > > SAL_CALL
         }
     }
 
-        if (pTH) {
+        while (pTH) {
          // convert word to all lower case for searching
-             sal_uInt16 ct = CAPTYPE_UNKNOWN;
-             ct = capitalType(rTerm, pCC);
+             if (!stem) ct = capitalType(rTerm, pCC);
              OUString nTerm(makeLowerCase(rTerm, pCC));
              OString aTmp( OU2ENC(nTerm, aEnc) );
              nmean = pTH->Lookup(aTmp.getStr(),aTmp.getLength(),&pmean);
@@ -385,13 +407,58 @@ Sequence < Reference < ::com::sun::star::linguistic2::XMeaning > > SAL_CALL
              if (nmean) aMeanings.realloc( nmean );
 
              mentry * pe = pmean;
+             OUString codeTerm = qTerm;
+         Reference< XSpellAlternatives > xTmpRes2;
+
+         if (stem) {
+               xTmpRes2 = xSpell->spell( A2OU("<?xml?><query type='analyze'><word>") +
+            pTerm + A2OU("</word></query>"), nLanguage, rProperties );
+               if (xTmpRes2.is()) {
+                 Sequence<OUString>seq = xTmpRes2->getAlternatives();
+                 if (seq.getLength() > 0) {
+                    codeTerm = seq[0];
+                    stem2 = 1;
+                 }
+#if 0
+                 OString o = OUStringToOString(codeTerm, rtl_getTextEncodingFromUnixCharset("UTF-8"));
+                 fprintf(stderr, "CODETERM: %s\n", o.pData->buffer);
+#endif
+               }
+             }
+
          for (int j = 0; j < nmean; j++) {
              int count = pe->count;
                  if (count) {
                      Sequence< OUString > aStr( count );
                      OUString *pStr = aStr.getArray();
+
                      for (int i=0; i < count; i++) {
                        OUString sTerm(pe->psyns[i],strlen(pe->psyns[i]),aEnc );
+                       sal_Int32 catpos = sTerm.indexOf('(');
+                       sal_Int32 catpos2 = 0;
+                       OUString catst;
+                       OUString catst2;
+                       if (catpos > 2) {
+                         // remove category name for affixation and casing
+                         catst = A2OU(" ") + sTerm.copy(catpos);
+                         sTerm = sTerm.copy(0, catpos);
+                         sTerm = sTerm.trim();
+                       }
+                       // generate synonyms with affixes
+                       if (stem && stem2) {
+                 Reference< XSpellAlternatives > xTmpRes;
+                 xTmpRes = xSpell->spell( A2OU("<?xml?><query type='generate'><word>") +
+                 sTerm + A2OU("</word>") + codeTerm + A2OU("</query>"), nLanguage, rProperties );
+                 if (xTmpRes.is()) {
+                   Sequence<OUString>seq = xTmpRes->getAlternatives();
+                   for (int k = 0; k < seq.getLength(); k++) {
+                     OString o = OUStringToOString(seq[k], rtl_getTextEncodingFromUnixCharset("UTF-8"));
+                   }
+                   if (seq.getLength() > 0) sTerm = seq[0];
+                 }
+               }
+               if (catpos2) sTerm = catst2 + sTerm;
+
                        sal_uInt16 ct1 = capitalType(sTerm, pCC);
                        if (CAPTYPE_MIXED == ct1)
                             ct = ct1;
@@ -413,7 +480,7 @@ Sequence < Reference < ::com::sun::star::linguistic2::XMeaning > > SAL_CALL
                                break;
                              }
                }
-                       OUString aAlt( cTerm );
+                       OUString aAlt( cTerm + catst);
                        pStr[i] = aAlt;
              }
 #if 0
@@ -429,10 +496,60 @@ Sequence < Reference < ::com::sun::star::linguistic2::XMeaning > > SAL_CALL
                  pe++;
          }
              pTH->CleanUpAfterLookup(&pmean,nmean);
-    }
+
         if (nmean) {
+            prevTerm = qTerm;
+            prevMeanings = aMeanings;
+            prevLocale = nLanguage;
             return aMeanings;
     }
+
+        if (stem || !xLngSvcMgr.is()) return noMeanings;
+        stem = 1;
+
+        xSpell = uno::Reference< XSpellChecker1 >( xLngSvcMgr->getSpellChecker(), UNO_QUERY );
+        if (!xSpell.is() || !xSpell->isValid( A2OU(SPELLML_SUPPORT), nLanguage, rProperties )) {
+            return noMeanings;
+        }
+        Reference< XSpellAlternatives > xTmpRes;
+        xTmpRes = xSpell->spell( A2OU("<?xml?><query type='stem'><word>") +
+            rTerm + A2OU("</word></query>"), nLanguage, rProperties );
+        if (xTmpRes.is()) {
+            Sequence<OUString>seq = xTmpRes->getAlternatives();
+#if 0
+            for (int i = 0; i < seq.getLength(); i++) {
+                OString o = OUStringToOString(seq[i], rtl_getTextEncodingFromUnixCharset("UTF-8"));
+                fprintf(stderr, "%d: %s\n", i + 1, o.pData->buffer);
+            }
+#endif
+            if (seq.getLength() > 0) {
+                rTerm = seq[0];  // XXX Use only the first stem
+                continue;
+            }
+        }
+
+        // stem the last word of the synonym (for categories after affixation)
+        rTerm = rTerm.trim();
+        sal_Int32 pos = rTerm.lastIndexOf(' ');
+        if (!pos) return noMeanings;
+        xTmpRes = xSpell->spell( A2OU("<?xml?><query type='stem'><word>") +
+            rTerm.copy(pos + 1) + A2OU("</word></query>"), nLanguage, rProperties );
+        if (xTmpRes.is()) {
+            Sequence<OUString>seq = xTmpRes->getAlternatives();
+            if (seq.getLength() > 0) {
+                pTerm = rTerm.copy(pos + 1);
+                rTerm = rTerm.copy(0, pos + 1) + seq[0];
+#if 0
+                for (int i = 0; i < seq.getLength(); i++) {
+                    OString o = OUStringToOString(seq[i], rtl_getTextEncodingFromUnixCharset("UTF-8"));
+                    fprintf(stderr, "%d: %s\n", i + 1, o.pData->buffer);
+                }
+#endif
+                continue;
+            }
+        }
+        break;
+        }
     return noMeanings;
 }
 
@@ -478,9 +595,8 @@ void SAL_CALL
             xPropHelper = pPropHelper;
             pPropHelper->AddAsPropListener();   //! after a reference is established
         }
-        else {
+        else
             DBG_ERROR( "wrong number of arguments in sequence" );
-        }
     }
 }
 
@@ -492,7 +608,7 @@ sal_uInt16 SAL_CALL Thesaurus::capitalType(const OUString& aTerm, CharClass * pC
         if ((pCC) && (tlen)) {
               String aStr(aTerm);
               sal_Int32 nc = 0;
-              for (xub_StrLen tindex = 0; tindex < tlen;  tindex++) {
+              for (USHORT tindex = 0; tindex < tlen;  tindex++) {
                if (pCC->getCharacterType(aStr,tindex) &
                        ::com::sun::star::i18n::KCharacterType::UPPER) nc++;
           }
diff --git a/lingucomponent/source/thesaurus/libnth/nthesimp.hxx b/lingucomponent/source/thesaurus/libnth/nthesimp.hxx
index bf168ec161dc..f75cc29b9755 100644
--- a/lingucomponent/source/thesaurus/libnth/nthesimp.hxx
+++ b/lingucomponent/source/thesaurus/libnth/nthesimp.hxx
@@ -7,7 +7,7 @@
  * OpenOffice.org - a multi-platform office productivity suite
  *
  * $RCSfile: nthesimp.hxx,v $
- * $Revision: 1.6 $
+ * $Revision: 1.6.16.1 $
  *
  * This file is part of OpenOffice.org.
  *
@@ -46,6 +46,8 @@
 #include <com/sun/star/linguistic2/XMeaning.hpp>
 #include <com/sun/star/linguistic2/XThesaurus.hpp>
 
+#include <com/sun/star/linguistic2/XLinguServiceManager.hpp>
+#include <com/sun/star/linguistic2/XSpellChecker1.hpp>
 
 #include <tools/table.hxx>
 
@@ -96,8 +98,10 @@ class Thesaurus :
     OUString *                              aTNames;
     sal_Int32                               numthes;
 
-
-
+    // cache for the Thesaurus dialog
+    Sequence < Reference < ::com::sun::star::linguistic2::XMeaning > > prevMeanings;
+    OUString  prevTerm;
+    INT16 prevLocale;
 
     // disallow copy-constructor and assignment-operator for now
     Thesaurus(const Thesaurus &);
@@ -177,6 +181,14 @@ private:
         OUString SAL_CALL makeUpperCase(const OUString&, CharClass *);
         OUString SAL_CALL makeInitCap(const OUString&, CharClass *);
 
+/*  static ::com::sun::star::uno::Reference<
+        ::com::sun::star::linguistic2::XLinguServiceManager > xLngSvcMgr;
+    static ::com::sun::star::uno::Reference<
+        ::com::sun::star::linguistic2::XSpellChecker1 > xSpell;
+*/
+    static ::com::sun::star::uno::Reference<
+        ::com::sun::star::linguistic2::XLinguServiceManager > GetLngSvcMgr();
+
 };
 
 inline OUString Thesaurus::getImplementationName_Static() throw()
diff --git a/lingucomponent/source/thesaurus/mythes/data_layout.txt b/lingucomponent/source/thesaurus/mythes/data_layout.txt
index 12d77a74f208..ef4bc255d96a 100644
--- a/lingucomponent/source/thesaurus/mythes/data_layout.txt
+++ b/lingucomponent/source/thesaurus/mythes/data_layout.txt
@@ -18,6 +18,7 @@ to and from this encoding if necessary.
 
      Strings currently recognized by OpenOffice.org are:
 
+     UTF-8
      ISO8859-1
      ISO8859-2
      ISO8859-3
author	Oliver Bolte <obo@openoffice.org>	2009-03-04 09:51:42 +0000
committer	Oliver Bolte <obo@openoffice.org>	2009-03-04 09:51:42 +0000
commit	76af6630bf438a418981dd4843749b8b9458dd66 (patch)
tree	62ffd7f02f35ffb956a95afc1bc96384a7d13734 /lingucomponent
parent	03ff7f6dadd9e0dea3c9647cef41601f6975b257 (diff)