1 files changed, 371 insertions, 0 deletions
diff --git a/lingucomponent/source/thesaurus/mythes/mythes.cxx b/lingucomponent/source/thesaurus/mythes/mythes.cxx
new file mode 100644
index 000000000000..67336f48f60d
--- /dev/null
+++ b/lingucomponent/source/thesaurus/mythes/mythes.cxx
@@ -0,0 +1,371 @@
+#include "license.readme"
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include "mythes.hxx"
+
+// some basic utility routines
+
+#ifdef MYTHES_STANDALONE
+
+// string duplication routine
+char * mystrdup(const char * p)
+{
+
+  int sl = strlen(p) + 1;
+  char * d = (char *)malloc(sl);
+  if (d) {
+    memcpy(d,p,sl);
+    return d;
+  }
+  return NULL;
+}
+
+// remove cross-platform text line end characters
+void mychomp(char * s)
+{
+  int k = strlen(s);
+  if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
+  if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
+}
+
+#else
+
+extern char * mystrdup(const char * p);
+extern void mychomp(char * s);
+
+#endif
+
+
+// return index of char in string
+int mystr_indexOfChar(const char * d, int c)
+{
+  char * p = strchr((char *)d,c);
+  if (p) return (int)(p-d);
+  return -1;
+}
+
+
+MyThes::MyThes(const char* idxpath, const char * datpath)
+{
+    nw = 0;
+    encoding = NULL;
+    list = NULL;
+    offst = NULL;
+
+    if (thInitialize(idxpath, datpath) != 1) {
+        fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath);
+        fflush(stderr);
+        if (encoding) free((void*)encoding);
+        if (list)  free((void*)list);
+        if (offst) free((void*)offst);
+        // did not initialize properly - throw exception?
+    }
+}
+
+
+MyThes::~MyThes()
+{
+    if (thCleanup() != 1) {
+        /* did not cleanup properly - throw exception? */
+    }
+    if (encoding) free((void*)encoding);
+    encoding = NULL;
+    list = NULL;
+    offst = NULL;
+}
+
+
+int MyThes::thInitialize(const char* idxpath, const char* datpath)
+{
+
+    // open the index file
+    FILE * pifile = fopen(idxpath,"r");
+    if (!pifile) {
+        pifile = NULL;
+        return 0;
+    }
+
+    // parse in encoding and index size */
+    char * wrd;
+    wrd = (char *)calloc(1, MAX_WD_LEN);
+    int len = readLine(pifile,wrd,MAX_WD_LEN);
+    encoding = mystrdup(wrd);
+    len = readLine(pifile,wrd,MAX_WD_LEN);
+    int idxsz = atoi(wrd);
+
+
+    // now allocate list, offst for the given size
+    list = (char**)   calloc(idxsz,sizeof(char*));
+    offst = (unsigned int*) calloc(idxsz,sizeof(unsigned int));
+
+    if ( (!(list)) || (!(offst)) ) {
+       fprintf(stderr,"Error - bad memory allocation\n");
+       fflush(stderr);
+       return 0;
+    }
+
+    // now parse the remaining lines of the index
+    len = readLine(pifile,wrd,MAX_WD_LEN);
+    while (len > 0)
+    {
+        int np = mystr_indexOfChar(wrd,'|');
+        if (nw < idxsz) {
+           if (np >= 0) {
+              *(wrd+np) = '\0';
+              list[nw] = (char *)calloc(1,(np+1));
+              memcpy((list[nw]),wrd,np);
+              offst[nw] = atoi(wrd+np+1);
+              nw++;
+       }
+        }
+        len = readLine(pifile,wrd,MAX_WD_LEN);
+    }
+
+    free((void *)wrd);
+    fclose(pifile);
+    pifile=NULL;
+
+    /* next open the data file */
+    pdfile = fopen(datpath,"r");
+    if (!pdfile) {
+        pdfile = NULL;
+        return 0;
+    }
+
+    return 1;
+}
+
+
+int MyThes::thCleanup()
+{
+    /* first close the data file */
+    if (pdfile) {
+        fclose(pdfile);
+        pdfile=NULL;
+    }
+
+    /* now free up all the allocated strings on the list */
+    for (int i=0; i < nw; i++)
+    {
+        if (list[i]) {
+            free(list[i]);
+            list[i] = 0;
+        }
+    }
+
+    if (list)  free((void*)list);
+    if (offst) free((void*)offst);
+
+    nw = 0;
+    return 1;
+}
+
+
+
+// lookup text in index and count of meanings and a list of meaning entries
+// with each entry having a synonym count and pointer to an
+// array of char * (i.e the synonyms)
+//
+// note: calling routine should call CleanUpAfterLookup with the original
+// meaning point and count to properly deallocate memory
+
+int MyThes::Lookup(const char * pText, int len, mentry** pme)
+{
+
+    *pme = NULL;
+
+    // handle the case of missing file or file related errors
+    if (! pdfile) return 0;
+
+    long offset = 0;
+
+    /* copy search word and make sure null terminated */
+    char * wrd = (char *) calloc(1,(len+1));
+    memcpy(wrd,pText,len);
+
+    /* find it in the list */
+    int idx = binsearch(wrd,list,nw);
+    free(wrd);
+    if (idx < 0) return 0;
+
+    // now seek to the offset
+    offset = (long) offst[idx];
+    int rc = fseek(pdfile,offset,SEEK_SET);
+    if (rc) {
+       return 0;
+    }
+
+    // grab the count of the number of meanings
+    // and allocate a list of meaning entries
+    char * buf = NULL;
+    buf  = (char *) malloc( MAX_LN_LEN );
+    if (!buf) return 0;
+    readLine(pdfile, buf, (MAX_LN_LEN-1));
+    int np = mystr_indexOfChar(buf,'|');
+    if (np < 0) {
+         free(buf);
+         return 0;
+    }
+    int nmeanings = atoi(buf+np+1);
+    *pme = (mentry*) malloc( nmeanings * sizeof(mentry) );
+    if (!(*pme)) {
+        free(buf);
+        return 0;
+    }
+
+    // now read in each meaning and parse it to get defn, count and synonym lists
+    mentry* pm = *(pme);
+    char dfn[MAX_WD_LEN];
+
+    for (int j = 0; j < nmeanings; j++) {
+        readLine(pdfile, buf, (MAX_LN_LEN-1));
+
+        pm->count = 0;
+        pm->psyns = NULL;
+        pm->defn = NULL;
+
+        // store away the part of speech for later use
+        char * p = buf;
+        char * pos = NULL;
+        np = mystr_indexOfChar(p,'|');
+        if (np >= 0) {
+           *(buf+np) = '\0';
+       pos = mystrdup(p);
+           p = p + np + 1;
+    } else {
+          pos = mystrdup("");
+        }
+
+        // count the number of fields in the remaining line
+        int nf = 1;
+        char * d = p;
+        np = mystr_indexOfChar(d,'|');
+        while ( np >= 0 ) {
+      nf++;
+          d = d + np + 1;
+          np = mystr_indexOfChar(d,'|');
+    }
+    pm->count = nf;
+        pm->psyns = (char **) malloc(nf*sizeof(char*));
+
+        // fill in the synonym list
+        d = p;
+        for (int j = 0; j < nf; j++) {
+            np = mystr_indexOfChar(d,'|');
+            if (np > 0) {
+          *(d+np) = '\0';
+              pm->psyns[j] = mystrdup(d);
+              d = d + np + 1;
+            } else {
+              pm->psyns[j] = mystrdup(d);
+        }
+        }
+
+        // add pos to first synonym to create the definition
+        int k = strlen(pos);
+        int m = strlen(pm->psyns[0]);
+        if ((k+m) < (MAX_WD_LEN - 1)) {
+             strncpy(dfn,pos,k);
+             *(dfn+k) = ' ';
+             strncpy((dfn+k+1),(pm->psyns[0]),m+1);
+             pm->defn = mystrdup(dfn);
+    } else {
+         pm->defn = mystrdup(pm->psyns[0]);
+    }
+        free(pos);
+        pm++;
+
+    }
+    free(buf);
+
+    return nmeanings;
+}
+
+
+
+void MyThes::CleanUpAfterLookup(mentry ** pme, int nmeanings)
+{
+
+    if (nmeanings == 0) return;
+    if ((*pme) == NULL) return;
+
+    mentry * pm = *pme;
+
+    for (int i = 0; i < nmeanings; i++) {
+       int count = pm->count;
+       for (int j = 0; j < count; j++) {
+      if (pm->psyns[j]) free(pm->psyns[j]);
+          pm->psyns[j] = NULL;
+       }
+       if (pm->psyns) free(pm->psyns);
+       pm->psyns = NULL;
+       if (pm->defn) free(pm->defn);
+       pm->defn = NULL;
+       pm->count = 0;
+       pm++;
+    }
+    pm = *pme;
+    free(pm);
+    *pme = NULL;
+    return;
+}
+
+
+// read a line of text from a text file stripping
+// off the line terminator and replacing it with
+// a null string terminator.
+// returns:  -1 on error or the number of characters in
+//             in the returning string
+
+// A maximum of nc characters will be returned
+
+int MyThes::readLine(FILE * pf, char * buf, int nc)
+{
+
+  if (fgets(buf,nc,pf)) {
+    mychomp(buf);
+    return strlen(buf);
+  }
+  return -1;
+}
+
+
+
+//  performs a binary search on null terminated character
+//  strings
+//
+//  returns: -1 on not found
+//           index of wrd in the list[]
+
+int MyThes::binsearch(char * sw, char* list[], int nlst)
+{
+    int lp, up, mp, j, indx;
+    lp = 0;
+    up = nlst-1;
+    indx = -1;
+    if (strcmp(sw,list[lp]) < 0) return -1;
+    if (strcmp(sw,list[up]) > 0) return -1;
+    while (indx < 0 ) {
+        mp = (int)((lp+up) >> 1);
+        j = strcmp(sw,list[mp]);
+        if ( j > 0) {
+            lp = mp + 1;
+        } else if (j < 0 ) {
+            up = mp - 1;
+        } else {
+            indx = mp;
+        }
+        if (lp > up) return -1;
+    }
+    return indx;
+}
+
+char * MyThes::get_th_encoding()
+{
+  if (encoding) return encoding;
+  return NULL;
+}
+