summaryrefslogtreecommitdiff
path: root/patches/dev300/hunspell-hashify.diff
diff options
context:
space:
mode:
Diffstat (limited to 'patches/dev300/hunspell-hashify.diff')
-rw-r--r--patches/dev300/hunspell-hashify.diff963
1 files changed, 0 insertions, 963 deletions
diff --git a/patches/dev300/hunspell-hashify.diff b/patches/dev300/hunspell-hashify.diff
deleted file mode 100644
index bfb3ea37a..000000000
--- a/patches/dev300/hunspell-hashify.diff
+++ /dev/null
@@ -1,963 +0,0 @@
---- lingucomponent/source/spellcheck/hunspell/affentry.cxx 2006-09-16 18:07:56.000000000 +0200
-+++ lingucomponent/source/spellcheck/hunspell/affentry.cxx 2007-08-31 17:28:49.000000000 +0200
-@@ -196,7 +196,7 @@ struct hentry * PfxEntry::check(const ch
- ((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
- (contclass && TESTAFF(contclass, needflag, contclasslen))))
- return he;
-- } while ((he = he->next_homonym)); // check homonyms
-+ } while (he = pmyMgr->lookup(tmpword, he->hash_id)); // next homonym
- }
-
- // prefix matched but no root word was found
-@@ -359,7 +359,7 @@ char * PfxEntry::check_morph(const char
- }
- strcat(result, "\n");
- }
-- } while ((he = he->next_homonym));
-+ } while (he = pmyMgr->lookup(tmpword, he->hash_id)); // next homonym
- }
-
- // prefix matched but no root word was found
-@@ -561,7 +561,7 @@ struct hentry * SfxEntry::check(const ch
- ((contclass) && TESTAFF(contclass, needflag, contclasslen)))
- )
- ) return he;
-- } while ((he = he->next_homonym)); // check homonyms
-+ } while (he = pmyMgr->lookup(tmpword, he->hash_id)); // next homonym
-
- // obsolote stemming code (used only by the
- // experimental SuffixMgr:suggest_pos_stems)
-@@ -736,8 +736,7 @@ struct hentry * SfxEntry::get_next_homon
- {
- PfxEntry* ep = (PfxEntry *) ppfx;
-
-- while (he->next_homonym) {
-- he = he->next_homonym;
-+ while (he = pmyMgr->lookup(he->word, he->hash_id)) { // next homonym
- if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() && TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
- ((optflags & aeXPRODUCT) == 0 ||
- TESTAFF(he->astr, ep->getFlag(), he->alen) ||
---- lingucomponent/source/spellcheck/hunspell/affixmgr.cxx 2007-08-31 11:42:06.000000000 +0200
-+++ lingucomponent/source/spellcheck/hunspell/affixmgr.cxx 2007-08-31 17:38:04.000000000 +0200
-@@ -1514,7 +1514,7 @@ struct hentry * AffixMgr::compound_check
- ((!words && !wordnum && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0)) ||
- (words && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0))))
- ))) {
-- rv = rv->next_homonym;
-+ rv = lookup(st, rv->hash_id); // next homonym
- }
-
- if (!rv) {
-@@ -1657,7 +1657,7 @@ struct hentry * AffixMgr::compound_check
- !((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
- (compoundend && !words && TESTAFF(rv->astr, compoundend, rv->alen)) ||
- (numdefcpd && words && defcpd_check(&words, wnum + 1, rv, NULL,1))))) {
-- rv = rv->next_homonym;
-+ rv = lookup(word+i, rv->hash_id); // next homonym
- }
-
- if (rv && words && words[wnum + 1]) return rv;
-@@ -1896,7 +1896,7 @@ int AffixMgr::compound_check_morph(const
- ((!words && !wordnum && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0)) ||
- (words && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0))))
- ))) {
-- rv = rv->next_homonym;
-+ rv = lookup(st, rv->hash_id); // next homonym
- }
-
- if (rv) {
-@@ -2064,7 +2064,7 @@ int AffixMgr::compound_check_morph(const
- !((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
- (compoundend && !words && TESTAFF(rv->astr, compoundend, rv->alen)) ||
- (numdefcpd && defcpd_check(&words, wnum + 1, rv, NULL,1))))) {
-- rv = rv->next_homonym;
-+ rv = lookup(word+i, rv->hash_id); // next homonym
- }
-
- if (rv && words && words[wnum + 1]) {
-@@ -2475,7 +2475,7 @@ char * AffixMgr::suffix_check_twosfx_mor
- free(st);
-
- result3[0] = '\0';
--#ifdef DEBUG
-+#if 0
- unsigned short flag = sptr->getFlag();
- char flagch[2] = &flag;
- if (flag_mode == FLAG_NUM) {
-@@ -2592,7 +2592,7 @@ char * AffixMgr::suffix_check_morph(cons
- if (rv->description && ((!rv->astr) ||
- !TESTAFF(rv->astr, lemma_present, rv->alen))) strcat(result, rv->word);
- if (!complexprefixes && rv->description) strcat(result, rv->description);
--#ifdef DEBUG
-+#if 0
- unsigned short flag = sptr->getKey();
- char flagch[2] = &flag;
- if (flag_mode == FLAG_NUM) {
-@@ -2976,10 +2976,10 @@ FLAG AffixMgr::get_lemma_present()
- }
-
- // utility method to look up root words in hash table
--struct hentry * AffixMgr::lookup(const char * word)
-+struct hentry * AffixMgr::lookup(const char * word, int from_id)
- {
- if (! pHMgr) return NULL;
-- return pHMgr->lookup(word);
-+ return pHMgr->lookup(word, from_id);
- }
-
- // return the value of suffix
---- lingucomponent/source/spellcheck/hunspell/affixmgr.hxx 2006-06-20 01:56:13.000000000 +0200
-+++ lingucomponent/source/spellcheck/hunspell/affixmgr.hxx 2007-08-31 17:30:47.000000000 +0200
-@@ -130,7 +130,7 @@ public:
- short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words,
- char hu_mov_rule, char ** result, char * partresult);
-
-- struct hentry * lookup(const char * word);
-+ struct hentry * lookup(const char * word, int from_id = -1);
- int get_numrep();
- struct replentry * get_reptable();
- int get_nummap();
---- lingucomponent/source/spellcheck/hunspell/hashmgr.cxx 2007-08-31 11:42:06.000000000 +0200
-+++ lingucomponent/source/spellcheck/hunspell/hashmgr.cxx 2007-08-31 17:32:37.000000000 +0200
-@@ -50,6 +50,28 @@
- #include "hashmgr.hxx"
- #include "csutil.hxx"
-
-+#ifdef UNX
-+#include <sys/mman.h>
-+#include <unistd.h>
-+#endif
-+#ifdef WNT
-+#include <io.h>
-+typedef int ssize_t;
-+#endif
-+#include <sys/types.h>
-+#include <sys/stat.h>
-+#include <errno.h>
-+#include <fcntl.h>
-+#ifndef _O_BINARY
-+#define _O_BINARY 0
-+#endif
-+
-+#define IS_END(c) ((c) == '\r' || (c) == '\n')
-+#define WORD_END_CONDITN(c) ((c) == 0 || (c) == '/')
-+
-+//Uncomment this to print debug msgs
-+//#define HASH_DEBUG
-+
- #ifndef W32
- #include <unistd.h>
- using namespace std;
-@@ -68,6 +90,15 @@ HashMgr::HashMgr(const char * tpath, con
- aliasf = NULL;
- numaliasm = 0;
- aliasm = NULL;
-+ compat.wlen = 0;
-+ compat.alen = 0;
-+ compat.word = NULL;
-+ compat_word_length = 0;
-+ compat.astr = NULL;
-+ fd = -1;
-+ length = 0;
-+ buffer = NULL;
-+
- load_config(apath);
- int ec = load_tables(tpath);
- if (ec) {
-@@ -84,30 +115,23 @@ HashMgr::HashMgr(const char * tpath, con
-
- HashMgr::~HashMgr()
- {
-- if (tableptr) {
-- // now pass through hash table freeing up everything
-- // go through column by column of the table
-- for (int i=0; i < tablesize; i++) {
-- struct hentry * pt = &tableptr[i];
-- struct hentry * nt = NULL;
-- if (pt) {
-- if (pt->astr && !aliasf) free(pt->astr);
-- if (pt->word) free(pt->word);
-- if (pt->description && !aliasm) free(pt->description);
--
-- pt = pt->next;
-- }
-- while(pt) {
-- nt = pt->next;
-- if (pt->astr && !aliasf) free(pt->astr);
-- if (pt->word) free(pt->word);
-- if (pt->description && !aliasm) free(pt->description);
-- free(pt);
-- pt = nt;
-- }
-- }
-- free(tableptr);
-+#ifdef UNX
-+ if (fd != -1)
-+ {
-+ if (buffer)
-+ munmap (buffer, length);
-+ close (fd);
-+ }
-+ else
-+#endif
-+ {
-+ if (buffer)
-+ free (buffer);
- }
-+ free (compat.word);
-+
-+ buffer = NULL;
-+ fd = -1;
- tablesize = 0;
-
- if (aliasf) {
-@@ -127,231 +151,219 @@ HashMgr::~HashMgr()
- }
-
- // lookup a root word in the hashtable
-+// from_id - for searching of homonyms
-
--struct hentry * HashMgr::lookup(const char *word) const
-+struct hentry * HashMgr::lookup(const char *word, int from_id) const
- {
-- struct hentry * dp;
-- if (tableptr) {
-- dp = &tableptr[hash(word)];
-- if (dp->word == NULL) return NULL;
-- for ( ; dp != NULL; dp = dp->next) {
-- if (strcmp(word,dp->word) == 0) return dp;
-- }
-+ const char *hash_str;
-+ int hash_id = (from_id < 0)? hash(word): from_id + 1;
-+ while ((hash_str = tableptr[hash_id%tablesize]) != NULL)
-+ {
-+ int i;
-+ for (i = 0; (word[i] != '\0' && !IS_END(hash_str[i]) &&
-+ hash_str[i] != '/' && word[i] == hash_str[i]); i++)
-+ ;
-+ if (word[i] == '\0' && (IS_END(hash_str[i]) || hash_str[i] == '/'))
-+ return ((HashMgr*)this)->setup_hentry (hash_str, hash_id);
-+ hash_id++;
- }
-+#ifdef HASH_DEBUG
-+ fprintf( stderr, "No instance of '%s' hashid 0x%x\n", word, hash(word) );
-+#endif
- return NULL;
- }
-
- // add a word to the hash table (private)
-
--int HashMgr::add_word(const char * word, int wl, unsigned short * aff, int al, const char * desc)
-+struct hentry *HashMgr::setup_hentry(const char *ptr, int hash_id)
- {
-- char * st = mystrdup(word);
-- if (wl && !st) return 1;
-- if (complexprefixes) {
-- if (utf8) reverseword_utf(st); else reverseword(st);
-- }
-- int i = hash(st);
-- struct hentry * dp = &tableptr[i];
-- if (dp->word == NULL) {
-- dp->wlen = wl;
-- dp->alen = al;
-- dp->word = st;
-- dp->astr = aff;
-- dp->next = NULL;
-- dp->next_homonym = NULL;
-- if (aliasm) {
-- dp->description = (desc) ? get_aliasm(atoi(desc)) : mystrdup(desc);
-- } else {
-- dp->description = mystrdup(desc);
-- if (desc && !dp->description) return 1;
-- if (dp->description && complexprefixes) {
-- if (utf8) reverseword_utf(dp->description); else reverseword(dp->description);
-- }
-- }
-- } else {
-- struct hentry* hp = (struct hentry *) malloc (sizeof(struct hentry));
-- if (!hp) return 1;
-- hp->wlen = wl;
-- hp->alen = al;
-- hp->word = st;
-- hp->astr = aff;
-- hp->next = NULL;
-- hp->next_homonym = NULL;
-- if (aliasm) {
-- hp->description = (desc) ? get_aliasm(atoi(desc)) : mystrdup(desc);
-- } else {
-- hp->description = mystrdup(desc);
-- if (desc && !hp->description) return 1;
-- if (dp->description && complexprefixes) {
-- if (utf8) reverseword_utf(hp->description); else reverseword(hp->description);
-- }
-- }
-- while (dp->next != NULL) {
-- if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) dp->next_homonym = hp;
-- dp=dp->next;
-- }
-- if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) dp->next_homonym = hp;
-- dp->next = hp;
-- }
-- return 0;
--}
--
--// add a custom dic. word to the hash table (public)
-+ int len;
-+ int slash = -1;
-
--int HashMgr::put_word(const char * word, int wl, char * aff)
--{
-- unsigned short * flags;
-- int al = 0;
-- if (aff) {
-- al = decode_flags(&flags, aff);
-- flag_qsort(flags, 0, al);
-- } else {
-- flags = NULL;
-+ for ( len = 0; !IS_END(ptr[len]); len++ )
-+ {
-+ if ( ptr[len] == '/' )
-+ slash = len;
-+ }
-+
-+ if ( len > compat_word_length )
-+ compat.word = (char *)realloc (compat.word,
-+ (compat_word_length = len + 1) + 1);
-+ if ( compat.astr )
-+ free( compat.astr );
-+ compat.astr = NULL;
-+ compat.wlen = len;
-+ compat.alen = 0;
-+ compat.hash_id = hash_id;
-+
-+ memcpy(compat.word, ptr, len);
-+ compat.word[len] = '\0';
-+
-+ if ( slash >= 0 )
-+ {
-+ compat.word[slash] = '\0';
-+ compat.wlen = slash;
-+
-+ if ( slash < len )
-+ {
-+ compat.alen = decode_flags(&compat.astr, compat.word + slash + 1);
-+ flag_qsort(compat.astr, 0, compat.alen);
-+ }
- }
-- add_word(word, wl, flags, al, NULL);
-- return 0;
--}
-
--int HashMgr::put_word_pattern(const char * word, int wl, const char * pattern)
--{
-- unsigned short * flags;
-- struct hentry * dp = lookup(pattern);
-- if (!dp || !dp->astr) return 1;
-- flags = (unsigned short *) malloc (dp->alen * sizeof(short));
-- memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(short));
-- add_word(word, wl, flags, dp->alen, NULL);
-- return 0;
-+#ifdef HASH_DEBUG
-+ fprintf( stderr, "Hit '%s' ('%s', %d)\n", compat.word, compat.astr, compat.alen );
-+#endif
-+
-+ return &compat;
- }
-
- // walk the hash table entry by entry - null at end
- struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const
- {
-- //reset to start
-- if ((col < 0) || (hp == NULL)) {
-- col = -1;
-- hp = NULL;
-- }
--
-- if (hp && hp->next != NULL) {
-- hp = hp->next;
-- } else {
-- col++;
-- hp = (col < tablesize) ? &tableptr[col] : NULL;
-- // search for next non-blank column entry
-- while (hp && (hp->word == NULL)) {
-- col ++;
-- hp = (col < tablesize) ? &tableptr[col] : NULL;
-- }
-- if (col < tablesize) return hp;
-- hp = NULL;
-- col = -1;
-- }
-- return hp;
-+ if (col < 0)
-+ col = seek_newline (0);
-+ size_t next_line = (size_t)seek_newline(col);
-+ if (next_line >= length)
-+ return NULL;
-+ struct hentry *value = ((HashMgr *)this)->setup_hentry(buffer + col, col);
-+ col = (int)next_line;
-+ return value;
- }
-
- // load a munched word list and build a hash table on the fly
--int HashMgr::load_tables(const char * tpath)
-+int HashMgr::slurp( int fd, size_t bytes )
- {
-- int wl, al;
-- char * ap;
-- char * dp;
-- unsigned short * flags;
--
-- // raw dictionary - munched file
-- FILE * rawdict = fopen(tpath, "r");
-- if (rawdict == NULL) return 1;
--
-- // first read the first line of file to get hash table size */
-- char ts[MAXDELEN];
-- if (! fgets(ts, MAXDELEN-1,rawdict)) return 2;
-- mychomp(ts);
-- if ((*ts < '1') || (*ts > '9')) fprintf(stderr, "error - missing word count in dictionary file\n");
-- tablesize = atoi(ts);
-- if (!tablesize) return 4;
-- tablesize = tablesize + 5 + USERWORD;
-- if ((tablesize %2) == 0) tablesize++;
--
-- // allocate the hash table
-- tableptr = (struct hentry *) calloc(tablesize, sizeof(struct hentry));
-- if (! tableptr) return 3;
-- for (int i=0; i<tablesize; i++) tableptr[i].word = NULL;
--
-- // loop through all words on much list and add to hash
-- // table and create word and affix strings
--
-- while (fgets(ts,MAXDELEN-1,rawdict)) {
-- mychomp(ts);
-- // split each line into word and morphological description
-- dp = strchr(ts,'\t');
--
-- if (dp) {
-- *dp = '\0';
-- dp++;
-- } else {
-- dp = NULL;
-+ size_t offset = 0;
-+ ssize_t count;
-+ do
-+ {
-+ count = read (fd, buffer + offset, bytes);
-+ if (count < 0)
-+ {
-+ if (errno == EINTR)
-+ continue;
-+ else
-+ return 1;
-+ }
-+ bytes -= count;
-+ offset += count;
- }
-+ while (bytes > 0);
-+ return 0;
-+}
-
-- // split each line into word and affix char strings
-- // "\/" signs slash in words (not affix separator)
-- // "/" at beginning of the line is word character (not affix separator)
-- ap = ts;
-- while ((ap = strchr(ap,'/'))) {
-- if (ap == ts) {
-- ap++;
-- continue;
-- } else if (*(ap - 1) != '\\') break;
-- // replace "\/" with "/"
-- for (char * sp = ap - 1; *sp; *sp = *(sp + 1), sp++);
--
-- }
--
-- if (ap) {
-- *ap = '\0';
-- if (aliasf) {
-- int index = atoi(ap + 1);
-- al = get_aliasf(index, &flags);
-- if (!al) {
-- fprintf(stderr, "error - bad flag vector alias: %s\n", ts);
-- *ap = '\0';
-- }
-- } else {
-- al = decode_flags(&flags, ap + 1);
-- flag_qsort(flags, 0, al);
-- }
-- } else {
-- al = 0;
-- ap = NULL;
-- flags = NULL;
-+size_t HashMgr::seek_newline( size_t i ) const
-+{
-+ while (i < length && !IS_END(buffer[i])) i++;
-+ while (i < length && IS_END(buffer[i])) i++;
-+ return i;
-+}
-+
-+int HashMgr::load_tables(const char * tpath)
-+{
-+#ifdef HASH_DEBUG
-+ fprintf( stderr, "Load tables from '%s'\n", tpath );
-+#endif
-+ fd = open (tpath, O_RDONLY|_O_BINARY);
-+ if (fd < 0)
-+ return 1;
-+ struct stat info;
-+ if (fstat(fd, &info) < 0)
-+ return 1;
-+ length = info.st_size;
-+#ifdef UNX
-+ if (MAP_FAILED == (buffer = (char *)mmap (NULL, info.st_size, PROT_READ, MAP_SHARED, fd, 0)))
-+ ;
-+ else
-+#endif
-+ {
-+ buffer = new char[ length ];
-+ int failed = slurp (fd, length);
-+ close (fd);
-+ fd = -1;
-+ if (failed)
-+ {
-+ delete buffer;
-+ return 1;
-+ }
- }
-+ if (!buffer)
-+ return 1;
-+ size_t second_line = seek_newline (0);
-+ char *str = new char [ second_line + 1 ];
-+ memcpy (str, buffer, second_line);
-+ str[second_line] = '\0';
-+ tablesize = strtol(str, NULL, 0);
-+ delete[] str;
-+ if (tablesize <= 0)
-+ return 4;
-+ long sparse = tablesize * 2;
-+
-+ // Bin any small prime factors
-+ while (!(sparse % 2) || !(sparse % 3) || !(sparse % 5 ) ||
-+ !(sparse % 7) || !(sparse % 11) || !(sparse % 13 ))
-+ sparse++;
-+
-+ /* fprintf( stderr, "Switch table size to %d from %d\n",
-+ tablesize, sparse); */
-+ tablesize = sparse;
-+
-+ // Hash - normal fixed size sparse hash, compare until we hit a NULL
-+ tableptr = (char **)calloc (sizeof(char *), tablesize );
-+
-+#ifdef HASH_DEBUG
-+ static int total_cols = 0;
-+ static int total_lookups = 0;
-+#endif
-+ size_t i = 0;
-+ // CHECKME: '\n' on Win32 ? - cf. Win32 binary downloads I guess.
-+ for (i = second_line; i < length;)
-+ {
-+ int hash_id;
-+ size_t next_line = seek_newline (i);
-+ if (next_line >= length)
-+ break;
-+
-+ hash_id = hash(buffer + i);
-+ int collisions = 0;
-+ while (tableptr[hash_id%tablesize]) {
-+ collisions++;
-+ hash_id++;
-+ }
-+#ifdef HASH_DEBUG
-+ total_lookups++;
-+ total_cols += collisions;
-+#endif
-+ tableptr[hash_id%tablesize] = buffer + i;
-+ i = next_line;
-+ }
-+#ifdef HASH_DEBUG
-+ fprintf (stderr, "%d collisions on %d lookups\n", total_cols, total_lookups);
-+#endif
-
-- wl = strlen(ts);
--
-- // add the word and its index
-- if (add_word(ts,wl,flags,al,dp)) return 5;
--
-- }
--
-- fclose(rawdict);
-- return 0;
-+ return 0;
- }
-
--
--// the hash function is a simple load and rotate
--// algorithm borrowed
--
- int HashMgr::hash(const char * word) const
- {
-- long hv = 0;
-- for (int i=0; i < 4 && *word != 0; i++)
-- hv = (hv << 8) | (*word++);
-- while (*word != 0) {
-- ROTATE(hv,ROTATE_LEN);
-- hv ^= (*word++);
-- }
-- return (unsigned long) hv % tablesize;
-+ unsigned long hv = word[0];
-+ for (int i = 1; !WORD_END_CONDITN(word[i]) && !IS_END(word[i]); i++)
-+ hv = (hv << 5) - hv + word[i];
-+#ifdef HASH_DEBUG
-+ fprintf(stderr, "string '");
-+ for (int i = 0; !WORD_END_CONDITN(word[ i ] && !IS_END(word[i])); i++)
-+ fprintf( stderr, "%c", word[i] );
-+ fprintf(stderr, "' 0x%x \n", (int)hv % tablesize);
-+#endif
-+
-+ return hv % tablesize;
- }
-
--int HashMgr::decode_flags(unsigned short ** result, char * flags) {
-+int HashMgr::decode_flags(unsigned short ** result, const char * flags) {
-+#ifdef HASH_DEBUG
-+ fprintf(stderr, "Decoding flags '%s'\n", flags);
-+#endif
- int len;
- switch (flag_mode) {
- case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
-@@ -366,9 +378,9 @@ int HashMgr::decode_flags(unsigned short
- }
- case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 23 233)
- len = 1;
-- char * src = flags;
-+ const char * src = flags;
- unsigned short * dest;
-- char * p;
-+ const char * p;
- for (p = flags; *p; p++) {
- if (*p == ',') len++;
- }
---- lingucomponent/source/spellcheck/hunspell/hashmgr.hxx 2007-08-31 11:42:06.000000000 +0200
-+++ lingucomponent/source/spellcheck/hunspell/hashmgr.hxx 2007-08-31 17:15:51.000000000 +0200
-@@ -9,7 +9,7 @@ enum flag { FLAG_CHAR, FLAG_LONG, FLAG_N
- class HashMgr
- {
- int tablesize;
-- struct hentry * tableptr;
-+ char ** tableptr;
- int userword;
- flag flag_mode;
- int complexprefixes;
-@@ -20,18 +20,25 @@ class HashMgr
- int numaliasm; // morphological desciption `compression' with aliases
- char ** aliasm;
-
-+ mutable struct hentry compat;
-+ int compat_word_length;
-+ int fd;
-+ char * buffer;
-+ size_t length;
-+
-+ size_t seek_newline(size_t from_offset) const;
-+ int slurp( int fd, size_t bytes );
-
- public:
- HashMgr(const char * tpath, const char * apath);
- ~HashMgr();
-
-- struct hentry * lookup(const char *) const;
-- int hash(const char *) const;
-- struct hentry * walk_hashtable(int & col, struct hentry * hp) const;
-+ struct hentry * setup_hentry(const char *ptr, int hash_id);
-
-- int put_word(const char * word, int wl, char * ap);
-- int put_word_pattern(const char * word, int wl, const char * pattern);
-- int decode_flags(unsigned short ** result, char * flags);
-+ struct hentry * lookup(const char *, int from_id = -1) const;
-+ struct hentry * walk_hashtable(int & col, struct hentry * p) const;
-+
-+ int decode_flags(unsigned short ** result, const char * flags);
- unsigned short decode_flag(const char * flag);
- char * encode_flag(unsigned short flag);
- int is_aliasf();
-@@ -40,8 +47,8 @@ public:
- char * get_aliasm(int index);
-
- private:
-+ int hash(const char *) const;
- int load_tables(const char * tpath);
-- int add_word(const char * word, int wl, unsigned short * ap, int al, const char * desc);
- int load_config(const char * affpath);
- int parse_aliasf(char * line, FILE * af);
- int parse_aliasm(char * line, FILE * af);
---- lingucomponent/source/spellcheck/hunspell/htypes.hxx 2006-01-06 14:10:55.000000000 +0100
-+++ lingucomponent/source/spellcheck/hunspell/htypes.hxx 2007-08-31 17:14:39.000000000 +0200
-@@ -17,9 +17,8 @@ struct hentry
- short alen;
- char * word;
- unsigned short * astr;
-- struct hentry * next;
-- struct hentry * next_homonym;
- char * description;
-+ int hash_id;
- };
-
- #endif
---- lingucomponent/source/spellcheck/hunspell/hunspell.cxx 2007-08-31 11:42:06.000000000 +0200
-+++ lingucomponent/source/spellcheck/hunspell/hunspell.cxx 2007-08-31 17:35:25.000000000 +0200
-@@ -599,7 +599,7 @@ struct hentry * Hunspell::check(const ch
- while (he && (he->astr) &&
- ((pAMgr->get_pseudoroot() && TESTAFF(he->astr, pAMgr->get_pseudoroot(), he->alen)) ||
- (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen))
-- )) he = he->next_homonym;
-+ )) he = pHMgr->lookup(word, he->hash_id); // next homonym
-
- // check with affixes
- if (!he && pAMgr) {
-@@ -1190,30 +1190,6 @@ struct unicode_info2 * Hunspell::get_utf
- return utfconv;
- }
-
--int Hunspell::put_word(const char * word)
--{
-- if (pHMgr) {
-- return pHMgr->put_word(word, strlen(word), NULL);
-- }
-- return 0;
--}
--
--int Hunspell::put_word_suffix(const char * word, const char * suffix)
--{
-- if (pHMgr) {
-- return pHMgr->put_word(word, strlen(word), (char *) suffix);
-- }
-- return 0;
--}
--
--int Hunspell::put_word_pattern(const char * word, const char * pattern)
--{
-- if (pHMgr) {
-- return pHMgr->put_word_pattern(word, strlen(word), pattern);
-- }
-- return 0;
--}
--
- const char * Hunspell::get_version()
- {
- return pAMgr->get_version();
---- lingucomponent/source/spellcheck/hunspell/hunspell.hxx 2007-08-31 11:42:06.000000000 +0200
-+++ lingucomponent/source/spellcheck/hunspell/hunspell.hxx 2007-08-31 14:14:26.000000000 +0200
-@@ -75,20 +75,6 @@ public:
-
- int suggest(char*** slst, const char * word);
-
-- /* handling custom dictionary */
--
-- int put_word(const char * word);
--
-- /* suffix is an affix flag string, similarly in dictionary files */
--
-- int put_word_suffix(const char * word, const char * suffix);
--
-- /* pattern is a sample dictionary word
-- * put word into custom dictionary with affix flags of pattern word
-- */
--
-- int put_word_pattern(const char * word, const char * pattern);
--
- /* other */
-
- char * get_dic_encoding();
---- lingucomponent/source/spellcheck/hunspell/suggestmgr.cxx 2007-08-31 11:42:06.000000000 +0200
-+++ lingucomponent/source/spellcheck/hunspell/suggestmgr.cxx 2007-08-31 17:39:14.000000000 +0200
-@@ -803,19 +803,10 @@ int SuggestMgr::ngsuggest(char** wlst, c
- int _lval;
- int sc;
- int lp;
-+ int lpMG;
-
- if (! pHMgr) return 0;
-
-- // exhaustively search through all root words
-- // keeping track of the MAX_ROOTS most similar root words
-- struct hentry * roots[MAX_ROOTS];
-- int scores[MAX_ROOTS];
-- for (i = 0; i < MAX_ROOTS; i++) {
-- roots[i] = NULL;
-- scores[i] = -100 * i;
-- }
-- lp = MAX_ROOTS - 1;
--
- char w2[MAXWORDUTF8LEN];
- char * word = w;
-
-@@ -831,27 +822,6 @@ int SuggestMgr::ngsuggest(char** wlst, c
- int nc = strlen(word);
- int n = (utf8) ? u8_u16(u8, MAXSWL, word) : nc;
-
-- struct hentry* hp = NULL;
-- int col = -1;
-- while ((hp = pHMgr->walk_hashtable(col, hp))) {
-- // check forbidden words
-- if ((hp->astr) && (pAMgr) &&
-- (TESTAFF(hp->astr, pAMgr->get_forbiddenword(), hp->alen) ||
-- TESTAFF(hp->astr, pAMgr->get_nosuggest(), hp->alen) ||
-- TESTAFF(hp->astr, pAMgr->get_onlyincompound(), hp->alen))) continue;
-- sc = ngram(3, word, hp->word, NGRAM_LONGER_WORSE);
-- if (sc > scores[lp]) {
-- scores[lp] = sc;
-- roots[lp] = hp;
-- int lval = sc;
-- for (j=0; j < MAX_ROOTS; j++)
-- if (scores[j] < lval) {
-- lp = j;
-- lval = scores[j];
-- }
-- }
-- }
--
- // find minimum threshhold for a passable suggestion
- // mangle original word three differnt ways
- // and score them to generate a minimum acceptable score
-@@ -870,9 +840,13 @@ int SuggestMgr::ngsuggest(char** wlst, c
- thresh = thresh / 3;
- thresh--;
-
-- // now expand affixes on each of these root words and
-- // and use length adjusted ngram scores to select
-- // possible suggestions
-+ struct hentry * roots[MAX_ROOTS];
-+ int scores[MAX_ROOTS];
-+ for (i = 0; i < MAX_ROOTS; i++) {
-+ roots[i] = NULL;
-+ scores[i] = -100 * i;
-+ }
-+
- char * guess[MAX_GUESS];
- int gscore[MAX_GUESS];
- for(i=0;i<MAX_GUESS;i++) {
-@@ -882,36 +856,28 @@ int SuggestMgr::ngsuggest(char** wlst, c
-
- lp = MAX_GUESS - 1;
-
-- struct guessword * glst;
-- glst = (struct guessword *) calloc(MAX_WORDS,sizeof(struct guessword));
-- if (! glst) return 0;
-+ // exhaustively search through all root words
-+ // keeping track of the MAX_ROOTS most similar root words
-+ lp = MAX_ROOTS - 1;
-
-- for (i = 0; i < MAX_ROOTS; i++) {
-+ lpMG = MAX_GUESS - 1;
-
-- if (roots[i]) {
-- struct hentry * rp = roots[i];
-- int nw = pAMgr->expand_rootword(glst, MAX_WORDS, rp->word, rp->wlen,
-- rp->astr, rp->alen, word, nc);
--
-- for (int k = 0; k < nw ; k++) {
-- sc = ngram(n, word, glst[k].word, NGRAM_ANY_MISMATCH);
-- if ((sc > thresh)) {
-- if (sc > gscore[lp]) {
-- if (guess[lp]) free (guess[lp]);
-- gscore[lp] = sc;
-- guess[lp] = glst[k].word;
-- _lval = sc;
-- for (j=0; j < MAX_GUESS; j++)
-- if (gscore[j] < _lval) {
-- lp = j;
-- _lval = gscore[j];
-- }
-- } else free (glst[k].word);
-- } else free(glst[k].word);
-- }
-- }
-+ struct hentry* hp = NULL;
-+ int col = -1;
-+ while ((hp = pHMgr->walk_hashtable(col, hp))) {
-+ sc = ngram(3, word, hp->word, NGRAM_LONGER_WORSE);
-+ if (sc > scores[lp]) {
-+ scores[lp] = sc;
-+ // Get the list of guesswords for this rootword
-+ if (-1 == expand_rootword_affix(hp, word, n, thresh, gscore, guess, &lpMG)) return 0;
-+ int lval = sc;
-+ for (j=0; j < MAX_ROOTS; j++)
-+ if (scores[j] < lval) {
-+ lp = j;
-+ lval = scores[j];
-+ }
-+ }
- }
-- free(glst);
-
- // now we are done generating guesses
- // sort in order of decreasing score
-@@ -984,6 +950,49 @@ int SuggestMgr::ngsuggest(char** wlst, c
- return ns;
- }
-
-+int SuggestMgr::expand_rootword_affix(struct hentry *hp, char *word, int wrdlen, int thresh, int glistscore[], char *guesslist[], int *lp)
-+{
-+ int sc;
-+ int lval;
-+
-+ struct guessword * glst;
-+ glst = (struct guessword *) calloc(MAX_WORDS,sizeof(struct guessword));
-+ if (! glst) return -1;
-+
-+ // now expand affixes on each of these root words and
-+ // and use length adjusted ngram scores to select
-+ // possible suggestions
-+ if (hp) {
-+ int nw = pAMgr->expand_rootword(glst, MAX_WORDS, hp->word, hp->wlen,
-+ hp->astr, hp->alen, word, wrdlen);
-+ for (int k = 0; k < nw; k++)
-+ {
-+ sc = ngram(wrdlen, word, glst[k].word, NGRAM_ANY_MISMATCH);
-+ if (sc > thresh && sc > glistscore[*lp])
-+ {
-+ if (guesslist[*lp]) free(guesslist[*lp]);
-+ glistscore[*lp] = sc;
-+ guesslist[*lp] = glst[k].word;
-+ glst[k].word = NULL;
-+ lval = sc;
-+ for (int j=0; j < MAX_GUESS; j++)
-+ {
-+ if (glistscore[j] < lval)
-+ {
-+ *lp = j;
-+ lval = glistscore[j];
-+ }
-+ }
-+ }
-+ free (glst[k].word);
-+ glst[k].word = NULL;
-+ glst[k].allow = 0;
-+ }
-+ }
-+ if (glst) free(glst);
-+
-+ return 0;
-+}
-
- // see if a candidate suggestion is spelled correctly
- // needs to check both root words and words with affixes
-@@ -1438,7 +1447,7 @@ char * SuggestMgr::suggest_morph(const c
- if (rv->description) strcat(result, rv->description);
- strcat(result, "\n");
- }
-- rv = rv->next_homonym;
-+ rv = pAMgr->lookup(word, rv->hash_id); // next homonym
- }
-
- st = pAMgr->affix_check_morph(word,strlen(word));
---- lingucomponent/source/spellcheck/hunspell/suggestmgr.hxx 2007-08-31 11:42:06.000000000 +0200
-+++ lingucomponent/source/spellcheck/hunspell/suggestmgr.hxx 2007-08-31 14:14:26.000000000 +0200
-@@ -78,6 +78,7 @@ private:
- int equalfirstletter(char * s1, const char * s2);
- int commoncharacterpositions(char * s1, const char * s2, int * is_swap);
- void bubblesort( char ** rwd, int * rsc, int n);
-+ int expand_rootword_affix(struct hentry *hp, char *word, int wrdlen, int thresh, int glistscore[], char *guesslist[], int *lp);
- void lcs(const char * s, const char * s2, int * l1, int * l2, char ** result);
- int lcslen(const char * s, const char* s2);
-