diff options
Diffstat (limited to 'patches/dev300/hunspell-hashify.diff')
-rw-r--r-- | patches/dev300/hunspell-hashify.diff | 963 |
1 files changed, 0 insertions, 963 deletions
diff --git a/patches/dev300/hunspell-hashify.diff b/patches/dev300/hunspell-hashify.diff deleted file mode 100644 index bfb3ea37a..000000000 --- a/patches/dev300/hunspell-hashify.diff +++ /dev/null @@ -1,963 +0,0 @@ ---- lingucomponent/source/spellcheck/hunspell/affentry.cxx 2006-09-16 18:07:56.000000000 +0200 -+++ lingucomponent/source/spellcheck/hunspell/affentry.cxx 2007-08-31 17:28:49.000000000 +0200 -@@ -196,7 +196,7 @@ struct hentry * PfxEntry::check(const ch - ((!needflag) || TESTAFF(he->astr, needflag, he->alen) || - (contclass && TESTAFF(contclass, needflag, contclasslen)))) - return he; -- } while ((he = he->next_homonym)); // check homonyms -+ } while (he = pmyMgr->lookup(tmpword, he->hash_id)); // next homonym - } - - // prefix matched but no root word was found -@@ -359,7 +359,7 @@ char * PfxEntry::check_morph(const char - } - strcat(result, "\n"); - } -- } while ((he = he->next_homonym)); -+ } while (he = pmyMgr->lookup(tmpword, he->hash_id)); // next homonym - } - - // prefix matched but no root word was found -@@ -561,7 +561,7 @@ struct hentry * SfxEntry::check(const ch - ((contclass) && TESTAFF(contclass, needflag, contclasslen))) - ) - ) return he; -- } while ((he = he->next_homonym)); // check homonyms -+ } while (he = pmyMgr->lookup(tmpword, he->hash_id)); // next homonym - - // obsolote stemming code (used only by the - // experimental SuffixMgr:suggest_pos_stems) -@@ -736,8 +736,7 @@ struct hentry * SfxEntry::get_next_homon - { - PfxEntry* ep = (PfxEntry *) ppfx; - -- while (he->next_homonym) { -- he = he->next_homonym; -+ while (he = pmyMgr->lookup(he->word, he->hash_id)) { // next homonym - if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() && TESTAFF(ep->getCont(), aflag, ep->getContLen()))) && - ((optflags & aeXPRODUCT) == 0 || - TESTAFF(he->astr, ep->getFlag(), he->alen) || ---- lingucomponent/source/spellcheck/hunspell/affixmgr.cxx 2007-08-31 11:42:06.000000000 +0200 -+++ lingucomponent/source/spellcheck/hunspell/affixmgr.cxx 2007-08-31 17:38:04.000000000 +0200 -@@ -1514,7 +1514,7 @@ struct hentry * AffixMgr::compound_check - ((!words && !wordnum && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0)) || - (words && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0)))) - ))) { -- rv = rv->next_homonym; -+ rv = lookup(st, rv->hash_id); // next homonym - } - - if (!rv) { -@@ -1657,7 +1657,7 @@ struct hentry * AffixMgr::compound_check - !((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) || - (compoundend && !words && TESTAFF(rv->astr, compoundend, rv->alen)) || - (numdefcpd && words && defcpd_check(&words, wnum + 1, rv, NULL,1))))) { -- rv = rv->next_homonym; -+ rv = lookup(word+i, rv->hash_id); // next homonym - } - - if (rv && words && words[wnum + 1]) return rv; -@@ -1896,7 +1896,7 @@ int AffixMgr::compound_check_morph(const - ((!words && !wordnum && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0)) || - (words && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0)))) - ))) { -- rv = rv->next_homonym; -+ rv = lookup(st, rv->hash_id); // next homonym - } - - if (rv) { -@@ -2064,7 +2064,7 @@ int AffixMgr::compound_check_morph(const - !((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) || - (compoundend && !words && TESTAFF(rv->astr, compoundend, rv->alen)) || - (numdefcpd && defcpd_check(&words, wnum + 1, rv, NULL,1))))) { -- rv = rv->next_homonym; -+ rv = lookup(word+i, rv->hash_id); // next homonym - } - - if (rv && words && words[wnum + 1]) { -@@ -2475,7 +2475,7 @@ char * AffixMgr::suffix_check_twosfx_mor - free(st); - - result3[0] = '\0'; --#ifdef DEBUG -+#if 0 - unsigned short flag = sptr->getFlag(); - char flagch[2] = &flag; - if (flag_mode == FLAG_NUM) { -@@ -2592,7 +2592,7 @@ char * AffixMgr::suffix_check_morph(cons - if (rv->description && ((!rv->astr) || - !TESTAFF(rv->astr, lemma_present, rv->alen))) strcat(result, rv->word); - if (!complexprefixes && rv->description) strcat(result, rv->description); --#ifdef DEBUG -+#if 0 - unsigned short flag = sptr->getKey(); - char flagch[2] = &flag; - if (flag_mode == FLAG_NUM) { -@@ -2976,10 +2976,10 @@ FLAG AffixMgr::get_lemma_present() - } - - // utility method to look up root words in hash table --struct hentry * AffixMgr::lookup(const char * word) -+struct hentry * AffixMgr::lookup(const char * word, int from_id) - { - if (! pHMgr) return NULL; -- return pHMgr->lookup(word); -+ return pHMgr->lookup(word, from_id); - } - - // return the value of suffix ---- lingucomponent/source/spellcheck/hunspell/affixmgr.hxx 2006-06-20 01:56:13.000000000 +0200 -+++ lingucomponent/source/spellcheck/hunspell/affixmgr.hxx 2007-08-31 17:30:47.000000000 +0200 -@@ -130,7 +130,7 @@ public: - short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words, - char hu_mov_rule, char ** result, char * partresult); - -- struct hentry * lookup(const char * word); -+ struct hentry * lookup(const char * word, int from_id = -1); - int get_numrep(); - struct replentry * get_reptable(); - int get_nummap(); ---- lingucomponent/source/spellcheck/hunspell/hashmgr.cxx 2007-08-31 11:42:06.000000000 +0200 -+++ lingucomponent/source/spellcheck/hunspell/hashmgr.cxx 2007-08-31 17:32:37.000000000 +0200 -@@ -50,6 +50,28 @@ - #include "hashmgr.hxx" - #include "csutil.hxx" - -+#ifdef UNX -+#include <sys/mman.h> -+#include <unistd.h> -+#endif -+#ifdef WNT -+#include <io.h> -+typedef int ssize_t; -+#endif -+#include <sys/types.h> -+#include <sys/stat.h> -+#include <errno.h> -+#include <fcntl.h> -+#ifndef _O_BINARY -+#define _O_BINARY 0 -+#endif -+ -+#define IS_END(c) ((c) == '\r' || (c) == '\n') -+#define WORD_END_CONDITN(c) ((c) == 0 || (c) == '/') -+ -+//Uncomment this to print debug msgs -+//#define HASH_DEBUG -+ - #ifndef W32 - #include <unistd.h> - using namespace std; -@@ -68,6 +90,15 @@ HashMgr::HashMgr(const char * tpath, con - aliasf = NULL; - numaliasm = 0; - aliasm = NULL; -+ compat.wlen = 0; -+ compat.alen = 0; -+ compat.word = NULL; -+ compat_word_length = 0; -+ compat.astr = NULL; -+ fd = -1; -+ length = 0; -+ buffer = NULL; -+ - load_config(apath); - int ec = load_tables(tpath); - if (ec) { -@@ -84,30 +115,23 @@ HashMgr::HashMgr(const char * tpath, con - - HashMgr::~HashMgr() - { -- if (tableptr) { -- // now pass through hash table freeing up everything -- // go through column by column of the table -- for (int i=0; i < tablesize; i++) { -- struct hentry * pt = &tableptr[i]; -- struct hentry * nt = NULL; -- if (pt) { -- if (pt->astr && !aliasf) free(pt->astr); -- if (pt->word) free(pt->word); -- if (pt->description && !aliasm) free(pt->description); -- -- pt = pt->next; -- } -- while(pt) { -- nt = pt->next; -- if (pt->astr && !aliasf) free(pt->astr); -- if (pt->word) free(pt->word); -- if (pt->description && !aliasm) free(pt->description); -- free(pt); -- pt = nt; -- } -- } -- free(tableptr); -+#ifdef UNX -+ if (fd != -1) -+ { -+ if (buffer) -+ munmap (buffer, length); -+ close (fd); -+ } -+ else -+#endif -+ { -+ if (buffer) -+ free (buffer); - } -+ free (compat.word); -+ -+ buffer = NULL; -+ fd = -1; - tablesize = 0; - - if (aliasf) { -@@ -127,231 +151,219 @@ HashMgr::~HashMgr() - } - - // lookup a root word in the hashtable -+// from_id - for searching of homonyms - --struct hentry * HashMgr::lookup(const char *word) const -+struct hentry * HashMgr::lookup(const char *word, int from_id) const - { -- struct hentry * dp; -- if (tableptr) { -- dp = &tableptr[hash(word)]; -- if (dp->word == NULL) return NULL; -- for ( ; dp != NULL; dp = dp->next) { -- if (strcmp(word,dp->word) == 0) return dp; -- } -+ const char *hash_str; -+ int hash_id = (from_id < 0)? hash(word): from_id + 1; -+ while ((hash_str = tableptr[hash_id%tablesize]) != NULL) -+ { -+ int i; -+ for (i = 0; (word[i] != '\0' && !IS_END(hash_str[i]) && -+ hash_str[i] != '/' && word[i] == hash_str[i]); i++) -+ ; -+ if (word[i] == '\0' && (IS_END(hash_str[i]) || hash_str[i] == '/')) -+ return ((HashMgr*)this)->setup_hentry (hash_str, hash_id); -+ hash_id++; - } -+#ifdef HASH_DEBUG -+ fprintf( stderr, "No instance of '%s' hashid 0x%x\n", word, hash(word) ); -+#endif - return NULL; - } - - // add a word to the hash table (private) - --int HashMgr::add_word(const char * word, int wl, unsigned short * aff, int al, const char * desc) -+struct hentry *HashMgr::setup_hentry(const char *ptr, int hash_id) - { -- char * st = mystrdup(word); -- if (wl && !st) return 1; -- if (complexprefixes) { -- if (utf8) reverseword_utf(st); else reverseword(st); -- } -- int i = hash(st); -- struct hentry * dp = &tableptr[i]; -- if (dp->word == NULL) { -- dp->wlen = wl; -- dp->alen = al; -- dp->word = st; -- dp->astr = aff; -- dp->next = NULL; -- dp->next_homonym = NULL; -- if (aliasm) { -- dp->description = (desc) ? get_aliasm(atoi(desc)) : mystrdup(desc); -- } else { -- dp->description = mystrdup(desc); -- if (desc && !dp->description) return 1; -- if (dp->description && complexprefixes) { -- if (utf8) reverseword_utf(dp->description); else reverseword(dp->description); -- } -- } -- } else { -- struct hentry* hp = (struct hentry *) malloc (sizeof(struct hentry)); -- if (!hp) return 1; -- hp->wlen = wl; -- hp->alen = al; -- hp->word = st; -- hp->astr = aff; -- hp->next = NULL; -- hp->next_homonym = NULL; -- if (aliasm) { -- hp->description = (desc) ? get_aliasm(atoi(desc)) : mystrdup(desc); -- } else { -- hp->description = mystrdup(desc); -- if (desc && !hp->description) return 1; -- if (dp->description && complexprefixes) { -- if (utf8) reverseword_utf(hp->description); else reverseword(hp->description); -- } -- } -- while (dp->next != NULL) { -- if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) dp->next_homonym = hp; -- dp=dp->next; -- } -- if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) dp->next_homonym = hp; -- dp->next = hp; -- } -- return 0; --} -- --// add a custom dic. word to the hash table (public) -+ int len; -+ int slash = -1; - --int HashMgr::put_word(const char * word, int wl, char * aff) --{ -- unsigned short * flags; -- int al = 0; -- if (aff) { -- al = decode_flags(&flags, aff); -- flag_qsort(flags, 0, al); -- } else { -- flags = NULL; -+ for ( len = 0; !IS_END(ptr[len]); len++ ) -+ { -+ if ( ptr[len] == '/' ) -+ slash = len; -+ } -+ -+ if ( len > compat_word_length ) -+ compat.word = (char *)realloc (compat.word, -+ (compat_word_length = len + 1) + 1); -+ if ( compat.astr ) -+ free( compat.astr ); -+ compat.astr = NULL; -+ compat.wlen = len; -+ compat.alen = 0; -+ compat.hash_id = hash_id; -+ -+ memcpy(compat.word, ptr, len); -+ compat.word[len] = '\0'; -+ -+ if ( slash >= 0 ) -+ { -+ compat.word[slash] = '\0'; -+ compat.wlen = slash; -+ -+ if ( slash < len ) -+ { -+ compat.alen = decode_flags(&compat.astr, compat.word + slash + 1); -+ flag_qsort(compat.astr, 0, compat.alen); -+ } - } -- add_word(word, wl, flags, al, NULL); -- return 0; --} - --int HashMgr::put_word_pattern(const char * word, int wl, const char * pattern) --{ -- unsigned short * flags; -- struct hentry * dp = lookup(pattern); -- if (!dp || !dp->astr) return 1; -- flags = (unsigned short *) malloc (dp->alen * sizeof(short)); -- memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(short)); -- add_word(word, wl, flags, dp->alen, NULL); -- return 0; -+#ifdef HASH_DEBUG -+ fprintf( stderr, "Hit '%s' ('%s', %d)\n", compat.word, compat.astr, compat.alen ); -+#endif -+ -+ return &compat; - } - - // walk the hash table entry by entry - null at end - struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const - { -- //reset to start -- if ((col < 0) || (hp == NULL)) { -- col = -1; -- hp = NULL; -- } -- -- if (hp && hp->next != NULL) { -- hp = hp->next; -- } else { -- col++; -- hp = (col < tablesize) ? &tableptr[col] : NULL; -- // search for next non-blank column entry -- while (hp && (hp->word == NULL)) { -- col ++; -- hp = (col < tablesize) ? &tableptr[col] : NULL; -- } -- if (col < tablesize) return hp; -- hp = NULL; -- col = -1; -- } -- return hp; -+ if (col < 0) -+ col = seek_newline (0); -+ size_t next_line = (size_t)seek_newline(col); -+ if (next_line >= length) -+ return NULL; -+ struct hentry *value = ((HashMgr *)this)->setup_hentry(buffer + col, col); -+ col = (int)next_line; -+ return value; - } - - // load a munched word list and build a hash table on the fly --int HashMgr::load_tables(const char * tpath) -+int HashMgr::slurp( int fd, size_t bytes ) - { -- int wl, al; -- char * ap; -- char * dp; -- unsigned short * flags; -- -- // raw dictionary - munched file -- FILE * rawdict = fopen(tpath, "r"); -- if (rawdict == NULL) return 1; -- -- // first read the first line of file to get hash table size */ -- char ts[MAXDELEN]; -- if (! fgets(ts, MAXDELEN-1,rawdict)) return 2; -- mychomp(ts); -- if ((*ts < '1') || (*ts > '9')) fprintf(stderr, "error - missing word count in dictionary file\n"); -- tablesize = atoi(ts); -- if (!tablesize) return 4; -- tablesize = tablesize + 5 + USERWORD; -- if ((tablesize %2) == 0) tablesize++; -- -- // allocate the hash table -- tableptr = (struct hentry *) calloc(tablesize, sizeof(struct hentry)); -- if (! tableptr) return 3; -- for (int i=0; i<tablesize; i++) tableptr[i].word = NULL; -- -- // loop through all words on much list and add to hash -- // table and create word and affix strings -- -- while (fgets(ts,MAXDELEN-1,rawdict)) { -- mychomp(ts); -- // split each line into word and morphological description -- dp = strchr(ts,'\t'); -- -- if (dp) { -- *dp = '\0'; -- dp++; -- } else { -- dp = NULL; -+ size_t offset = 0; -+ ssize_t count; -+ do -+ { -+ count = read (fd, buffer + offset, bytes); -+ if (count < 0) -+ { -+ if (errno == EINTR) -+ continue; -+ else -+ return 1; -+ } -+ bytes -= count; -+ offset += count; - } -+ while (bytes > 0); -+ return 0; -+} - -- // split each line into word and affix char strings -- // "\/" signs slash in words (not affix separator) -- // "/" at beginning of the line is word character (not affix separator) -- ap = ts; -- while ((ap = strchr(ap,'/'))) { -- if (ap == ts) { -- ap++; -- continue; -- } else if (*(ap - 1) != '\\') break; -- // replace "\/" with "/" -- for (char * sp = ap - 1; *sp; *sp = *(sp + 1), sp++); -- -- } -- -- if (ap) { -- *ap = '\0'; -- if (aliasf) { -- int index = atoi(ap + 1); -- al = get_aliasf(index, &flags); -- if (!al) { -- fprintf(stderr, "error - bad flag vector alias: %s\n", ts); -- *ap = '\0'; -- } -- } else { -- al = decode_flags(&flags, ap + 1); -- flag_qsort(flags, 0, al); -- } -- } else { -- al = 0; -- ap = NULL; -- flags = NULL; -+size_t HashMgr::seek_newline( size_t i ) const -+{ -+ while (i < length && !IS_END(buffer[i])) i++; -+ while (i < length && IS_END(buffer[i])) i++; -+ return i; -+} -+ -+int HashMgr::load_tables(const char * tpath) -+{ -+#ifdef HASH_DEBUG -+ fprintf( stderr, "Load tables from '%s'\n", tpath ); -+#endif -+ fd = open (tpath, O_RDONLY|_O_BINARY); -+ if (fd < 0) -+ return 1; -+ struct stat info; -+ if (fstat(fd, &info) < 0) -+ return 1; -+ length = info.st_size; -+#ifdef UNX -+ if (MAP_FAILED == (buffer = (char *)mmap (NULL, info.st_size, PROT_READ, MAP_SHARED, fd, 0))) -+ ; -+ else -+#endif -+ { -+ buffer = new char[ length ]; -+ int failed = slurp (fd, length); -+ close (fd); -+ fd = -1; -+ if (failed) -+ { -+ delete buffer; -+ return 1; -+ } - } -+ if (!buffer) -+ return 1; -+ size_t second_line = seek_newline (0); -+ char *str = new char [ second_line + 1 ]; -+ memcpy (str, buffer, second_line); -+ str[second_line] = '\0'; -+ tablesize = strtol(str, NULL, 0); -+ delete[] str; -+ if (tablesize <= 0) -+ return 4; -+ long sparse = tablesize * 2; -+ -+ // Bin any small prime factors -+ while (!(sparse % 2) || !(sparse % 3) || !(sparse % 5 ) || -+ !(sparse % 7) || !(sparse % 11) || !(sparse % 13 )) -+ sparse++; -+ -+ /* fprintf( stderr, "Switch table size to %d from %d\n", -+ tablesize, sparse); */ -+ tablesize = sparse; -+ -+ // Hash - normal fixed size sparse hash, compare until we hit a NULL -+ tableptr = (char **)calloc (sizeof(char *), tablesize ); -+ -+#ifdef HASH_DEBUG -+ static int total_cols = 0; -+ static int total_lookups = 0; -+#endif -+ size_t i = 0; -+ // CHECKME: '\n' on Win32 ? - cf. Win32 binary downloads I guess. -+ for (i = second_line; i < length;) -+ { -+ int hash_id; -+ size_t next_line = seek_newline (i); -+ if (next_line >= length) -+ break; -+ -+ hash_id = hash(buffer + i); -+ int collisions = 0; -+ while (tableptr[hash_id%tablesize]) { -+ collisions++; -+ hash_id++; -+ } -+#ifdef HASH_DEBUG -+ total_lookups++; -+ total_cols += collisions; -+#endif -+ tableptr[hash_id%tablesize] = buffer + i; -+ i = next_line; -+ } -+#ifdef HASH_DEBUG -+ fprintf (stderr, "%d collisions on %d lookups\n", total_cols, total_lookups); -+#endif - -- wl = strlen(ts); -- -- // add the word and its index -- if (add_word(ts,wl,flags,al,dp)) return 5; -- -- } -- -- fclose(rawdict); -- return 0; -+ return 0; - } - -- --// the hash function is a simple load and rotate --// algorithm borrowed -- - int HashMgr::hash(const char * word) const - { -- long hv = 0; -- for (int i=0; i < 4 && *word != 0; i++) -- hv = (hv << 8) | (*word++); -- while (*word != 0) { -- ROTATE(hv,ROTATE_LEN); -- hv ^= (*word++); -- } -- return (unsigned long) hv % tablesize; -+ unsigned long hv = word[0]; -+ for (int i = 1; !WORD_END_CONDITN(word[i]) && !IS_END(word[i]); i++) -+ hv = (hv << 5) - hv + word[i]; -+#ifdef HASH_DEBUG -+ fprintf(stderr, "string '"); -+ for (int i = 0; !WORD_END_CONDITN(word[ i ] && !IS_END(word[i])); i++) -+ fprintf( stderr, "%c", word[i] ); -+ fprintf(stderr, "' 0x%x \n", (int)hv % tablesize); -+#endif -+ -+ return hv % tablesize; - } - --int HashMgr::decode_flags(unsigned short ** result, char * flags) { -+int HashMgr::decode_flags(unsigned short ** result, const char * flags) { -+#ifdef HASH_DEBUG -+ fprintf(stderr, "Decoding flags '%s'\n", flags); -+#endif - int len; - switch (flag_mode) { - case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz) -@@ -366,9 +378,9 @@ int HashMgr::decode_flags(unsigned short - } - case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 23 233) - len = 1; -- char * src = flags; -+ const char * src = flags; - unsigned short * dest; -- char * p; -+ const char * p; - for (p = flags; *p; p++) { - if (*p == ',') len++; - } ---- lingucomponent/source/spellcheck/hunspell/hashmgr.hxx 2007-08-31 11:42:06.000000000 +0200 -+++ lingucomponent/source/spellcheck/hunspell/hashmgr.hxx 2007-08-31 17:15:51.000000000 +0200 -@@ -9,7 +9,7 @@ enum flag { FLAG_CHAR, FLAG_LONG, FLAG_N - class HashMgr - { - int tablesize; -- struct hentry * tableptr; -+ char ** tableptr; - int userword; - flag flag_mode; - int complexprefixes; -@@ -20,18 +20,25 @@ class HashMgr - int numaliasm; // morphological desciption `compression' with aliases - char ** aliasm; - -+ mutable struct hentry compat; -+ int compat_word_length; -+ int fd; -+ char * buffer; -+ size_t length; -+ -+ size_t seek_newline(size_t from_offset) const; -+ int slurp( int fd, size_t bytes ); - - public: - HashMgr(const char * tpath, const char * apath); - ~HashMgr(); - -- struct hentry * lookup(const char *) const; -- int hash(const char *) const; -- struct hentry * walk_hashtable(int & col, struct hentry * hp) const; -+ struct hentry * setup_hentry(const char *ptr, int hash_id); - -- int put_word(const char * word, int wl, char * ap); -- int put_word_pattern(const char * word, int wl, const char * pattern); -- int decode_flags(unsigned short ** result, char * flags); -+ struct hentry * lookup(const char *, int from_id = -1) const; -+ struct hentry * walk_hashtable(int & col, struct hentry * p) const; -+ -+ int decode_flags(unsigned short ** result, const char * flags); - unsigned short decode_flag(const char * flag); - char * encode_flag(unsigned short flag); - int is_aliasf(); -@@ -40,8 +47,8 @@ public: - char * get_aliasm(int index); - - private: -+ int hash(const char *) const; - int load_tables(const char * tpath); -- int add_word(const char * word, int wl, unsigned short * ap, int al, const char * desc); - int load_config(const char * affpath); - int parse_aliasf(char * line, FILE * af); - int parse_aliasm(char * line, FILE * af); ---- lingucomponent/source/spellcheck/hunspell/htypes.hxx 2006-01-06 14:10:55.000000000 +0100 -+++ lingucomponent/source/spellcheck/hunspell/htypes.hxx 2007-08-31 17:14:39.000000000 +0200 -@@ -17,9 +17,8 @@ struct hentry - short alen; - char * word; - unsigned short * astr; -- struct hentry * next; -- struct hentry * next_homonym; - char * description; -+ int hash_id; - }; - - #endif ---- lingucomponent/source/spellcheck/hunspell/hunspell.cxx 2007-08-31 11:42:06.000000000 +0200 -+++ lingucomponent/source/spellcheck/hunspell/hunspell.cxx 2007-08-31 17:35:25.000000000 +0200 -@@ -599,7 +599,7 @@ struct hentry * Hunspell::check(const ch - while (he && (he->astr) && - ((pAMgr->get_pseudoroot() && TESTAFF(he->astr, pAMgr->get_pseudoroot(), he->alen)) || - (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) -- )) he = he->next_homonym; -+ )) he = pHMgr->lookup(word, he->hash_id); // next homonym - - // check with affixes - if (!he && pAMgr) { -@@ -1190,30 +1190,6 @@ struct unicode_info2 * Hunspell::get_utf - return utfconv; - } - --int Hunspell::put_word(const char * word) --{ -- if (pHMgr) { -- return pHMgr->put_word(word, strlen(word), NULL); -- } -- return 0; --} -- --int Hunspell::put_word_suffix(const char * word, const char * suffix) --{ -- if (pHMgr) { -- return pHMgr->put_word(word, strlen(word), (char *) suffix); -- } -- return 0; --} -- --int Hunspell::put_word_pattern(const char * word, const char * pattern) --{ -- if (pHMgr) { -- return pHMgr->put_word_pattern(word, strlen(word), pattern); -- } -- return 0; --} -- - const char * Hunspell::get_version() - { - return pAMgr->get_version(); ---- lingucomponent/source/spellcheck/hunspell/hunspell.hxx 2007-08-31 11:42:06.000000000 +0200 -+++ lingucomponent/source/spellcheck/hunspell/hunspell.hxx 2007-08-31 14:14:26.000000000 +0200 -@@ -75,20 +75,6 @@ public: - - int suggest(char*** slst, const char * word); - -- /* handling custom dictionary */ -- -- int put_word(const char * word); -- -- /* suffix is an affix flag string, similarly in dictionary files */ -- -- int put_word_suffix(const char * word, const char * suffix); -- -- /* pattern is a sample dictionary word -- * put word into custom dictionary with affix flags of pattern word -- */ -- -- int put_word_pattern(const char * word, const char * pattern); -- - /* other */ - - char * get_dic_encoding(); ---- lingucomponent/source/spellcheck/hunspell/suggestmgr.cxx 2007-08-31 11:42:06.000000000 +0200 -+++ lingucomponent/source/spellcheck/hunspell/suggestmgr.cxx 2007-08-31 17:39:14.000000000 +0200 -@@ -803,19 +803,10 @@ int SuggestMgr::ngsuggest(char** wlst, c - int _lval; - int sc; - int lp; -+ int lpMG; - - if (! pHMgr) return 0; - -- // exhaustively search through all root words -- // keeping track of the MAX_ROOTS most similar root words -- struct hentry * roots[MAX_ROOTS]; -- int scores[MAX_ROOTS]; -- for (i = 0; i < MAX_ROOTS; i++) { -- roots[i] = NULL; -- scores[i] = -100 * i; -- } -- lp = MAX_ROOTS - 1; -- - char w2[MAXWORDUTF8LEN]; - char * word = w; - -@@ -831,27 +822,6 @@ int SuggestMgr::ngsuggest(char** wlst, c - int nc = strlen(word); - int n = (utf8) ? u8_u16(u8, MAXSWL, word) : nc; - -- struct hentry* hp = NULL; -- int col = -1; -- while ((hp = pHMgr->walk_hashtable(col, hp))) { -- // check forbidden words -- if ((hp->astr) && (pAMgr) && -- (TESTAFF(hp->astr, pAMgr->get_forbiddenword(), hp->alen) || -- TESTAFF(hp->astr, pAMgr->get_nosuggest(), hp->alen) || -- TESTAFF(hp->astr, pAMgr->get_onlyincompound(), hp->alen))) continue; -- sc = ngram(3, word, hp->word, NGRAM_LONGER_WORSE); -- if (sc > scores[lp]) { -- scores[lp] = sc; -- roots[lp] = hp; -- int lval = sc; -- for (j=0; j < MAX_ROOTS; j++) -- if (scores[j] < lval) { -- lp = j; -- lval = scores[j]; -- } -- } -- } -- - // find minimum threshhold for a passable suggestion - // mangle original word three differnt ways - // and score them to generate a minimum acceptable score -@@ -870,9 +840,13 @@ int SuggestMgr::ngsuggest(char** wlst, c - thresh = thresh / 3; - thresh--; - -- // now expand affixes on each of these root words and -- // and use length adjusted ngram scores to select -- // possible suggestions -+ struct hentry * roots[MAX_ROOTS]; -+ int scores[MAX_ROOTS]; -+ for (i = 0; i < MAX_ROOTS; i++) { -+ roots[i] = NULL; -+ scores[i] = -100 * i; -+ } -+ - char * guess[MAX_GUESS]; - int gscore[MAX_GUESS]; - for(i=0;i<MAX_GUESS;i++) { -@@ -882,36 +856,28 @@ int SuggestMgr::ngsuggest(char** wlst, c - - lp = MAX_GUESS - 1; - -- struct guessword * glst; -- glst = (struct guessword *) calloc(MAX_WORDS,sizeof(struct guessword)); -- if (! glst) return 0; -+ // exhaustively search through all root words -+ // keeping track of the MAX_ROOTS most similar root words -+ lp = MAX_ROOTS - 1; - -- for (i = 0; i < MAX_ROOTS; i++) { -+ lpMG = MAX_GUESS - 1; - -- if (roots[i]) { -- struct hentry * rp = roots[i]; -- int nw = pAMgr->expand_rootword(glst, MAX_WORDS, rp->word, rp->wlen, -- rp->astr, rp->alen, word, nc); -- -- for (int k = 0; k < nw ; k++) { -- sc = ngram(n, word, glst[k].word, NGRAM_ANY_MISMATCH); -- if ((sc > thresh)) { -- if (sc > gscore[lp]) { -- if (guess[lp]) free (guess[lp]); -- gscore[lp] = sc; -- guess[lp] = glst[k].word; -- _lval = sc; -- for (j=0; j < MAX_GUESS; j++) -- if (gscore[j] < _lval) { -- lp = j; -- _lval = gscore[j]; -- } -- } else free (glst[k].word); -- } else free(glst[k].word); -- } -- } -+ struct hentry* hp = NULL; -+ int col = -1; -+ while ((hp = pHMgr->walk_hashtable(col, hp))) { -+ sc = ngram(3, word, hp->word, NGRAM_LONGER_WORSE); -+ if (sc > scores[lp]) { -+ scores[lp] = sc; -+ // Get the list of guesswords for this rootword -+ if (-1 == expand_rootword_affix(hp, word, n, thresh, gscore, guess, &lpMG)) return 0; -+ int lval = sc; -+ for (j=0; j < MAX_ROOTS; j++) -+ if (scores[j] < lval) { -+ lp = j; -+ lval = scores[j]; -+ } -+ } - } -- free(glst); - - // now we are done generating guesses - // sort in order of decreasing score -@@ -984,6 +950,49 @@ int SuggestMgr::ngsuggest(char** wlst, c - return ns; - } - -+int SuggestMgr::expand_rootword_affix(struct hentry *hp, char *word, int wrdlen, int thresh, int glistscore[], char *guesslist[], int *lp) -+{ -+ int sc; -+ int lval; -+ -+ struct guessword * glst; -+ glst = (struct guessword *) calloc(MAX_WORDS,sizeof(struct guessword)); -+ if (! glst) return -1; -+ -+ // now expand affixes on each of these root words and -+ // and use length adjusted ngram scores to select -+ // possible suggestions -+ if (hp) { -+ int nw = pAMgr->expand_rootword(glst, MAX_WORDS, hp->word, hp->wlen, -+ hp->astr, hp->alen, word, wrdlen); -+ for (int k = 0; k < nw; k++) -+ { -+ sc = ngram(wrdlen, word, glst[k].word, NGRAM_ANY_MISMATCH); -+ if (sc > thresh && sc > glistscore[*lp]) -+ { -+ if (guesslist[*lp]) free(guesslist[*lp]); -+ glistscore[*lp] = sc; -+ guesslist[*lp] = glst[k].word; -+ glst[k].word = NULL; -+ lval = sc; -+ for (int j=0; j < MAX_GUESS; j++) -+ { -+ if (glistscore[j] < lval) -+ { -+ *lp = j; -+ lval = glistscore[j]; -+ } -+ } -+ } -+ free (glst[k].word); -+ glst[k].word = NULL; -+ glst[k].allow = 0; -+ } -+ } -+ if (glst) free(glst); -+ -+ return 0; -+} - - // see if a candidate suggestion is spelled correctly - // needs to check both root words and words with affixes -@@ -1438,7 +1447,7 @@ char * SuggestMgr::suggest_morph(const c - if (rv->description) strcat(result, rv->description); - strcat(result, "\n"); - } -- rv = rv->next_homonym; -+ rv = pAMgr->lookup(word, rv->hash_id); // next homonym - } - - st = pAMgr->affix_check_morph(word,strlen(word)); ---- lingucomponent/source/spellcheck/hunspell/suggestmgr.hxx 2007-08-31 11:42:06.000000000 +0200 -+++ lingucomponent/source/spellcheck/hunspell/suggestmgr.hxx 2007-08-31 14:14:26.000000000 +0200 -@@ -78,6 +78,7 @@ private: - int equalfirstletter(char * s1, const char * s2); - int commoncharacterpositions(char * s1, const char * s2, int * is_swap); - void bubblesort( char ** rwd, int * rsc, int n); -+ int expand_rootword_affix(struct hentry *hp, char *word, int wrdlen, int thresh, int glistscore[], char *guesslist[], int *lp); - void lcs(const char * s, const char * s2, int * l1, int * l2, char ** result); - int lcslen(const char * s, const char* s2); - |