From a20a2d7e0d28658f2d9089da076961a599833a28 Mon Sep 17 00:00:00 2001 From: László Németh Date: Wed, 7 Nov 2018 13:18:36 +0100 Subject: bump hunspell to 1.7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: Ia8d1f4831e651b3a8d5115f78e5a5239b56c71c4 Reviewed-on: https://gerrit.libreoffice.org/63015 Tested-by: Jenkins Reviewed-by: László Németh --- ...pell-fixes-for-suggestion-spelling-and-an.patch | 1166 -------------------- 1 file changed, 1166 deletions(-) delete mode 100644 external/hunspell/0001-recent-Hunspell-fixes-for-suggestion-spelling-and-an.patch (limited to 'external/hunspell/0001-recent-Hunspell-fixes-for-suggestion-spelling-and-an.patch') diff --git a/external/hunspell/0001-recent-Hunspell-fixes-for-suggestion-spelling-and-an.patch b/external/hunspell/0001-recent-Hunspell-fixes-for-suggestion-spelling-and-an.patch deleted file mode 100644 index d4d822f92185..000000000000 --- a/external/hunspell/0001-recent-Hunspell-fixes-for-suggestion-spelling-and-an.patch +++ /dev/null @@ -1,1166 +0,0 @@ -in addition to that: configure.ac portion was fixed to not have unbalanced [] - -From d9f392dc35f75b1246862b2db8090e8d5b6ec068 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?L=C3=A1szl=C3=B3=20N=C3=A9meth?= -Date: Sun, 17 Jun 2018 17:21:01 +0200 -Subject: [PATCH] recent Hunspell fixes for suggestion, spelling and analysis - -6f976bf fix compiling on WIN32, use time.h and thread_local -24f0963 [morph] better time limitation for morphological analysis -8e6ceaa [spelling] tdf#118162 better time limitation for compounding -3f00ff3 [suggestion] tdf#118162 time limit for a HunspellImpl::suggest() call -a1f9dfa [suggestion] tdf#118162 time limit for a SuggestMgr::suggest() call -d70bf2d [spelling] optimize IGNORE to speed up dictionary loading -16b4900 [spelling] add time limit for compound word handling -b0ded55 [suggestion] lower limit for doubletwochars -b3a44fa [suggestion] limit longswapchar, lower limit for movechar -a295af9 [morph] clean up for separators of morphological analysis -ca5f629 [morph] add missing field separator for members with prefixes ---- - Makefile.in | 1 + - configure.ac | 8 ++ - src/hunspell/affentry.cxx | 12 +-- - src/hunspell/affixmgr.cxx | 89 +++++++++++++------ - src/hunspell/atypes.hxx | 10 +++ - src/hunspell/csutil.hxx | 12 +++ - src/hunspell/hashmgr.cxx | 2 +- - src/hunspell/hunspell.cxx | 210 ++++++++++++++++++++++++++------------------ - src/hunspell/hunvisapi.h | 12 ++- - src/hunspell/hunvisapi.h.in | 12 ++- - src/hunspell/suggestmgr.cxx | 72 +++++++++++---- - src/hunspell/suggestmgr.hxx | 5 -- - 12 files changed, 300 insertions(+), 145 deletions(-) - -diff --git a/Makefile.in b/Makefile.in -index 06d933e..241f797 100644 ---- a/Makefile.in -+++ b/Makefile.in -@@ -296,6 +296,7 @@ GMSGFMT = @GMSGFMT@ - GMSGFMT_015 = @GMSGFMT_015@ - GREP = @GREP@ - HAVE_ASPRINTF = @HAVE_ASPRINTF@ -+HAVE_CXX11 = @HAVE_CXX11@ - HAVE_NEWLOCALE = @HAVE_NEWLOCALE@ - HAVE_POSIX_PRINTF = @HAVE_POSIX_PRINTF@ - HAVE_SNPRINTF = @HAVE_SNPRINTF@ -diff --git a/configure.ac b/configure.ac -index fb79d0d..2936107 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -16,6 +16,14 @@ HUNSPELL_VERSION_MINOR=`echo $VERSION | cut -d"." -f2` - AC_SUBST(HUNSPELL_VERSION_MAJOR) - AC_SUBST(HUNSPELL_VERSION_MINOR) - -+# check C++11 compiling environment for thread_local -+# to handle time limits better also with threads -+AS_CASE([$CXXFLAGS], -+ [*-std=c++11*], [HAVE_CXX11=1], -+ [HAVE_CXX11=0] -+ ) -+AC_SUBST(HAVE_CXX11) -+ - # Checks for programs. - AC_PROG_CXX - AC_PROG_CC -diff --git a/src/hunspell/affentry.cxx b/src/hunspell/affentry.cxx -index 4ef0c00..ffcdb21 100644 ---- a/src/hunspell/affentry.cxx -+++ b/src/hunspell/affentry.cxx -@@ -399,28 +399,28 @@ std::string PfxEntry::check_morph(const char* word, - ((!needflag) || TESTAFF(he->astr, needflag, he->alen) || - (contclass && TESTAFF(contclass, needflag, contclasslen)))) { - if (morphcode) { -- result.append(" "); -+ result.push_back(MSEP_FLD); - result.append(morphcode); - } else - result.append(getKey()); - if (!HENTRY_FIND(he, MORPH_STEM)) { -- result.append(" "); -+ result.push_back(MSEP_FLD); - result.append(MORPH_STEM); - result.append(HENTRY_WORD(he)); - } - // store the pointer of the hash entry - if (HENTRY_DATA(he)) { -- result.append(" "); -+ result.push_back(MSEP_FLD); - result.append(HENTRY_DATA2(he)); - } else { - // return with debug information - char* flag = pmyMgr->encode_flag(getFlag()); -- result.append(" "); -+ result.push_back(MSEP_FLD); - result.append(MORPH_FLAG); - result.append(flag); - free(flag); - } -- result.append("\n"); -+ result.push_back(MSEP_REC); - } - he = he->next_homonym; - } while (he); -@@ -804,7 +804,7 @@ std::string SfxEntry::check_twosfx_morph(const char* word, - if (!st.empty()) { - if (ppfx->getMorph()) { - result.append(ppfx->getMorph()); -- result.append(" "); -+ result.push_back(MSEP_FLD); - } - result.append(st); - mychomp(result); -diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx -index 2c540f2..1610ef0 100644 ---- a/src/hunspell/affixmgr.cxx -+++ b/src/hunspell/affixmgr.cxx -@@ -72,6 +72,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -1014,7 +1015,7 @@ int AffixMgr::process_sfx_order() { - // add flags to the result for dictionary debugging - std::string& AffixMgr::debugflag(std::string& result, unsigned short flag) { - char* st = encode_flag(flag); -- result.append(" "); -+ result.push_back(MSEP_FLD); - result.append(MORPH_FLAG); - if (st) { - result.append(st); -@@ -1594,6 +1595,17 @@ struct hentry* AffixMgr::compound_check(const std::string& word, - - int checked_prefix; - -+ // add a time limit to handle possible -+ // combinatorical explosion of the overlapping words -+ -+ HUNSPELL_THREAD_LOCAL clock_t timelimit; -+ -+ if (wordnum == 0) -+ timelimit = clock(); -+ else if (timelimit != 0 && (clock() > timelimit + TIMELIMIT)) { -+ timelimit = 0; -+ } -+ - setcminmax(&cmin, &cmax, word.c_str(), len); - - st.assign(word); -@@ -1618,6 +1630,9 @@ struct hentry* AffixMgr::compound_check(const std::string& word, - - do { // simplified checkcompoundpattern loop - -+ if (timelimit == 0) -+ return 0; -+ - if (scpd > 0) { - for (; scpd <= checkcpdtable.size() && - (checkcpdtable[scpd - 1].pattern3.empty() || -@@ -2186,6 +2201,17 @@ int AffixMgr::compound_check_morph(const char* word, - char affixed = 0; - hentry** oldwords = words; - -+ // add a time limit to handle possible -+ // combinatorical explosion of the overlapping words -+ -+ HUNSPELL_THREAD_LOCAL clock_t timelimit; -+ -+ if (wordnum == 0) -+ timelimit = clock(); -+ else if (timelimit != 0 && (clock() > timelimit + TIMELIMIT)) { -+ timelimit = 0; -+ } -+ - setcminmax(&cmin, &cmax, word, len); - - st.assign(word); -@@ -2204,6 +2230,9 @@ int AffixMgr::compound_check_morph(const char* word, - - do { // onlycpdrule loop - -+ if (timelimit == 0) -+ return 0; -+ - oldnumsyllable = numsyllable; - oldwordnum = wordnum; - checked_prefix = 0; -@@ -2245,6 +2274,9 @@ int AffixMgr::compound_check_morph(const char* word, - rv = rv->next_homonym; - } - -+ if (timelimit == 0) -+ return 0; -+ - if (rv) - affixed = 0; - -@@ -2435,22 +2467,22 @@ int AffixMgr::compound_check_morph(const char* word, - - if (rv && words && words[wnum + 1]) { - result.append(presult); -- result.append(" "); -+ result.push_back(MSEP_FLD); - result.append(MORPH_PART); - result.append(word + i); - if (complexprefixes && HENTRY_DATA(rv)) - result.append(HENTRY_DATA2(rv)); - if (!HENTRY_FIND(rv, MORPH_STEM)) { -- result.append(" "); -+ result.push_back(MSEP_FLD); - result.append(MORPH_STEM); - result.append(HENTRY_WORD(rv)); - } - // store the pointer of the hash entry - if (!complexprefixes && HENTRY_DATA(rv)) { -- result.append(" "); -+ result.push_back(MSEP_FLD); - result.append(HENTRY_DATA2(rv)); - } -- result.append("\n"); -+ result.push_back(MSEP_REC); - return 0; - } - -@@ -2492,7 +2524,7 @@ int AffixMgr::compound_check_morph(const char* word, - ((!checkcompounddup || (rv != rv_first)))) { - // bad compound word - result.append(presult); -- result.append(" "); -+ result.push_back(MSEP_FLD); - result.append(MORPH_PART); - result.append(word + i); - -@@ -2500,17 +2532,17 @@ int AffixMgr::compound_check_morph(const char* word, - if (complexprefixes) - result.append(HENTRY_DATA2(rv)); - if (!HENTRY_FIND(rv, MORPH_STEM)) { -- result.append(" "); -+ result.push_back(MSEP_FLD); - result.append(MORPH_STEM); - result.append(HENTRY_WORD(rv)); - } - // store the pointer of the hash entry - if (!complexprefixes) { -- result.append(" "); -+ result.push_back(MSEP_FLD); - result.append(HENTRY_DATA2(rv)); - } - } -- result.append("\n"); -+ result.push_back(MSEP_REC); - ok = 1; - } - -@@ -2549,7 +2581,7 @@ int AffixMgr::compound_check_morph(const char* word, - line_uniq_app(m, MSEP_REC); - result.append(m); - } -- result.append("\n"); -+ result.push_back(MSEP_REC); - ok = 1; - } - } -@@ -2639,6 +2671,7 @@ int AffixMgr::compound_check_morph(const char* word, - result.append(MORPH_PART); - result.append(word + i); - line_uniq_app(m, MSEP_REC); -+ result.push_back(MSEP_FLD); - result.append(m); - } - result.push_back(MSEP_REC); -@@ -2864,17 +2897,17 @@ std::string AffixMgr::suffix_check_twosfx_morph(const char* word, - if (ppfx) { - if (ppfx->getMorph()) { - result.append(ppfx->getMorph()); -- result.append(" "); -+ result.push_back(MSEP_FLD); - } else - debugflag(result, ppfx->getFlag()); - } - result.append(st); - if (se->getMorph()) { -- result.append(" "); -+ result.push_back(MSEP_FLD); - result.append(se->getMorph()); - } else - debugflag(result, se->getFlag()); -- result.append("\n"); -+ result.push_back(MSEP_REC); - } - } - se = se->getNext(); -@@ -2899,12 +2932,12 @@ std::string AffixMgr::suffix_check_twosfx_morph(const char* word, - result3.clear(); - - if (sptr->getMorph()) { -- result3.append(" "); -+ result3.push_back(MSEP_FLD); - result3.append(sptr->getMorph()); - } else - debugflag(result3, sptr->getFlag()); - strlinecat(result2, result3); -- result2.append("\n"); -+ result2.push_back(MSEP_REC); - result.append(result2); - } - } -@@ -2967,28 +3000,28 @@ std::string AffixMgr::suffix_check_morph(const char* word, - if (ppfx) { - if (ppfx->getMorph()) { - result.append(ppfx->getMorph()); -- result.append(" "); -+ result.push_back(MSEP_FLD); - } else - debugflag(result, ppfx->getFlag()); - } - if (complexprefixes && HENTRY_DATA(rv)) - result.append(HENTRY_DATA2(rv)); - if (!HENTRY_FIND(rv, MORPH_STEM)) { -- result.append(" "); -+ result.push_back(MSEP_FLD); - result.append(MORPH_STEM); - result.append(HENTRY_WORD(rv)); - } - - if (!complexprefixes && HENTRY_DATA(rv)) { -- result.append(" "); -+ result.push_back(MSEP_FLD); - result.append(HENTRY_DATA2(rv)); - } - if (se->getMorph()) { -- result.append(" "); -+ result.push_back(MSEP_FLD); - result.append(se->getMorph()); - } else - debugflag(result, se->getFlag()); -- result.append("\n"); -+ result.push_back(MSEP_REC); - rv = se->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag); - } - } -@@ -3034,29 +3067,29 @@ std::string AffixMgr::suffix_check_morph(const char* word, - if (ppfx) { - if (ppfx->getMorph()) { - result.append(ppfx->getMorph()); -- result.append(" "); -+ result.push_back(MSEP_FLD); - } else - debugflag(result, ppfx->getFlag()); - } - if (complexprefixes && HENTRY_DATA(rv)) - result.append(HENTRY_DATA2(rv)); - if (!HENTRY_FIND(rv, MORPH_STEM)) { -- result.append(" "); -+ result.push_back(MSEP_FLD); - result.append(MORPH_STEM); - result.append(HENTRY_WORD(rv)); - } - - if (!complexprefixes && HENTRY_DATA(rv)) { -- result.append(" "); -+ result.push_back(MSEP_FLD); - result.append(HENTRY_DATA2(rv)); - } - - if (sptr->getMorph()) { -- result.append(" "); -+ result.push_back(MSEP_FLD); - result.append(sptr->getMorph()); - } else - debugflag(result, sptr->getFlag()); -- result.append("\n"); -+ result.push_back(MSEP_REC); - rv = sptr->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag); - } - sptr = sptr->getNextEQ(); -@@ -3245,7 +3278,7 @@ std::string AffixMgr::morphgen(const char* ts, - // use input suffix fields, if exist - if (strstr(morph, MORPH_INFL_SFX) || strstr(morph, MORPH_DERI_SFX)) { - mymorph.assign(morph); -- mymorph.append(" "); -+ mymorph.push_back(MSEP_FLD); - stemmorphcatpos = mymorph.size(); - } else { - stemmorphcatpos = std::string::npos; -@@ -4557,7 +4590,7 @@ bool AffixMgr::parse_affix(const std::string& line, - entry->appnd = std::string(start_piece, dash); - std::string dash_str(dash + 1, iter); - -- if (!ignorechars.empty()) { -+ if (!ignorechars.empty() && !has_no_ignored_chars(entry->appnd, ignorechars)) { - if (utf8) { - remove_ignored_chars_utf(entry->appnd, ignorechars_utf16); - } else { -@@ -4593,7 +4626,7 @@ bool AffixMgr::parse_affix(const std::string& line, - } else { - entry->appnd = std::string(start_piece, iter); - -- if (!ignorechars.empty()) { -+ if (!ignorechars.empty() && !has_no_ignored_chars(entry->appnd, ignorechars)) { - if (utf8) { - remove_ignored_chars_utf(entry->appnd, ignorechars_utf16); - } else { -diff --git a/src/hunspell/atypes.hxx b/src/hunspell/atypes.hxx -index f841523..38396db 100644 ---- a/src/hunspell/atypes.hxx -+++ b/src/hunspell/atypes.hxx -@@ -95,6 +95,16 @@ static inline void HUNSPELL_WARNING(FILE*, const char*, ...) {} - - #define TESTAFF(a, b, c) (std::binary_search(a, a + c, b)) - -+// timelimit: max. ~1/4 sec (process time on Linux) for -+// for a suggestion, including max. ~/10 sec for a case -+// sensitive plain or compound word suggestion, within -+// ~1/20 sec long time consuming suggestion functions -+#define TIMELIMIT_GLOBAL (CLOCKS_PER_SEC / 4) -+#define TIMELIMIT_SUGGESTION (CLOCKS_PER_SEC / 10) -+#define TIMELIMIT (CLOCKS_PER_SEC / 20) -+#define MINTIMER 100 -+#define MAXPLUSTIMER 100 -+ - struct guessword { - char* word; - bool allow; -diff --git a/src/hunspell/csutil.hxx b/src/hunspell/csutil.hxx -index 01c0a24..3397257 100644 ---- a/src/hunspell/csutil.hxx -+++ b/src/hunspell/csutil.hxx -@@ -311,4 +311,16 @@ LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry* h, - return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL); - } - -+// to avoid unnecessary string copies and Unicode conversions -+// we simply check the ignored_chars characters in the word -+// (in the case of UTF-8 encoded strings, "false" means -+// "likely false", if ignored_chars characters are not ASCII) -+inline bool has_no_ignored_chars(const std::string& word, -+ const std::string& ignored_chars) { -+ for (std::string::const_iterator it = ignored_chars.begin(), end = ignored_chars.end(); it != end; ++it) -+ if (word.find(*it) != std::string::npos) -+ return false; -+ return true; -+} -+ - #endif -diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx -index 5183f02..7e843c3 100644 ---- a/src/hunspell/hashmgr.cxx -+++ b/src/hunspell/hashmgr.cxx -@@ -190,7 +190,7 @@ int HashMgr::add_word(const std::string& in_word, - - std::string *word_copy = NULL; - std::string *desc_copy = NULL; -- if (!ignorechars.empty() || complexprefixes) { -+ if ((!ignorechars.empty() && !has_no_ignored_chars(in_word, ignorechars)) || complexprefixes) { - word_copy = new std::string(in_word); - - if (!ignorechars.empty()) { -diff --git a/src/hunspell/hunspell.cxx b/src/hunspell/hunspell.cxx -index d6e871f..0dcd748 100644 ---- a/src/hunspell/hunspell.cxx -+++ b/src/hunspell/hunspell.cxx -@@ -71,6 +71,7 @@ - #include - #include - #include -+#include - - #include "affixmgr.hxx" - #include "hunspell.hxx" -@@ -101,7 +102,8 @@ public: - bool spell(const std::string& word, int* info = NULL, std::string* root = NULL); - bool spell_internal(const std::string& word, int* info = NULL, std::string* root = NULL); - std::vector suggest(const std::string& word); -- std::vector suggest_internal(const std::string& word); -+ std::vector suggest_internal(const std::string& word, -+ bool& capitalized, size_t& abbreviated, int& captype); - const std::string& get_wordchars() const; - const std::vector& get_wordchars_utf16() const; - const std::string& get_dict_encoding() const; -@@ -755,7 +757,7 @@ struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::str - int len; - - const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL; -- if (ignoredchars != NULL) { -+ if (ignoredchars != NULL && !has_no_ignored_chars(w, ignoredchars)) { - w2.assign(w); - if (utf8) { - const std::vector& ignoredchars_utf16 = -@@ -887,8 +889,83 @@ std::vector Hunspell::suggest(const std::string& word) { - } - - std::vector HunspellImpl::suggest(const std::string& word) { -- std::vector slst; -- slst = suggest_internal(word); -+ bool capwords; -+ size_t abbv; -+ int captype; -+ std::vector slst = suggest_internal(word, capwords, abbv, captype); -+ // word reversing wrapper for complex prefixes -+ if (complexprefixes) { -+ for (size_t j = 0; j < slst.size(); ++j) { -+ if (utf8) -+ reverseword_utf(slst[j]); -+ else -+ reverseword(slst[j]); -+ } -+ } -+ -+ // capitalize -+ if (capwords) -+ for (size_t j = 0; j < slst.size(); ++j) { -+ mkinitcap(slst[j]); -+ } -+ -+ // expand suggestions with dot(s) -+ if (abbv && pAMgr && pAMgr->get_sugswithdots()) { -+ for (size_t j = 0; j < slst.size(); ++j) { -+ slst[j].append(word.substr(word.size() - abbv)); -+ } -+ } -+ -+ // remove bad capitalized and forbidden forms -+ if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) { -+ switch (captype) { -+ case INITCAP: -+ case ALLCAP: { -+ size_t l = 0; -+ for (size_t j = 0; j < slst.size(); ++j) { -+ if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) { -+ std::string s; -+ std::vector w; -+ if (utf8) { -+ u8_u16(w, slst[j]); -+ } else { -+ s = slst[j]; -+ } -+ mkallsmall2(s, w); -+ if (spell(s)) { -+ slst[l] = s; -+ ++l; -+ } else { -+ mkinitcap2(s, w); -+ if (spell(s)) { -+ slst[l] = s; -+ ++l; -+ } -+ } -+ } else { -+ slst[l] = slst[j]; -+ ++l; -+ } -+ } -+ slst.resize(l); -+ } -+ } -+ } -+ -+ // remove duplications -+ size_t l = 0; -+ for (size_t j = 0; j < slst.size(); ++j) { -+ slst[l] = slst[j]; -+ for (size_t k = 0; k < l; ++k) { -+ if (slst[k] == slst[j]) { -+ --l; -+ break; -+ } -+ } -+ ++l; -+ } -+ slst.resize(l); -+ - // output conversion - RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; - if (rl) { -@@ -902,7 +979,12 @@ std::vector HunspellImpl::suggest(const std::string& word) { - return slst; - } - --std::vector HunspellImpl::suggest_internal(const std::string& word) { -+std::vector HunspellImpl::suggest_internal(const std::string& word, -+ bool& capwords, size_t& abbv, int& captype) { -+ captype = NOCAP; -+ abbv = 0; -+ capwords = false; -+ - std::vector slst; - - int onlycmpdsug = 0; -@@ -920,8 +998,6 @@ std::vector HunspellImpl::suggest_internal(const std::string& word) - if (word.size() >= MAXWORDLEN) - return slst; - } -- int captype = NOCAP; -- size_t abbv = 0; - size_t wl = 0; - - std::string scw; -@@ -942,9 +1020,12 @@ std::vector HunspellImpl::suggest_internal(const std::string& word) - return slst; - } - -- int capwords = 0; - bool good = false; - -+ HUNSPELL_THREAD_LOCAL clock_t timelimit; -+ // initialize in every suggestion call -+ timelimit = clock(); -+ - // check capitalized form for FORCEUCASE - if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) { - int info = SPELL_ORIGCAP; -@@ -959,26 +1041,36 @@ std::vector HunspellImpl::suggest_internal(const std::string& word) - switch (captype) { - case NOCAP: { - good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); -+ if (clock() > timelimit + TIMELIMIT_GLOBAL) -+ return slst; - if (abbv) { - std::string wspace(scw); - wspace.push_back('.'); - good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); -+ if (clock() > timelimit + TIMELIMIT_GLOBAL) -+ return slst; - } - break; - } - - case INITCAP: { -- capwords = 1; -+ capwords = true; - good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); -+ if (clock() > timelimit + TIMELIMIT_GLOBAL) -+ return slst; - std::string wspace(scw); - mkallsmall2(wspace, sunicw); - good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); -+ if (clock() > timelimit + TIMELIMIT_GLOBAL) -+ return slst; - break; - } - case HUHINITCAP: -- capwords = 1; -+ capwords = true; - case HUHCAP: { - good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); -+ if (clock() > timelimit + TIMELIMIT_GLOBAL) -+ return slst; - // something.The -> something. The - size_t dot_pos = scw.find('.'); - if (dot_pos != std::string::npos) { -@@ -1005,6 +1097,8 @@ std::vector HunspellImpl::suggest_internal(const std::string& word) - wspace = scw; - mkinitsmall2(wspace, sunicw); - good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); -+ if (clock() > timelimit + TIMELIMIT_GLOBAL) -+ return slst; - } - wspace = scw; - mkallsmall2(wspace, sunicw); -@@ -1012,11 +1106,15 @@ std::vector HunspellImpl::suggest_internal(const std::string& word) - insert_sug(slst, wspace); - size_t prevns = slst.size(); - good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); -+ if (clock() > timelimit + TIMELIMIT_GLOBAL) -+ return slst; - if (captype == HUHINITCAP) { - mkinitcap2(wspace, sunicw); - if (spell(wspace.c_str())) - insert_sug(slst, wspace); - good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); -+ if (clock() > timelimit + TIMELIMIT_GLOBAL) -+ return slst; - } - // aNew -> "a New" (instead of "a new") - for (size_t j = prevns; j < slst.size(); ++j) { -@@ -1044,10 +1142,14 @@ std::vector HunspellImpl::suggest_internal(const std::string& word) - std::string wspace(scw); - mkallsmall2(wspace, sunicw); - good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); -+ if (clock() > timelimit + TIMELIMIT_GLOBAL) -+ return slst; - if (pAMgr && pAMgr->get_keepcase() && spell(wspace.c_str())) - insert_sug(slst, wspace); - mkinitcap2(wspace, sunicw); - good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); -+ if (clock() > timelimit + TIMELIMIT_GLOBAL) -+ return slst; - for (size_t j = 0; j < slst.size(); ++j) { - mkallcap(slst[j]); - if (pAMgr && pAMgr->get_checksharps()) { -@@ -1084,21 +1186,27 @@ std::vector HunspellImpl::suggest_internal(const std::string& word) - switch (captype) { - case NOCAP: { - pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs, NOCAP); -+ if (clock() > timelimit + TIMELIMIT_GLOBAL) -+ return slst; - break; - } - case HUHINITCAP: -- capwords = 1; -+ capwords = true; - case HUHCAP: { - std::string wspace(scw); - mkallsmall2(wspace, sunicw); - pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, HUHCAP); -+ if (clock() > timelimit + TIMELIMIT_GLOBAL) -+ return slst; - break; - } - case INITCAP: { -- capwords = 1; -+ capwords = true; - std::string wspace(scw); - mkallsmall2(wspace, sunicw); - pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, INITCAP); -+ if (clock() > timelimit + TIMELIMIT_GLOBAL) -+ return slst; - break; - } - case ALLCAP: { -@@ -1106,6 +1214,8 @@ std::vector HunspellImpl::suggest_internal(const std::string& word) - mkallsmall2(wspace, sunicw); - size_t oldns = slst.size(); - pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, ALLCAP); -+ if (clock() > timelimit + TIMELIMIT_GLOBAL) -+ return slst; - for (size_t j = oldns; j < slst.size(); ++j) { - mkallcap(slst[j]); - } -@@ -1137,6 +1247,8 @@ std::vector HunspellImpl::suggest_internal(const std::string& word) - std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos); - if (!spell(chunk.c_str())) { - std::vector nlst = suggest(chunk.c_str()); -+ if (clock() > timelimit + TIMELIMIT_GLOBAL) -+ return slst; - for (std::vector::reverse_iterator j = nlst.rbegin(); j != nlst.rend(); ++j) { - std::string wspace = scw.substr(0, prev_pos); - wspace.append(*j); -@@ -1160,80 +1272,6 @@ std::vector HunspellImpl::suggest_internal(const std::string& word) - dash_pos = scw.size(); - } - } -- -- // word reversing wrapper for complex prefixes -- if (complexprefixes) { -- for (size_t j = 0; j < slst.size(); ++j) { -- if (utf8) -- reverseword_utf(slst[j]); -- else -- reverseword(slst[j]); -- } -- } -- -- // capitalize -- if (capwords) -- for (size_t j = 0; j < slst.size(); ++j) { -- mkinitcap(slst[j]); -- } -- -- // expand suggestions with dot(s) -- if (abbv && pAMgr && pAMgr->get_sugswithdots()) { -- for (size_t j = 0; j < slst.size(); ++j) { -- slst[j].append(word.substr(word.size() - abbv)); -- } -- } -- -- // remove bad capitalized and forbidden forms -- if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) { -- switch (captype) { -- case INITCAP: -- case ALLCAP: { -- size_t l = 0; -- for (size_t j = 0; j < slst.size(); ++j) { -- if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) { -- std::string s; -- std::vector w; -- if (utf8) { -- u8_u16(w, slst[j]); -- } else { -- s = slst[j]; -- } -- mkallsmall2(s, w); -- if (spell(s)) { -- slst[l] = s; -- ++l; -- } else { -- mkinitcap2(s, w); -- if (spell(s)) { -- slst[l] = s; -- ++l; -- } -- } -- } else { -- slst[l] = slst[j]; -- ++l; -- } -- } -- slst.resize(l); -- } -- } -- } -- -- // remove duplications -- size_t l = 0; -- for (size_t j = 0; j < slst.size(); ++j) { -- slst[l] = slst[j]; -- for (size_t k = 0; k < l; ++k) { -- if (slst[k] == slst[j]) { -- --l; -- break; -- } -- } -- ++l; -- } -- slst.resize(l); -- - return slst; - } - -diff --git a/src/hunspell/hunvisapi.h b/src/hunspell/hunvisapi.h -index eb2b348..8283017 100644 ---- a/src/hunspell/hunvisapi.h -+++ b/src/hunspell/hunvisapi.h -@@ -3,7 +3,7 @@ - - #if defined(HUNSPELL_STATIC) - # define LIBHUNSPELL_DLL_EXPORTED --#elif defined(_MSC_VER) -+#elif defined(_WIN32) - # if defined(BUILDING_LIBHUNSPELL) - # define LIBHUNSPELL_DLL_EXPORTED __declspec(dllexport) - # else -@@ -15,4 +15,14 @@ - # define LIBHUNSPELL_DLL_EXPORTED - #endif - -+/* use thread_local, if it's possible, otherwise static */ -+ -+#if defined(_WIN32) -+# define HUNSPELL_THREAD_LOCAL thread_local -+#elif 0 -+# define HUNSPELL_THREAD_LOCAL thread_local -+#else -+# define HUNSPELL_THREAD_LOCAL static -+#endif -+ - #endif -diff --git a/src/hunspell/hunvisapi.h.in b/src/hunspell/hunvisapi.h.in -index a1020c8..85972dd 100644 ---- a/src/hunspell/hunvisapi.h.in -+++ b/src/hunspell/hunvisapi.h.in -@@ -3,7 +3,7 @@ - - #if defined(HUNSPELL_STATIC) - # define LIBHUNSPELL_DLL_EXPORTED --#elif defined(_MSC_VER) -+#elif defined(_WIN32) - # if defined(BUILDING_LIBHUNSPELL) - # define LIBHUNSPELL_DLL_EXPORTED __declspec(dllexport) - # else -@@ -15,4 +15,14 @@ - # define LIBHUNSPELL_DLL_EXPORTED - #endif - -+/* use thread_local, if it's possible, otherwise static */ -+ -+#if defined(_WIN32) -+# define HUNSPELL_THREAD_LOCAL thread_local -+#elif @HAVE_CXX11@ -+# define HUNSPELL_THREAD_LOCAL thread_local -+#else -+# define HUNSPELL_THREAD_LOCAL static -+#endif -+ - #endif -diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx -index ade85af..d9fabca 100644 ---- a/src/hunspell/suggestmgr.cxx -+++ b/src/hunspell/suggestmgr.cxx -@@ -72,6 +72,7 @@ - #include - #include - #include -+#include - - #include "suggestmgr.hxx" - #include "htypes.hxx" -@@ -79,6 +80,8 @@ - - const w_char W_VLINE = {'\0', '|'}; - -+#define MAX_CHAR_DISTANCE 4 -+ - SuggestMgr::SuggestMgr(const char* tryme, unsigned int maxn, AffixMgr* aptr) { - // register affix manager and check in string of chars to - // try when building candidate suggestions -@@ -211,6 +214,11 @@ bool SuggestMgr::suggest(std::vector& slst, - - for (int cpdsuggest = 0; (cpdsuggest < 2) && (nocompoundtwowords == 0) && !good_suggestion; - cpdsuggest++) { -+ -+ HUNSPELL_THREAD_LOCAL clock_t timelimit; -+ // initialize both in non-compound and compound cycles -+ timelimit = clock(); -+ - // limit compound suggestion - if (cpdsuggest > 0) - oldSug = slst.size(); -@@ -233,12 +241,16 @@ bool SuggestMgr::suggest(std::vector& slst, - if (slst.size() > i) - good_suggestion = true; - } -+ if (clock() > timelimit + TIMELIMIT_SUGGESTION) -+ return good_suggestion; - - // perhaps we made chose the wrong char from a related set - if ((slst.size() < maxSug) && - (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { - mapchars(slst, word, cpdsuggest); - } -+ if (clock() > timelimit + TIMELIMIT_SUGGESTION) -+ return good_suggestion; - - // only suggest compound words when no other suggestion - if ((cpdsuggest == 0) && (slst.size() > nsugorig)) -@@ -251,6 +263,8 @@ bool SuggestMgr::suggest(std::vector& slst, - else - swapchar(slst, word, cpdsuggest); - } -+ if (clock() > timelimit + TIMELIMIT_SUGGESTION) -+ return good_suggestion; - - // did we swap the order of non adjacent chars by mistake - if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { -@@ -259,6 +273,8 @@ bool SuggestMgr::suggest(std::vector& slst, - else - longswapchar(slst, word, cpdsuggest); - } -+ if (clock() > timelimit + TIMELIMIT_SUGGESTION) -+ return good_suggestion; - - // did we just hit the wrong key in place of a good char (case and keyboard) - if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { -@@ -267,6 +283,8 @@ bool SuggestMgr::suggest(std::vector& slst, - else - badcharkey(slst, word, cpdsuggest); - } -+ if (clock() > timelimit + TIMELIMIT_SUGGESTION) -+ return good_suggestion; - - // did we add a char that should not be there - if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { -@@ -275,6 +293,8 @@ bool SuggestMgr::suggest(std::vector& slst, - else - extrachar(slst, word, cpdsuggest); - } -+ if (clock() > timelimit + TIMELIMIT_SUGGESTION) -+ return good_suggestion; - - // did we forgot a char - if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { -@@ -283,6 +303,8 @@ bool SuggestMgr::suggest(std::vector& slst, - else - forgotchar(slst, word, cpdsuggest); - } -+ if (clock() > timelimit + TIMELIMIT_SUGGESTION) -+ return good_suggestion; - - // did we move a char - if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { -@@ -291,6 +313,8 @@ bool SuggestMgr::suggest(std::vector& slst, - else - movechar(slst, word, cpdsuggest); - } -+ if (clock() > timelimit + TIMELIMIT_SUGGESTION) -+ return good_suggestion; - - // did we just hit the wrong key in place of a good char - if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { -@@ -299,6 +323,8 @@ bool SuggestMgr::suggest(std::vector& slst, - else - badchar(slst, word, cpdsuggest); - } -+ if (clock() > timelimit + TIMELIMIT_SUGGESTION) -+ return good_suggestion; - - // did we double two characters - if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { -@@ -307,6 +333,8 @@ bool SuggestMgr::suggest(std::vector& slst, - else - doubletwochars(slst, word, cpdsuggest); - } -+ if (clock() > timelimit + TIMELIMIT_SUGGESTION) -+ return good_suggestion; - - // perhaps we forgot to hit space and two words ran together - // (dictionary word pairs have top priority here, so -@@ -315,6 +343,8 @@ bool SuggestMgr::suggest(std::vector& slst, - if (!cpdsuggest || (!nosplitsugs && slst.size() < oldSug + maxcpdsugs)) { - good_suggestion = twowords(slst, word, cpdsuggest, good_suggestion); - } -+ if (clock() > timelimit + TIMELIMIT_SUGGESTION) -+ return good_suggestion; - - } // repeating ``for'' statement compounding support - -@@ -469,8 +499,11 @@ int SuggestMgr::replchars(std::vector& wlst, - return wlst.size(); - } - --// perhaps we doubled two characters (pattern aba -> ababa, for example vacation --// -> vacacation) -+// perhaps we doubled two characters -+// (for example vacation -> vacacation) -+// The recognized pattern with regex back-references: -+// "(.)(.)\1\2\1" or "..(.)(.)\1\2" -+ - int SuggestMgr::doubletwochars(std::vector& wlst, - const char* word, - int cpdsuggest) { -@@ -481,7 +514,7 @@ int SuggestMgr::doubletwochars(std::vector& wlst, - for (int i = 2; i < wl; i++) { - if (word[i] == word[i - 2]) { - state++; -- if (state == 3) { -+ if (state == 3 || (state == 2 && i >= 4)) { - std::string candidate(word, word + i - 1); - candidate.insert(candidate.end(), word + i + 1, word + wl); - testsug(wlst, candidate, cpdsuggest, NULL, NULL); -@@ -494,8 +527,11 @@ int SuggestMgr::doubletwochars(std::vector& wlst, - return wlst.size(); - } - --// perhaps we doubled two characters (pattern aba -> ababa, for example vacation --// -> vacacation) -+// perhaps we doubled two characters -+// (for example vacation -> vacacation) -+// The recognized pattern with regex back-references: -+// "(.)(.)\1\2\1" or "..(.)(.)\1\2" -+ - int SuggestMgr::doubletwochars_utf(std::vector& wlst, - const w_char* word, - int wl, -@@ -506,7 +542,7 @@ int SuggestMgr::doubletwochars_utf(std::vector& wlst, - for (int i = 2; i < wl; i++) { - if (word[i] == word[i - 2]) { - state++; -- if (state == 3) { -+ if (state == 3 || (state == 2 && i >= 4)) { - std::vector candidate_utf(word, word + i - 1); - candidate_utf.insert(candidate_utf.end(), word + i + 1, word + wl); - std::string candidate; -@@ -939,7 +975,8 @@ int SuggestMgr::longswapchar(std::vector& wlst, - // try swapping not adjacent chars one by one - for (std::string::iterator p = candidate.begin(); p < candidate.end(); ++p) { - for (std::string::iterator q = candidate.begin(); q < candidate.end(); ++q) { -- if (std::abs(std::distance(q, p)) > 1) { -+ size_t distance = std::abs(std::distance(q, p)); -+ if (distance > 1 && distance <= MAX_CHAR_DISTANCE) { - std::swap(*p, *q); - testsug(wlst, candidate, cpdsuggest, NULL, NULL); - std::swap(*p, *q); -@@ -958,7 +995,8 @@ int SuggestMgr::longswapchar_utf(std::vector& wlst, - // try swapping not adjacent chars - for (std::vector::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) { - for (std::vector::iterator q = candidate_utf.begin(); q < candidate_utf.end(); ++q) { -- if (std::abs(std::distance(q, p)) > 1) { -+ size_t distance = std::abs(std::distance(q, p)); -+ if (distance > 1 && distance <= MAX_CHAR_DISTANCE) { - std::swap(*p, *q); - std::string candidate; - u16_u8(candidate, candidate_utf); -@@ -980,7 +1018,7 @@ int SuggestMgr::movechar(std::vector& wlst, - - // try moving a char - for (std::string::iterator p = candidate.begin(); p < candidate.end(); ++p) { -- for (std::string::iterator q = p + 1; q < candidate.end() && std::distance(p, q) < 10; ++q) { -+ for (std::string::iterator q = p + 1; q < candidate.end() && std::distance(p, q) <= MAX_CHAR_DISTANCE; ++q) { - std::swap(*q, *(q - 1)); - if (std::distance(p, q) < 2) - continue; // omit swap char -@@ -990,7 +1028,7 @@ int SuggestMgr::movechar(std::vector& wlst, - } - - for (std::string::reverse_iterator p = candidate.rbegin(), pEnd = candidate.rend() - 1; p != pEnd; ++p) { -- for (std::string::reverse_iterator q = p + 1, qEnd = candidate.rend(); q != qEnd && std::distance(p, q) < 10; ++q) { -+ for (std::string::reverse_iterator q = p + 1, qEnd = candidate.rend(); q != qEnd && std::distance(p, q) <= MAX_CHAR_DISTANCE; ++q) { - std::swap(*q, *(q - 1)); - if (std::distance(p, q) < 2) - continue; // omit swap char -@@ -1013,7 +1051,7 @@ int SuggestMgr::movechar_utf(std::vector& wlst, - - // try moving a char - for (std::vector::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) { -- for (std::vector::iterator q = p + 1; q < candidate_utf.end() && std::distance(p, q) < 10; ++q) { -+ for (std::vector::iterator q = p + 1; q < candidate_utf.end() && std::distance(p, q) <= MAX_CHAR_DISTANCE; ++q) { - std::swap(*q, *(q - 1)); - if (std::distance(p, q) < 2) - continue; // omit swap char -@@ -1025,7 +1063,7 @@ int SuggestMgr::movechar_utf(std::vector& wlst, - } - - for (std::vector::reverse_iterator p = candidate_utf.rbegin(); p < candidate_utf.rend(); ++p) { -- for (std::vector::reverse_iterator q = p + 1; q < candidate_utf.rend() && std::distance(p, q) < 10; ++q) { -+ for (std::vector::reverse_iterator q = p + 1; q < candidate_utf.rend() && std::distance(p, q) <= MAX_CHAR_DISTANCE; ++q) { - std::swap(*q, *(q - 1)); - if (std::distance(p, q) < 2) - continue; // omit swap char -@@ -1715,15 +1753,15 @@ std::string SuggestMgr::suggest_morph(const std::string& in_w) { - TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) || - TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen))) { - if (!HENTRY_FIND(rv, MORPH_STEM)) { -- result.append(" "); -+ result.push_back(MSEP_FLD); - result.append(MORPH_STEM); - result.append(w); - } - if (HENTRY_DATA(rv)) { -- result.append(" "); -+ result.push_back(MSEP_FLD); - result.append(HENTRY_DATA2(rv)); - } -- result.append("\n"); -+ result.push_back(MSEP_REC); - } - rv = rv->next_homonym; - } -@@ -1779,7 +1817,7 @@ std::string SuggestMgr::suggest_hentry_gen(hentry* rv, const char* pattern) { - HENTRY_DATA(rv), pattern, 0); - if (!aff.empty()) { - result.append(aff); -- result.append("\n"); -+ result.push_back(MSEP_REC); - } - } - -@@ -1803,7 +1841,7 @@ std::string SuggestMgr::suggest_hentry_gen(hentry* rv, const char* pattern) { - rv2->alen, HENTRY_DATA(rv2), pattern, 0); - if (!aff.empty()) { - result.append(aff); -- result.append("\n"); -+ result.push_back(MSEP_REC); - } - } - } -diff --git a/src/hunspell/suggestmgr.hxx b/src/hunspell/suggestmgr.hxx -index f0daf23..a435aac 100644 ---- a/src/hunspell/suggestmgr.hxx -+++ b/src/hunspell/suggestmgr.hxx -@@ -78,11 +78,6 @@ - #define MAXPHONSUGS 2 - #define MAXCOMPOUNDSUGS 3 - --// timelimit: max ~1/4 sec (process time on Linux) for a time consuming function --#define TIMELIMIT (CLOCKS_PER_SEC >> 2) --#define MINTIMER 100 --#define MAXPLUSTIMER 100 -- - #define NGRAM_LONGER_WORSE (1 << 0) - #define NGRAM_ANY_MISMATCH (1 << 1) - #define NGRAM_LOWERING (1 << 2) --- -2.7.4 - -- cgit v1.2.3