diff options
author | László Németh <nemeth@numbertext.org> | 2018-06-17 13:44:24 +0200 |
---|---|---|
committer | Miklos Vajna <vmiklos@collabora.co.uk> | 2018-06-28 09:09:19 +0200 |
commit | ec0715eaaf2655e2eaa9e228e26583b63038ba02 (patch) | |
tree | e7e0013ba7a5e10133c3f4412e64acbb47173574 | |
parent | dea74f31d2a0062547e279bb934c60731f5e560d (diff) |
tdf#118162 spell checking: fix freezing and add missing OCONV...
also commit "fix hunspell build break for non-cxx11 case"
by Christian Lohmaier.
(cherry-picked from commit b691e5824a6346d2fe7f702b5280b56532a2f89e
and commit 79c0327bf8f472faed0dd950d42c060e8766d1c4)
... conversion, also other smaller fixes of spelling, suggestion
and morphological analysis using recent Hunspell commits.
Several second or more freezing was occured with Hunspell
dictionaries with compound word handling, because of (1)
combinatorical explosion of overlapping word parts, or
(2) unlimited suggestion algorithms (for example MAP) and
(3) multiple suggestion search for a capitalized,
mixed case or abbreviated long word.
Reviewed-on: https://gerrit.libreoffice.org/55965
Tested-by: Jenkins
Reviewed-by: László Németh <nemeth@numbertext.org>
Change-Id: I73e196f907e9b73dcd981d275cedb33878a554f6
Reviewed-on: https://gerrit.libreoffice.org/56393
Tested-by: Jenkins
Reviewed-by: László Németh <nemeth@numbertext.org>
Reviewed-on: https://gerrit.libreoffice.org/56349
Reviewed-by: Miklos Vajna <vmiklos@collabora.co.uk>
3 files changed, 1343 insertions, 1 deletions
diff --git a/external/hunspell/0001-Hunspell-patches-for-missing-OCONV-conversion.patch b/external/hunspell/0001-Hunspell-patches-for-missing-OCONV-conversion.patch new file mode 100644 index 000000000000..83d429f50979 --- /dev/null +++ b/external/hunspell/0001-Hunspell-patches-for-missing-OCONV-conversion.patch @@ -0,0 +1,175 @@ +From e13ff056fd65990b88d29fb9eae304b411e58234 Mon Sep 17 00:00:00 2001 +From: Changwoo Ryu <cwryu@debian.org> +Date: Wed, 8 Mar 2017 14:04:26 +0900 +Subject: [PATCH] Hunspell patches for missing OCONV conversion + +4e2abfd Clean up PR #479 +cc2d71e Add oconv2 test to Makefile +ca14fdb Avoid gotos across variable initialization +7e5cb62 Use goto to reduce repetitive code +f528192 Add missing OCONV conversion of root and morphemes output +--- + src/hunspell/hunspell.cxx | 59 +++++++++++++++++++++++++++++++++++++++-------- + tests/test.sh | 23 +++++++++++++++--- + 2 files changed, 70 insertions(+), 12 deletions(-) + +diff --git a/src/hunspell/hunspell.cxx b/src/hunspell/hunspell.cxx +index 1100a6f..87d1b4a 100644 +--- a/src/hunspell/hunspell.cxx ++++ b/src/hunspell/hunspell.cxx +@@ -98,10 +98,13 @@ public: + std::vector<std::string> stem(const std::string& word); + std::vector<std::string> stem(const std::vector<std::string>& morph); + std::vector<std::string> analyze(const std::string& word); ++ std::vector<std::string> analyze_internal(const std::string& word); + int get_langnum() const; + bool input_conv(const std::string& word, std::string& dest); + bool spell(const std::string& word, int* info = NULL, std::string* root = NULL); ++ bool spell_internal(const std::string& word, int* info = NULL, std::string* root = NULL); + std::vector<std::string> suggest(const std::string& word); ++ std::vector<std::string> suggest_internal(const std::string& word); + const std::string& get_wordchars() const; + const std::vector<w_char>& get_wordchars_utf16() const; + const std::string& get_dict_encoding() const; +@@ -415,6 +418,21 @@ bool Hunspell::spell(const std::string& word, int* info, std::string* root) { + } + + bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) { ++ bool r = spell_internal(word, info, root); ++ if (r && root) { ++ // output conversion ++ RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; ++ if (rl) { ++ std::string wspace; ++ if (rl->conv(*root, wspace)) { ++ *root = wspace; ++ } ++ } ++ } ++ return r; ++} ++ ++bool HunspellImpl::spell_internal(const std::string& word, int* info, std::string* root) { + struct hentry* rv = NULL; + + int info2 = 0; +@@ -834,6 +852,22 @@ std::vector<std::string> Hunspell::suggest(const std::string& word) { + + std::vector<std::string> HunspellImpl::suggest(const std::string& word) { + std::vector<std::string> slst; ++ slst = suggest_internal(word); ++ // output conversion ++ RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; ++ if (rl) { ++ for (size_t i = 0; rl && i < slst.size(); ++i) { ++ std::string wspace; ++ if (rl->conv(slst[i], wspace)) { ++ slst[i] = wspace; ++ } ++ } ++ } ++ return slst; ++} ++ ++std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word) { ++ std::vector<std::string> slst; + + int onlycmpdsug = 0; + if (!pSMgr || m_HMgrs.empty()) +@@ -1150,15 +1184,6 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) { + } + slst.resize(l); + +- // output conversion +- rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; +- for (size_t j = 0; rl && j < slst.size(); ++j) { +- std::string wspace; +- if (rl->conv(slst[j], wspace)) { +- slst[j] = wspace; +- } +- } +- + return slst; + } + +@@ -1365,6 +1390,22 @@ std::vector<std::string> Hunspell::analyze(const std::string& word) { + + std::vector<std::string> HunspellImpl::analyze(const std::string& word) { + std::vector<std::string> slst; ++ slst = analyze_internal(word); ++ // output conversion ++ RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; ++ if (rl) { ++ for (size_t i = 0; rl && i < slst.size(); ++i) { ++ std::string wspace; ++ if (rl->conv(slst[i], wspace)) { ++ slst[i] = wspace; ++ } ++ } ++ } ++ return slst; ++} ++ ++std::vector<std::string> HunspellImpl::analyze_internal(const std::string& word) { ++ std::vector<std::string> slst; + if (!pSMgr || m_HMgrs.empty()) + return slst; + if (utf8) { +diff --git a/tests/test.sh b/tests/test.sh +index 22e5087..9344f82 100755 +--- a/tests/test.sh ++++ b/tests/test.sh +@@ -112,7 +112,7 @@ if test -f $TESTDIR/$NAME.wrong; then + echo "=============================================" + echo "Fail in $NAME.wrong. Bad words recognised as good:" + tr -d ' ' <$TESTDIR/$NAME.wrong >$TEMPDIR/$NAME.wrong.detab +- diff $TEMPDIR/$NAME.wrong.detab $TEMPDIR/$NAME.wrong | grep '^<' | sed 's/^..//' ++ diff -u $TEMPDIR/$NAME.wrong.detab $TEMPDIR/$NAME.wrong | grep '^<' | sed 's/^..//' + rm -f $TEMPDIR/$NAME.wrong $TEMPDIR/$NAME.wrong.detab + exit 1 + fi +@@ -121,6 +121,23 @@ fi + + check_valgrind_log "bad words" + ++# Tests good words' root ++if test -f $TESTDIR/$NAME.root; then ++ # Extract the root words of the affixed words, after '+' ++ hunspell $* -d $TESTDIR/$NAME <$TESTDIR/$NAME.good | grep -a '^+ ' | \ ++ sed 's/^+ //' >$TEMPDIR/$NAME.root ++ if ! cmp $TEMPDIR/$NAME.root $TESTDIR/$NAME.root >/dev/null; then ++ echo "=============================================" ++ echo "Fail in $NAME.root. Bad prefix or suffix?" ++ diff -u $TESTDIR/$NAME.root $TEMPDIR/$NAME.root ++ rm -f $TEMPDIR/$NAME.root ++ exit 1 ++ fi ++ rm -f $TEMPDIR/$NAME.root ++fi ++ ++check_valgrind_log "root" ++ + # Tests morphological analysis + if test -f $TESTDIR/$NAME.morph; then + sed 's/ $//' $TESTDIR/$NAME.good >$TEMPDIR/$NAME.good +@@ -129,7 +146,7 @@ if test -f $TESTDIR/$NAME.morph; then + if ! cmp $TEMPDIR/$NAME.morph $TESTDIR/$NAME.morph >/dev/null; then + echo "=============================================" + echo "Fail in $NAME.morph. Bad analysis?" +- diff $TESTDIR/$NAME.morph $TEMPDIR/$NAME.morph | grep '^<' | sed 's/^..//' ++ diff -u $TESTDIR/$NAME.morph $TEMPDIR/$NAME.morph | grep '^<' | sed 's/^..//' + rm -f $TEMPDIR/$NAME.morph + exit 1 + fi +@@ -145,7 +162,7 @@ if test -f $TESTDIR/$NAME.sug; then + if ! cmp $TEMPDIR/$NAME.sug $TESTDIR/$NAME.sug >/dev/null; then + echo "=============================================" + echo "Fail in $NAME.sug. Bad suggestion?" +- diff $TESTDIR/$NAME.sug $TEMPDIR/$NAME.sug ++ diff -u $TESTDIR/$NAME.sug $TEMPDIR/$NAME.sug + rm -f $TEMPDIR/$NAME.sug + exit 1 + fi +-- +2.7.4 + diff --git a/external/hunspell/0001-recent-Hunspell-fixes-for-suggestion-spelling-and-an.patch b/external/hunspell/0001-recent-Hunspell-fixes-for-suggestion-spelling-and-an.patch new file mode 100644 index 000000000000..7c9b255abe74 --- /dev/null +++ b/external/hunspell/0001-recent-Hunspell-fixes-for-suggestion-spelling-and-an.patch @@ -0,0 +1,1165 @@ +in addition to that: configure.ac portion was fixed to not have unbalanced [] + +From d9f392dc35f75b1246862b2db8090e8d5b6ec068 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?L=C3=A1szl=C3=B3=20N=C3=A9meth?= <nemeth@numbertext.org> +Date: Sun, 17 Jun 2018 17:21:01 +0200 +Subject: [PATCH] recent Hunspell fixes for suggestion, spelling and analysis + +6f976bf fix compiling on WIN32, use time.h and thread_local +24f0963 [morph] better time limitation for morphological analysis +8e6ceaa [spelling] tdf#118162 better time limitation for compounding +3f00ff3 [suggestion] tdf#118162 time limit for a HunspellImpl::suggest() call +a1f9dfa [suggestion] tdf#118162 time limit for a SuggestMgr::suggest() call +d70bf2d [spelling] optimize IGNORE to speed up dictionary loading +16b4900 [spelling] add time limit for compound word handling +b0ded55 [suggestion] lower limit for doubletwochars +b3a44fa [suggestion] limit longswapchar, lower limit for movechar +a295af9 [morph] clean up for separators of morphological analysis +ca5f629 [morph] add missing field separator for members with prefixes +--- + Makefile.in | 1 + + configure.ac | 8 ++ + src/hunspell/affentry.cxx | 12 +-- + src/hunspell/affixmgr.cxx | 89 +++++++++++++------ + src/hunspell/atypes.hxx | 10 +++ + src/hunspell/csutil.hxx | 12 +++ + src/hunspell/hashmgr.cxx | 2 +- + src/hunspell/hunspell.cxx | 210 ++++++++++++++++++++++++++------------------ + src/hunspell/hunvisapi.h | 12 ++- + src/hunspell/hunvisapi.h.in | 12 ++- + src/hunspell/suggestmgr.cxx | 72 +++++++++++---- + src/hunspell/suggestmgr.hxx | 5 -- + 12 files changed, 300 insertions(+), 145 deletions(-) + +diff --git a/Makefile.in b/Makefile.in +index 06d933e..241f797 100644 +--- a/Makefile.in ++++ b/Makefile.in +@@ -296,6 +296,7 @@ GMSGFMT = @GMSGFMT@ + GMSGFMT_015 = @GMSGFMT_015@ + GREP = @GREP@ + HAVE_ASPRINTF = @HAVE_ASPRINTF@ ++HAVE_CXX11 = @HAVE_CXX11@ + HAVE_NEWLOCALE = @HAVE_NEWLOCALE@ + HAVE_POSIX_PRINTF = @HAVE_POSIX_PRINTF@ + HAVE_SNPRINTF = @HAVE_SNPRINTF@ +diff --git a/configure.ac b/configure.ac +index fb79d0d..2936107 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -16,6 +16,14 @@ HUNSPELL_VERSION_MINOR=`echo $VERSION | cut -d"." -f2` + AC_SUBST(HUNSPELL_VERSION_MAJOR) + AC_SUBST(HUNSPELL_VERSION_MINOR) + ++# check C++11 compiling environment for thread_local ++# to handle time limits better also with threads ++AS_CASE([$CXXFLAGS], ++ [*-std=c++11*], [HAVE_CXX11=1], ++ [HAVE_CXX11=0] ++ ) ++AC_SUBST(HAVE_CXX11) ++ + # Checks for programs. + AC_PROG_CXX + AC_PROG_CC +diff --git a/src/hunspell/affentry.cxx b/src/hunspell/affentry.cxx +index 4ef0c00..ffcdb21 100644 +--- a/src/hunspell/affentry.cxx ++++ b/src/hunspell/affentry.cxx +@@ -399,28 +399,28 @@ std::string PfxEntry::check_morph(const char* word, + ((!needflag) || TESTAFF(he->astr, needflag, he->alen) || + (contclass && TESTAFF(contclass, needflag, contclasslen)))) { + if (morphcode) { +- result.append(" "); ++ result.push_back(MSEP_FLD); + result.append(morphcode); + } else + result.append(getKey()); + if (!HENTRY_FIND(he, MORPH_STEM)) { +- result.append(" "); ++ result.push_back(MSEP_FLD); + result.append(MORPH_STEM); + result.append(HENTRY_WORD(he)); + } + // store the pointer of the hash entry + if (HENTRY_DATA(he)) { +- result.append(" "); ++ result.push_back(MSEP_FLD); + result.append(HENTRY_DATA2(he)); + } else { + // return with debug information + char* flag = pmyMgr->encode_flag(getFlag()); +- result.append(" "); ++ result.push_back(MSEP_FLD); + result.append(MORPH_FLAG); + result.append(flag); + free(flag); + } +- result.append("\n"); ++ result.push_back(MSEP_REC); + } + he = he->next_homonym; + } while (he); +@@ -804,7 +804,7 @@ std::string SfxEntry::check_twosfx_morph(const char* word, + if (!st.empty()) { + if (ppfx->getMorph()) { + result.append(ppfx->getMorph()); +- result.append(" "); ++ result.push_back(MSEP_FLD); + } + result.append(st); + mychomp(result); +diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx +index 2c540f2..1610ef0 100644 +--- a/src/hunspell/affixmgr.cxx ++++ b/src/hunspell/affixmgr.cxx +@@ -72,6 +72,7 @@ + #include <string.h> + #include <stdio.h> + #include <ctype.h> ++#include <time.h> + + #include <algorithm> + #include <limits> +@@ -1014,7 +1015,7 @@ int AffixMgr::process_sfx_order() { + // add flags to the result for dictionary debugging + std::string& AffixMgr::debugflag(std::string& result, unsigned short flag) { + char* st = encode_flag(flag); +- result.append(" "); ++ result.push_back(MSEP_FLD); + result.append(MORPH_FLAG); + if (st) { + result.append(st); +@@ -1594,6 +1595,17 @@ struct hentry* AffixMgr::compound_check(const std::string& word, + + int checked_prefix; + ++ // add a time limit to handle possible ++ // combinatorical explosion of the overlapping words ++ ++ HUNSPELL_THREAD_LOCAL clock_t timelimit; ++ ++ if (wordnum == 0) ++ timelimit = clock(); ++ else if (timelimit != 0 && (clock() > timelimit + TIMELIMIT)) { ++ timelimit = 0; ++ } ++ + setcminmax(&cmin, &cmax, word.c_str(), len); + + st.assign(word); +@@ -1618,6 +1630,9 @@ struct hentry* AffixMgr::compound_check(const std::string& word, + + do { // simplified checkcompoundpattern loop + ++ if (timelimit == 0) ++ return 0; ++ + if (scpd > 0) { + for (; scpd <= checkcpdtable.size() && + (checkcpdtable[scpd - 1].pattern3.empty() || +@@ -2186,6 +2201,17 @@ int AffixMgr::compound_check_morph(const char* word, + char affixed = 0; + hentry** oldwords = words; + ++ // add a time limit to handle possible ++ // combinatorical explosion of the overlapping words ++ ++ HUNSPELL_THREAD_LOCAL clock_t timelimit; ++ ++ if (wordnum == 0) ++ timelimit = clock(); ++ else if (timelimit != 0 && (clock() > timelimit + TIMELIMIT)) { ++ timelimit = 0; ++ } ++ + setcminmax(&cmin, &cmax, word, len); + + st.assign(word); +@@ -2204,6 +2230,9 @@ int AffixMgr::compound_check_morph(const char* word, + + do { // onlycpdrule loop + ++ if (timelimit == 0) ++ return 0; ++ + oldnumsyllable = numsyllable; + oldwordnum = wordnum; + checked_prefix = 0; +@@ -2245,6 +2274,9 @@ int AffixMgr::compound_check_morph(const char* word, + rv = rv->next_homonym; + } + ++ if (timelimit == 0) ++ return 0; ++ + if (rv) + affixed = 0; + +@@ -2435,22 +2467,22 @@ int AffixMgr::compound_check_morph(const char* word, + + if (rv && words && words[wnum + 1]) { + result.append(presult); +- result.append(" "); ++ result.push_back(MSEP_FLD); + result.append(MORPH_PART); + result.append(word + i); + if (complexprefixes && HENTRY_DATA(rv)) + result.append(HENTRY_DATA2(rv)); + if (!HENTRY_FIND(rv, MORPH_STEM)) { +- result.append(" "); ++ result.push_back(MSEP_FLD); + result.append(MORPH_STEM); + result.append(HENTRY_WORD(rv)); + } + // store the pointer of the hash entry + if (!complexprefixes && HENTRY_DATA(rv)) { +- result.append(" "); ++ result.push_back(MSEP_FLD); + result.append(HENTRY_DATA2(rv)); + } +- result.append("\n"); ++ result.push_back(MSEP_REC); + return 0; + } + +@@ -2492,7 +2524,7 @@ int AffixMgr::compound_check_morph(const char* word, + ((!checkcompounddup || (rv != rv_first)))) { + // bad compound word + result.append(presult); +- result.append(" "); ++ result.push_back(MSEP_FLD); + result.append(MORPH_PART); + result.append(word + i); + +@@ -2500,17 +2532,17 @@ int AffixMgr::compound_check_morph(const char* word, + if (complexprefixes) + result.append(HENTRY_DATA2(rv)); + if (!HENTRY_FIND(rv, MORPH_STEM)) { +- result.append(" "); ++ result.push_back(MSEP_FLD); + result.append(MORPH_STEM); + result.append(HENTRY_WORD(rv)); + } + // store the pointer of the hash entry + if (!complexprefixes) { +- result.append(" "); ++ result.push_back(MSEP_FLD); + result.append(HENTRY_DATA2(rv)); + } + } +- result.append("\n"); ++ result.push_back(MSEP_REC); + ok = 1; + } + +@@ -2549,7 +2581,7 @@ int AffixMgr::compound_check_morph(const char* word, + line_uniq_app(m, MSEP_REC); + result.append(m); + } +- result.append("\n"); ++ result.push_back(MSEP_REC); + ok = 1; + } + } +@@ -2639,6 +2671,7 @@ int AffixMgr::compound_check_morph(const char* word, + result.append(MORPH_PART); + result.append(word + i); + line_uniq_app(m, MSEP_REC); ++ result.push_back(MSEP_FLD); + result.append(m); + } + result.push_back(MSEP_REC); +@@ -2864,17 +2897,17 @@ std::string AffixMgr::suffix_check_twosfx_morph(const char* word, + if (ppfx) { + if (ppfx->getMorph()) { + result.append(ppfx->getMorph()); +- result.append(" "); ++ result.push_back(MSEP_FLD); + } else + debugflag(result, ppfx->getFlag()); + } + result.append(st); + if (se->getMorph()) { +- result.append(" "); ++ result.push_back(MSEP_FLD); + result.append(se->getMorph()); + } else + debugflag(result, se->getFlag()); +- result.append("\n"); ++ result.push_back(MSEP_REC); + } + } + se = se->getNext(); +@@ -2899,12 +2932,12 @@ std::string AffixMgr::suffix_check_twosfx_morph(const char* word, + result3.clear(); + + if (sptr->getMorph()) { +- result3.append(" "); ++ result3.push_back(MSEP_FLD); + result3.append(sptr->getMorph()); + } else + debugflag(result3, sptr->getFlag()); + strlinecat(result2, result3); +- result2.append("\n"); ++ result2.push_back(MSEP_REC); + result.append(result2); + } + } +@@ -2967,28 +3000,28 @@ std::string AffixMgr::suffix_check_morph(const char* word, + if (ppfx) { + if (ppfx->getMorph()) { + result.append(ppfx->getMorph()); +- result.append(" "); ++ result.push_back(MSEP_FLD); + } else + debugflag(result, ppfx->getFlag()); + } + if (complexprefixes && HENTRY_DATA(rv)) + result.append(HENTRY_DATA2(rv)); + if (!HENTRY_FIND(rv, MORPH_STEM)) { +- result.append(" "); ++ result.push_back(MSEP_FLD); + result.append(MORPH_STEM); + result.append(HENTRY_WORD(rv)); + } + + if (!complexprefixes && HENTRY_DATA(rv)) { +- result.append(" "); ++ result.push_back(MSEP_FLD); + result.append(HENTRY_DATA2(rv)); + } + if (se->getMorph()) { +- result.append(" "); ++ result.push_back(MSEP_FLD); + result.append(se->getMorph()); + } else + debugflag(result, se->getFlag()); +- result.append("\n"); ++ result.push_back(MSEP_REC); + rv = se->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag); + } + } +@@ -3034,29 +3067,29 @@ std::string AffixMgr::suffix_check_morph(const char* word, + if (ppfx) { + if (ppfx->getMorph()) { + result.append(ppfx->getMorph()); +- result.append(" "); ++ result.push_back(MSEP_FLD); + } else + debugflag(result, ppfx->getFlag()); + } + if (complexprefixes && HENTRY_DATA(rv)) + result.append(HENTRY_DATA2(rv)); + if (!HENTRY_FIND(rv, MORPH_STEM)) { +- result.append(" "); ++ result.push_back(MSEP_FLD); + result.append(MORPH_STEM); + result.append(HENTRY_WORD(rv)); + } + + if (!complexprefixes && HENTRY_DATA(rv)) { +- result.append(" "); ++ result.push_back(MSEP_FLD); + result.append(HENTRY_DATA2(rv)); + } + + if (sptr->getMorph()) { +- result.append(" "); ++ result.push_back(MSEP_FLD); + result.append(sptr->getMorph()); + } else + debugflag(result, sptr->getFlag()); +- result.append("\n"); ++ result.push_back(MSEP_REC); + rv = sptr->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag); + } + sptr = sptr->getNextEQ(); +@@ -3245,7 +3278,7 @@ std::string AffixMgr::morphgen(const char* ts, + // use input suffix fields, if exist + if (strstr(morph, MORPH_INFL_SFX) || strstr(morph, MORPH_DERI_SFX)) { + mymorph.assign(morph); +- mymorph.append(" "); ++ mymorph.push_back(MSEP_FLD); + stemmorphcatpos = mymorph.size(); + } else { + stemmorphcatpos = std::string::npos; +@@ -4557,7 +4590,7 @@ bool AffixMgr::parse_affix(const std::string& line, + entry->appnd = std::string(start_piece, dash); + std::string dash_str(dash + 1, iter); + +- if (!ignorechars.empty()) { ++ if (!ignorechars.empty() && !has_no_ignored_chars(entry->appnd, ignorechars)) { + if (utf8) { + remove_ignored_chars_utf(entry->appnd, ignorechars_utf16); + } else { +@@ -4593,7 +4626,7 @@ bool AffixMgr::parse_affix(const std::string& line, + } else { + entry->appnd = std::string(start_piece, iter); + +- if (!ignorechars.empty()) { ++ if (!ignorechars.empty() && !has_no_ignored_chars(entry->appnd, ignorechars)) { + if (utf8) { + remove_ignored_chars_utf(entry->appnd, ignorechars_utf16); + } else { +diff --git a/src/hunspell/atypes.hxx b/src/hunspell/atypes.hxx +index f841523..38396db 100644 +--- a/src/hunspell/atypes.hxx ++++ b/src/hunspell/atypes.hxx +@@ -95,6 +95,16 @@ static inline void HUNSPELL_WARNING(FILE*, const char*, ...) {} + + #define TESTAFF(a, b, c) (std::binary_search(a, a + c, b)) + ++// timelimit: max. ~1/4 sec (process time on Linux) for ++// for a suggestion, including max. ~/10 sec for a case ++// sensitive plain or compound word suggestion, within ++// ~1/20 sec long time consuming suggestion functions ++#define TIMELIMIT_GLOBAL (CLOCKS_PER_SEC / 4) ++#define TIMELIMIT_SUGGESTION (CLOCKS_PER_SEC / 10) ++#define TIMELIMIT (CLOCKS_PER_SEC / 20) ++#define MINTIMER 100 ++#define MAXPLUSTIMER 100 ++ + struct guessword { + char* word; + bool allow; +diff --git a/src/hunspell/csutil.hxx b/src/hunspell/csutil.hxx +index 01c0a24..3397257 100644 +--- a/src/hunspell/csutil.hxx ++++ b/src/hunspell/csutil.hxx +@@ -311,4 +311,16 @@ LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry* h, + return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL); + } + ++// to avoid unnecessary string copies and Unicode conversions ++// we simply check the ignored_chars characters in the word ++// (in the case of UTF-8 encoded strings, "false" means ++// "likely false", if ignored_chars characters are not ASCII) ++inline bool has_no_ignored_chars(const std::string& word, ++ const std::string& ignored_chars) { ++ for (std::string::const_iterator it = ignored_chars.begin(), end = ignored_chars.end(); it != end; ++it) ++ if (word.find(*it) != std::string::npos) ++ return false; ++ return true; ++} ++ + #endif +diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx +index 5183f02..7e843c3 100644 +--- a/src/hunspell/hashmgr.cxx ++++ b/src/hunspell/hashmgr.cxx +@@ -190,7 +190,7 @@ int HashMgr::add_word(const std::string& in_word, + + std::string *word_copy = NULL; + std::string *desc_copy = NULL; +- if (!ignorechars.empty() || complexprefixes) { ++ if ((!ignorechars.empty() && !has_no_ignored_chars(in_word, ignorechars)) || complexprefixes) { + word_copy = new std::string(in_word); + + if (!ignorechars.empty()) { +diff --git a/src/hunspell/hunspell.cxx b/src/hunspell/hunspell.cxx +index d6e871f..0dcd748 100644 +--- a/src/hunspell/hunspell.cxx ++++ b/src/hunspell/hunspell.cxx +@@ -71,6 +71,7 @@ + #include <stdlib.h> + #include <string.h> + #include <stdio.h> ++#include <time.h> + + #include "affixmgr.hxx" + #include "hunspell.hxx" +@@ -101,7 +102,8 @@ public: + bool spell(const std::string& word, int* info = NULL, std::string* root = NULL); + bool spell_internal(const std::string& word, int* info = NULL, std::string* root = NULL); + std::vector<std::string> suggest(const std::string& word); +- std::vector<std::string> suggest_internal(const std::string& word); ++ std::vector<std::string> suggest_internal(const std::string& word, ++ bool& capitalized, size_t& abbreviated, int& captype); + const std::string& get_wordchars() const; + const std::vector<w_char>& get_wordchars_utf16() const; + const std::string& get_dict_encoding() const; +@@ -755,7 +757,7 @@ struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::str + int len; + + const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL; +- if (ignoredchars != NULL) { ++ if (ignoredchars != NULL && !has_no_ignored_chars(w, ignoredchars)) { + w2.assign(w); + if (utf8) { + const std::vector<w_char>& ignoredchars_utf16 = +@@ -887,8 +889,83 @@ std::vector<std::string> Hunspell::suggest(const std::string& word) { + } + + std::vector<std::string> HunspellImpl::suggest(const std::string& word) { +- std::vector<std::string> slst; +- slst = suggest_internal(word); ++ bool capwords; ++ size_t abbv; ++ int captype; ++ std::vector<std::string> slst = suggest_internal(word, capwords, abbv, captype); ++ // word reversing wrapper for complex prefixes ++ if (complexprefixes) { ++ for (size_t j = 0; j < slst.size(); ++j) { ++ if (utf8) ++ reverseword_utf(slst[j]); ++ else ++ reverseword(slst[j]); ++ } ++ } ++ ++ // capitalize ++ if (capwords) ++ for (size_t j = 0; j < slst.size(); ++j) { ++ mkinitcap(slst[j]); ++ } ++ ++ // expand suggestions with dot(s) ++ if (abbv && pAMgr && pAMgr->get_sugswithdots()) { ++ for (size_t j = 0; j < slst.size(); ++j) { ++ slst[j].append(word.substr(word.size() - abbv)); ++ } ++ } ++ ++ // remove bad capitalized and forbidden forms ++ if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) { ++ switch (captype) { ++ case INITCAP: ++ case ALLCAP: { ++ size_t l = 0; ++ for (size_t j = 0; j < slst.size(); ++j) { ++ if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) { ++ std::string s; ++ std::vector<w_char> w; ++ if (utf8) { ++ u8_u16(w, slst[j]); ++ } else { ++ s = slst[j]; ++ } ++ mkallsmall2(s, w); ++ if (spell(s)) { ++ slst[l] = s; ++ ++l; ++ } else { ++ mkinitcap2(s, w); ++ if (spell(s)) { ++ slst[l] = s; ++ ++l; ++ } ++ } ++ } else { ++ slst[l] = slst[j]; ++ ++l; ++ } ++ } ++ slst.resize(l); ++ } ++ } ++ } ++ ++ // remove duplications ++ size_t l = 0; ++ for (size_t j = 0; j < slst.size(); ++j) { ++ slst[l] = slst[j]; ++ for (size_t k = 0; k < l; ++k) { ++ if (slst[k] == slst[j]) { ++ --l; ++ break; ++ } ++ } ++ ++l; ++ } ++ slst.resize(l); ++ + // output conversion + RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; + if (rl) { +@@ -902,7 +979,8 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) { + return slst; + } + +-std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word) { ++std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word, ++ bool& capwords, size_t& abbv, int& captype) { + std::vector<std::string> slst; + + int onlycmpdsug = 0; +@@ -920,8 +998,8 @@ std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word) + if (word.size() >= MAXWORDLEN) + return slst; + } +- int captype = NOCAP; +- size_t abbv = 0; ++ captype = NOCAP; ++ abbv = 0; + size_t wl = 0; + + std::string scw; +@@ -942,9 +1020,13 @@ std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word) + return slst; + } + +- int capwords = 0; ++ capwords = false; + bool good = false; + ++ HUNSPELL_THREAD_LOCAL clock_t timelimit; ++ // initialize in every suggestion call ++ timelimit = clock(); ++ + // check capitalized form for FORCEUCASE + if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) { + int info = SPELL_ORIGCAP; +@@ -959,26 +1041,36 @@ std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word) + switch (captype) { + case NOCAP: { + good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); ++ if (clock() > timelimit + TIMELIMIT_GLOBAL) ++ return slst; + if (abbv) { + std::string wspace(scw); + wspace.push_back('.'); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); ++ if (clock() > timelimit + TIMELIMIT_GLOBAL) ++ return slst; + } + break; + } + + case INITCAP: { +- capwords = 1; ++ capwords = true; + good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); ++ if (clock() > timelimit + TIMELIMIT_GLOBAL) ++ return slst; + std::string wspace(scw); + mkallsmall2(wspace, sunicw); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); ++ if (clock() > timelimit + TIMELIMIT_GLOBAL) ++ return slst; + break; + } + case HUHINITCAP: +- capwords = 1; ++ capwords = true; + case HUHCAP: { + good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); ++ if (clock() > timelimit + TIMELIMIT_GLOBAL) ++ return slst; + // something.The -> something. The + size_t dot_pos = scw.find('.'); + if (dot_pos != std::string::npos) { +@@ -1005,6 +1097,8 @@ std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word) + wspace = scw; + mkinitsmall2(wspace, sunicw); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); ++ if (clock() > timelimit + TIMELIMIT_GLOBAL) ++ return slst; + } + wspace = scw; + mkallsmall2(wspace, sunicw); +@@ -1012,11 +1106,15 @@ std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word) + insert_sug(slst, wspace); + size_t prevns = slst.size(); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); ++ if (clock() > timelimit + TIMELIMIT_GLOBAL) ++ return slst; + if (captype == HUHINITCAP) { + mkinitcap2(wspace, sunicw); + if (spell(wspace.c_str())) + insert_sug(slst, wspace); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); ++ if (clock() > timelimit + TIMELIMIT_GLOBAL) ++ return slst; + } + // aNew -> "a New" (instead of "a new") + for (size_t j = prevns; j < slst.size(); ++j) { +@@ -1044,10 +1142,14 @@ std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word) + std::string wspace(scw); + mkallsmall2(wspace, sunicw); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); ++ if (clock() > timelimit + TIMELIMIT_GLOBAL) ++ return slst; + if (pAMgr && pAMgr->get_keepcase() && spell(wspace.c_str())) + insert_sug(slst, wspace); + mkinitcap2(wspace, sunicw); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); ++ if (clock() > timelimit + TIMELIMIT_GLOBAL) ++ return slst; + for (size_t j = 0; j < slst.size(); ++j) { + mkallcap(slst[j]); + if (pAMgr && pAMgr->get_checksharps()) { +@@ -1084,21 +1186,27 @@ std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word) + switch (captype) { + case NOCAP: { + pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs, NOCAP); ++ if (clock() > timelimit + TIMELIMIT_GLOBAL) ++ return slst; + break; + } + case HUHINITCAP: +- capwords = 1; ++ capwords = true; + case HUHCAP: { + std::string wspace(scw); + mkallsmall2(wspace, sunicw); + pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, HUHCAP); ++ if (clock() > timelimit + TIMELIMIT_GLOBAL) ++ return slst; + break; + } + case INITCAP: { +- capwords = 1; ++ capwords = true; + std::string wspace(scw); + mkallsmall2(wspace, sunicw); + pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, INITCAP); ++ if (clock() > timelimit + TIMELIMIT_GLOBAL) ++ return slst; + break; + } + case ALLCAP: { +@@ -1106,6 +1214,8 @@ std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word) + mkallsmall2(wspace, sunicw); + size_t oldns = slst.size(); + pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, ALLCAP); ++ if (clock() > timelimit + TIMELIMIT_GLOBAL) ++ return slst; + for (size_t j = oldns; j < slst.size(); ++j) { + mkallcap(slst[j]); + } +@@ -1137,6 +1247,8 @@ std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word) + std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos); + if (!spell(chunk.c_str())) { + std::vector<std::string> nlst = suggest(chunk.c_str()); ++ if (clock() > timelimit + TIMELIMIT_GLOBAL) ++ return slst; + for (std::vector<std::string>::reverse_iterator j = nlst.rbegin(); j != nlst.rend(); ++j) { + std::string wspace = scw.substr(0, prev_pos); + wspace.append(*j); +@@ -1160,80 +1272,6 @@ std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word) + dash_pos = scw.size(); + } + } +- +- // word reversing wrapper for complex prefixes +- if (complexprefixes) { +- for (size_t j = 0; j < slst.size(); ++j) { +- if (utf8) +- reverseword_utf(slst[j]); +- else +- reverseword(slst[j]); +- } +- } +- +- // capitalize +- if (capwords) +- for (size_t j = 0; j < slst.size(); ++j) { +- mkinitcap(slst[j]); +- } +- +- // expand suggestions with dot(s) +- if (abbv && pAMgr && pAMgr->get_sugswithdots()) { +- for (size_t j = 0; j < slst.size(); ++j) { +- slst[j].append(word.substr(word.size() - abbv)); +- } +- } +- +- // remove bad capitalized and forbidden forms +- if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) { +- switch (captype) { +- case INITCAP: +- case ALLCAP: { +- size_t l = 0; +- for (size_t j = 0; j < slst.size(); ++j) { +- if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) { +- std::string s; +- std::vector<w_char> w; +- if (utf8) { +- u8_u16(w, slst[j]); +- } else { +- s = slst[j]; +- } +- mkallsmall2(s, w); +- if (spell(s)) { +- slst[l] = s; +- ++l; +- } else { +- mkinitcap2(s, w); +- if (spell(s)) { +- slst[l] = s; +- ++l; +- } +- } +- } else { +- slst[l] = slst[j]; +- ++l; +- } +- } +- slst.resize(l); +- } +- } +- } +- +- // remove duplications +- size_t l = 0; +- for (size_t j = 0; j < slst.size(); ++j) { +- slst[l] = slst[j]; +- for (size_t k = 0; k < l; ++k) { +- if (slst[k] == slst[j]) { +- --l; +- break; +- } +- } +- ++l; +- } +- slst.resize(l); +- + return slst; + } + +diff --git a/src/hunspell/hunvisapi.h b/src/hunspell/hunvisapi.h +index eb2b348..8283017 100644 +--- a/src/hunspell/hunvisapi.h ++++ b/src/hunspell/hunvisapi.h +@@ -3,7 +3,7 @@ + + #if defined(HUNSPELL_STATIC) + # define LIBHUNSPELL_DLL_EXPORTED +-#elif defined(_MSC_VER) ++#elif defined(_WIN32) + # if defined(BUILDING_LIBHUNSPELL) + # define LIBHUNSPELL_DLL_EXPORTED __declspec(dllexport) + # else +@@ -15,4 +15,14 @@ + # define LIBHUNSPELL_DLL_EXPORTED + #endif + ++/* use thread_local, if it's possible, otherwise static */ ++ ++#if defined(_WIN32) ++# define HUNSPELL_THREAD_LOCAL thread_local ++#elif 0 ++# define HUNSPELL_THREAD_LOCAL thread_local ++#else ++# define HUNSPELL_THREAD_LOCAL static ++#endif ++ + #endif +diff --git a/src/hunspell/hunvisapi.h.in b/src/hunspell/hunvisapi.h.in +index a1020c8..85972dd 100644 +--- a/src/hunspell/hunvisapi.h.in ++++ b/src/hunspell/hunvisapi.h.in +@@ -3,7 +3,7 @@ + + #if defined(HUNSPELL_STATIC) + # define LIBHUNSPELL_DLL_EXPORTED +-#elif defined(_MSC_VER) ++#elif defined(_WIN32) + # if defined(BUILDING_LIBHUNSPELL) + # define LIBHUNSPELL_DLL_EXPORTED __declspec(dllexport) + # else +@@ -15,4 +15,14 @@ + # define LIBHUNSPELL_DLL_EXPORTED + #endif + ++/* use thread_local, if it's possible, otherwise static */ ++ ++#if defined(_WIN32) ++# define HUNSPELL_THREAD_LOCAL thread_local ++#elif @HAVE_CXX11@ ++# define HUNSPELL_THREAD_LOCAL thread_local ++#else ++# define HUNSPELL_THREAD_LOCAL static ++#endif ++ + #endif +diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx +index ade85af..d9fabca 100644 +--- a/src/hunspell/suggestmgr.cxx ++++ b/src/hunspell/suggestmgr.cxx +@@ -72,6 +72,7 @@ + #include <string.h> + #include <stdio.h> + #include <ctype.h> ++#include <time.h> + + #include "suggestmgr.hxx" + #include "htypes.hxx" +@@ -79,6 +80,8 @@ + + const w_char W_VLINE = {'\0', '|'}; + ++#define MAX_CHAR_DISTANCE 4 ++ + SuggestMgr::SuggestMgr(const char* tryme, unsigned int maxn, AffixMgr* aptr) { + // register affix manager and check in string of chars to + // try when building candidate suggestions +@@ -211,6 +214,11 @@ bool SuggestMgr::suggest(std::vector<std::string>& slst, + + for (int cpdsuggest = 0; (cpdsuggest < 2) && (nocompoundtwowords == 0) && !good_suggestion; + cpdsuggest++) { ++ ++ HUNSPELL_THREAD_LOCAL clock_t timelimit; ++ // initialize both in non-compound and compound cycles ++ timelimit = clock(); ++ + // limit compound suggestion + if (cpdsuggest > 0) + oldSug = slst.size(); +@@ -233,12 +241,16 @@ bool SuggestMgr::suggest(std::vector<std::string>& slst, + if (slst.size() > i) + good_suggestion = true; + } ++ if (clock() > timelimit + TIMELIMIT_SUGGESTION) ++ return good_suggestion; + + // perhaps we made chose the wrong char from a related set + if ((slst.size() < maxSug) && + (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { + mapchars(slst, word, cpdsuggest); + } ++ if (clock() > timelimit + TIMELIMIT_SUGGESTION) ++ return good_suggestion; + + // only suggest compound words when no other suggestion + if ((cpdsuggest == 0) && (slst.size() > nsugorig)) +@@ -251,6 +263,8 @@ bool SuggestMgr::suggest(std::vector<std::string>& slst, + else + swapchar(slst, word, cpdsuggest); + } ++ if (clock() > timelimit + TIMELIMIT_SUGGESTION) ++ return good_suggestion; + + // did we swap the order of non adjacent chars by mistake + if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { +@@ -259,6 +273,8 @@ bool SuggestMgr::suggest(std::vector<std::string>& slst, + else + longswapchar(slst, word, cpdsuggest); + } ++ if (clock() > timelimit + TIMELIMIT_SUGGESTION) ++ return good_suggestion; + + // did we just hit the wrong key in place of a good char (case and keyboard) + if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { +@@ -267,6 +283,8 @@ bool SuggestMgr::suggest(std::vector<std::string>& slst, + else + badcharkey(slst, word, cpdsuggest); + } ++ if (clock() > timelimit + TIMELIMIT_SUGGESTION) ++ return good_suggestion; + + // did we add a char that should not be there + if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { +@@ -275,6 +293,8 @@ bool SuggestMgr::suggest(std::vector<std::string>& slst, + else + extrachar(slst, word, cpdsuggest); + } ++ if (clock() > timelimit + TIMELIMIT_SUGGESTION) ++ return good_suggestion; + + // did we forgot a char + if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { +@@ -283,6 +303,8 @@ bool SuggestMgr::suggest(std::vector<std::string>& slst, + else + forgotchar(slst, word, cpdsuggest); + } ++ if (clock() > timelimit + TIMELIMIT_SUGGESTION) ++ return good_suggestion; + + // did we move a char + if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { +@@ -291,6 +313,8 @@ bool SuggestMgr::suggest(std::vector<std::string>& slst, + else + movechar(slst, word, cpdsuggest); + } ++ if (clock() > timelimit + TIMELIMIT_SUGGESTION) ++ return good_suggestion; + + // did we just hit the wrong key in place of a good char + if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { +@@ -299,6 +323,8 @@ bool SuggestMgr::suggest(std::vector<std::string>& slst, + else + badchar(slst, word, cpdsuggest); + } ++ if (clock() > timelimit + TIMELIMIT_SUGGESTION) ++ return good_suggestion; + + // did we double two characters + if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { +@@ -307,6 +333,8 @@ bool SuggestMgr::suggest(std::vector<std::string>& slst, + else + doubletwochars(slst, word, cpdsuggest); + } ++ if (clock() > timelimit + TIMELIMIT_SUGGESTION) ++ return good_suggestion; + + // perhaps we forgot to hit space and two words ran together + // (dictionary word pairs have top priority here, so +@@ -315,6 +343,8 @@ bool SuggestMgr::suggest(std::vector<std::string>& slst, + if (!cpdsuggest || (!nosplitsugs && slst.size() < oldSug + maxcpdsugs)) { + good_suggestion = twowords(slst, word, cpdsuggest, good_suggestion); + } ++ if (clock() > timelimit + TIMELIMIT_SUGGESTION) ++ return good_suggestion; + + } // repeating ``for'' statement compounding support + +@@ -469,8 +499,11 @@ int SuggestMgr::replchars(std::vector<std::string>& wlst, + return wlst.size(); + } + +-// perhaps we doubled two characters (pattern aba -> ababa, for example vacation +-// -> vacacation) ++// perhaps we doubled two characters ++// (for example vacation -> vacacation) ++// The recognized pattern with regex back-references: ++// "(.)(.)\1\2\1" or "..(.)(.)\1\2" ++ + int SuggestMgr::doubletwochars(std::vector<std::string>& wlst, + const char* word, + int cpdsuggest) { +@@ -481,7 +514,7 @@ int SuggestMgr::doubletwochars(std::vector<std::string>& wlst, + for (int i = 2; i < wl; i++) { + if (word[i] == word[i - 2]) { + state++; +- if (state == 3) { ++ if (state == 3 || (state == 2 && i >= 4)) { + std::string candidate(word, word + i - 1); + candidate.insert(candidate.end(), word + i + 1, word + wl); + testsug(wlst, candidate, cpdsuggest, NULL, NULL); +@@ -494,8 +527,11 @@ int SuggestMgr::doubletwochars(std::vector<std::string>& wlst, + return wlst.size(); + } + +-// perhaps we doubled two characters (pattern aba -> ababa, for example vacation +-// -> vacacation) ++// perhaps we doubled two characters ++// (for example vacation -> vacacation) ++// The recognized pattern with regex back-references: ++// "(.)(.)\1\2\1" or "..(.)(.)\1\2" ++ + int SuggestMgr::doubletwochars_utf(std::vector<std::string>& wlst, + const w_char* word, + int wl, +@@ -506,7 +542,7 @@ int SuggestMgr::doubletwochars_utf(std::vector<std::string>& wlst, + for (int i = 2; i < wl; i++) { + if (word[i] == word[i - 2]) { + state++; +- if (state == 3) { ++ if (state == 3 || (state == 2 && i >= 4)) { + std::vector<w_char> candidate_utf(word, word + i - 1); + candidate_utf.insert(candidate_utf.end(), word + i + 1, word + wl); + std::string candidate; +@@ -939,7 +975,8 @@ int SuggestMgr::longswapchar(std::vector<std::string>& wlst, + // try swapping not adjacent chars one by one + for (std::string::iterator p = candidate.begin(); p < candidate.end(); ++p) { + for (std::string::iterator q = candidate.begin(); q < candidate.end(); ++q) { +- if (std::abs(std::distance(q, p)) > 1) { ++ size_t distance = std::abs(std::distance(q, p)); ++ if (distance > 1 && distance <= MAX_CHAR_DISTANCE) { + std::swap(*p, *q); + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + std::swap(*p, *q); +@@ -958,7 +995,8 @@ int SuggestMgr::longswapchar_utf(std::vector<std::string>& wlst, + // try swapping not adjacent chars + for (std::vector<w_char>::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) { + for (std::vector<w_char>::iterator q = candidate_utf.begin(); q < candidate_utf.end(); ++q) { +- if (std::abs(std::distance(q, p)) > 1) { ++ size_t distance = std::abs(std::distance(q, p)); ++ if (distance > 1 && distance <= MAX_CHAR_DISTANCE) { + std::swap(*p, *q); + std::string candidate; + u16_u8(candidate, candidate_utf); +@@ -980,7 +1018,7 @@ int SuggestMgr::movechar(std::vector<std::string>& wlst, + + // try moving a char + for (std::string::iterator p = candidate.begin(); p < candidate.end(); ++p) { +- for (std::string::iterator q = p + 1; q < candidate.end() && std::distance(p, q) < 10; ++q) { ++ for (std::string::iterator q = p + 1; q < candidate.end() && std::distance(p, q) <= MAX_CHAR_DISTANCE; ++q) { + std::swap(*q, *(q - 1)); + if (std::distance(p, q) < 2) + continue; // omit swap char +@@ -990,7 +1028,7 @@ int SuggestMgr::movechar(std::vector<std::string>& wlst, + } + + for (std::string::reverse_iterator p = candidate.rbegin(), pEnd = candidate.rend() - 1; p != pEnd; ++p) { +- for (std::string::reverse_iterator q = p + 1, qEnd = candidate.rend(); q != qEnd && std::distance(p, q) < 10; ++q) { ++ for (std::string::reverse_iterator q = p + 1, qEnd = candidate.rend(); q != qEnd && std::distance(p, q) <= MAX_CHAR_DISTANCE; ++q) { + std::swap(*q, *(q - 1)); + if (std::distance(p, q) < 2) + continue; // omit swap char +@@ -1013,7 +1051,7 @@ int SuggestMgr::movechar_utf(std::vector<std::string>& wlst, + + // try moving a char + for (std::vector<w_char>::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) { +- for (std::vector<w_char>::iterator q = p + 1; q < candidate_utf.end() && std::distance(p, q) < 10; ++q) { ++ for (std::vector<w_char>::iterator q = p + 1; q < candidate_utf.end() && std::distance(p, q) <= MAX_CHAR_DISTANCE; ++q) { + std::swap(*q, *(q - 1)); + if (std::distance(p, q) < 2) + continue; // omit swap char +@@ -1025,7 +1063,7 @@ int SuggestMgr::movechar_utf(std::vector<std::string>& wlst, + } + + for (std::vector<w_char>::reverse_iterator p = candidate_utf.rbegin(); p < candidate_utf.rend(); ++p) { +- for (std::vector<w_char>::reverse_iterator q = p + 1; q < candidate_utf.rend() && std::distance(p, q) < 10; ++q) { ++ for (std::vector<w_char>::reverse_iterator q = p + 1; q < candidate_utf.rend() && std::distance(p, q) <= MAX_CHAR_DISTANCE; ++q) { + std::swap(*q, *(q - 1)); + if (std::distance(p, q) < 2) + continue; // omit swap char +@@ -1715,15 +1753,15 @@ std::string SuggestMgr::suggest_morph(const std::string& in_w) { + TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) || + TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen))) { + if (!HENTRY_FIND(rv, MORPH_STEM)) { +- result.append(" "); ++ result.push_back(MSEP_FLD); + result.append(MORPH_STEM); + result.append(w); + } + if (HENTRY_DATA(rv)) { +- result.append(" "); ++ result.push_back(MSEP_FLD); + result.append(HENTRY_DATA2(rv)); + } +- result.append("\n"); ++ result.push_back(MSEP_REC); + } + rv = rv->next_homonym; + } +@@ -1779,7 +1817,7 @@ std::string SuggestMgr::suggest_hentry_gen(hentry* rv, const char* pattern) { + HENTRY_DATA(rv), pattern, 0); + if (!aff.empty()) { + result.append(aff); +- result.append("\n"); ++ result.push_back(MSEP_REC); + } + } + +@@ -1803,7 +1841,7 @@ std::string SuggestMgr::suggest_hentry_gen(hentry* rv, const char* pattern) { + rv2->alen, HENTRY_DATA(rv2), pattern, 0); + if (!aff.empty()) { + result.append(aff); +- result.append("\n"); ++ result.push_back(MSEP_REC); + } + } + } +diff --git a/src/hunspell/suggestmgr.hxx b/src/hunspell/suggestmgr.hxx +index f0daf23..a435aac 100644 +--- a/src/hunspell/suggestmgr.hxx ++++ b/src/hunspell/suggestmgr.hxx +@@ -78,11 +78,6 @@ + #define MAXPHONSUGS 2 + #define MAXCOMPOUNDSUGS 3 + +-// timelimit: max ~1/4 sec (process time on Linux) for a time consuming function +-#define TIMELIMIT (CLOCKS_PER_SEC >> 2) +-#define MINTIMER 100 +-#define MAXPLUSTIMER 100 +- + #define NGRAM_LONGER_WORSE (1 << 0) + #define NGRAM_ANY_MISMATCH (1 << 1) + #define NGRAM_LOWERING (1 << 2) +-- +2.7.4 + diff --git a/external/hunspell/UnpackedTarball_hunspell.mk b/external/hunspell/UnpackedTarball_hunspell.mk index c76233c546d7..fd439342b2e9 100644 --- a/external/hunspell/UnpackedTarball_hunspell.mk +++ b/external/hunspell/UnpackedTarball_hunspell.mk @@ -29,7 +29,9 @@ $(eval $(call gb_UnpackedTarball_add_patches,hunspell, \ external/hunspell/0001-fix-compound-word-part-pa.patch \ external/hunspell/0001-add-SPELLML-support-for-run-time-dictionary-extensio.patch \ external/hunspell/0001-Recent-Hunspell-fixes-and-improvements.patch \ - external/hunspell/0001-tdf-116586-fix-LibreOffice-crash-by-Hungarian-person.patch \ + external/hunspell/0001-tdf-116586-fix-LibreOffice-crash-by-Hungarian-person.patch \ + external/hunspell/0001-Hunspell-patches-for-missing-OCONV-conversion.patch \ + external/hunspell/0001-recent-Hunspell-fixes-for-suggestion-spelling-and-an.patch \ )) # vim: set noet sw=4 ts=4: |