1 files changed, 264 insertions, 0 deletions
diff --git a/external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch b/external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch
new file mode 100644
index 000000000000..ff2530cfe23d
--- /dev/null
+++ b/external/hunspell/0003-hoist-string-lowering-from-ngram-to-ngsuggest.patch
@@ -0,0 +1,264 @@
+From cf0967951a25a2daa10a636092193af5c5497aa2 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com>
+Date: Fri, 10 Feb 2017 16:36:27 +0000
+Subject: [PATCH 3/4] hoist string lowering from ngram to ngsuggest
+
+only lower when we have to and reuse scratch buffers as
+tolower destination
+
+kcachegrind reports 830,529,143 -> 779,887,690 on
+
+echo Hollo | valgrind --tool=callgrind ./src/tools/.libs/hunspell -d nl_NL
+---
+ src/hunspell/suggestmgr.cxx | 143 +++++++++++++++++++++++++++++---------------
+ 1 file changed, 95 insertions(+), 48 deletions(-)
+
+diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
+index 54a474f..ea52707 100644
+--- a/src/hunspell/suggestmgr.cxx
++++ b/src/hunspell/suggestmgr.cxx
+@@ -1075,10 +1075,8 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+     u8_u16(w_target, target);
+   }
+   
+-  std::vector<w_char> w_entry;
+   std::string f;
+   std::vector<w_char> w_f;
+-  std::vector<w_char> w_target2;
+   
+   for (size_t i = 0; i < rHMgr.size(); ++i) {
+     while (0 != (hp = rHMgr[i]->walk_hashtable(col, hp))) {
+@@ -1091,13 +1089,24 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+         continue;
+ 
+       if (utf8) {
+-        w_entry.clear();
+-        u8_u16(w_entry, HENTRY_WORD(hp));
+-        sc = ngram(3, w_word, w_entry, NGRAM_LONGER_WORSE + low) +
+-             leftcommonsubstring(w_word, w_entry);
++        w_f.clear();
++        u8_u16(w_f, HENTRY_WORD(hp));
++
++        int leftcommon = leftcommonsubstring(w_word, w_f);
++        if (low) {
++          // lowering dictionary word
++          mkallsmall_utf(w_f, langnum);
++        }
++        sc = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon;
+       } else {
+-        sc = ngram(3, word, HENTRY_WORD(hp), NGRAM_LONGER_WORSE + low) +
+-             leftcommonsubstring(word, HENTRY_WORD(hp));
++        f.assign(HENTRY_WORD(hp));
++
++        int leftcommon = leftcommonsubstring(word, f.c_str());
++        if (low) {
++          // lowering dictionary word
++          mkallsmall(f, csconv);
++        }
++        sc = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon;
+       }
+ 
+       // check special pronounciation
+@@ -1108,11 +1117,20 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+         if (utf8) {
+           w_f.clear();
+           u8_u16(w_f, f);
+-          sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE + low) +
+-                leftcommonsubstring(w_word, w_f);
++
++          int leftcommon = leftcommonsubstring(w_word, w_f);
++          if (low) {
++            // lowering dictionary word
++            mkallsmall_utf(w_f, langnum);
++          }
++          sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon;
+         } else {
+-          sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE + low) +
+-                leftcommonsubstring(word, f.c_str());
++          int leftcommon = leftcommonsubstring(word, f.c_str());
++          if (low) {
++            // lowering dictionary word
++            mkallsmall(f, csconv);
++          }
++          sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon;
+         }
+         if (sc2 > sc)
+           sc = sc2;
+@@ -1129,14 +1147,14 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+           candidate = HENTRY_WORD(hp);
+           mkallcap(candidate, csconv);
+         }
+-        std::string target2 = phonet(candidate, *ph);
+-        w_target2.clear();
++        f = phonet(candidate, *ph);
++        w_f.clear();
+         if (utf8) {
+-          u8_u16(w_target2, target2);
+-          scphon = 2 * ngram(3, w_target, w_target2,
++          u8_u16(w_f, f);
++          scphon = 2 * ngram(3, w_target, w_f,
+                              NGRAM_LONGER_WORSE);
+         } else {
+-          scphon = 2 * ngram(3, target, target2,
++          scphon = 2 * ngram(3, target, f,
+                              NGRAM_LONGER_WORSE);
+         }
+       }
+@@ -1177,12 +1195,24 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+         w_mw[k].l = '*';
+         w_mw[k].h = 0;
+       }
+-      thresh += ngram(n, w_word, w_mw, NGRAM_ANY_MISMATCH + low);
++
++      if (low) {
++        // lowering dictionary word
++        mkallsmall_utf(w_mw, langnum);
++      }
++
++      thresh += ngram(n, w_word, w_mw, NGRAM_ANY_MISMATCH);
+     } else {
+       std::string mw = word;
+       for (int k = sp; k < n; k += 4)
+         mw[k] = '*';
+-      thresh += ngram(n, word, mw, NGRAM_ANY_MISMATCH + low);
++
++      if (low) {
++        // lowering dictionary word
++        mkallsmall(mw, csconv);
++      }
++
++      thresh += ngram(n, word, mw, NGRAM_ANY_MISMATCH);
+     }
+   }
+   thresh = thresh / 3;
+@@ -1210,7 +1240,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+     return;
+   }
+ 
+-  std::vector<w_char> w_glst_word;
+   for (int i = 0; i < MAX_ROOTS; i++) {
+     if (roots[i]) {
+       struct hentry* rp = roots[i];
+@@ -1225,15 +1254,26 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+ 
+       for (int k = 0; k < nw; k++) {
+         if (utf8) {
+-          w_glst_word.clear();
+-          u8_u16(w_glst_word, glst[k].word);
+-          sc = ngram(n, w_word, w_glst_word,
+-                     NGRAM_ANY_MISMATCH + low) +
+-               leftcommonsubstring(w_word, w_glst_word);
++          w_f.clear();
++          u8_u16(w_f, glst[k].word);
++
++          int leftcommon = leftcommonsubstring(w_word, w_f);
++          if (low) {
++            // lowering dictionary word
++            mkallsmall_utf(w_f, langnum);
++          }
++
++          sc = ngram(n, w_word, w_f, NGRAM_ANY_MISMATCH) + leftcommon;
+         } else {
+-          sc = ngram(n, word, glst[k].word,
+-                     NGRAM_ANY_MISMATCH + low) +
+-               leftcommonsubstring(word, glst[k].word);
++          f = glst[k].word;
++
++          int leftcommon = leftcommonsubstring(word, f.c_str());
++          if (low) {
++            // lowering dictionary word
++            mkallsmall(f, csconv);
++          }
++
++          sc = ngram(n, word, f, NGRAM_ANY_MISMATCH) + leftcommon;
+         }
+ 
+         if (sc > thresh) {
+@@ -1318,19 +1358,37 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+       w_gl.clear();
+       if (utf8) {
+         u8_u16(w_gl, gl);
+-        re = ngram(2, w_word, w_gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) +
+-             ngram(2, w_gl, w_word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED);
++        //w_gl is lowercase already at this point
++        re = ngram(2, w_word, w_gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
++        if (low) {
++          w_f = w_word;
++          // lowering dictionary word
++          mkallsmall_utf(w_f, langnum);
++          re += ngram(2, w_gl, w_f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
++        } else {
++          re += ngram(2, w_gl, w_word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
++        }
+       } else {
+-        re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) +
+-             ngram(2, gl, word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED);
++        //gl is lowercase already at this point
++        re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
++        if (low) {
++          f = word;
++          // lowering dictionary word
++          mkallsmall(f, csconv);
++          re += ngram(2, gl, f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
++        } else {
++          re += ngram(2, gl, word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
++        }
+       }
+ 
+       int ngram_score, leftcommon_score;
+       if (utf8) {
+-        ngram_score = ngram(4, w_word, w_gl, NGRAM_ANY_MISMATCH + low);
++        //w_gl is lowercase already at this point
++        ngram_score = ngram(4, w_word, w_gl, NGRAM_ANY_MISMATCH);
+         leftcommon_score = leftcommonsubstring(w_word, w_gl);
+       } else {
+-        ngram_score = ngram(4, word, gl, NGRAM_ANY_MISMATCH + low);
++        //gl is lowercase already at this point
++        ngram_score = ngram(4, word, gl, NGRAM_ANY_MISMATCH);
+         leftcommon_score = leftcommonsubstring(word, gl.c_str());
+       }
+       gscore[i] =
+@@ -1802,14 +1860,6 @@ int SuggestMgr::ngram(int n,
+   l2 = su2.size();
+   if (l2 == 0)
+     return 0;
+-  // lowering dictionary word
+-  const std::vector<w_char>* p_su2 = &su2;
+-  std::vector<w_char> su2_copy;
+-  if (opt & NGRAM_LOWERING) {
+-    su2_copy = su2;
+-    mkallsmall_utf(su2_copy, langnum);
+-    p_su2 = &su2_copy;
+-  }
+   for (int j = 1; j <= n; j++) {
+     ns = 0;
+     for (int i = 0; i <= (l1 - j); i++) {
+@@ -1817,7 +1867,7 @@ int SuggestMgr::ngram(int n,
+       for (int l = 0; l <= (l2 - j); l++) {
+         for (k = 0; k < j; k++) {
+           const w_char& c1 = su1[i + k];
+-          const w_char& c2 = (*p_su2)[l + k];
++          const w_char& c2 = su2[l + k];
+           if ((c1.l != c2.l) || (c1.h != c2.h))
+             break;
+         }
+@@ -1862,14 +1912,11 @@ int SuggestMgr::ngram(int n,
+   if (l2 == 0)
+     return 0;
+   l1 = s1.size();
+-  std::string t(s2);
+-  if (opt & NGRAM_LOWERING)
+-    mkallsmall(t, csconv);
+   for (int j = 1; j <= n; j++) {
+     ns = 0;
+     for (int i = 0; i <= (l1 - j); i++) {
+-      //t is haystack, s1[i..i+j) is needle
+-      if (t.find(s1.c_str()+i, 0, j) != std::string::npos) {
++      //s2 is haystack, s1[i..i+j) is needle
++      if (s2.find(s1.c_str()+i, 0, j) != std::string::npos) {
+         ns++;
+       } else if (opt & NGRAM_WEIGHTED) {
+         ns--;
+-- 
+2.9.3
+