1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
|
From 8e957585671c76fa21e6265ec7b68aa19507f4fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com>
Date: Fri, 10 Feb 2017 15:49:17 +0000
Subject: [PATCH 2/4] add a get_clen_and_captype varient that takes a buffer
kcachegrind reports 1,057,506,901 -> 830,529,143 on
echo Hollo | valgrind --tool=callgrind ./src/tools/.libs/hunspell -d nl_NL
---
src/hunspell/hashmgr.cxx | 16 +++++++++++-----
src/hunspell/hashmgr.hxx | 1 +
2 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx
index 1de1690..4844b49 100644
--- a/src/hunspell/hashmgr.cxx
+++ b/src/hunspell/hashmgr.cxx
@@ -363,12 +363,11 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word,
}
// detect captype and modify word length for UTF-8 encoding
-int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
+int HashMgr::get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf) {
int len;
if (utf8) {
- std::vector<w_char> dest_utf;
- len = u8_u16(dest_utf, word);
- *captype = get_captype_utf8(dest_utf, langnum);
+ len = u8_u16(workbuf, word);
+ *captype = get_captype_utf8(workbuf, langnum);
} else {
len = word.size();
*captype = get_captype(word, csconv);
@@ -376,6 +375,11 @@ int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
return len;
}
+int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
+ std::vector<w_char> workbuf;
+ return get_clen_and_captype(word, captype, workbuf);
+}
+
// remove word (personal dictionary function for standalone applications)
int HashMgr::remove(const std::string& word) {
struct hentry* dp = lookup(word.c_str());
@@ -527,6 +531,8 @@ int HashMgr::load_tables(const char* tpath, const char* key) {
// loop through all words on much list and add to hash
// table and create word and affix strings
+ std::vector<w_char> workbuf;
+
while (dict->getline(ts)) {
mychomp(ts);
// split each line into word and morphological description
@@ -599,7 +605,7 @@ int HashMgr::load_tables(const char* tpath, const char* key) {
}
int captype;
- int wcl = get_clen_and_captype(ts, &captype);
+ int wcl = get_clen_and_captype(ts, &captype, workbuf);
const std::string *dp_str = dp.empty() ? NULL : &dp;
// add the word and its index plus its capitalized form optionally
if (add_word(ts, wcl, flags, al, dp_str, false) ||
diff --git a/src/hunspell/hashmgr.hxx b/src/hunspell/hashmgr.hxx
index 812171a..5a09c45 100644
--- a/src/hunspell/hashmgr.hxx
+++ b/src/hunspell/hashmgr.hxx
@@ -125,6 +125,7 @@ class HashMgr {
private:
int get_clen_and_captype(const std::string& word, int* captype);
+ int get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf);
int load_tables(const char* tpath, const char* key);
int add_word(const std::string& word,
int wcl,
--
2.9.3
|