From e13ff056fd65990b88d29fb9eae304b411e58234 Mon Sep 17 00:00:00 2001 From: Changwoo Ryu Date: Wed, 8 Mar 2017 14:04:26 +0900 Subject: [PATCH] Hunspell patches for missing OCONV conversion 4e2abfd Clean up PR #479 cc2d71e Add oconv2 test to Makefile ca14fdb Avoid gotos across variable initialization 7e5cb62 Use goto to reduce repetitive code f528192 Add missing OCONV conversion of root and morphemes output --- src/hunspell/hunspell.cxx | 59 +++++++++++++++++++++++++++++++++++++++-------- tests/test.sh | 23 +++++++++++++++--- 2 files changed, 70 insertions(+), 12 deletions(-) diff --git a/src/hunspell/hunspell.cxx b/src/hunspell/hunspell.cxx index 1100a6f..87d1b4a 100644 --- a/src/hunspell/hunspell.cxx +++ b/src/hunspell/hunspell.cxx @@ -98,10 +98,13 @@ public: std::vector stem(const std::string& word); std::vector stem(const std::vector& morph); std::vector analyze(const std::string& word); + std::vector analyze_internal(const std::string& word); int get_langnum() const; bool input_conv(const std::string& word, std::string& dest); bool spell(const std::string& word, int* info = NULL, std::string* root = NULL); + bool spell_internal(const std::string& word, int* info = NULL, std::string* root = NULL); std::vector suggest(const std::string& word); + std::vector suggest_internal(const std::string& word); const std::string& get_wordchars() const; const std::vector& get_wordchars_utf16() const; const std::string& get_dict_encoding() const; @@ -415,6 +418,21 @@ bool Hunspell::spell(const std::string& word, int* info, std::string* root) { } bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) { + bool r = spell_internal(word, info, root); + if (r && root) { + // output conversion + RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; + if (rl) { + std::string wspace; + if (rl->conv(*root, wspace)) { + *root = wspace; + } + } + } + return r; +} + +bool HunspellImpl::spell_internal(const std::string& word, int* info, std::string* root) { struct hentry* rv = NULL; int info2 = 0; @@ -834,6 +852,22 @@ std::vector Hunspell::suggest(const std::string& word) { std::vector HunspellImpl::suggest(const std::string& word) { std::vector slst; + slst = suggest_internal(word); + // output conversion + RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; + if (rl) { + for (size_t i = 0; rl && i < slst.size(); ++i) { + std::string wspace; + if (rl->conv(slst[i], wspace)) { + slst[i] = wspace; + } + } + } + return slst; +} + +std::vector HunspellImpl::suggest_internal(const std::string& word) { + std::vector slst; int onlycmpdsug = 0; if (!pSMgr || m_HMgrs.empty()) @@ -1150,15 +1184,6 @@ std::vector HunspellImpl::suggest(const std::string& word) { } slst.resize(l); - // output conversion - rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; - for (size_t j = 0; rl && j < slst.size(); ++j) { - std::string wspace; - if (rl->conv(slst[j], wspace)) { - slst[j] = wspace; - } - } - return slst; } @@ -1365,6 +1390,22 @@ std::vector Hunspell::analyze(const std::string& word) { std::vector HunspellImpl::analyze(const std::string& word) { std::vector slst; + slst = analyze_internal(word); + // output conversion + RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; + if (rl) { + for (size_t i = 0; rl && i < slst.size(); ++i) { + std::string wspace; + if (rl->conv(slst[i], wspace)) { + slst[i] = wspace; + } + } + } + return slst; +} + +std::vector HunspellImpl::analyze_internal(const std::string& word) { + std::vector slst; if (!pSMgr || m_HMgrs.empty()) return slst; if (utf8) { diff --git a/tests/test.sh b/tests/test.sh index 22e5087..9344f82 100755 --- a/tests/test.sh +++ b/tests/test.sh @@ -112,7 +112,7 @@ if test -f $TESTDIR/$NAME.wrong; then echo "=============================================" echo "Fail in $NAME.wrong. Bad words recognised as good:" tr -d ' ' <$TESTDIR/$NAME.wrong >$TEMPDIR/$NAME.wrong.detab - diff $TEMPDIR/$NAME.wrong.detab $TEMPDIR/$NAME.wrong | grep '^<' | sed 's/^..//' + diff -u $TEMPDIR/$NAME.wrong.detab $TEMPDIR/$NAME.wrong | grep '^<' | sed 's/^..//' rm -f $TEMPDIR/$NAME.wrong $TEMPDIR/$NAME.wrong.detab exit 1 fi @@ -121,6 +121,23 @@ fi check_valgrind_log "bad words" +# Tests good words' root +if test -f $TESTDIR/$NAME.root; then + # Extract the root words of the affixed words, after '+' + hunspell $* -d $TESTDIR/$NAME <$TESTDIR/$NAME.good | grep -a '^+ ' | \ + sed 's/^+ //' >$TEMPDIR/$NAME.root + if ! cmp $TEMPDIR/$NAME.root $TESTDIR/$NAME.root >/dev/null; then + echo "=============================================" + echo "Fail in $NAME.root. Bad prefix or suffix?" + diff -u $TESTDIR/$NAME.root $TEMPDIR/$NAME.root + rm -f $TEMPDIR/$NAME.root + exit 1 + fi + rm -f $TEMPDIR/$NAME.root +fi + +check_valgrind_log "root" + # Tests morphological analysis if test -f $TESTDIR/$NAME.morph; then sed 's/ $//' $TESTDIR/$NAME.good >$TEMPDIR/$NAME.good @@ -129,7 +146,7 @@ if test -f $TESTDIR/$NAME.morph; then if ! cmp $TEMPDIR/$NAME.morph $TESTDIR/$NAME.morph >/dev/null; then echo "=============================================" echo "Fail in $NAME.morph. Bad analysis?" - diff $TESTDIR/$NAME.morph $TEMPDIR/$NAME.morph | grep '^<' | sed 's/^..//' + diff -u $TESTDIR/$NAME.morph $TEMPDIR/$NAME.morph | grep '^<' | sed 's/^..//' rm -f $TEMPDIR/$NAME.morph exit 1 fi @@ -145,7 +162,7 @@ if test -f $TESTDIR/$NAME.sug; then if ! cmp $TEMPDIR/$NAME.sug $TESTDIR/$NAME.sug >/dev/null; then echo "=============================================" echo "Fail in $NAME.sug. Bad suggestion?" - diff $TESTDIR/$NAME.sug $TEMPDIR/$NAME.sug + diff -u $TESTDIR/$NAME.sug $TEMPDIR/$NAME.sug rm -f $TEMPDIR/$NAME.sug exit 1 fi -- 2.7.4