summaryrefslogtreecommitdiff
path: root/editeng
diff options
context:
space:
mode:
authorTomaž Vajngerl <quikee@gmail.com>2013-06-11 23:13:14 +0200
committerMichael Stahl <mstahl@redhat.com>2013-06-17 21:26:27 +0200
commit033d888b8afae92eb85c7d95bf677d4b7b6e1da0 (patch)
tree41bc617cd02ca5d920d59ca2cb8ff6ee63442dee /editeng
parentf6c8a7f90154ea7251bf7aa8eb6f2db14252060a (diff)
fdo#55315 Added simple Trie lookup tree for autocomplete words storage
Added simple Trie lookup tree which is more tailored to what is needed in autocomplete implementation, but still has the speed of the LatinLookupTree that has been used till now. As the implementation is much simpler it should be more managable and easier fixable. For now two actions: insert (word) and findSuggestions are supported. Acttion findSuggestion returns all words in a list for a searched sub-word, it also fixes fdo#62945. Change-Id: I63b69c30d28b4e1c465c2122ebc537f7f75a033a Reviewed-on: https://gerrit.libreoffice.org/4237 Reviewed-by: Michael Stahl <mstahl@redhat.com> Tested-by: Michael Stahl <mstahl@redhat.com> (cherry picked from commit fd33a9b60d50e34a1b72a52f22d07da89f5bd3fc) Signed-off-by: Michael Stahl <mstahl@redhat.com>
Diffstat (limited to 'editeng')
-rw-r--r--editeng/Library_editeng.mk1
-rw-r--r--editeng/Package_inc.mk1
-rw-r--r--editeng/inc/editeng/Trie.hxx59
-rw-r--r--editeng/qa/lookuptree/lookuptree_test.cxx95
-rw-r--r--editeng/source/lookuptree/Trie.cxx183
5 files changed, 335 insertions, 4 deletions
diff --git a/editeng/Library_editeng.mk b/editeng/Library_editeng.mk
index 8fee71e73666..3fc296935a78 100644
--- a/editeng/Library_editeng.mk
+++ b/editeng/Library_editeng.mk
@@ -119,6 +119,7 @@ $(eval $(call gb_Library_add_exception_objects,editeng,\
editeng/source/lookuptree/LatinLookupTree \
editeng/source/lookuptree/LatinTreeNode \
editeng/source/lookuptree/Node \
+ editeng/source/lookuptree/Trie \
))
# add libraries to be linked to editeng; again these names need to be given as
diff --git a/editeng/Package_inc.mk b/editeng/Package_inc.mk
index 5b107caec225..afac17aba319 100644
--- a/editeng/Package_inc.mk
+++ b/editeng/Package_inc.mk
@@ -149,5 +149,6 @@ $(eval $(call gb_Package_add_file,editeng_inc,inc/editeng/Node.hxx,editeng/Node.
$(eval $(call gb_Package_add_file,editeng_inc,inc/editeng/TreeHead.hxx,editeng/TreeHead.hxx))
$(eval $(call gb_Package_add_file,editeng_inc,inc/editeng/LatinLookupTree.hxx,editeng/LatinLookupTree.hxx))
$(eval $(call gb_Package_add_file,editeng_inc,inc/editeng/LatinTreeNode.hxx,editeng/LatinTreeNode.hxx))
+$(eval $(call gb_Package_add_file,editeng_inc,inc/editeng/Trie.hxx,editeng/Trie.hxx))
# vim: set noet sw=4 ts=4:
diff --git a/editeng/inc/editeng/Trie.hxx b/editeng/inc/editeng/Trie.hxx
new file mode 100644
index 000000000000..2ac76aee380c
--- /dev/null
+++ b/editeng/inc/editeng/Trie.hxx
@@ -0,0 +1,59 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef TRIE_HXX
+#define TRIE_HXX
+
+#include <sal/types.h>
+#include <rtl/ustring.hxx>
+#include <vector>
+#include <editeng/editengdllapi.h>
+
+namespace editeng
+{
+
+struct TrieNode
+{
+ static const int LATIN_ARRAY_SIZE = 26;
+
+ sal_Unicode mCharacter;
+ bool mMarker;
+ std::vector<TrieNode*> mChildren;
+ TrieNode* mLatinArray[LATIN_ARRAY_SIZE];
+
+
+ TrieNode(sal_Unicode aCharacter = '\0');
+ virtual ~TrieNode();
+
+ void markWord();
+ TrieNode* findChild(sal_Unicode aCharacter);
+ TrieNode* traversePath(OUString sPath);
+ void addNewChild(TrieNode* pChild);
+ void collectSuggestions(OUString sPath, std::vector<OUString>& rSuggestionList);
+};
+
+class EDITENG_DLLPUBLIC Trie
+{
+private:
+ TrieNode* mRoot;
+
+public:
+ Trie();
+ virtual ~Trie();
+
+ void insert(OUString sInputString) const;
+ void findSuggestions(OUString sWordPart, std::vector<OUString>& rSuggesstionList) const;
+
+};
+
+}
+
+#endif // TRIE_HXX
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/editeng/qa/lookuptree/lookuptree_test.cxx b/editeng/qa/lookuptree/lookuptree_test.cxx
index 5cfd57804851..b51c88a9fb10 100644
--- a/editeng/qa/lookuptree/lookuptree_test.cxx
+++ b/editeng/qa/lookuptree/lookuptree_test.cxx
@@ -25,21 +25,25 @@
#include <editeng/LookupTree.hxx>
#include <editeng/LatinLookupTree.hxx>
+#include <editeng/Trie.hxx>
+
namespace {
class LookupTreeTest : public CppUnit::TestFixture
{
- public:
- void test();
+public:
+ void testLookupTree();
+ void testTrie();
CPPUNIT_TEST_SUITE(LookupTreeTest);
- CPPUNIT_TEST(test);
+ CPPUNIT_TEST(testLookupTree);
+ CPPUNIT_TEST(testTrie);
CPPUNIT_TEST_SUITE_END();
};
CPPUNIT_TEST_SUITE_REGISTRATION(LookupTreeTest);
-void LookupTreeTest::test()
+void LookupTreeTest::testLookupTree()
{
LookupTree* a = new LatinLookupTree( "a" );
@@ -218,6 +222,89 @@ void LookupTreeTest::test()
delete a;
}
+void LookupTreeTest::testTrie()
+{
+ editeng::Trie trie;
+ std::vector<OUString> suggestions;
+
+ trie.findSuggestions( OUString(""), suggestions);
+ CPPUNIT_ASSERT_EQUAL( (size_t) 0, suggestions.size() );
+
+ trie.insert( OUString("") );
+ trie.findSuggestions( OUString(""), suggestions);
+ CPPUNIT_ASSERT_EQUAL( (size_t) 0, suggestions.size() );
+
+ trie.findSuggestions( OUString("a"), suggestions);
+ CPPUNIT_ASSERT_EQUAL( (size_t) 0, suggestions.size() );
+
+ trie.insert( OUString("abc") );
+ trie.insert( OUString("abcdefghijklmnopqrstuvwxyz") );
+ trie.findSuggestions( OUString("a"), suggestions);
+ CPPUNIT_ASSERT_EQUAL( (size_t) 2, suggestions.size() );
+ CPPUNIT_ASSERT_EQUAL( OUString("abc"), suggestions[0] );
+ CPPUNIT_ASSERT_EQUAL( OUString("abcdefghijklmnopqrstuvwxyz"), suggestions[1] );
+ suggestions.clear();
+
+ trie.findSuggestions( OUString("abc"), suggestions);
+ CPPUNIT_ASSERT_EQUAL( (size_t) 1, suggestions.size() );
+ CPPUNIT_ASSERT_EQUAL( OUString("abcdefghijklmnopqrstuvwxyz"), suggestions[0] );
+ suggestions.clear();
+
+ trie.findSuggestions( OUString("abe"), suggestions);
+ CPPUNIT_ASSERT_EQUAL( (size_t) 0, suggestions.size() );
+ suggestions.clear();
+
+ trie.insert( OUString("abe") );
+ trie.findSuggestions( OUString(""), suggestions);
+ CPPUNIT_ASSERT_EQUAL( (size_t) 3, suggestions.size() );
+ CPPUNIT_ASSERT_EQUAL( OUString("abc"), suggestions[0] );
+ CPPUNIT_ASSERT_EQUAL( OUString("abcdefghijklmnopqrstuvwxyz"), suggestions[1] );
+ CPPUNIT_ASSERT_EQUAL( OUString("abe"), suggestions[2] );
+ suggestions.clear();
+
+ trie.insert( OUString("H31l0") );
+ trie.findSuggestions( OUString("H"), suggestions);
+
+ CPPUNIT_ASSERT_EQUAL( (size_t) 1, suggestions.size() );
+ CPPUNIT_ASSERT_EQUAL( OUString("H31l0"), suggestions[0] );
+ suggestions.clear();
+
+ trie.insert( OUString("H1") );
+ trie.findSuggestions( OUString("H"), suggestions);
+ CPPUNIT_ASSERT_EQUAL( (size_t) 2, suggestions.size() );
+ CPPUNIT_ASSERT_EQUAL( OUString("H31l0"), suggestions[0] );
+ CPPUNIT_ASSERT_EQUAL( OUString("H1"), suggestions[1] );
+ suggestions.clear();
+
+ trie.findSuggestions( OUString("H3"), suggestions);
+ CPPUNIT_ASSERT_EQUAL( (size_t) 1, suggestions.size() );
+ CPPUNIT_ASSERT_EQUAL( OUString("H31l0"), suggestions[0] );
+ suggestions.clear();
+
+ trie.insert( OStringToOUString( "H\xC3\xA4llo", RTL_TEXTENCODING_UTF8 ) );
+ trie.findSuggestions( OUString("H"), suggestions );
+ CPPUNIT_ASSERT_EQUAL( (size_t) 3, suggestions.size() );
+ CPPUNIT_ASSERT_EQUAL( OUString("H31l0"), suggestions[0] );
+ CPPUNIT_ASSERT_EQUAL( OUString("H1"), suggestions[1] );
+ CPPUNIT_ASSERT_EQUAL( OStringToOUString( "H\xC3\xA4llo", RTL_TEXTENCODING_UTF8 ), suggestions[2] );
+ suggestions.clear();
+
+ trie.findSuggestions( OUString("H3"), suggestions );
+ CPPUNIT_ASSERT_EQUAL( (size_t) 1, suggestions.size() );
+ CPPUNIT_ASSERT_EQUAL( OUString("H31l0"), suggestions[0] );
+ suggestions.clear();
+
+ trie.findSuggestions( OStringToOUString("H\xC3\xA4", RTL_TEXTENCODING_UTF8), suggestions );
+ CPPUNIT_ASSERT_EQUAL( (size_t) 1, suggestions.size() );
+ CPPUNIT_ASSERT_EQUAL( OStringToOUString("H\xC3\xA4llo", RTL_TEXTENCODING_UTF8), suggestions[0] );
+ suggestions.clear();
+
+ trie.findSuggestions( OUString(""), suggestions);
+ CPPUNIT_ASSERT_EQUAL( (size_t) 6, suggestions.size() );
+ suggestions.clear();
+
+}
+
} // namespace end
CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/editeng/source/lookuptree/Trie.cxx b/editeng/source/lookuptree/Trie.cxx
new file mode 100644
index 000000000000..30ee0be0769f
--- /dev/null
+++ b/editeng/source/lookuptree/Trie.cxx
@@ -0,0 +1,183 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <editeng/Trie.hxx>
+
+namespace editeng
+{
+
+using namespace std;
+
+/* TrieNode */
+
+TrieNode::TrieNode(sal_Unicode aCharacter) :
+ mCharacter(aCharacter),
+ mMarker(false)
+{
+ for (int i=0; i<LATIN_ARRAY_SIZE; i++)
+ {
+ mLatinArray[i] = NULL;
+ }
+}
+
+TrieNode::~TrieNode()
+{
+ vector<TrieNode*>::iterator iNode;
+ for(iNode = mChildren.begin(); iNode != mChildren.end(); iNode++)
+ {
+ delete *iNode;
+ }
+
+ for (int i=0; i<LATIN_ARRAY_SIZE; i++)
+ {
+ delete mLatinArray[i];
+ }
+}
+
+void TrieNode::markWord()
+{
+ mMarker = true;
+}
+
+void TrieNode::addNewChild(TrieNode* pChild)
+{
+ if (pChild->mCharacter >= sal_Unicode('a') &&
+ pChild->mCharacter <= sal_Unicode('z'))
+ {
+ mLatinArray[pChild->mCharacter - sal_Unicode('a')] = pChild;
+ }
+ else
+ {
+ mChildren.push_back(pChild);
+ }
+}
+
+TrieNode* TrieNode::findChild(sal_Unicode aInputCharacter)
+{
+ if (aInputCharacter >= sal_Unicode('a') &&
+ aInputCharacter <= sal_Unicode('z'))
+ {
+ return mLatinArray[aInputCharacter - sal_Unicode('a')];
+ }
+
+ vector<TrieNode*>::iterator iNode;
+
+ for(iNode = mChildren.begin(); iNode != mChildren.end(); iNode++)
+ {
+ TrieNode* pCurrent = *iNode;
+ if ( pCurrent->mCharacter == aInputCharacter )
+ return pCurrent;
+ }
+
+ return NULL;
+}
+
+void TrieNode::collectSuggestions(OUString sPath, vector<OUString>& rSuggestionList)
+{
+ // first traverse nodes for alphabet characters
+ for (int i=0; i<LATIN_ARRAY_SIZE; i++)
+ {
+ TrieNode* pCurrent = mLatinArray[i];
+ if (pCurrent != NULL)
+ {
+ OUString aStringPath = sPath + OUString(pCurrent->mCharacter);
+ if(pCurrent->mMarker)
+ rSuggestionList.push_back(aStringPath);
+ // recursivly traverse tree
+ pCurrent->collectSuggestions(aStringPath, rSuggestionList);
+ }
+ }
+
+ // traverse nodes for other characters
+ vector<TrieNode*>::iterator iNode;
+ for(iNode = mChildren.begin(); iNode != mChildren.end(); iNode++)
+ {
+ TrieNode* pCurrent = *iNode;
+ if (pCurrent != NULL)
+ {
+ OUString aStringPath = sPath + OUString(pCurrent->mCharacter);
+ if(pCurrent->mMarker)
+ rSuggestionList.push_back(aStringPath);
+ // recursivly traverse tree
+ pCurrent->collectSuggestions(aStringPath, rSuggestionList);
+ }
+ }
+}
+
+TrieNode* TrieNode::traversePath(OUString sPath)
+{
+ TrieNode* pCurrent = this;
+
+ for ( sal_Int32 i = 0; i < sPath.getLength(); i++ )
+ {
+ sal_Unicode aCurrentChar = sPath[i];
+ pCurrent = pCurrent->findChild(aCurrentChar);
+ if ( pCurrent == NULL )
+ return NULL;
+ }
+
+ return pCurrent;
+}
+
+/* TRIE */
+
+Trie::Trie()
+{
+ mRoot = new TrieNode();
+}
+
+Trie::~Trie()
+{
+ delete mRoot;
+}
+
+void Trie::insert(OUString sInputString) const
+{
+ // adding an empty word is not allowed
+ if ( sInputString.isEmpty() )
+ {
+ return;
+ }
+
+ // traverse the input string and modify the tree with new nodes / characters
+
+ TrieNode* pCurrent = mRoot;
+ sal_Unicode aCurrentChar;
+
+ for ( sal_Int32 i = 0; i < sInputString.getLength(); i++ )
+ {
+ aCurrentChar = sInputString[i];
+ TrieNode* pChild = pCurrent->findChild(aCurrentChar);
+ if ( pChild == NULL )
+ {
+ TrieNode* pNewNode = new TrieNode(aCurrentChar);
+ pCurrent->addNewChild(pNewNode);
+ pCurrent = pNewNode;
+ }
+ else
+ {
+ pCurrent = pChild;
+ }
+ }
+
+ pCurrent->markWord();
+}
+
+void Trie::findSuggestions(OUString sWordPart, vector<OUString>& rSuggesstionList) const
+{
+ TrieNode* pNode = mRoot->traversePath(sWordPart);
+
+ if (pNode != NULL)
+ {
+ pNode->collectSuggestions(sWordPart, rSuggesstionList);
+ }
+}
+
+}
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */