summaryrefslogtreecommitdiff
path: root/l10ntools
diff options
context:
space:
mode:
authorGert van Valkenhoef <g.h.m.van.valkenhoef@rug.nl>2012-02-19 13:49:08 +0100
committerCaolán McNamara <caolanm@redhat.com>2012-02-23 10:31:17 +0000
commit70a7cd0923795ee5c8210b476e2897d12988ad95 (patch)
treee385d49216d229ce6428148e7ec6bdfdda2216df /l10ntools
parent796818d7b5a63c30d1ec837172deb73e300bfc38 (diff)
Add C++ HelpSearch and call from XMLHelp. Fix string conversion bug.
Diffstat (limited to 'l10ntools')
-rw-r--r--l10ntools/inc/l10ntools/HelpSearch.hxx36
-rw-r--r--l10ntools/prj/d.lst1
-rw-r--r--l10ntools/source/help/HelpIndexer.cxx17
-rw-r--r--l10ntools/source/help/HelpSearch.cxx40
-rw-r--r--l10ntools/source/help/LuceneHelper.cxx33
-rw-r--r--l10ntools/source/help/LuceneHelper.hxx13
-rw-r--r--l10ntools/source/help/makefile.mk9
7 files changed, 131 insertions, 18 deletions
diff --git a/l10ntools/inc/l10ntools/HelpSearch.hxx b/l10ntools/inc/l10ntools/HelpSearch.hxx
new file mode 100644
index 000000000000..4885b5698222
--- /dev/null
+++ b/l10ntools/inc/l10ntools/HelpSearch.hxx
@@ -0,0 +1,36 @@
+#ifndef HELPSEARCH_HXX
+#define HELPSEARCH_HXX
+
+#include <l10ntools/dllapi.h>
+
+#include <CLucene/StdHeader.h>
+#include <CLucene.h>
+
+#include <rtl/ustring.hxx>
+#include <vector>
+
+class L10N_DLLPUBLIC HelpSearch {
+ private:
+ rtl::OUString d_lang;
+ rtl::OUString d_indexDir;
+
+ public:
+
+ /**
+ * @param lang Help files language.
+ * @param indexDir The directory where the index files are stored.
+ */
+ HelpSearch(rtl::OUString const &lang, rtl::OUString const &indexDir);
+
+ /**
+ * Query the index for a certain query string.
+ * @param queryStr The query.
+ * @param captionOnly Set to true to search in the caption, not the content.
+ * @param rDocuments Vector to write the paths of the found documents.
+ * @param rScores Vector to write the scores to.
+ */
+ bool query(rtl::OUString const &queryStr, bool captionOnly,
+ std::vector<rtl::OUString> &rDocuments, std::vector<float> &rScores);
+};
+
+#endif
diff --git a/l10ntools/prj/d.lst b/l10ntools/prj/d.lst
index 44cf5f001e14..e9329dc93855 100644
--- a/l10ntools/prj/d.lst
+++ b/l10ntools/prj/d.lst
@@ -48,6 +48,7 @@ mkdir: %_DEST%\bin\help\com\sun\star\help
..\inc\l10ntools\directory.hxx %_DEST%\inc\l10ntools\directory.hxx
..\inc\l10ntools\file.hxx %_DEST%\inc\l10ntools\file.hxx
..\inc\l10ntools\HelpIndexer.hxx %_DEST%\inc\l10ntools\HelpIndexer.hxx
+..\inc\l10ntools\HelpSearch.hxx %_DEST%\inc\l10ntools\HelpSearch.hxx
..\source\filter\merge\FCFGMerge.cfg %_DEST%\inc\l10ntools\FCFGMerge.cfg
..\%__SRC%\lib\transex.lib %_DEST%\lib\transex.lib
diff --git a/l10ntools/source/help/HelpIndexer.cxx b/l10ntools/source/help/HelpIndexer.cxx
index b54814a41895..793348b2b2fa 100644
--- a/l10ntools/source/help/HelpIndexer.cxx
+++ b/l10ntools/source/help/HelpIndexer.cxx
@@ -1,4 +1,5 @@
#include <l10ntools/HelpIndexer.hxx>
+#include "LuceneHelper.hxx"
#define TODO
@@ -100,22 +101,6 @@ bool HelpIndexer::scanForFiles(rtl::OUString const & path) {
return true;
}
-std::vector<TCHAR> OUStringToTCHARVec(rtl::OUString const &rStr)
-{
- //UTF-16
- if (sizeof(wchar_t) == sizeof(sal_Unicode))
- return std::vector<TCHAR>(rStr.getStr(), rStr.getStr() + rStr.getLength());
-
- //UTF-32
- std::vector<TCHAR> aRet;
- for (sal_Int32 nStrIndex = 0; nStrIndex < rStr.getLength();)
- {
- const sal_uInt32 nCode = rStr.iterateCodePoints(&nStrIndex);
- aRet.push_back(nCode);
- }
- return aRet;
-}
-
bool HelpIndexer::helpDocument(rtl::OUString const & fileName, Document *doc) {
// Add the help path as an indexed, untokenized field.
rtl::OUString path = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("#HLP#")) + d_module + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("/")) + fileName;
diff --git a/l10ntools/source/help/HelpSearch.cxx b/l10ntools/source/help/HelpSearch.cxx
new file mode 100644
index 000000000000..f50c44eb7cbd
--- /dev/null
+++ b/l10ntools/source/help/HelpSearch.cxx
@@ -0,0 +1,40 @@
+#include <l10ntools/HelpSearch.hxx>
+#include "LuceneHelper.hxx"
+
+#include <iostream>
+
+HelpSearch::HelpSearch(rtl::OUString const &lang, rtl::OUString const &indexDir) :
+d_lang(lang), d_indexDir(indexDir) {}
+
+bool HelpSearch::query(rtl::OUString const &queryStr, bool captionOnly,
+ std::vector<rtl::OUString> &rDocuments, std::vector<float> &rScores) {
+ rtl::OString pathStr;
+ d_indexDir.convertToString(&pathStr, RTL_TEXTENCODING_ASCII_US, 0);
+ lucene::index::IndexReader *reader = lucene::index::IndexReader::open(pathStr.getStr());
+ lucene::search::IndexSearcher searcher(reader);
+
+ TCHAR captionField[] = L"caption";
+ TCHAR contentField[] = L"content";
+ TCHAR *field = captionOnly ? captionField : contentField;
+
+ bool isWildcard = queryStr[queryStr.getLength() - 1] == L'*';
+ std::vector<TCHAR> aQueryStr(OUStringToTCHARVec(queryStr));
+ lucene::search::Query *aQuery = (isWildcard ?
+ (lucene::search::Query*)new lucene::search::WildcardQuery(new lucene::index::Term(field, &aQueryStr[0])) :
+ (lucene::search::Query*)new lucene::search::TermQuery(new lucene::index::Term(field, &aQueryStr[0])));
+ // FIXME: who is responsible for the Term*?
+
+ lucene::search::Hits *hits = searcher.search(aQuery);
+ for (unsigned i = 0; i < hits->length(); ++i) {
+ lucene::document::Document &doc = hits->doc(i); // Document* belongs to Hits.
+ wchar_t const *path = doc.get(L"path");
+ rDocuments.push_back(TCHARArrayToOUString(path != 0 ? path : L""));
+ rScores.push_back(hits->score(i));
+ }
+
+ delete hits;
+ delete aQuery;
+
+ reader->close();
+ return true;
+}
diff --git a/l10ntools/source/help/LuceneHelper.cxx b/l10ntools/source/help/LuceneHelper.cxx
new file mode 100644
index 000000000000..a88542f93009
--- /dev/null
+++ b/l10ntools/source/help/LuceneHelper.cxx
@@ -0,0 +1,33 @@
+#include "LuceneHelper.hxx"
+
+std::vector<TCHAR> OUStringToTCHARVec(rtl::OUString const &rStr)
+{
+ //UTF-16
+ if (sizeof(TCHAR) == sizeof(sal_Unicode))
+ return std::vector<TCHAR>(rStr.getStr(), rStr.getStr() + rStr.getLength() + 1);
+
+ //UTF-32
+ std::vector<TCHAR> aRet;
+ for (sal_Int32 nStrIndex = 0; nStrIndex < rStr.getLength() + 1; )
+ {
+ const sal_uInt32 nCode = rStr.iterateCodePoints(&nStrIndex);
+ aRet.push_back(nCode);
+ }
+ return aRet;
+}
+
+inline unsigned tstrlen(TCHAR const *str) {
+ unsigned i;
+ for (i = 0; str[i] != 0; ++i) {}
+ return i;
+}
+
+rtl::OUString TCHARArrayToOUString(TCHAR const *str)
+{
+ // UTF-16
+ if (sizeof(TCHAR) == sizeof(sal_Unicode))
+ return rtl::OUString((sal_Unicode*) str);
+
+ // UTF-32
+ return rtl::OUString((char*) str, tstrlen(str), RTL_TEXTENCODING_UCS4);
+}
diff --git a/l10ntools/source/help/LuceneHelper.hxx b/l10ntools/source/help/LuceneHelper.hxx
new file mode 100644
index 000000000000..7591b8ca0760
--- /dev/null
+++ b/l10ntools/source/help/LuceneHelper.hxx
@@ -0,0 +1,13 @@
+#ifndef LUCENEHELPER_HXX
+#define LUCENEHELPER_HXX
+
+#include <CLucene/StdHeader.h>
+#include <CLucene.h>
+
+#include <rtl/ustring.hxx>
+#include <vector>
+
+std::vector<TCHAR> OUStringToTCHARVec(rtl::OUString const &rStr);
+rtl::OUString TCHARArrayToOUString(TCHAR const *str);
+
+#endif
diff --git a/l10ntools/source/help/makefile.mk b/l10ntools/source/help/makefile.mk
index 2ae32329d546..a466e2c9fc24 100644
--- a/l10ntools/source/help/makefile.mk
+++ b/l10ntools/source/help/makefile.mk
@@ -56,12 +56,16 @@ OBJFILES=\
$(OBJ)$/HelpLinker.obj \
$(OBJ)$/HelpCompiler.obj \
$(OBJ)$/HelpIndexer.obj \
- $(OBJ)$/HelpIndexer_main.obj
+ $(OBJ)$/HelpIndexer_main.obj \
+ $(OBJ)$/HelpSearch.obj \
+ $(OBJ)$/LuceneHelper.obj
SLOFILES=\
$(SLO)$/HelpLinker.obj \
$(SLO)$/HelpCompiler.obj \
- $(SLO)$/HelpIndexer.obj
+ $(SLO)$/LuceneHelper.obj \
+ $(SLO)$/HelpIndexer.obj \
+ $(SLO)$/HelpSearch.obj
.IF "$(OS)" == "MACOSX" && "$(CPU)" == "P" && "$(COM)" == "GCC"
# There appears to be a GCC 4.0.1 optimization error causing _file:good() to
@@ -85,6 +89,7 @@ APP1STDLIBS+=$(SALLIB) $(BERKELEYLIB) $(XSLTLIB) $(EXPATASCII3RDLIB)
APP2TARGET=HelpIndexer
APP2OBJS=\
+ $(OBJ)$/LuceneHelper.obj \
$(OBJ)$/HelpIndexer.obj \
$(OBJ)$/HelpIndexer_main.obj
APP2RPATH = NONE