summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiegfried-Angel Gevatter Pujals <siegfried@gevatter.com>2012-02-10 12:28:05 +0100
committerSiegfried-Angel Gevatter Pujals <siegfried@gevatter.com>2012-02-10 12:28:05 +0100
commit9dabfaeab3c0190a74c071e1d2922d9b1ce2c687 (patch)
tree9dee2ffea0fcf227994f1fbd46d8f8454cf02170
parentdc35affc0590dcec370b7edcb7960bdb57d45d8c (diff)
parentb43ee3c586dd8fe1da25fc8aeb759030b39032a9 (diff)
Merge FTS++ by Michal Hruby.
-rw-r--r--.bzrignore13
-rw-r--r--configure.ac4
-rw-r--r--extensions/Makefile.am2
-rw-r--r--extensions/fts++/Makefile.am113
-rw-r--r--extensions/fts++/controller.cpp136
-rw-r--r--extensions/fts++/controller.h72
l---------extensions/fts++/datamodel.vala1
l---------extensions/fts++/db-reader.vala1
l---------extensions/fts++/engine.vala1
l---------extensions/fts++/errors.vala1
-rw-r--r--extensions/fts++/ext-dummies.vala71
-rw-r--r--extensions/fts++/fts.cpp136
-rw-r--r--extensions/fts++/fts.h59
-rw-r--r--extensions/fts++/fts.vapi25
-rw-r--r--extensions/fts++/indexer.cpp897
-rw-r--r--extensions/fts++/indexer.h115
l---------extensions/fts++/mimetype.vala1
l---------extensions/fts++/ontology-uris.vala1
l---------extensions/fts++/ontology.vala1
-rw-r--r--extensions/fts++/org.gnome.zeitgeist.fts.service.in (renamed from extensions/fts-python/org.gnome.zeitgeist.fts.service.in)2
l---------extensions/fts++/remote.vala1
l---------extensions/fts++/sql-schema.vala1
l---------extensions/fts++/sql.vala1
-rw-r--r--extensions/fts++/stringutils.cpp128
-rw-r--r--extensions/fts++/stringutils.h42
l---------extensions/fts++/table-lookup.vala1
-rw-r--r--extensions/fts++/task.cpp47
-rw-r--r--extensions/fts++/task.h100
-rw-r--r--extensions/fts++/test/Makefile.am27
-rw-r--r--extensions/fts++/test/test-fts.c37
-rw-r--r--extensions/fts++/test/test-indexer.cpp531
-rw-r--r--extensions/fts++/test/test-stringutils.cpp178
l---------extensions/fts++/utils.vala1
l---------extensions/fts++/where-clause.vala1
-rw-r--r--extensions/fts++/zeitgeist-fts.vala301
-rw-r--r--extensions/fts-python/Makefile.am23
-rw-r--r--extensions/fts-python/constants.py71
-rw-r--r--extensions/fts-python/datamodel.py83
-rw-r--r--extensions/fts-python/fts.py1273
-rw-r--r--extensions/fts-python/lrucache.py125
-rw-r--r--extensions/fts-python/sql.py301
-rw-r--r--extensions/fts.vala14
-rw-r--r--src/datamodel.vala3
-rw-r--r--src/engine.vala1
-rw-r--r--src/notify.vala77
-rw-r--r--src/sql.vala4
-rw-r--r--src/table-lookup.vala20
-rw-r--r--src/zeitgeist-daemon.vala2
48 files changed, 3147 insertions, 1899 deletions
diff --git a/.bzrignore b/.bzrignore
index df052f35..c70ad2bf 100644
--- a/.bzrignore
+++ b/.bzrignore
@@ -44,12 +44,23 @@ extensions/*.c
extensions/*.stamp
extensions/*.la
extensions/*.lo
+extensions/fts++/.deps
+extensions/fts++/.libs
+extensions/fts++/*.c
+extensions/fts++/*.stamp
+extensions/fts++/*.la
+extensions/fts++/*.lo
+extensions/fts++/zeitgeist-internal.*
+extensions/fts++/test/.deps
+extensions/fts++/test/.libs
+extensions/fts++/test/test-fts
+extensions/fts++/org.gnome.zeitgeist.fts.service
+extensions/fts++/zeitgeist-fts
test/direct/marshalling
test/dbus/__pycache__
test/direct/table-lookup-test
src/zeitgeist-engine.vapi
src/zeitgeist-engine.h
-extensions/fts-python/org.gnome.zeitgeist.fts.service
py-compile
python/_ontology.py
test/direct/*.c
diff --git a/configure.ac b/configure.ac
index 6b5ca97e..83b06deb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -8,6 +8,7 @@ AM_PATH_PYTHON
AC_PROG_CC
AM_PROG_CC_C_O
+AC_PROG_CXX
AC_DISABLE_STATIC
AC_PROG_LIBTOOL
@@ -59,7 +60,8 @@ AC_CONFIG_FILES([
Makefile
src/Makefile
extensions/Makefile
- extensions/fts-python/Makefile
+ extensions/fts++/Makefile
+ extensions/fts++/test/Makefile
data/Makefile
data/ontology/Makefile
python/Makefile
diff --git a/extensions/Makefile.am b/extensions/Makefile.am
index e6642522..5ddc9238 100644
--- a/extensions/Makefile.am
+++ b/extensions/Makefile.am
@@ -1,4 +1,4 @@
-SUBDIRS = fts-python
+SUBDIRS = fts++
NULL =
diff --git a/extensions/fts++/Makefile.am b/extensions/fts++/Makefile.am
new file mode 100644
index 00000000..931695f7
--- /dev/null
+++ b/extensions/fts++/Makefile.am
@@ -0,0 +1,113 @@
+SUBDIRS = test
+NULL =
+
+noinst_LTLIBRARIES = libzeitgeist-internal.la
+libexec_PROGRAMS = zeitgeist-fts
+
+servicedir = $(DBUS_SERVICES_DIR)
+service_DATA = org.gnome.zeitgeist.fts.service
+
+org.gnome.zeitgeist.fts.service: org.gnome.zeitgeist.fts.service.in
+ $(AM_V_GEN)sed -e s!\@libexecdir\@!$(libexecdir)! < $< > $@
+org.gnome.zeitgeist.fts.service: Makefile
+
+AM_CPPFLAGS = \
+ $(ZEITGEIST_CFLAGS) \
+ -include $(CONFIG_HEADER) \
+ -w \
+ $(NULL)
+
+AM_VALAFLAGS = \
+ --target-glib=2.26 \
+ --pkg gio-2.0 \
+ --pkg sqlite3 \
+ --pkg posix \
+ --pkg gmodule-2.0 \
+ $(top_srcdir)/config.vapi \
+ $(NULL)
+
+libzeitgeist_internal_la_VALASOURCES = \
+ datamodel.vala \
+ db-reader.vala \
+ engine.vala \
+ sql.vala \
+ remote.vala \
+ utils.vala \
+ errors.vala \
+ table-lookup.vala \
+ sql-schema.vala \
+ where-clause.vala \
+ ontology.vala \
+ ontology-uris.vala \
+ mimetype.vala \
+ ext-dummies.vala \
+ $(NULL)
+
+libzeitgeist_internal_la_SOURCES = \
+ zeitgeist-internal.stamp \
+ $(libzeitgeist_internal_la_VALASOURCES:.vala=.c) \
+ $(NULL)
+
+libzeitgeist_internal_la_LIBADD = \
+ $(ZEITGEIST_LIBS) \
+ $(NULL)
+
+zeitgeist_fts_VALASOURCES = \
+ zeitgeist-fts.vala \
+ $(NULL)
+
+zeitgeist_fts_SOURCES = \
+ zeitgeist-fts_vala.stamp \
+ $(zeitgeist_fts_VALASOURCES:.vala=.c) \
+ controller.cpp \
+ controller.h \
+ fts.cpp \
+ fts.h \
+ indexer.cpp \
+ indexer.h \
+ task.cpp \
+ task.h \
+ stringutils.cpp \
+ stringutils.h \
+ $(NULL)
+
+zeitgeist_fts_LDADD = \
+ $(builddir)/libzeitgeist-internal.la \
+ -lxapian \
+ $(NULL)
+
+BUILT_SOURCES = \
+ zeitgeist-internal.stamp \
+ zeitgeist-fts_vala.stamp \
+ $(NULL)
+
+zeitgeist-internal.stamp: $(libzeitgeist_internal_la_VALASOURCES)
+ $(VALA_V)$(VALAC) $(AM_VALAFLAGS) $(VALAFLAGS) -C -H zeitgeist-internal.h --library zeitgeist-internal $^
+ @touch "$@"
+
+zeitgeist-fts_vala.stamp: $(zeitgeist_fts_VALASOURCES)
+ $(VALA_V)$(VALAC) $(AM_VALAFLAGS) $(VALAFLAGS) \
+ $(srcdir)/zeitgeist-internal.vapi $(srcdir)/fts.vapi -C $^
+ @touch "$@"
+
+EXTRA_DIST = \
+ $(libzeitgeist_internal_la_VALASOURCES) \
+ $(zeitgeist_fts_VALASOURCES) \
+ zeitgeist-fts_vala.stamp \
+ zeitgeist-internal.h \
+ zeitgeist-internal.vapi \
+ org.gnome.zeitgeist.fts.service.in \
+ $(NULL)
+
+CLEANFILES = org.gnome.zeitgeist.fts.service
+
+DISTCLEANFILES = \
+ $(NULL)
+
+distclean-local:
+ rm -f *.c *.o *.stamp *.~[0-9]~
+
+VALA_V = $(VALA_V_$(V))
+VALA_V_ = $(VALA_V_$(AM_DEFAULT_VERBOSITY))
+VALA_V_0 = @echo " VALAC " $^;
+
diff --git a/extensions/fts++/controller.cpp b/extensions/fts++/controller.cpp
new file mode 100644
index 00000000..51584cd5
--- /dev/null
+++ b/extensions/fts++/controller.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2012 Mikkel Kamstrup Erlandsen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
+ *
+ */
+
+#include "controller.h"
+
+namespace ZeitgeistFTS {
+
+void Controller::Initialize (GError **error)
+{
+ indexer->Initialize (error);
+}
+
+void Controller::Run ()
+{
+ if (!indexer->CheckIndex ())
+ {
+ indexer->DropIndex ();
+ RebuildIndex ();
+ }
+}
+
+void Controller::RebuildIndex ()
+{
+ GError *error = NULL;
+ GPtrArray *events;
+ GPtrArray *templates = g_ptr_array_new ();
+ ZeitgeistTimeRange *time_range = zeitgeist_time_range_new_anytime ();
+
+ g_debug ("asking reader for all events");
+ events = zeitgeist_db_reader_find_events (zg_reader,
+ time_range,
+ templates,
+ ZEITGEIST_STORAGE_STATE_ANY,
+ 0,
+ ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS,
+ NULL,
+ &error);
+
+ if (error)
+ {
+ g_warning ("%s", error->message);
+ g_error_free (error);
+ }
+ else
+ {
+ g_debug ("reader returned %u events", events->len);
+
+ IndexEvents (events);
+ g_ptr_array_unref (events);
+
+ // Set the db metadata key only once we're done
+ PushTask (new MetadataTask ("fts_index_version", INDEX_VERSION));
+ }
+
+ g_object_unref (time_range);
+ g_ptr_array_unref (templates);
+}
+
+void Controller::IndexEvents (GPtrArray *events)
+{
+ const int CHUNK_SIZE = 32;
+ // Break down index tasks into suitable chunks
+ for (unsigned i = 0; i < events->len; i += CHUNK_SIZE)
+ {
+ PushTask (new IndexEventsTask (g_ptr_array_ref (events), i, CHUNK_SIZE));
+ }
+}
+
+void Controller::DeleteEvents (guint *event_ids, int event_ids_size)
+{
+ // FIXME: Should we break the task here as well?
+ PushTask (new DeleteEventsTask (event_ids, event_ids_size));
+}
+
+void Controller::PushTask (Task* task)
+{
+ queued_tasks.push (task);
+
+ if (processing_source_id == 0)
+ {
+ processing_source_id =
+ g_idle_add ((GSourceFunc) &Controller::ProcessTask, this);
+ }
+}
+
+gboolean Controller::ProcessTask ()
+{
+ if (!queued_tasks.empty ())
+ {
+ Task *task;
+
+ task = queued_tasks.front ();
+ queued_tasks.pop ();
+
+ task->Process (indexer);
+ delete task;
+ }
+
+ bool all_done = queued_tasks.empty ();
+ if (all_done)
+ {
+ indexer->Commit ();
+ if (processing_source_id != 0)
+ {
+ g_source_remove (processing_source_id);
+ processing_source_id = 0;
+ }
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+bool Controller::HasPendingTasks ()
+{
+ return !queued_tasks.empty ();
+}
+
+}
diff --git a/extensions/fts++/controller.h b/extensions/fts++/controller.h
new file mode 100644
index 00000000..abcd8fda
--- /dev/null
+++ b/extensions/fts++/controller.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2012 Mikkel Kamstrup Erlandsen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
+ *
+ */
+
+#ifndef _ZGFTS_CONTROLLER_H_
+#define _ZGFTS_CONTROLLER_H_
+
+#include <glib-object.h>
+#include <queue>
+#include <vector>
+
+#include "indexer.h"
+#include "task.h"
+#include "zeitgeist-internal.h"
+
+namespace ZeitgeistFTS {
+
+class Controller {
+public:
+ Controller (ZeitgeistDbReader *reader)
+ : zg_reader (reader)
+ , processing_source_id (0)
+ , indexer (new Indexer (reader)) {};
+
+ ~Controller ()
+ {
+ if (processing_source_id != 0)
+ {
+ g_source_remove (processing_source_id);
+ }
+ }
+
+ void Initialize (GError **error);
+ void Run ();
+ void RebuildIndex ();
+
+ void IndexEvents (GPtrArray *events);
+ void DeleteEvents (guint *event_ids, int event_ids_size);
+
+ void PushTask (Task* task);
+ bool HasPendingTasks ();
+ gboolean ProcessTask ();
+
+ Indexer *indexer;
+
+private:
+ ZeitgeistDbReader *zg_reader;
+
+ typedef std::queue<Task*> TaskQueue;
+ TaskQueue queued_tasks;
+ guint processing_source_id;
+};
+
+}
+
+#endif /* _ZGFTS_CONTROLLER_H_ */
diff --git a/extensions/fts++/datamodel.vala b/extensions/fts++/datamodel.vala
new file mode 120000
index 00000000..02172aac
--- /dev/null
+++ b/extensions/fts++/datamodel.vala
@@ -0,0 +1 @@
+../../src/datamodel.vala \ No newline at end of file
diff --git a/extensions/fts++/db-reader.vala b/extensions/fts++/db-reader.vala
new file mode 120000
index 00000000..fecbc782
--- /dev/null
+++ b/extensions/fts++/db-reader.vala
@@ -0,0 +1 @@
+../../src/db-reader.vala \ No newline at end of file
diff --git a/extensions/fts++/engine.vala b/extensions/fts++/engine.vala
new file mode 120000
index 00000000..e2314a5e
--- /dev/null
+++ b/extensions/fts++/engine.vala
@@ -0,0 +1 @@
+../../src/engine.vala \ No newline at end of file
diff --git a/extensions/fts++/errors.vala b/extensions/fts++/errors.vala
new file mode 120000
index 00000000..c630d3ed
--- /dev/null
+++ b/extensions/fts++/errors.vala
@@ -0,0 +1 @@
+../../src/errors.vala \ No newline at end of file
diff --git a/extensions/fts++/ext-dummies.vala b/extensions/fts++/ext-dummies.vala
new file mode 100644
index 00000000..f77a2b2d
--- /dev/null
+++ b/extensions/fts++/ext-dummies.vala
@@ -0,0 +1,71 @@
+/* ext-dummies.vala
+ *
+ * Copyright © 2011-2012 Michal Hruby <michal.mhr@gmail.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+namespace Zeitgeist
+{
+ public class ExtensionCollection : Object
+ {
+ public unowned Engine engine { get; construct; }
+
+ public ExtensionCollection (Engine engine)
+ {
+ Object (engine: engine);
+ }
+
+ public string[] get_extension_names ()
+ {
+ string[] result = {};
+ return result;
+ }
+
+ public void call_pre_insert_events (GenericArray<Event?> events,
+ BusName? sender)
+ {
+ }
+
+ public void call_post_insert_events (GenericArray<Event?> events,
+ BusName? sender)
+ {
+ }
+
+ public unowned uint32[] call_pre_delete_events (uint32[] event_ids,
+ BusName? sender)
+ {
+ return event_ids;
+ }
+
+ public void call_post_delete_events (uint32[] event_ids,
+ BusName? sender)
+ {
+ }
+ }
+
+ public class ExtensionStore : Object
+ {
+ public unowned Engine engine { get; construct; }
+
+ public ExtensionStore (Engine engine)
+ {
+ Object (engine: engine);
+ }
+ }
+
+}
+
+// vim:expandtab:ts=4:sw=4
diff --git a/extensions/fts++/fts.cpp b/extensions/fts++/fts.cpp
new file mode 100644
index 00000000..5d66e2ea
--- /dev/null
+++ b/extensions/fts++/fts.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2012 Canonical Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Michal Hruby <michal.hruby@canonical.com>
+ *
+ */
+
+#include "fts.h"
+#include "indexer.h"
+#include "controller.h"
+
+ZeitgeistIndexer*
+zeitgeist_indexer_new (ZeitgeistDbReader *reader, GError **error)
+{
+ ZeitgeistFTS::Controller *ctrl;
+ GError *local_error;
+
+ g_return_val_if_fail (ZEITGEIST_IS_DB_READER (reader), NULL);
+ g_return_val_if_fail (error == NULL || *error == NULL, NULL);
+
+ g_setenv ("XAPIAN_CJK_NGRAM", "1", TRUE);
+ ctrl = new ZeitgeistFTS::Controller (reader);
+
+ local_error = NULL;
+ ctrl->Initialize (&local_error);
+ if (local_error)
+ {
+ delete ctrl;
+ g_propagate_error (error, local_error);
+ return NULL;
+ }
+
+
+ ctrl->Run ();
+
+ return (ZeitgeistIndexer*) ctrl;
+}
+
+void
+zeitgeist_indexer_free (ZeitgeistIndexer* indexer)
+{
+ g_return_if_fail (indexer != NULL);
+
+ delete (ZeitgeistFTS::Controller*) indexer;
+}
+
+GPtrArray* zeitgeist_indexer_search (ZeitgeistIndexer *indexer,
+ const gchar *search_string,
+ ZeitgeistTimeRange *time_range,
+ GPtrArray *templates,
+ guint offset,
+ guint count,
+ ZeitgeistResultType result_type,
+ guint *matches,
+ GError **error)
+{
+ GPtrArray *results;
+ ZeitgeistFTS::Controller *_indexer;
+
+ g_return_val_if_fail (indexer != NULL, NULL);
+ g_return_val_if_fail (search_string != NULL, NULL);
+ g_return_val_if_fail (ZEITGEIST_IS_TIME_RANGE (time_range), NULL);
+ g_return_val_if_fail (error == NULL || *error == NULL, NULL);
+
+ _indexer = (ZeitgeistFTS::Controller*) indexer;
+
+ results = _indexer->indexer->Search (search_string, time_range,
+ templates, offset, count, result_type,
+ matches, error);
+
+ return results;
+}
+
+void zeitgeist_indexer_index_events (ZeitgeistIndexer *indexer,
+ GPtrArray *events)
+{
+ ZeitgeistFTS::Controller *_indexer;
+
+ g_return_if_fail (indexer != NULL);
+ g_return_if_fail (events != NULL);
+
+ _indexer = (ZeitgeistFTS::Controller*) indexer;
+
+ _indexer->IndexEvents (events);
+}
+
+void zeitgeist_indexer_delete_events (ZeitgeistIndexer *indexer,
+ guint *event_ids,
+ int event_ids_size)
+{
+ ZeitgeistFTS::Controller *_indexer;
+
+ g_return_if_fail (indexer != NULL);
+
+ if (event_ids_size <= 0) return;
+
+ _indexer = (ZeitgeistFTS::Controller*) indexer;
+
+ _indexer->DeleteEvents (event_ids, event_ids_size);
+}
+
+gboolean zeitgeist_indexer_has_pending_tasks (ZeitgeistIndexer *indexer)
+{
+ ZeitgeistFTS::Controller *_indexer;
+
+ g_return_val_if_fail (indexer != NULL, FALSE);
+
+ _indexer = (ZeitgeistFTS::Controller*) indexer;
+
+ return _indexer->HasPendingTasks () ? TRUE : FALSE;
+}
+
+void zeitgeist_indexer_process_task (ZeitgeistIndexer *indexer)
+{
+ ZeitgeistFTS::Controller *_indexer;
+
+ g_return_if_fail (indexer != NULL);
+
+ _indexer = (ZeitgeistFTS::Controller*) indexer;
+
+ _indexer->ProcessTask ();
+}
+
diff --git a/extensions/fts++/fts.h b/extensions/fts++/fts.h
new file mode 100644
index 00000000..2226ec90
--- /dev/null
+++ b/extensions/fts++/fts.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2012 Canonical Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Michal Hruby <michal.hruby@canonical.com>
+ *
+ */
+
+#ifndef _ZGFTS_H_
+#define _ZGFTS_H_
+
+#include <glib.h>
+#include "zeitgeist-internal.h"
+
+typedef struct _ZeitgeistIndexer ZeitgeistIndexer;
+
+G_BEGIN_DECLS
+
+ZeitgeistIndexer* zeitgeist_indexer_new (ZeitgeistDbReader* reader,
+ GError **error);
+
+void zeitgeist_indexer_free (ZeitgeistIndexer* indexer);
+
+GPtrArray* zeitgeist_indexer_search (ZeitgeistIndexer *indexer,
+ const gchar *search_string,
+ ZeitgeistTimeRange *time_range,
+ GPtrArray *templates,
+ guint offset,
+ guint count,
+ ZeitgeistResultType result_type,
+ guint *matches,
+ GError **error);
+
+void zeitgeist_indexer_index_events (ZeitgeistIndexer *indexer,
+ GPtrArray *events);
+
+void zeitgeist_indexer_delete_events (ZeitgeistIndexer *indexer,
+ guint *event_ids,
+ int event_ids_size);
+
+gboolean zeitgeist_indexer_has_pending_tasks (ZeitgeistIndexer *indexer);
+
+void zeitgeist_indexer_process_task (ZeitgeistIndexer *indexer);
+
+G_END_DECLS
+
+#endif /* _ZGFTS_H_ */
diff --git a/extensions/fts++/fts.vapi b/extensions/fts++/fts.vapi
new file mode 100644
index 00000000..1aae3602
--- /dev/null
+++ b/extensions/fts++/fts.vapi
@@ -0,0 +1,25 @@
+/* indexer.vapi is hand-written - not a big deal for these ~10 lines */
+
+namespace Zeitgeist {
+ [Compact]
+ [CCode (free_function = "zeitgeist_indexer_free", cheader_filename = "fts.h")]
+ public class Indexer {
+ public Indexer (DbReader reader) throws EngineError;
+
+ public GLib.GenericArray<Event> search (string search_string,
+ TimeRange time_range,
+ GLib.GenericArray<Event> templates,
+ uint offset,
+ uint count,
+ ResultType result_type,
+ out uint matches) throws GLib.Error;
+
+ public void index_events (GLib.GenericArray<Event> events);
+
+ public void delete_events (uint[] event_ids);
+
+ public bool has_pending_tasks ();
+
+ public void process_task ();
+ }
+}
diff --git a/extensions/fts++/indexer.cpp b/extensions/fts++/indexer.cpp
new file mode 100644
index 00000000..d97f7ebd
--- /dev/null
+++ b/extensions/fts++/indexer.cpp
@@ -0,0 +1,897 @@
+/*
+ * Copyright (C) 2012 Canonical Ltd
+ * 2012 Mikkel Kamstrup Erlandsen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Michal Hruby <michal.hruby@canonical.com>
+ * Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
+ *
+ */
+
+#include "indexer.h"
+#include "stringutils.h"
+#include <xapian.h>
+#include <queue>
+#include <vector>
+
+#include <gio/gio.h>
+#include <gio/gdesktopappinfo.h>
+
+namespace ZeitgeistFTS {
+
+const std::string FILTER_PREFIX_EVENT_INTERPRETATION = "ZGEI";
+const std::string FILTER_PREFIX_EVENT_MANIFESTATION = "ZGEM";
+const std::string FILTER_PREFIX_ACTOR = "ZGA";
+const std::string FILTER_PREFIX_SUBJECT_URI = "ZGSU";
+const std::string FILTER_PREFIX_SUBJECT_INTERPRETATION = "ZGSI";
+const std::string FILTER_PREFIX_SUBJECT_MANIFESTATION = "ZGSM";
+const std::string FILTER_PREFIX_SUBJECT_ORIGIN = "ZGSO";
+const std::string FILTER_PREFIX_SUBJECT_MIMETYPE = "ZGST";
+const std::string FILTER_PREFIX_SUBJECT_STORAGE = "ZGSS";
+const std::string FILTER_PREFIX_XDG_CATEGORY = "AC";
+
+const Xapian::valueno VALUE_EVENT_ID = 0;
+const Xapian::valueno VALUE_TIMESTAMP = 1;
+
+#define QUERY_PARSER_FLAGS \
+ Xapian::QueryParser::FLAG_PHRASE | Xapian::QueryParser::FLAG_BOOLEAN | \
+ Xapian::QueryParser::FLAG_PURE_NOT | Xapian::QueryParser::FLAG_LOVEHATE | \
+ Xapian::QueryParser::FLAG_WILDCARD
+
+const std::string FTS_MAIN_DIR = "ftspp.index";
+
+void Indexer::Initialize (GError **error)
+{
+ try
+ {
+ if (zeitgeist_utils_using_in_memory_database ())
+ {
+ this->db = new Xapian::WritableDatabase;
+ this->db->add_database (Xapian::InMemory::open ());
+ }
+ else
+ {
+ gchar *path = g_build_filename (zeitgeist_utils_get_data_path (),
+ FTS_MAIN_DIR.c_str (), NULL);
+ this->db = new Xapian::WritableDatabase (path,
+ Xapian::DB_CREATE_OR_OPEN);
+ g_free (path);
+ }
+
+ this->tokenizer = new Xapian::TermGenerator ();
+ this->query_parser = new Xapian::QueryParser ();
+ this->query_parser->add_prefix ("name", "N");
+ this->query_parser->add_prefix ("title", "N");
+ this->query_parser->add_prefix ("site", "S");
+ this->query_parser->add_prefix ("app", "A");
+ this->query_parser->add_boolean_prefix ("zgei",
+ FILTER_PREFIX_EVENT_INTERPRETATION);
+ this->query_parser->add_boolean_prefix ("zgem",
+ FILTER_PREFIX_EVENT_MANIFESTATION);
+ this->query_parser->add_boolean_prefix ("zga", FILTER_PREFIX_ACTOR);
+ this->query_parser->add_prefix ("zgsu", FILTER_PREFIX_SUBJECT_URI);
+ this->query_parser->add_boolean_prefix ("zgsi",
+ FILTER_PREFIX_SUBJECT_INTERPRETATION);
+ this->query_parser->add_boolean_prefix ("zgsm",
+ FILTER_PREFIX_SUBJECT_MANIFESTATION);
+ this->query_parser->add_prefix ("zgso", FILTER_PREFIX_SUBJECT_ORIGIN);
+ this->query_parser->add_boolean_prefix ("zgst",
+ FILTER_PREFIX_SUBJECT_MIMETYPE);
+ this->query_parser->add_boolean_prefix ("zgss",
+ FILTER_PREFIX_SUBJECT_STORAGE);
+ this->query_parser->add_prefix ("category", FILTER_PREFIX_XDG_CATEGORY);
+
+ this->query_parser->add_valuerangeprocessor (
+ new Xapian::NumberValueRangeProcessor (VALUE_EVENT_ID, "id"));
+ this->query_parser->add_valuerangeprocessor (
+ new Xapian::NumberValueRangeProcessor (VALUE_TIMESTAMP, "ms", false));
+
+ this->query_parser->set_default_op (Xapian::Query::OP_AND);
+ this->query_parser->set_database (*this->db);
+
+ this->enquire = new Xapian::Enquire (*this->db);
+
+ }
+ catch (const Xapian::Error &xp_error)
+ {
+ g_set_error_literal (error,
+ ZEITGEIST_ENGINE_ERROR,
+ ZEITGEIST_ENGINE_ERROR_DATABASE_ERROR,
+ xp_error.get_msg ().c_str ());
+ this->db = NULL;
+ }
+}
+
+/**
+ * Returns true if and only if the index is good.
+ * Otherwise the index should be rebuild.
+ */
+bool Indexer::CheckIndex ()
+{
+ std::string db_version (db->get_metadata ("fts_index_version"));
+ if (db_version != INDEX_VERSION)
+ {
+ g_message ("Index must be upgraded. Doing full rebuild");
+ return false;
+ }
+ else if (db->get_doccount () == 0)
+ {
+ g_message ("Empty index detected. Doing full rebuild");
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * Clear the index and create a new empty one
+ */
+void Indexer::DropIndex ()
+{
+ try
+ {
+ if (this->db != NULL)
+ {
+ this->db->close ();
+ delete this->db;
+ this->db = NULL;
+ }
+
+ if (this->enquire != NULL)
+ {
+ delete this->enquire;
+ this->enquire = NULL;
+ }
+
+ if (zeitgeist_utils_using_in_memory_database ())
+ {
+ this->db = new Xapian::WritableDatabase;
+ this->db->add_database (Xapian::InMemory::open ());
+ }
+ else
+ {
+ gchar *path = g_build_filename (zeitgeist_utils_get_data_path (),
+ FTS_MAIN_DIR.c_str (), NULL);
+ this->db = new Xapian::WritableDatabase (path,
+ Xapian::DB_CREATE_OR_OVERWRITE);
+ // FIXME: leaks on error
+ g_free (path);
+ }
+
+ this->query_parser->set_database (*this->db);
+ this->enquire = new Xapian::Enquire (*this->db);
+ }
+ catch (const Xapian::Error &xp_error)
+ {
+ g_error ("Error ocurred during database reindex: %s",
+ xp_error.get_msg ().c_str ());
+ }
+}
+
+void Indexer::Commit ()
+{
+ try
+ {
+ db->commit ();
+ }
+ catch (Xapian::Error const& e)
+ {
+ g_warning ("Failed to commit changes: %s", e.get_msg ().c_str ());
+ }
+}
+
+std::string Indexer::ExpandType (std::string const& prefix,
+ const gchar* unparsed_uri)
+{
+ gchar* uri = g_strdup (unparsed_uri);
+ gboolean is_negation = zeitgeist_utils_parse_negation (&uri);
+ gboolean noexpand = zeitgeist_utils_parse_noexpand (&uri);
+
+ std::string result;
+ GList *symbols = NULL;
+ symbols = g_list_append (symbols, uri);
+ if (!noexpand)
+ {
+ GList *children = zeitgeist_symbol_get_all_children (uri);
+ symbols = g_list_concat (symbols, children);
+ }
+
+ for (GList *iter = symbols; iter != NULL; iter = iter->next)
+ {
+ result += prefix + std::string((gchar*) iter->data);
+ if (iter->next != NULL) result += " OR ";
+ }
+
+ g_list_free (symbols);
+ g_free (uri);
+
+ if (is_negation) result = "NOT (" + result + ")";
+
+ return result;
+}
+
+std::string Indexer::CompileEventFilterQuery (GPtrArray *templates)
+{
+ std::vector<std::string> query;
+
+ for (unsigned i = 0; i < templates->len; i++)
+ {
+ const gchar* val;
+ std::vector<std::string> tmpl;
+ ZeitgeistEvent *event = (ZeitgeistEvent*) g_ptr_array_index (templates, i);
+
+ val = zeitgeist_event_get_interpretation (event);
+ if (val && val[0] != '\0')
+ tmpl.push_back (ExpandType ("zgei:", val));
+
+ val = zeitgeist_event_get_manifestation (event);
+ if (val && val[0] != '\0')
+ tmpl.push_back (ExpandType ("zgem:", val));
+
+ val = zeitgeist_event_get_actor (event);
+ if (val && val[0] != '\0')
+ tmpl.push_back ("zga:" + StringUtils::MangleUri (val));
+
+ GPtrArray *subjects = zeitgeist_event_get_subjects (event);
+ for (unsigned j = 0; j < subjects->len; j++)
+ {
+ ZeitgeistSubject *subject = (ZeitgeistSubject*) g_ptr_array_index (subjects, j);
+ val = zeitgeist_subject_get_uri (subject);
+ if (val && val[0] != '\0')
+ tmpl.push_back ("zgsu:" + StringUtils::MangleUri (val));
+
+ val = zeitgeist_subject_get_interpretation (subject);
+ if (val && val[0] != '\0')
+ tmpl.push_back (ExpandType ("zgsi:", val));
+
+ val = zeitgeist_subject_get_manifestation (subject);
+ if (val && val[0] != '\0')
+ tmpl.push_back (ExpandType ("zgsm:", val));
+
+ val = zeitgeist_subject_get_origin (subject);
+ if (val && val[0] != '\0')
+ tmpl.push_back ("zgso:" + StringUtils::MangleUri (val));
+
+ val = zeitgeist_subject_get_mimetype (subject);
+ if (val && val[0] != '\0')
+ tmpl.push_back (std::string ("zgst:") + val);
+
+ val = zeitgeist_subject_get_storage (subject);
+ if (val && val[0] != '\0')
+ tmpl.push_back (std::string ("zgss:") + val);
+ }
+
+ if (tmpl.size () == 0) continue;
+
+ std::string event_query ("(");
+ for (int i = 0; i < tmpl.size (); i++)
+ {
+ event_query += tmpl[i];
+ if (i < tmpl.size () - 1) event_query += ") AND (";
+ }
+ query.push_back (event_query + ")");
+ }
+
+ if (query.size () == 0) return std::string ("");
+
+ std::string result;
+ for (int i = 0; i < query.size (); i++)
+ {
+ result += query[i];
+ if (i < query.size () - 1) result += " OR ";
+ }
+ return result;
+}
+
+std::string Indexer::CompileTimeRangeFilterQuery (gint64 start, gint64 end)
+{
+ // let's use gprinting to be safe
+ gchar *q = g_strdup_printf ("%" G_GINT64_FORMAT "..%" G_GINT64_FORMAT "ms",
+ start, end);
+ std::string query (q);
+ g_free (q);
+
+ return query;
+}
+
+/**
+ * Adds the filtering rules to the doc. Filtering rules will
+ * not affect the relevancy ranking of the event/doc
+ */
+void Indexer::AddDocFilters (ZeitgeistEvent *event, Xapian::Document &doc)
+{
+ const gchar* val;
+
+ val = zeitgeist_event_get_interpretation (event);
+ if (val && val[0] != '\0')
+ doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_EVENT_INTERPRETATION + val));
+
+ val = zeitgeist_event_get_manifestation (event);
+ if (val && val[0] != '\0')
+ doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_EVENT_MANIFESTATION + val));
+
+ val = zeitgeist_event_get_actor (event);
+ if (val && val[0] != '\0')
+ doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_ACTOR + StringUtils::MangleUri (val)));
+
+ GPtrArray *subjects = zeitgeist_event_get_subjects (event);
+ for (unsigned j = 0; j < subjects->len; j++)
+ {
+ ZeitgeistSubject *subject = (ZeitgeistSubject*) g_ptr_array_index (subjects, j);
+ val = zeitgeist_subject_get_uri (subject);
+ if (val && val[0] != '\0')
+ doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_SUBJECT_URI + StringUtils::MangleUri (val)));
+
+ val = zeitgeist_subject_get_interpretation (subject);
+ if (val && val[0] != '\0')
+ doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_SUBJECT_INTERPRETATION + val));
+
+ val = zeitgeist_subject_get_manifestation (subject);
+ if (val && val[0] != '\0')
+ doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_SUBJECT_MANIFESTATION + val));
+
+ val = zeitgeist_subject_get_origin (subject);
+ if (val && val[0] != '\0')
+ doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_SUBJECT_ORIGIN + StringUtils::MangleUri (val)));
+
+ val = zeitgeist_subject_get_mimetype (subject);
+ if (val && val[0] != '\0')
+ doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_SUBJECT_MIMETYPE + val));
+
+ val = zeitgeist_subject_get_storage (subject);
+ if (val && val[0] != '\0')
+ doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_SUBJECT_STORAGE + val));
+ }
+}
+
+void Indexer::IndexText (std::string const& text)
+{
+ // FIXME: ascii folding!
+ tokenizer->index_text (text, 5);
+}
+
+void Indexer::IndexUri (std::string const& uri, std::string const& origin)
+{
+ GFile *f = g_file_new_for_uri (uri.c_str ());
+
+ gchar *scheme = g_file_get_uri_scheme (f);
+ if (scheme == NULL)
+ {
+ g_warning ("Invalid URI: %s", uri.c_str ());
+ return;
+ }
+
+ std::string scheme_str(scheme);
+ g_free (scheme);
+
+ if (scheme_str == "file")
+ {
+ // FIXME: special case some typical filenames (like photos)
+ // examples of typical filenames from cameras:
+ // P07-08-08_16.25.JPG
+ // P070608_18.54.JPG
+ // P180308_22.27[1].jpg
+ // P6220111.JPG
+ // PC220006.JPG
+ // DSCN0149.JPG
+ // DSC01166.JPG
+ // SDC12583.JPG
+ // IMGP3199.JPG
+ // IMGP1251-4.jpg
+ // IMG_101_8987.JPG
+ // 10052010152.jpg
+ // 4867_93080512835_623012835_1949065_8351752_n.jpg
+ // 2011-05-29 10.49.37.jpg
+ // V100908_11.24.AVI
+ // video-2011-05-29-15-14-58.mp4
+
+ // get_parse_name will convert escaped characters to UTF-8, but only for
+ // the "file" scheme, so using it elsewhere won't be of much help
+
+ gchar *pn = g_file_get_parse_name (f);
+ gchar *basename = g_path_get_basename (pn);
+
+ // FIXME: remove unscores, CamelCase and process digits
+ tokenizer->index_text (basename, 5);
+ tokenizer->index_text (basename, 5, "N");
+
+ g_free (basename);
+ // limit the directory indexing to just a few levels
+ // (the original formula was weight = 5.0 / (1.5^n)
+ unsigned path_weights[] = { 3, 2, 1, 0 };
+ unsigned weight_index = 0;
+
+ // this should be equal to origin, but we already got a nice utf-8 display
+ // name, so we'll use that
+ gchar *dir = g_path_get_dirname (pn);
+ std::string path_component (dir);
+ g_free (dir);
+ g_free (pn);
+
+ while (path_component.length () > 2 &&
+ weight_index < G_N_ELEMENTS (path_weights))
+ {
+ // if this is already home directory we don't want it
+ if (path_component.length () == home_dir_path.length () &&
+ path_component == home_dir_path) return;
+
+ gchar *name = g_path_get_basename (path_component.c_str ());
+
+ // FIXME: un-underscore, uncamelcase, ascii fold
+ tokenizer->index_text (name, path_weights[weight_index++]);
+
+ dir = g_path_get_dirname (path_component.c_str ());
+ path_component = dir;
+ g_free (dir);
+ g_free (name);
+ }
+ }
+ else if (scheme_str == "mailto")
+ {
+ // mailto:username@server.com
+ size_t scheme_len = scheme_str.length () + 1;
+ size_t at_pos = uri.find ('@', scheme_len);
+ if (at_pos == std::string::npos) return;
+
+ tokenizer->index_text (uri.substr (scheme_len, at_pos - scheme_len), 5);
+ tokenizer->index_text (uri.substr (at_pos + 1), 1);
+ }
+ else if (scheme_str.compare (0, 4, "http") == 0)
+ {
+ // http / https - we'll index just the basename of the uri (minus query
+ // part) and the hostname/domain
+
+ // step 1) strip query part
+ gchar *basename;
+ size_t question_mark = uri.find ('?');
+ if (question_mark != std::string::npos)
+ {
+ std::string stripped (uri, 0, question_mark - 1);
+ basename = g_path_get_basename (stripped.c_str ());
+ }
+ else
+ {
+ basename = g_file_get_basename (f);
+ }
+
+ // step 2) unescape and check that it's valid utf8
+ gchar *unescaped_basename = g_uri_unescape_string (basename, "");
+
+ if (g_utf8_validate (unescaped_basename, -1, NULL))
+ {
+ // FIXME: remove unscores, CamelCase and process digits
+ tokenizer->index_text (unescaped_basename, 5);
+ tokenizer->index_text (unescaped_basename, 5, "N");
+ }
+
+ // and also index hostname (taken from origin field if possible)
+ std::string host_str (origin.empty () ? uri : origin);
+ size_t hostname_start = host_str.find ("://");
+ if (hostname_start != std::string::npos)
+ {
+ std::string hostname (host_str, hostname_start + 3);
+ size_t slash_pos = hostname.find ("/");
+ if (slash_pos != std::string::npos) hostname.resize (slash_pos);
+
+ // support IDN
+ if (g_hostname_is_ascii_encoded (hostname.c_str ()))
+ {
+ gchar *printable_hostname = g_hostname_to_unicode (hostname.c_str ());
+ if (printable_hostname != NULL) hostname = printable_hostname;
+ g_free (printable_hostname);
+ }
+
+ tokenizer->index_text (hostname, 2);
+ tokenizer->index_text (hostname, 2, "N");
+ tokenizer->index_text (hostname, 2, "S");
+ }
+
+ g_free (unescaped_basename);
+ g_free (basename);
+ }
+ else if (scheme_str == "data")
+ {
+ // we *really* don't want to index anything with this scheme
+ }
+ else
+ {
+ std::string authority, path, query;
+ StringUtils::SplitUri (uri, authority, path, query);
+
+ if (!path.empty ())
+ {
+ gchar *basename = g_path_get_basename (path.c_str ());
+ gchar *unescaped_basename = g_uri_unescape_string (basename, "");
+
+ if (g_utf8_validate (unescaped_basename, -1, NULL))
+ {
+ std::string capped (StringUtils::Truncate (unescaped_basename, 30));
+ tokenizer->index_text (capped, 5);
+ tokenizer->index_text (capped, 5, "N");
+ }
+
+ // FIXME: rest of the path?
+ g_free (unescaped_basename);
+ g_free (basename);
+ }
+
+ if (!authority.empty ())
+ {
+ std::string capped (StringUtils::Truncate (authority, 30));
+
+ tokenizer->index_text (capped, 2);
+ tokenizer->index_text (capped, 2, "N");
+ tokenizer->index_text (capped, 2, "S");
+ }
+ }
+
+ g_object_unref (f);
+}
+
+bool Indexer::IndexActor (std::string const& actor, bool is_subject)
+{
+ GDesktopAppInfo *dai = NULL;
+ // check the cache first
+ GAppInfo *ai = app_info_cache[actor];
+
+ if (ai == NULL)
+ {
+ // check also the failed cache
+ if (failed_lookups.count (actor) != 0) return false;
+
+ // and now try to load from the disk
+ if (g_path_is_absolute (actor.c_str ()))
+ {
+ dai = g_desktop_app_info_new_from_filename (actor.c_str ());
+ }
+ else if (g_str_has_prefix (actor.c_str (), "application://"))
+ {
+ dai = g_desktop_app_info_new (actor.substr (14).c_str ());
+ }
+
+ if (dai != NULL)
+ {
+ ai = G_APP_INFO (dai);
+ app_info_cache[actor] = ai;
+ }
+ else
+ {
+ // cache failed lookup
+ failed_lookups.insert (actor);
+ if (clear_failed_id == 0)
+ {
+ // but clear the failed cache in 30 seconds
+ clear_failed_id = g_timeout_add_seconds (30,
+ (GSourceFunc) &Indexer::ClearFailedLookupsCb, this);
+ }
+ }
+ }
+ else
+ {
+ dai = G_DESKTOP_APP_INFO (ai);
+ }
+
+ if (dai == NULL)
+ {
+ g_warning ("Unable to get info on %s", actor.c_str ());
+ return false;
+ }
+
+ const gchar *val;
+ unsigned name_weight = is_subject ? 5 : 2;
+ unsigned comment_weight = 2;
+
+ // FIXME: ascii folding somewhere
+
+ val = g_app_info_get_display_name (ai);
+ if (val && val[0] != '\0')
+ {
+ std::string display_name (val);
+ tokenizer->index_text (display_name, name_weight);
+ tokenizer->index_text (display_name, name_weight, "A");
+ }
+
+ val = g_desktop_app_info_get_generic_name (dai);
+ if (val && val[0] != '\0')
+ {
+ std::string generic_name (val);
+ tokenizer->index_text (generic_name, name_weight);
+ tokenizer->index_text (generic_name, name_weight, "A");
+ }
+
+ if (!is_subject) return true;
+ // the rest of the code only applies to events with application subject uris:
+ // index the comment field, add category terms, index keywords
+
+ val = g_app_info_get_description (ai);
+ if (val && val[0] != '\0')
+ {
+ std::string comment (val);
+ tokenizer->index_text (comment, comment_weight);
+ tokenizer->index_text (comment, comment_weight, "A");
+ }
+
+ val = g_desktop_app_info_get_categories (dai);
+ if (val && val[0] != '\0')
+ {
+ gchar **categories = g_strsplit (val, ";", 0);
+ Xapian::Document doc(tokenizer->get_document ());
+ for (gchar **iter = categories; *iter != NULL; ++iter)
+ {
+ // FIXME: what if this isn't ascii? but it should, that's what
+ // the fdo menu spec says
+ gchar *category = g_ascii_strdown (*iter, -1);
+ doc.add_boolean_term (FILTER_PREFIX_XDG_CATEGORY + category);
+ g_free (category);
+ }
+ g_strfreev (categories);
+ }
+
+ return true;
+}
+
+GPtrArray* Indexer::Search (const gchar *search_string,
+ ZeitgeistTimeRange *time_range,
+ GPtrArray *templates,
+ guint offset,
+ guint count,
+ ZeitgeistResultType result_type,
+ guint *matches,
+ GError **error)
+{
+ GPtrArray *results = NULL;
+ try
+ {
+ std::string query_string(search_string);
+
+ if (templates && templates->len > 0)
+ {
+ std::string filters (CompileEventFilterQuery (templates));
+ query_string = "(" + query_string + ") AND (" + filters + ")";
+ }
+
+ if (time_range)
+ {
+ gint64 start_time = zeitgeist_time_range_get_start (time_range);
+ gint64 end_time = zeitgeist_time_range_get_end (time_range);
+
+ if (start_time > 0 || end_time < G_MAXINT64)
+ {
+ std::string time_filter (CompileTimeRangeFilterQuery (start_time, end_time));
+ query_string = "(" + query_string + ") AND (" + time_filter + ")";
+ }
+ }
+
+ // FIXME: which result types coalesce?
+ guint maxhits = count * 3;
+
+ if (result_type == 100)
+ {
+ enquire->set_sort_by_relevance ();
+ }
+ else
+ {
+ enquire->set_sort_by_value (VALUE_TIMESTAMP, true);
+ }
+
+ g_debug ("query: %s", query_string.c_str ());
+ Xapian::Query q(query_parser->parse_query (query_string, QUERY_PARSER_FLAGS));
+ enquire->set_query (q);
+ Xapian::MSet hits (enquire->get_mset (offset, maxhits));
+ Xapian::doccount hitcount = hits.get_matches_estimated ();
+
+ if (result_type == 100)
+ {
+ std::vector<unsigned> event_ids;
+ for (Xapian::MSetIterator iter = hits.begin (); iter != hits.end (); ++iter)
+ {
+ Xapian::Document doc(iter.get_document ());
+ double unserialized =
+ Xapian::sortable_unserialise(doc.get_value (VALUE_EVENT_ID));
+ event_ids.push_back (static_cast<unsigned>(unserialized));
+ }
+
+ results = zeitgeist_db_reader_get_events (zg_reader,
+ &event_ids[0],
+ event_ids.size (),
+ NULL,
+ error);
+ }
+ else
+ {
+ GPtrArray *event_templates;
+ event_templates = g_ptr_array_new_with_free_func (g_object_unref);
+ for (Xapian::MSetIterator iter = hits.begin (); iter != hits.end (); ++iter)
+ {
+ Xapian::Document doc(iter.get_document ());
+ double unserialized =
+ Xapian::sortable_unserialise(doc.get_value (VALUE_EVENT_ID));
+ // this doesn't need ref sinking, does it?
+ ZeitgeistEvent *event = zeitgeist_event_new ();
+ zeitgeist_event_set_id (event, static_cast<unsigned>(unserialized));
+ g_ptr_array_add (event_templates, event);
+ }
+
+ if (event_templates->len > 0)
+ {
+ ZeitgeistTimeRange *time_range = zeitgeist_time_range_new_anytime ();
+ results = zeitgeist_db_reader_find_events (zg_reader,
+ time_range,
+ event_templates,
+ ZEITGEIST_STORAGE_STATE_ANY,
+ 0,
+ result_type,
+ NULL,
+ error);
+
+ g_object_unref (time_range);
+ }
+ else
+ {
+ results = g_ptr_array_new ();
+ }
+
+ g_ptr_array_unref (event_templates);
+ }
+
+ if (matches)
+ {
+ *matches = hitcount;
+ }
+ }
+ catch (Xapian::Error const& e)
+ {
+ g_warning ("Failed to index event: %s", e.get_msg ().c_str ());
+ g_set_error_literal (error,
+ ZEITGEIST_ENGINE_ERROR,
+ ZEITGEIST_ENGINE_ERROR_DATABASE_ERROR,
+ e.get_msg ().c_str ());
+ }
+
+ return results;
+}
+
+void Indexer::IndexEvent (ZeitgeistEvent *event)
+{
+ try
+ {
+ // FIXME: we need to special case MOVE_EVENTs
+ const gchar *val;
+ guint event_id = zeitgeist_event_get_id (event);
+ g_return_if_fail (event_id > 0);
+
+ g_debug ("Indexing event with ID: %u", event_id);
+
+ Xapian::Document doc;
+ doc.add_value (VALUE_EVENT_ID,
+ Xapian::sortable_serialise (static_cast<double>(event_id)));
+ doc.add_value (VALUE_TIMESTAMP,
+ Xapian::sortable_serialise (static_cast<double>(zeitgeist_event_get_timestamp (event))));
+
+ tokenizer->set_document (doc);
+
+ val = zeitgeist_event_get_actor (event);
+ if (val && val[0] != '\0')
+ {
+ // it's nice that searching for "gedit" will find all files you worked
+ // with in gedit, but the relevancy has to be low
+ IndexActor (val, false);
+ }
+
+ GPtrArray *subjects = zeitgeist_event_get_subjects (event);
+ for (unsigned i = 0; i < subjects->len; i++)
+ {
+ ZeitgeistSubject *subject;
+ subject = (ZeitgeistSubject*) g_ptr_array_index (subjects, i);
+
+ val = zeitgeist_subject_get_uri (subject);
+ if (val == NULL || val[0] == '\0') continue;
+
+ std::string uri(val);
+
+ if (uri.length () > 512)
+ {
+ g_warning ("URI too long (%lu). Discarding:\n%s",
+ uri.length (), uri.substr (0, 32).c_str ());
+ return; // ignore this event completely...
+ }
+
+ val = zeitgeist_subject_get_text (subject);
+ if (val && val[0] != '\0')
+ {
+ IndexText (val);
+ }
+
+ val = zeitgeist_subject_get_origin (subject);
+ std::string origin (val != NULL ? val : "");
+
+ if (uri.compare (0, 14, "application://") == 0)
+ {
+ if (!IndexActor (uri, true))
+ IndexUri (uri, origin);
+ }
+ else
+ {
+ IndexUri (uri, origin);
+ }
+ }
+
+ AddDocFilters (event, doc);
+
+ this->db->add_document (doc);
+ }
+ catch (Xapian::Error const& e)
+ {
+ g_warning ("Failed to index event: %s", e.get_msg ().c_str ());
+ }
+}
+
+void Indexer::DeleteEvent (guint32 event_id)
+{
+ g_debug ("Deleting event with ID: %u", event_id);
+
+ try
+ {
+ std::string id(Xapian::sortable_serialise (static_cast<double>(event_id)));
+ Xapian::Query query (Xapian::Query::OP_VALUE_RANGE, VALUE_EVENT_ID, id, id);
+
+ enquire->set_query(query);
+ Xapian::MSet mset = enquire->get_mset(0, 10);
+
+ Xapian::doccount total = mset.get_matches_estimated();
+ if (total > 1)
+ {
+ g_warning ("More than one event found with id '%s", id.c_str ());
+ }
+ else if (total == 0)
+ {
+ g_warning ("No event for id '%s'", id.c_str ());
+ return;
+ }
+
+ Xapian::MSetIterator i, end;
+ for (i= mset.begin(), end = mset.end(); i != end; i++)
+ {
+ db->delete_document (*i);
+ }
+ }
+ catch (Xapian::Error const& e)
+ {
+ g_warning ("Failed to delete event '%u': %s",
+ event_id, e.get_msg().c_str ());
+ }
+}
+
+void Indexer::SetDbMetadata (std::string const& key, std::string const& value)
+{
+ try
+ {
+ db->set_metadata (key, value);
+ }
+ catch (Xapian::Error const& e)
+ {
+ g_warning ("Failed to set metadata: %s", e.get_msg ().c_str ());
+ }
+}
+
+gboolean Indexer::ClearFailedLookupsCb ()
+{
+ failed_lookups.clear ();
+
+ clear_failed_id = 0;
+ return FALSE;
+}
+
+} /* namespace */
diff --git a/extensions/fts++/indexer.h b/extensions/fts++/indexer.h
new file mode 100644
index 00000000..9a0135e7
--- /dev/null
+++ b/extensions/fts++/indexer.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2012 Canonical Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Michal Hruby <michal.hruby@canonical.com>
+ *
+ */
+
+#ifndef _ZGFTS_INDEXER_H_
+#define _ZGFTS_INDEXER_H_
+
+#include <glib-object.h>
+#include <gio/gio.h>
+#include <xapian.h>
+
+#include "zeitgeist-internal.h"
+
+namespace ZeitgeistFTS {
+
+const std::string INDEX_VERSION = "1";
+
+class Indexer
+{
+public:
+ typedef std::map<std::string, GAppInfo*> AppInfoMap;
+ typedef std::set<std::string> ApplicationSet;
+
+ Indexer (ZeitgeistDbReader *reader)
+ : zg_reader (reader)
+ , db (NULL)
+ , query_parser (NULL)
+ , enquire (NULL)
+ , tokenizer (NULL)
+ , clear_failed_id (0)
+ {
+ const gchar *home_dir = g_get_home_dir ();
+ home_dir_path = home_dir != NULL ? home_dir : "/home";
+ }
+
+ ~Indexer ()
+ {
+ if (tokenizer) delete tokenizer;
+ if (enquire) delete enquire;
+ if (query_parser) delete query_parser;
+ if (db) delete db;
+
+ for (AppInfoMap::iterator it = app_info_cache.begin ();
+ it != app_info_cache.end (); ++it)
+ {
+ g_object_unref (it->second);
+ }
+
+ if (clear_failed_id != 0)
+ {
+ g_source_remove (clear_failed_id);
+ }
+ }
+
+ void Initialize (GError **error);
+ bool CheckIndex ();
+ void DropIndex ();
+ void Commit ();
+
+ void IndexEvent (ZeitgeistEvent *event);
+ void DeleteEvent (guint32 event_id);
+ void SetDbMetadata (std::string const& key, std::string const& value);
+
+ GPtrArray* Search (const gchar *search_string,
+ ZeitgeistTimeRange *time_range,
+ GPtrArray *templates,
+ guint offset,
+ guint count,
+ ZeitgeistResultType result_type,
+ guint *matches,
+ GError **error);
+
+private:
+ std::string ExpandType (std::string const& prefix, const gchar* unparsed_uri);
+ std::string CompileEventFilterQuery (GPtrArray *templates);
+ std::string CompileTimeRangeFilterQuery (gint64 start, gint64 end);
+
+ void AddDocFilters (ZeitgeistEvent *event, Xapian::Document &doc);
+ void IndexText (std::string const& text);
+ void IndexUri (std::string const& uri, std::string const& origin);
+ bool IndexActor (std::string const& actor, bool is_subject);
+
+ gboolean ClearFailedLookupsCb ();
+
+ ZeitgeistDbReader *zg_reader;
+ Xapian::WritableDatabase *db;
+ Xapian::QueryParser *query_parser;
+ Xapian::Enquire *enquire;
+ Xapian::TermGenerator *tokenizer;
+ AppInfoMap app_info_cache;
+ ApplicationSet failed_lookups;
+
+ guint clear_failed_id;
+ std::string home_dir_path;
+};
+
+}
+
+#endif /* _ZGFTS_INDEXER_H_ */
diff --git a/extensions/fts++/mimetype.vala b/extensions/fts++/mimetype.vala
new file mode 120000
index 00000000..fc0a6ce1
--- /dev/null
+++ b/extensions/fts++/mimetype.vala
@@ -0,0 +1 @@
+../../src/mimetype.vala \ No newline at end of file
diff --git a/extensions/fts++/ontology-uris.vala b/extensions/fts++/ontology-uris.vala
new file mode 120000
index 00000000..c0b93ab7
--- /dev/null
+++ b/extensions/fts++/ontology-uris.vala
@@ -0,0 +1 @@
+../../src/ontology-uris.vala \ No newline at end of file
diff --git a/extensions/fts++/ontology.vala b/extensions/fts++/ontology.vala
new file mode 120000
index 00000000..5daa0215
--- /dev/null
+++ b/extensions/fts++/ontology.vala
@@ -0,0 +1 @@
+../../src/ontology.vala \ No newline at end of file
diff --git a/extensions/fts-python/org.gnome.zeitgeist.fts.service.in b/extensions/fts++/org.gnome.zeitgeist.fts.service.in
index 7551d79d..dff8199f 100644
--- a/extensions/fts-python/org.gnome.zeitgeist.fts.service.in
+++ b/extensions/fts++/org.gnome.zeitgeist.fts.service.in
@@ -1,3 +1,3 @@
[D-BUS Service]
Name=org.gnome.zeitgeist.SimpleIndexer
-Exec=@pkgdatadir@/fts-python/fts.py
+Exec=@libexecdir@/zeitgeist-fts
diff --git a/extensions/fts++/remote.vala b/extensions/fts++/remote.vala
new file mode 120000
index 00000000..32661b1f
--- /dev/null
+++ b/extensions/fts++/remote.vala
@@ -0,0 +1 @@
+../../src/remote.vala \ No newline at end of file
diff --git a/extensions/fts++/sql-schema.vala b/extensions/fts++/sql-schema.vala
new file mode 120000
index 00000000..a2756d4a
--- /dev/null
+++ b/extensions/fts++/sql-schema.vala
@@ -0,0 +1 @@
+../../src/sql-schema.vala \ No newline at end of file
diff --git a/extensions/fts++/sql.vala b/extensions/fts++/sql.vala
new file mode 120000
index 00000000..48950aed
--- /dev/null
+++ b/extensions/fts++/sql.vala
@@ -0,0 +1 @@
+../../src/sql.vala \ No newline at end of file
diff --git a/extensions/fts++/stringutils.cpp b/extensions/fts++/stringutils.cpp
new file mode 100644
index 00000000..12b0baf8
--- /dev/null
+++ b/extensions/fts++/stringutils.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2012 Mikkel Kamstrup Erlandsen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
+ *
+ */
+#include <string>
+
+#include "stringutils.h"
+
+using namespace std;
+
+namespace ZeitgeistFTS {
+
+namespace StringUtils {
+
+/**
+ * Make sure s has equal or less than 'nbytes' bytes making sure the returned
+ * string is still valid UTF-8.
+ *
+ * NOTE: It is assumed the input string is valid UTF-8. Untrusted text
+ * should be validated with g_utf8_validate().
+ *
+ * This function useful for working with Xapian terms because Xapian has
+ * a max term length of 245 (which is not very well documented, but see
+ * http://xapian.org/docs/omega/termprefixes.html).
+ */
+string Truncate (string const& s, unsigned int nbytes)
+{
+ const gchar *str = s.c_str();
+ const gchar *iter = str;
+
+ nbytes = MIN(nbytes, s.length());
+
+ while (iter - str < nbytes)
+ {
+ const gchar *tmp = g_utf8_next_char (iter);
+ if (tmp - str > nbytes) break;
+ iter = tmp;
+ }
+
+
+ return s.substr(0, iter - str);
+}
+
+/**
+ * Converts a URI into an index- and query friendly string. The problem
+ * is that Xapian doesn't handle CAPITAL letters or most non-alphanumeric
+ * symbols in a boolean term when it does prefix matching. The mangled
+ * URIs returned from this function are suitable for boolean prefix searches.
+ *
+ * IMPORTANT: This is a 1-way function! You can not convert back.
+ */
+string MangleUri (string const& orig)
+{
+ string s(orig);
+ size_t pos = 0;
+ while ((pos = s.find_first_of (": /", pos)) != string::npos)
+ {
+ s.replace (pos, 1, 1, '_');
+ pos++;
+ }
+
+ return s;
+}
+
+/**
+ * This method expects a valid uri and tries to split it into authority,
+ * path and query.
+ *
+ * Note that any and all parts may be left untouched.
+ */
+void SplitUri (string const& uri, string &authority,
+ string &path, string &query)
+{
+ size_t colon_pos = uri.find (':');
+ if (colon_pos == string::npos) return; // not an uri?
+ bool has_double_slash = uri.length () > colon_pos + 2 &&
+ uri.compare (colon_pos + 1, 2, "//") == 0;
+
+ size_t start_pos = has_double_slash ? colon_pos + 3 : colon_pos + 1;
+
+ size_t first_slash = uri.find ('/', start_pos);
+ size_t question_mark_pos = uri.find ('?', first_slash == string::npos ?
+ start_pos : first_slash + 1);
+
+ authority = uri.substr (start_pos);
+ if (first_slash != string::npos)
+ {
+ authority.resize (first_slash - start_pos);
+ }
+ else if (question_mark_pos != string::npos)
+ {
+ authority.resize (question_mark_pos - start_pos);
+ }
+
+ if (first_slash == string::npos)
+ {
+ first_slash = start_pos + authority.length ();
+ }
+
+ if (question_mark_pos != string::npos)
+ {
+ path = uri.substr (first_slash, question_mark_pos - first_slash);
+ query = uri.substr (question_mark_pos + 1);
+ }
+ else
+ {
+ path = uri.substr (first_slash);
+ }
+}
+
+} /* namespace StringUtils */
+
+} /* namespace ZeitgeistFTS */
diff --git a/extensions/fts++/stringutils.h b/extensions/fts++/stringutils.h
new file mode 100644
index 00000000..ef011d00
--- /dev/null
+++ b/extensions/fts++/stringutils.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2012 Mikkel Kamstrup Erlandsen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
+ *
+ */
+
+#include <string>
+#include <glib.h>
+
+namespace ZeitgeistFTS {
+
+namespace StringUtils {
+
+const unsigned int MAX_TERM_LENGTH = 245;
+
+std::string Truncate (std::string const& s,
+ unsigned int nbytes = MAX_TERM_LENGTH);
+
+std::string MangleUri (std::string const& orig);
+
+void SplitUri (std::string const& uri,
+ std::string &host,
+ std::string &path,
+ std::string &basename);
+
+} /* namespace StringUtils */
+
+} /* namespace ZeitgeistFTS */
diff --git a/extensions/fts++/table-lookup.vala b/extensions/fts++/table-lookup.vala
new file mode 120000
index 00000000..9e242838
--- /dev/null
+++ b/extensions/fts++/table-lookup.vala
@@ -0,0 +1 @@
+../../src/table-lookup.vala \ No newline at end of file
diff --git a/extensions/fts++/task.cpp b/extensions/fts++/task.cpp
new file mode 100644
index 00000000..74c4092d
--- /dev/null
+++ b/extensions/fts++/task.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2012 Canonical Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Michal Hruby <michal.hruby@canonical.com>
+ *
+ */
+
+#include "task.h"
+
+namespace ZeitgeistFTS {
+
+void IndexEventsTask::Process (Indexer *indexer)
+{
+ unsigned end_index = MIN (start_index + event_count, events->len);
+ for (unsigned i = start_index; i < end_index; i++)
+ {
+ indexer->IndexEvent ((ZeitgeistEvent*) g_ptr_array_index (events, i));
+ }
+}
+
+void DeleteEventsTask::Process (Indexer *indexer)
+{
+ for (unsigned i = 0; i < event_ids.size (); i++)
+ {
+ indexer->DeleteEvent (event_ids[i]);
+ }
+}
+
+void MetadataTask::Process (Indexer *indexer)
+{
+ indexer->SetDbMetadata (key_name, value);
+}
+
+}
diff --git a/extensions/fts++/task.h b/extensions/fts++/task.h
new file mode 100644
index 00000000..1c124382
--- /dev/null
+++ b/extensions/fts++/task.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2012 Canonical Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Michal Hruby <michal.hruby@canonical.com>
+ *
+ */
+
+#ifndef _ZGFTS_TASK_H_
+#define _ZGFTS_TASK_H_
+
+#include <glib.h>
+
+#include "indexer.h"
+
+namespace ZeitgeistFTS {
+
+/**
+ * A task contains a chunk of work defined by the Controller.
+ * A task should not be clever in scheduling on its own, the
+ * Controller is responsible for breaking down tasks in suitable
+ * chunks.
+ */
+class Task
+{
+public:
+ virtual ~Task () {}
+ virtual void Process (Indexer *indexer) = 0;
+};
+
+class IndexEventsTask : public Task
+{
+public:
+ void Process (Indexer *indexer);
+
+ IndexEventsTask (GPtrArray *event_arr)
+ : events (event_arr), start_index (0), event_count (event_arr->len) {}
+
+ IndexEventsTask (GPtrArray *event_arr, unsigned index, unsigned count)
+ : events (event_arr), start_index (index), event_count (count) {}
+
+ virtual ~IndexEventsTask ()
+ {
+ g_ptr_array_unref (events);
+ }
+
+private:
+ GPtrArray *events;
+ unsigned start_index;
+ unsigned event_count;
+};
+
+class DeleteEventsTask : public Task
+{
+public:
+ void Process (Indexer *indexer);
+
+ DeleteEventsTask (unsigned *event_ids_arr, int event_ids_arr_size)
+ : event_ids (event_ids_arr, event_ids_arr + event_ids_arr_size) {}
+
+ virtual ~DeleteEventsTask ()
+ {
+ }
+
+private:
+ std::vector<unsigned> event_ids;
+};
+
+class MetadataTask : public Task
+{
+public:
+ void Process (Indexer *indexer);
+
+ MetadataTask (std::string const& name, std::string const& val)
+ : key_name (name), value (val) {}
+
+ virtual ~MetadataTask ()
+ {}
+
+private:
+ std::string key_name;
+ std::string value;
+};
+
+}
+
+#endif /* _ZGFTS_TASK_H_ */
+
diff --git a/extensions/fts++/test/Makefile.am b/extensions/fts++/test/Makefile.am
new file mode 100644
index 00000000..e36cf773
--- /dev/null
+++ b/extensions/fts++/test/Makefile.am
@@ -0,0 +1,27 @@
+NULL =
+check_PROGRAMS = test-fts
+TESTS = test-fts
+
+AM_CPPFLAGS = \
+ $(ZEITGEIST_CFLAGS) \
+ -include $(CONFIG_HEADER) \
+ -w \
+ -I$(srcdir)/.. \
+ $(NULL)
+
+test_fts_SOURCES = \
+ test-stringutils.cpp \
+ test-indexer.cpp \
+ test-fts.c \
+ $(srcdir)/../stringutils.cpp \
+ $(srcdir)/../controller.cpp \
+ $(srcdir)/../indexer.cpp \
+ $(srcdir)/../task.cpp \
+ $(srcdir)/../fts.cpp \
+ $(NULL)
+
+test_fts_LDADD = \
+ $(builddir)/../libzeitgeist-internal.la \
+ -lxapian \
+ $(NULL)
+
diff --git a/extensions/fts++/test/test-fts.c b/extensions/fts++/test/test-fts.c
new file mode 100644
index 00000000..6b9208fd
--- /dev/null
+++ b/extensions/fts++/test/test-fts.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2012 Mikkel Kamstrup Erlandsen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
+ *
+ */
+
+#include <glib-object.h>
+
+void test_stringutils_create_suite (void);
+void test_indexer_create_suite (void);
+
+gint
+main (gint argc, gchar *argv[])
+{
+ g_type_init ();
+
+ g_test_init (&argc, &argv, NULL);
+
+ test_stringutils_create_suite ();
+ test_indexer_create_suite ();
+
+ return g_test_run ();
+}
diff --git a/extensions/fts++/test/test-indexer.cpp b/extensions/fts++/test/test-indexer.cpp
new file mode 100644
index 00000000..188213e7
--- /dev/null
+++ b/extensions/fts++/test/test-indexer.cpp
@@ -0,0 +1,531 @@
+/*
+ * Copyright (C) 2012 Mikkel Kamstrup Erlandsen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
+ *
+ */
+
+#include <glib-object.h>
+
+#include "stringutils.h"
+#include "fts.h"
+#include <zeitgeist-internal.h>
+
+using namespace ZeitgeistFTS;
+
+typedef struct
+{
+ ZeitgeistDbReader *db;
+ ZeitgeistIndexer *indexer;
+} Fixture;
+
+static void setup (Fixture *fix, gconstpointer data);
+static void teardown (Fixture *fix, gconstpointer data);
+
+static void
+setup (Fixture *fix, gconstpointer data)
+{
+ // use in-memory databases for both zg db and fts db
+ GError *error = NULL;
+ g_setenv ("ZEITGEIST_DATABASE_PATH", ":memory:", TRUE);
+ fix->db = ZEITGEIST_DB_READER (zeitgeist_engine_new (&error));
+
+ if (error)
+ {
+ g_warning ("%s", error->message);
+ return;
+ }
+
+ fix->indexer = zeitgeist_indexer_new (fix->db, &error);
+ if (error)
+ {
+ g_warning ("%s", error->message);
+ return;
+ }
+}
+
+static void
+teardown (Fixture *fix, gconstpointer data)
+{
+ zeitgeist_indexer_free (fix->indexer);
+ g_object_unref (fix->db);
+}
+
+static ZeitgeistEvent* create_test_event1 (void)
+{
+ ZeitgeistEvent *event = zeitgeist_event_new ();
+ ZeitgeistSubject *subject = zeitgeist_subject_new ();
+
+ zeitgeist_subject_set_interpretation (subject, ZEITGEIST_NFO_RASTER_IMAGE);
+ zeitgeist_subject_set_manifestation (subject, ZEITGEIST_NFO_REMOTE_DATA_OBJECT);
+ zeitgeist_subject_set_uri (subject, "http://example.com/image.jpg");
+ zeitgeist_subject_set_text (subject, "text");
+ zeitgeist_subject_set_mimetype (subject, "image/png");
+
+ zeitgeist_event_set_interpretation (event, ZEITGEIST_ZG_ACCESS_EVENT);
+ zeitgeist_event_set_manifestation (event, ZEITGEIST_ZG_USER_ACTIVITY);
+ zeitgeist_event_set_actor (event, "application://firefox.desktop");
+ zeitgeist_event_add_subject (event, subject);
+
+ g_object_unref (subject);
+ return event;
+}
+
+static ZeitgeistEvent* create_test_event2 (void)
+{
+ ZeitgeistEvent *event = zeitgeist_event_new ();
+ ZeitgeistSubject *subject = zeitgeist_subject_new ();
+
+ zeitgeist_subject_set_interpretation (subject, ZEITGEIST_NFO_WEBSITE);
+ zeitgeist_subject_set_manifestation (subject, ZEITGEIST_NFO_REMOTE_DATA_OBJECT);
+ zeitgeist_subject_set_uri (subject, "http://example.com/I%20Love%20Wikis");
+ zeitgeist_subject_set_text (subject, "Example.com Wiki Page. Kanji is awesome 漢字");
+ zeitgeist_subject_set_mimetype (subject, "text/html");
+
+ zeitgeist_event_set_interpretation (event, ZEITGEIST_ZG_ACCESS_EVENT);
+ zeitgeist_event_set_manifestation (event, ZEITGEIST_ZG_USER_ACTIVITY);
+ zeitgeist_event_set_actor (event, "application://firefox.desktop");
+ zeitgeist_event_add_subject (event, subject);
+
+ g_object_unref (subject);
+ return event;
+}
+
+static ZeitgeistEvent* create_test_event3 (void)
+{
+ ZeitgeistEvent *event = zeitgeist_event_new ();
+ ZeitgeistSubject *subject = zeitgeist_subject_new ();
+
+ zeitgeist_subject_set_interpretation (subject, ZEITGEIST_NFO_WEBSITE);
+ zeitgeist_subject_set_manifestation (subject, ZEITGEIST_NFO_REMOTE_DATA_OBJECT);
+ // Greek IDN - stands for http://παράδειγμα.δοκιμή
+ zeitgeist_subject_set_uri (subject, "http://xn--hxajbheg2az3al.xn--jxalpdlp/");
+ zeitgeist_subject_set_text (subject, "IDNwiki");
+ zeitgeist_subject_set_mimetype (subject, "text/html");
+
+ zeitgeist_event_set_interpretation (event, ZEITGEIST_ZG_ACCESS_EVENT);
+ zeitgeist_event_set_manifestation (event, ZEITGEIST_ZG_USER_ACTIVITY);
+ zeitgeist_event_set_actor (event, "application://firefox.desktop");
+ zeitgeist_event_add_subject (event, subject);
+
+ g_object_unref (subject);
+ return event;
+}
+
+static ZeitgeistEvent* create_test_event4 (void)
+{
+ ZeitgeistEvent *event = zeitgeist_event_new ();
+ ZeitgeistSubject *subject = zeitgeist_subject_new ();
+
+ zeitgeist_subject_set_interpretation (subject, ZEITGEIST_NFO_PRESENTATION);
+ zeitgeist_subject_set_manifestation (subject, ZEITGEIST_NFO_FILE_DATA_OBJECT);
+ zeitgeist_subject_set_uri (subject, "file:///home/username/Documents/my_fabulous_presentation.pdf");
+ zeitgeist_subject_set_text (subject, NULL);
+ zeitgeist_subject_set_mimetype (subject, "application/pdf");
+
+ zeitgeist_event_set_interpretation (event, ZEITGEIST_ZG_MODIFY_EVENT);
+ zeitgeist_event_set_manifestation (event, ZEITGEIST_ZG_USER_ACTIVITY);
+ zeitgeist_event_set_actor (event, "application://libreoffice-impress.desktop");
+ zeitgeist_event_add_subject (event, subject);
+
+ g_object_unref (subject);
+ return event;
+}
+
+// Steals the event, ref it if you want to keep it
+static guint
+index_event (Fixture *fix, ZeitgeistEvent *event)
+{
+ guint event_id = 0;
+
+ // add event to DBs
+ event_id = zeitgeist_engine_insert_event (ZEITGEIST_ENGINE (fix->db),
+ event, NULL, NULL);
+
+ GPtrArray *events = g_ptr_array_new_with_free_func (g_object_unref);
+ g_ptr_array_add (events, event); // steal event ref
+ zeitgeist_indexer_index_events (fix->indexer, events);
+ g_ptr_array_unref (events);
+
+ while (zeitgeist_indexer_has_pending_tasks (fix->indexer))
+ {
+ zeitgeist_indexer_process_task (fix->indexer);
+ }
+
+ return event_id;
+}
+
+static void
+test_simple_query (Fixture *fix, gconstpointer data)
+{
+ guint matches;
+ guint event_id;
+ ZeitgeistEvent* event;
+
+ // add test events to DBs
+ event_id = index_event (fix, create_test_event1 ());
+ index_event (fix, create_test_event2 ());
+ index_event (fix, create_test_event3 ());
+ index_event (fix, create_test_event4 ());
+
+ GPtrArray *results =
+ zeitgeist_indexer_search (fix->indexer,
+ "text",
+ zeitgeist_time_range_new_anytime (),
+ g_ptr_array_new (),
+ 0,
+ 10,
+ ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS,
+ &matches,
+ NULL);
+
+ g_assert_cmpuint (matches, >, 0);
+ g_assert_cmpuint (results->len, ==, 1);
+
+ event = (ZeitgeistEvent*) results->pdata[0];
+ g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id);
+
+ ZeitgeistSubject *subject = (ZeitgeistSubject*)
+ g_ptr_array_index (zeitgeist_event_get_subjects (event), 0);
+ g_assert_cmpstr (zeitgeist_subject_get_text (subject), ==, "text");
+}
+
+static void
+test_simple_with_filter (Fixture *fix, gconstpointer data)
+{
+ guint matches;
+ guint event_id;
+ ZeitgeistEvent* event;
+
+ // add test events to DBs
+ index_event (fix, create_test_event1 ());
+ index_event (fix, create_test_event2 ());
+
+ GPtrArray *filters = g_ptr_array_new_with_free_func (g_object_unref);
+ event = zeitgeist_event_new ();
+ zeitgeist_event_set_interpretation (event, ZEITGEIST_NFO_DOCUMENT);
+ g_ptr_array_add (filters, event); // steals ref
+
+ GPtrArray *results =
+ zeitgeist_indexer_search (fix->indexer,
+ "text",
+ zeitgeist_time_range_new_anytime (),
+ filters,
+ 0,
+ 10,
+ ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS,
+ &matches,
+ NULL);
+
+ g_assert_cmpuint (results->len, ==, 0);
+ g_assert_cmpuint (matches, ==, 0);
+}
+
+static void
+test_simple_with_valid_filter (Fixture *fix, gconstpointer data)
+{
+ guint matches;
+ guint event_id;
+ ZeitgeistEvent* event;
+ ZeitgeistSubject *subject;
+
+ // add test events to DBs
+ event_id = index_event (fix, create_test_event1 ());
+ index_event (fix, create_test_event2 ());
+
+ GPtrArray *filters = g_ptr_array_new_with_free_func (g_object_unref);
+ event = zeitgeist_event_new ();
+ subject = zeitgeist_subject_new ();
+ zeitgeist_subject_set_interpretation (subject, ZEITGEIST_NFO_IMAGE);
+ zeitgeist_event_add_subject (event, subject);
+ g_ptr_array_add (filters, event); // steals ref
+
+ GPtrArray *results =
+ zeitgeist_indexer_search (fix->indexer,
+ "text",
+ zeitgeist_time_range_new_anytime (),
+ filters,
+ 0,
+ 10,
+ ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS,
+ &matches,
+ NULL);
+
+ g_assert_cmpuint (matches, >, 0);
+ g_assert_cmpuint (results->len, ==, 1);
+
+ event = (ZeitgeistEvent*) results->pdata[0];
+ g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id);
+
+ subject = (ZeitgeistSubject*)
+ g_ptr_array_index (zeitgeist_event_get_subjects (event), 0);
+ g_assert_cmpstr (zeitgeist_subject_get_text (subject), ==, "text");
+}
+
+static void
+test_simple_negation (Fixture *fix, gconstpointer data)
+{
+ guint matches;
+ guint event_id;
+ ZeitgeistEvent* event;
+ ZeitgeistSubject *subject;
+
+ // add test events to DBs
+ event_id = index_event (fix, create_test_event1 ());
+ index_event (fix, create_test_event2 ());
+
+ GPtrArray *filters = g_ptr_array_new_with_free_func (g_object_unref);
+ event = zeitgeist_event_new ();
+ subject = zeitgeist_subject_new ();
+ zeitgeist_subject_set_interpretation (subject, "!" ZEITGEIST_NFO_IMAGE);
+ zeitgeist_event_add_subject (event, subject);
+ g_ptr_array_add (filters, event); // steals ref
+
+ GPtrArray *results =
+ zeitgeist_indexer_search (fix->indexer,
+ "text",
+ zeitgeist_time_range_new_anytime (),
+ filters,
+ 0,
+ 10,
+ ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS,
+ &matches,
+ NULL);
+
+ g_assert_cmpuint (matches, ==, 0);
+ g_assert_cmpuint (results->len, ==, 0);
+}
+
+static void
+test_simple_noexpand (Fixture *fix, gconstpointer data)
+{
+ guint matches;
+ guint event_id;
+ ZeitgeistEvent* event;
+ ZeitgeistSubject *subject;
+
+ // add test events to DBs
+ event_id = index_event (fix, create_test_event1 ());
+ index_event (fix, create_test_event2 ());
+
+ GPtrArray *filters = g_ptr_array_new_with_free_func (g_object_unref);
+ event = zeitgeist_event_new ();
+ subject = zeitgeist_subject_new ();
+ zeitgeist_subject_set_interpretation (subject, "+" ZEITGEIST_NFO_IMAGE);
+ zeitgeist_event_add_subject (event, subject);
+ g_ptr_array_add (filters, event); // steals ref
+
+ GPtrArray *results =
+ zeitgeist_indexer_search (fix->indexer,
+ "text",
+ zeitgeist_time_range_new_anytime (),
+ filters,
+ 0,
+ 10,
+ ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS,
+ &matches,
+ NULL);
+
+ g_assert_cmpuint (matches, ==, 0);
+ g_assert_cmpuint (results->len, ==, 0);
+}
+
+static void
+test_simple_noexpand_valid (Fixture *fix, gconstpointer data)
+{
+ guint matches;
+ guint event_id;
+ ZeitgeistEvent* event;
+ ZeitgeistSubject *subject;
+
+ // add test events to DBs
+ event_id = index_event (fix, create_test_event1 ());
+ index_event (fix, create_test_event2 ());
+
+ GPtrArray *filters = g_ptr_array_new_with_free_func (g_object_unref);
+ event = zeitgeist_event_new ();
+ subject = zeitgeist_subject_new ();
+ zeitgeist_subject_set_interpretation (subject, "+"ZEITGEIST_NFO_RASTER_IMAGE);
+ zeitgeist_event_add_subject (event, subject);
+ g_ptr_array_add (filters, event); // steals ref
+
+ GPtrArray *results =
+ zeitgeist_indexer_search (fix->indexer,
+ "text",
+ zeitgeist_time_range_new_anytime (),
+ filters,
+ 0,
+ 10,
+ ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS,
+ &matches,
+ NULL);
+
+ g_assert_cmpuint (matches, >, 0);
+ g_assert_cmpuint (results->len, ==, 1);
+
+ event = (ZeitgeistEvent*) results->pdata[0];
+ g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id);
+
+ subject = (ZeitgeistSubject*)
+ g_ptr_array_index (zeitgeist_event_get_subjects (event), 0);
+ g_assert_cmpstr (zeitgeist_subject_get_text (subject), ==, "text");
+}
+
+static void
+test_simple_url_unescape (Fixture *fix, gconstpointer data)
+{
+ guint matches;
+ guint event_id;
+ ZeitgeistEvent* event;
+ ZeitgeistSubject *subject;
+
+ // add test events to DBs
+ index_event (fix, create_test_event1 ());
+ event_id = index_event (fix, create_test_event2 ());
+
+ GPtrArray *filters = g_ptr_array_new_with_free_func (g_object_unref);
+ event = zeitgeist_event_new ();
+ subject = zeitgeist_subject_new ();
+ zeitgeist_subject_set_interpretation (subject, ZEITGEIST_NFO_WEBSITE);
+ zeitgeist_event_add_subject (event, subject);
+ g_ptr_array_add (filters, event); // steals ref
+
+ GPtrArray *results =
+ zeitgeist_indexer_search (fix->indexer,
+ "love",
+ zeitgeist_time_range_new_anytime (),
+ filters,
+ 0,
+ 10,
+ ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS,
+ &matches,
+ NULL);
+
+ g_assert_cmpuint (matches, >, 0);
+ g_assert_cmpuint (results->len, ==, 1);
+
+ event = (ZeitgeistEvent*) results->pdata[0];
+ g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id);
+
+ subject = (ZeitgeistSubject*)
+ g_ptr_array_index (zeitgeist_event_get_subjects (event), 0);
+ g_assert_cmpstr (zeitgeist_subject_get_text (subject), ==, "Example.com Wiki Page. Kanji is awesome 漢字");
+}
+
+static void
+test_simple_cjk (Fixture *fix, gconstpointer data)
+{
+ guint matches;
+ guint event_id;
+ ZeitgeistEvent* event;
+ ZeitgeistSubject *subject;
+
+ // add test events to DBs
+ index_event (fix, create_test_event1 ());
+ event_id = index_event (fix, create_test_event2 ());
+
+ GPtrArray *results =
+ zeitgeist_indexer_search (fix->indexer,
+ "漢*",
+ zeitgeist_time_range_new_anytime (),
+ g_ptr_array_new (),
+ 0,
+ 10,
+ ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS,
+ &matches,
+ NULL);
+
+ g_assert_cmpuint (matches, >, 0);
+ g_assert_cmpuint (results->len, ==, 1);
+
+ event = (ZeitgeistEvent*) results->pdata[0];
+ g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id);
+
+ subject = (ZeitgeistSubject*)
+ g_ptr_array_index (zeitgeist_event_get_subjects (event), 0);
+ g_assert_cmpstr (zeitgeist_subject_get_text (subject), ==, "Example.com Wiki Page. Kanji is awesome 漢字");
+}
+
+static void
+test_simple_idn_support (Fixture *fix, gconstpointer data)
+{
+ guint matches;
+ guint event_id;
+ ZeitgeistEvent* event;
+ ZeitgeistSubject *subject;
+
+ // add test events to DBs
+ index_event (fix, create_test_event1 ());
+ index_event (fix, create_test_event2 ());
+ event_id = index_event (fix, create_test_event3 ());
+
+ GPtrArray *results =
+ zeitgeist_indexer_search (fix->indexer,
+ "παράδειγμα",
+ zeitgeist_time_range_new_anytime (),
+ g_ptr_array_new (),
+ 0,
+ 10,
+ ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS,
+ &matches,
+ NULL);
+
+ g_assert_cmpuint (matches, >, 0);
+ g_assert_cmpuint (results->len, ==, 1);
+
+ event = (ZeitgeistEvent*) results->pdata[0];
+ g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id);
+
+ subject = (ZeitgeistSubject*)
+ g_ptr_array_index (zeitgeist_event_get_subjects (event), 0);
+ g_assert_cmpstr (zeitgeist_subject_get_text (subject), ==, "IDNwiki");
+}
+
+G_BEGIN_DECLS
+
+static void discard_message (const gchar *domain,
+ GLogLevelFlags level,
+ const gchar *msg,
+ gpointer userdata)
+{
+}
+
+void test_indexer_create_suite (void)
+{
+ g_test_add ("/Zeitgeist/FTS/Indexer/SimpleQuery", Fixture, 0,
+ setup, test_simple_query, teardown);
+ g_test_add ("/Zeitgeist/FTS/Indexer/SimpleWithFilter", Fixture, 0,
+ setup, test_simple_with_filter, teardown);
+ g_test_add ("/Zeitgeist/FTS/Indexer/SimpleWithValidFilter", Fixture, 0,
+ setup, test_simple_with_valid_filter, teardown);
+ g_test_add ("/Zeitgeist/FTS/Indexer/SimpleNegation", Fixture, 0,
+ setup, test_simple_negation, teardown);
+ g_test_add ("/Zeitgeist/FTS/Indexer/SimpleNoexpand", Fixture, 0,
+ setup, test_simple_noexpand, teardown);
+ g_test_add ("/Zeitgeist/FTS/Indexer/SimpleNoexpandValid", Fixture, 0,
+ setup, test_simple_noexpand_valid, teardown);
+ g_test_add ("/Zeitgeist/FTS/Indexer/URLUnescape", Fixture, 0,
+ setup, test_simple_url_unescape, teardown);
+ g_test_add ("/Zeitgeist/FTS/Indexer/IDNSupport", Fixture, 0,
+ setup, test_simple_idn_support, teardown);
+ g_test_add ("/Zeitgeist/FTS/Indexer/CJK", Fixture, 0,
+ setup, test_simple_cjk, teardown);
+
+ // get rid of the "rebuilding index..." messages
+ g_log_set_handler (NULL, G_LOG_LEVEL_MESSAGE, discard_message, NULL);
+}
+
+G_END_DECLS
diff --git a/extensions/fts++/test/test-stringutils.cpp b/extensions/fts++/test/test-stringutils.cpp
new file mode 100644
index 00000000..3f9405fa
--- /dev/null
+++ b/extensions/fts++/test/test-stringutils.cpp
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2012 Mikkel Kamstrup Erlandsen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
+ *
+ */
+
+#include <glib-object.h>
+
+#include "stringutils.h"
+
+using namespace ZeitgeistFTS;
+
+typedef struct
+{
+ int i;
+} Fixture;
+
+static void setup (Fixture *fix, gconstpointer data);
+static void teardown (Fixture *fix, gconstpointer data);
+
+static void
+setup (Fixture *fix, gconstpointer data)
+{
+
+}
+
+static void
+teardown (Fixture *fix, gconstpointer data)
+{
+
+}
+
+static void
+test_truncate (Fixture *fix, gconstpointer data)
+{
+ g_assert_cmpstr ("", ==, StringUtils::Truncate("").c_str ());
+
+ g_assert_cmpstr ("", ==, StringUtils::Truncate("a", 0).c_str ());
+ g_assert_cmpstr ("a", ==, StringUtils::Truncate("a", 1).c_str ());
+ g_assert_cmpstr ("a", ==, StringUtils::Truncate("a").c_str ());
+
+ g_assert_cmpstr ("", ==, StringUtils::Truncate("aa", 0).c_str ());
+ g_assert_cmpstr ("a", ==, StringUtils::Truncate("aa", 1).c_str ());
+ g_assert_cmpstr ("aa", ==, StringUtils::Truncate("aa", 2).c_str ());
+ g_assert_cmpstr ("aa", ==, StringUtils::Truncate("aa").c_str ());
+
+
+ g_assert_cmpstr ("", ==, StringUtils::Truncate("å", 0).c_str ());
+ g_assert_cmpstr ("", ==, StringUtils::Truncate("å", 1).c_str ());
+ g_assert_cmpstr ("å", ==, StringUtils::Truncate("å").c_str ());
+
+ g_assert_cmpstr ("", ==, StringUtils::Truncate("åå", 0).c_str ());
+ g_assert_cmpstr ("", ==, StringUtils::Truncate("åå", 1).c_str ());
+ g_assert_cmpstr ("å", ==, StringUtils::Truncate("åå", 2).c_str ());
+ g_assert_cmpstr ("å", ==, StringUtils::Truncate("åå", 3).c_str ());
+ g_assert_cmpstr ("åå", ==, StringUtils::Truncate("åå", 4).c_str ());
+ g_assert_cmpstr ("åå", ==, StringUtils::Truncate("åå").c_str ());
+}
+
+static void
+test_mangle (Fixture *fix, gconstpointer data)
+{
+ g_assert_cmpstr ("", ==, StringUtils::MangleUri("").c_str ());
+
+ g_assert_cmpstr ("file", ==, StringUtils::MangleUri("file").c_str ());
+ g_assert_cmpstr ("file___", ==, StringUtils::MangleUri("file://").c_str ());
+ g_assert_cmpstr ("http___www.zeitgeist-project.com", ==,
+ StringUtils::MangleUri("http://www.zeitgeist-project.com").c_str ());
+
+ g_assert_cmpstr ("scheme_no_spaces_in_uris", ==,
+ StringUtils::MangleUri("scheme:no spaces in uris").c_str ());
+}
+
+static void
+test_split (Fixture *fix, gconstpointer data)
+{
+ std::string authority, path, query;
+
+ authority = path = query = "";
+ StringUtils::SplitUri ("", authority, path, query); // doesn't crash
+
+ g_assert_cmpstr ("", ==, authority.c_str ());
+ g_assert_cmpstr ("", ==, path.c_str ());
+ g_assert_cmpstr ("", ==, query.c_str ());
+
+ authority = path = query = "";
+ StringUtils::SplitUri ("scheme:", authority, path, query); // doesn't crash
+
+ g_assert_cmpstr ("", ==, authority.c_str ());
+ g_assert_cmpstr ("", ==, path.c_str ());
+ g_assert_cmpstr ("", ==, query.c_str ());
+
+ authority = path = query = "";
+ StringUtils::SplitUri ("ldap://ldap1.example.net:6666/o=University%20"
+ "of%20Michigan,c=US??sub?(cn=Babs%20Jensen)",
+ authority, path, query);
+
+ g_assert_cmpstr ("ldap1.example.net:6666", ==, authority.c_str ());
+ g_assert_cmpstr ("/o=University%20of%20Michigan,c=US", ==, path.c_str ());
+ g_assert_cmpstr ("?sub?(cn=Babs%20Jensen)", ==, query.c_str ());
+
+
+ authority = path = query = "";
+ StringUtils::SplitUri ("mailto:jsmith@example.com",
+ authority, path, query);
+
+ g_assert_cmpstr ("jsmith@example.com", ==, authority.c_str ());
+ g_assert_cmpstr ("", ==, path.c_str ());
+ g_assert_cmpstr ("", ==, query.c_str ());
+
+ authority = path = query = "";
+ StringUtils::SplitUri ("mailto:jsmith@example.com?subject=A%20Test&body="
+ "My%20idea%20is%3A%20%0A", authority, path, query);
+
+ g_assert_cmpstr ("jsmith@example.com", ==, authority.c_str ());
+ g_assert_cmpstr ("", ==, path.c_str ());
+ g_assert_cmpstr ("subject=A%20Test&body=My%20idea%20is%3A%20%0A", ==, query.c_str ());
+
+ authority = path = query = "";
+ StringUtils::SplitUri ("sip:alice@atlanta.com?subject=project%20x",
+ authority, path, query);
+
+ g_assert_cmpstr ("alice@atlanta.com", ==, authority.c_str ());
+ g_assert_cmpstr ("", ==, path.c_str ());
+ g_assert_cmpstr ("subject=project%20x", ==, query.c_str ());
+
+ authority = path = query = "";
+ StringUtils::SplitUri ("file:///",
+ authority, path, query);
+
+ g_assert_cmpstr ("", ==, authority.c_str ());
+ g_assert_cmpstr ("/", ==, path.c_str ());
+ g_assert_cmpstr ("", ==, query.c_str ());
+
+ authority = path = query = "";
+ StringUtils::SplitUri ("file:///home/username/file.ext",
+ authority, path, query);
+
+ g_assert_cmpstr ("", ==, authority.c_str ());
+ g_assert_cmpstr ("/home/username/file.ext", ==, path.c_str ());
+ g_assert_cmpstr ("", ==, query.c_str ());
+
+ authority = path = query = "";
+ StringUtils::SplitUri ("dns://192.168.1.1/ftp.example.org?type=A",
+ authority, path, query);
+
+ g_assert_cmpstr ("192.168.1.1", ==, authority.c_str ());
+ g_assert_cmpstr ("/ftp.example.org", ==, path.c_str ());
+ g_assert_cmpstr ("type=A", ==, query.c_str ());
+}
+
+G_BEGIN_DECLS
+
+void test_stringutils_create_suite (void)
+{
+ g_test_add ("/Zeitgeist/FTS/StringUtils/Truncate", Fixture, 0,
+ setup, test_truncate, teardown);
+ g_test_add ("/Zeitgeist/FTS/StringUtils/MangleUri", Fixture, 0,
+ setup, test_mangle, teardown);
+ g_test_add ("/Zeitgeist/FTS/StringUtils/SplitUri", Fixture, 0,
+ setup, test_split, teardown);
+}
+
+G_END_DECLS
diff --git a/extensions/fts++/utils.vala b/extensions/fts++/utils.vala
new file mode 120000
index 00000000..6da71ce8
--- /dev/null
+++ b/extensions/fts++/utils.vala
@@ -0,0 +1 @@
+../../src/utils.vala \ No newline at end of file
diff --git a/extensions/fts++/where-clause.vala b/extensions/fts++/where-clause.vala
new file mode 120000
index 00000000..efc7d8f9
--- /dev/null
+++ b/extensions/fts++/where-clause.vala
@@ -0,0 +1 @@
+../../src/where-clause.vala \ No newline at end of file
diff --git a/extensions/fts++/zeitgeist-fts.vala b/extensions/fts++/zeitgeist-fts.vala
new file mode 100644
index 00000000..f245b03e
--- /dev/null
+++ b/extensions/fts++/zeitgeist-fts.vala
@@ -0,0 +1,301 @@
+/* zeitgeist-fts.vala
+ *
+ * Copyright © 2012 Canonical Ltd.
+ * Copyright © 2012 Michal Hruby <michal.mhr@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+namespace Zeitgeist
+{
+
+ [DBus (name = "org.freedesktop.DBus")]
+ public interface RemoteDBus : Object
+ {
+ public abstract bool name_has_owner (string name) throws IOError;
+ }
+
+ public class FtsDaemon : Object, RemoteSimpleIndexer, RemoteMonitor
+ {
+ //const string DBUS_NAME = "org.gnome.zeitgeist.Fts";
+ const string DBUS_NAME = "org.gnome.zeitgeist.SimpleIndexer";
+ const string ZEITGEIST_DBUS_NAME = "org.gnome.zeitgeist.Engine";
+ private static bool show_version_info = false;
+ private static string log_level = "";
+
+ const OptionEntry[] options =
+ {
+ {
+ "version", 'v', 0, OptionArg.NONE, out show_version_info,
+ "Print program's version number and exit", null
+ },
+ {
+ "log-level", 0, 0, OptionArg.STRING, out log_level,
+ "How much information should be printed; possible values: " +
+ "DEBUG, INFO, WARNING, ERROR, CRITICAL", "LEVEL"
+ },
+ {
+ null
+ }
+ };
+
+ private static FtsDaemon? instance;
+ private static MainLoop mainloop;
+ private static bool name_acquired = false;
+
+ private DbReader engine;
+ private Indexer indexer;
+
+ private uint indexer_register_id;
+ private uint monitor_register_id;
+ private unowned DBusConnection connection;
+
+ public FtsDaemon () throws EngineError
+ {
+ engine = new DbReader ();
+ indexer = new Indexer (engine);
+ }
+
+ private void do_quit ()
+ {
+ engine.close ();
+ mainloop.quit ();
+ }
+
+ public void register_dbus_object (DBusConnection conn) throws IOError
+ {
+ connection = conn;
+ indexer_register_id = conn.register_object<RemoteSimpleIndexer> (
+ "/org/gnome/zeitgeist/index/activity", this);
+ monitor_register_id = conn.register_object<RemoteMonitor> (
+ "/org/gnome/zeitgeist/monitor/special", this);
+ }
+
+ public void unregister_dbus_object ()
+ {
+ if (indexer_register_id != 0)
+ {
+ connection.unregister_object (indexer_register_id);
+ indexer_register_id = 0;
+ }
+
+ if (monitor_register_id != 0)
+ {
+ connection.unregister_object (monitor_register_id);
+ monitor_register_id = 0;
+ }
+ }
+
+ public async void notify_insert (Variant time_range, Variant events)
+ throws IOError
+ {
+ debug ("got insertion notification");
+ var events_arr = Events.from_variant (events);
+ indexer.index_events (events_arr);
+ }
+
+ public async void notify_delete (Variant time_range, uint32[] event_ids)
+ throws IOError
+ {
+ debug ("got deletion notification");
+ indexer.delete_events (event_ids);
+ }
+
+ public async void search (string query_string, Variant time_range,
+ Variant filter_templates,
+ uint offset, uint count, uint result_type,
+ out Variant events, out uint matches)
+ throws Error
+ {
+ var tr = new TimeRange.from_variant (time_range);
+ var templates = Events.from_variant (filter_templates);
+ var results = instance.indexer.search (query_string,
+ tr,
+ templates,
+ offset,
+ count,
+ (ResultType) result_type,
+ out matches);
+
+ events = Events.to_variant (results);
+ }
+
+ private static void name_acquired_callback (DBusConnection conn)
+ {
+ name_acquired = true;
+ }
+
+ private static void name_lost_callback (DBusConnection? conn)
+ {
+ if (conn == null)
+ {
+ // something happened to our bus connection
+ mainloop.quit ();
+ }
+ else if (instance != null && name_acquired)
+ {
+ // we owned the name and we lost it... what to do?
+ mainloop.quit ();
+ }
+ }
+
+ static void run ()
+ throws Error
+ {
+ DBusConnection connection = Bus.get_sync (BusType.SESSION);
+ var proxy = connection.get_proxy_sync<RemoteDBus> (
+ "org.freedesktop.DBus", "/org/freedesktop/DBus",
+ DBusProxyFlags.DO_NOT_LOAD_PROPERTIES);
+ bool zeitgeist_up = proxy.name_has_owner (ZEITGEIST_DBUS_NAME);
+ // FIXME: throw an error that zeitgeist isn't up? or just start it?
+ bool name_owned = proxy.name_has_owner (DBUS_NAME);
+ if (name_owned)
+ {
+ throw new EngineError.EXISTING_INSTANCE (
+ "The FTS daemon is running already.");
+ }
+
+ /* setup Engine instance and register objects on dbus */
+ try
+ {
+ instance = new FtsDaemon ();
+ instance.register_dbus_object (connection);
+ }
+ catch (Error err)
+ {
+ if (err is EngineError.DATABASE_CANTOPEN)
+ {
+ warning ("Could not access the database file.\n" +
+ "Please check the permissions of file %s.",
+ Utils.get_database_file_path ());
+ }
+ else if (err is EngineError.DATABASE_BUSY)
+ {
+ warning ("It looks like another Zeitgeist instance " +
+ "is already running (the database is locked).");
+ }
+ throw err;
+ }
+
+ uint owner_id = Bus.own_name_on_connection (connection,
+ DBUS_NAME,
+ BusNameOwnerFlags.NONE,
+ name_acquired_callback,
+ name_lost_callback);
+
+ mainloop = new MainLoop ();
+ mainloop.run ();
+
+ if (instance != null)
+ {
+ Bus.unown_name (owner_id);
+ instance.unregister_dbus_object ();
+ instance = null;
+
+ // make sure we send quit reply
+ try
+ {
+ connection.flush_sync ();
+ }
+ catch (Error e)
+ {
+ warning ("%s", e.message);
+ }
+ }
+ }
+
+ static void safe_exit ()
+ {
+ instance.do_quit ();
+ }
+
+ static int main (string[] args)
+ {
+ // FIXME: the cat process xapian spawns won't like this and we
+ // can freeze if it dies
+ Posix.signal (Posix.SIGHUP, safe_exit);
+ Posix.signal (Posix.SIGINT, safe_exit);
+ Posix.signal (Posix.SIGTERM, safe_exit);
+
+ var opt_context = new OptionContext (" - Zeitgeist FTS daemon");
+ opt_context.add_main_entries (options, null);
+
+ try
+ {
+ opt_context.parse (ref args);
+
+ if (show_version_info)
+ {
+ stdout.printf (Config.VERSION + "\n");
+ return 0;
+ }
+
+ LogLevelFlags discarded = LogLevelFlags.LEVEL_DEBUG;
+ if (log_level != null)
+ {
+ var ld = LogLevelFlags.LEVEL_DEBUG;
+ var li = LogLevelFlags.LEVEL_INFO;
+ var lm = LogLevelFlags.LEVEL_MESSAGE;
+ var lw = LogLevelFlags.LEVEL_WARNING;
+ var lc = LogLevelFlags.LEVEL_CRITICAL;
+ switch (log_level.up ())
+ {
+ case "DEBUG":
+ discarded = 0;
+ break;
+ case "INFO":
+ discarded = ld;
+ break;
+ case "WARNING":
+ discarded = ld | li | lm;
+ break;
+ case "CRITICAL":
+ discarded = ld | li | lm | lw;
+ break;
+ case "ERROR":
+ discarded = ld | li | lm | lw | lc;
+ break;
+ }
+ }
+ if (discarded != 0)
+ {
+ Log.set_handler ("", discarded, () => {});
+ }
+ else
+ {
+ Environment.set_variable ("G_MESSAGES_DEBUG", "all", true);
+ }
+
+ run ();
+ }
+ catch (Error err)
+ {
+ if (err is EngineError.DATABASE_CANTOPEN)
+ return 21;
+ if (err is EngineError.DATABASE_BUSY)
+ return 22;
+
+ warning ("%s", err.message);
+ return 1;
+ }
+
+ return 0;
+ }
+
+ }
+
+}
+
+// vim:expandtab:ts=4:sw=4
diff --git a/extensions/fts-python/Makefile.am b/extensions/fts-python/Makefile.am
deleted file mode 100644
index 73cf55ee..00000000
--- a/extensions/fts-python/Makefile.am
+++ /dev/null
@@ -1,23 +0,0 @@
-NULL =
-
-ftsdir = $(pkgdatadir)/fts-python
-dist_fts_SCRIPTS = \
- fts.py \
- $(NULL)
-
-dist_fts_DATA = \
- datamodel.py \
- constants.py \
- lrucache.py \
- sql.py \
- $(NULL)
-
-servicedir = $(DBUS_SERVICES_DIR)
-service_DATA = org.gnome.zeitgeist.fts.service
-
-org.gnome.zeitgeist.fts.service: org.gnome.zeitgeist.fts.service.in
- $(AM_V_GEN)sed -e s!\@pkgdatadir\@!$(pkgdatadir)! < $< > $@
-org.gnome.zeitgeist.fts.service: Makefile
-
-EXTRA_DIST = org.gnome.zeitgeist.fts.service.in
-CLEANFILES = org.gnome.zeitgeist.fts.service
diff --git a/extensions/fts-python/constants.py b/extensions/fts-python/constants.py
deleted file mode 100644
index f52e5efb..00000000
--- a/extensions/fts-python/constants.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# -.- coding: utf-8 -.-
-
-# Zeitgeist
-#
-# Copyright © 2009 Markus Korn <thekorn@gmx.de>
-# Copyright © 2009-2010 Siegfried-Angel Gevatter Pujals <rainct@ubuntu.com>
-# Copyright © 2009 Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation, either version 2.1 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-import os
-import logging
-from xdg import BaseDirectory
-
-from zeitgeist.client import ZeitgeistDBusInterface
-
-__all__ = [
- "log",
- "get_engine",
- "constants"
-]
-
-log = logging.getLogger("zeitgeist.engine")
-
-_engine = None
-def get_engine():
- """ Get the running engine instance or create a new one. """
- global _engine
- if _engine is None or _engine.is_closed():
- import main # _zeitgeist.engine.main
- _engine = main.ZeitgeistEngine()
- return _engine
-
-class _Constants:
- # Directories
- DATA_PATH = os.environ.get("ZEITGEIST_DATA_PATH",
- BaseDirectory.save_data_path("zeitgeist"))
- DATABASE_FILE = os.environ.get("ZEITGEIST_DATABASE_PATH",
- os.path.join(DATA_PATH, "activity.sqlite"))
- DATABASE_FILE_BACKUP = os.environ.get("ZEITGEIST_DATABASE_BACKUP_PATH",
- os.path.join(DATA_PATH, "activity.sqlite.bck"))
- DEFAULT_LOG_PATH = os.path.join(BaseDirectory.xdg_cache_home,
- "zeitgeist", "daemon.log")
-
- # D-Bus
- DBUS_INTERFACE = ZeitgeistDBusInterface.INTERFACE_NAME
- SIG_EVENT = "asaasay"
-
- # Required version of DB schema
- CORE_SCHEMA="core"
- CORE_SCHEMA_VERSION = 4
-
- USER_EXTENSION_PATH = os.path.join(DATA_PATH, "extensions")
-
- # configure runtime cache for events
- # default size is 2000
- CACHE_SIZE = int(os.environ.get("ZEITGEIST_CACHE_SIZE", 2000))
- log.debug("Cache size = %i" %CACHE_SIZE)
-
-constants = _Constants()
diff --git a/extensions/fts-python/datamodel.py b/extensions/fts-python/datamodel.py
deleted file mode 100644
index defbe711..00000000
--- a/extensions/fts-python/datamodel.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# -.- coding: utf-8 -.-
-
-# Zeitgeist
-#
-# Copyright © 2009 Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
-# Copyright © 2009 Markus Korn <thekorn@gmx.de>
-# Copyright © 2009 Seif Lotfy <seif@lotfy.com>
-# Copyright © 2009-2010 Siegfried-Angel Gevatter Pujals <rainct@ubuntu.com>
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation, either version 2.1 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-from zeitgeist.datamodel import Event as OrigEvent, Subject as OrigSubject, \
- DataSource as OrigDataSource
-
-class Event(OrigEvent):
-
- @staticmethod
- def _to_unicode(obj):
- """
- Return an unicode representation of the given object.
- If obj is None, return an empty string.
- """
- return unicode(obj) if obj is not None else u""
-
- @staticmethod
- def _make_dbus_sendable(obj):
- """
- Ensure that all fields in the event struct are non-None
- """
- for n, value in enumerate(obj[0]):
- obj[0][n] = obj._to_unicode(value)
- for subject in obj[1]:
- for n, value in enumerate(subject):
- subject[n] = obj._to_unicode(value)
- # The payload require special handling, since it is binary data
- # If there is indeed data here, we must not unicode encode it!
- if obj[2] is None:
- obj[2] = u""
- elif isinstance(obj[2], unicode):
- obj[2] = str(obj[2])
- return obj
-
- @staticmethod
- def get_plain(ev):
- """
- Ensure that an Event instance is a Plain Old Python Object (popo),
- without DBus wrappings etc.
- """
- popo = []
- popo.append(map(unicode, ev[0]))
- popo.append([map(unicode, subj) for subj in ev[1]])
- # We need the check here so that if D-Bus gives us an empty
- # byte array we don't serialize the text "dbus.Array(...)".
- popo.append(str(ev[2]) if ev[2] else u'')
- return popo
-
-class Subject(OrigSubject):
- pass
-
-class DataSource(OrigDataSource):
-
- @staticmethod
- def get_plain(datasource):
- for plaintype, props in {
- unicode: (DataSource.Name, DataSource.Description),
- lambda x: map(Event.get_plain, x): (DataSource.EventTemplates,),
- bool: (DataSource.Running, DataSource.Enabled),
- int: (DataSource.LastSeen,),
- }.iteritems():
- for prop in props:
- datasource[prop] = plaintype(datasource[prop])
- return tuple(datasource)
diff --git a/extensions/fts-python/fts.py b/extensions/fts-python/fts.py
deleted file mode 100644
index 772eb699..00000000
--- a/extensions/fts-python/fts.py
+++ /dev/null
@@ -1,1273 +0,0 @@
-#!/usr/bin/env python
-# -.- coding: utf-8 -.-
-
-# Zeitgeist
-#
-# Copyright © 2009 Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
-# Copyright © 2010 Canonical Ltd
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-
-#
-# TODO
-#
-# - Delete events hook
-# - ? Filter on StorageState
-# - Throttle IO and CPU where possible
-
-import os, sys
-import time
-import pickle
-import dbus
-import sqlite3
-import dbus.service
-from xdg import BaseDirectory
-from xdg.DesktopEntry import DesktopEntry, xdg_data_dirs
-import logging
-import subprocess
-from xml.dom import minidom
-import xapian
-import os
-from Queue import Queue, Empty
-import threading
-from urllib import quote as url_escape, unquote as url_unescape
-import gobject, gio
-from cStringIO import StringIO
-
-from collections import defaultdict
-from array import array
-from zeitgeist.datamodel import Event as OrigEvent, StorageState, TimeRange, \
- ResultType, get_timestamp_for_now, Interpretation, Symbol, NEGATION_OPERATOR, WILDCARD, NULL_EVENT
-from datamodel import Event, Subject
-from constants import constants
-from zeitgeist.client import ZeitgeistClient, ZeitgeistDBusInterface
-from sql import get_default_cursor, unset_cursor, TableLookup, WhereClause
-from lrucache import LRUCache
-
-ZG_CLIENT = ZeitgeistClient()
-
-logging.basicConfig(level=logging.DEBUG)
-log = logging.getLogger("zeitgeist.fts")
-
-INDEX_FILE = os.path.join(constants.DATA_PATH, "bb.fts.index")
-INDEX_VERSION = "1"
-INDEX_LOCK = threading.Lock()
-FTS_DBUS_BUS_NAME = "org.gnome.zeitgeist.SimpleIndexer"
-FTS_DBUS_OBJECT_PATH = "/org/gnome/zeitgeist/index/activity"
-FTS_DBUS_INTERFACE = "org.gnome.zeitgeist.Index"
-
-FILTER_PREFIX_EVENT_INTERPRETATION = "ZGEI"
-FILTER_PREFIX_EVENT_MANIFESTATION = "ZGEM"
-FILTER_PREFIX_ACTOR = "ZGA"
-FILTER_PREFIX_SUBJECT_URI = "ZGSU"
-FILTER_PREFIX_SUBJECT_INTERPRETATION = "ZGSI"
-FILTER_PREFIX_SUBJECT_MANIFESTATION = "ZGSM"
-FILTER_PREFIX_SUBJECT_ORIGIN = "ZGSO"
-FILTER_PREFIX_SUBJECT_MIMETYPE = "ZGST"
-FILTER_PREFIX_SUBJECT_STORAGE = "ZGSS"
-FILTER_PREFIX_XDG_CATEGORY = "AC"
-
-VALUE_EVENT_ID = 0
-VALUE_TIMESTAMP = 1
-
-MAX_CACHE_BATCH_SIZE = constants.CACHE_SIZE/2
-
-# When sorting by of the COALESCING_RESULT_TYPES result types,
-# we need to fetch some extra events from the Xapian index because
-# the final result set will be coalesced on some property of the event
-COALESCING_RESULT_TYPES = [ \
- ResultType.MostRecentSubjects,
- ResultType.LeastRecentSubjects,
- ResultType.MostPopularSubjects,
- ResultType.LeastPopularSubjects,
- ResultType.MostRecentActor,
- ResultType.LeastRecentActor,
- ResultType.MostPopularActor,
- ResultType.LeastPopularActor,
-]
-
-MAX_TERM_LENGTH = 245
-
-
-class NegationNotSupported(ValueError):
- pass
-
-class WildcardNotSupported(ValueError):
- pass
-
-def parse_negation(kind, field, value, parse_negation=True):
- """checks if value starts with the negation operator,
- if value starts with the negation operator but the field does
- not support negation a ValueError is raised.
- This function returns a (value_without_negation, negation)-tuple
- """
- negation = False
- if parse_negation and value.startswith(NEGATION_OPERATOR):
- negation = True
- value = value[len(NEGATION_OPERATOR):]
- if negation and field not in kind.SUPPORTS_NEGATION:
- raise NegationNotSupported("This field does not support negation")
- return value, negation
-
-def parse_wildcard(kind, field, value):
- """checks if value ends with the a wildcard,
- if value ends with a wildcard but the field does not support wildcards
- a ValueError is raised.
- This function returns a (value_without_wildcard, wildcard)-tuple
- """
- wildcard = False
- if value.endswith(WILDCARD):
- wildcard = True
- value = value[:-len(WILDCARD)]
- if wildcard and field not in kind.SUPPORTS_WILDCARDS:
- raise WildcardNotSupported("This field does not support wildcards")
- return value, wildcard
-
-def parse_operators(kind, field, value):
- """runs both (parse_negation and parse_wildcard) parser functions
- on query values, and handles the special case of Subject.Text correctly.
- returns a (value_without_negation_and_wildcard, negation, wildcard)-tuple
- """
- try:
- value, negation = parse_negation(kind, field, value)
- except ValueError:
- if kind is Subject and field == Subject.Text:
- # we do not support negation of the text field,
- # the text field starts with the NEGATION_OPERATOR
- # so we handle this string as the content instead
- # of an operator
- negation = False
- else:
- raise
- value, wildcard = parse_wildcard(kind, field, value)
- return value, negation, wildcard
-
-
-def synchronized(lock):
- """ Synchronization decorator. """
- def wrap(f):
- def newFunction(*args, **kw):
- lock.acquire()
- try:
- return f(*args, **kw)
- finally:
- lock.release()
- return newFunction
- return wrap
-
-class Deletion:
- """
- A marker class that marks an event id for deletion
- """
- def __init__ (self, event_id):
- self.event_id = event_id
-
-class Reindex:
- """
- Marker class that tells the worker thread to rebuild the entire index.
- On construction time all events are pulled out of the zg_engine
- argument and stored for later processing in the worker thread.
- This avoid concurrent access to the ZG sqlite db from the worker thread.
- """
- def __init__ (self, zg_engine):
- all_events = zg_engine._find_events(1, TimeRange.always(),
- [], StorageState.Any,
- sys.maxint,
- ResultType.MostRecentEvents)
- self.all_events = all_events
-
-class SearchEngineExtension (dbus.service.Object):
- """
- Full text indexing and searching extension for Zeitgeist
- """
- PUBLIC_METHODS = []
-
- def __init__ (self):
- bus_name = dbus.service.BusName(FTS_DBUS_BUS_NAME, bus=dbus.SessionBus())
- dbus.service.Object.__init__(self, bus_name, FTS_DBUS_OBJECT_PATH)
- self._indexer = Indexer()
-
- ZG_CLIENT.install_monitor((0, 2**63 - 1), [],
- self.pre_insert_event, self.post_delete_events)
-
- def pre_insert_event(self, timerange, events):
- for event in events:
- self._indexer.index_event (event)
-
- def post_delete_events (self, ids):
- for _id in ids:
- self._indexer.delete_event (_id)
-
- @dbus.service.method(FTS_DBUS_INTERFACE,
- in_signature="s(xx)a("+constants.SIG_EVENT+")uuu",
- out_signature="a("+constants.SIG_EVENT+")u")
- def Search(self, query_string, time_range, filter_templates, offset, count, result_type):
- """
- DBus method to perform a full text search against the contents of the
- Zeitgeist log. Returns an array of events.
- """
- time_range = TimeRange(time_range[0], time_range[1])
- filter_templates = map(Event, filter_templates)
- events, hit_count = self._indexer.search(query_string, time_range,
- filter_templates,
- offset, count, result_type)
- return self._make_events_sendable (events), hit_count
-
- @dbus.service.method(FTS_DBUS_INTERFACE,
- in_signature="",
- out_signature="")
- def ForceReindex(self):
- """
- DBus method to force a reindex of the entire Zeitgeist log.
- This method is only intended for debugging purposes and is not
- considered blessed public API.
- """
- log.debug ("Received ForceReindex request over DBus.")
- self._indexer._queue.put (Reindex (self._indexer))
-
- def _make_events_sendable(self, events):
- return [NULL_EVENT if event is None else Event._make_dbus_sendable(event) for event in events]
-
-def mangle_uri (uri):
- """
- Converts a URI into an index- and query friendly string. The problem
- is that Xapian doesn't handle CAPITAL letters or most non-alphanumeric
- symbols in a boolean term when it does prefix matching. The mangled
- URIs returned from this function are suitable for boolean prefix searches.
-
- IMPORTANT: This is a 1-way function! You can not convert back.
- """
- result = ""
- for c in uri.lower():
- if c in (": /"):
- result += "_"
- else:
- result += c
- return result
-
-def cap_string (s, nbytes=MAX_TERM_LENGTH):
- """
- If s has more than nbytes bytes (not characters) then cap it off
- after nbytes bytes in a way still producing a valid utf-8 string.
-
- Assumes that s is a utf-8 string.
-
- This function useful for working with Xapian terms because Xapian has
- a max term length of 245 (which is not very well documented, but see
- http://xapian.org/docs/omega/termprefixes.html).
- """
- # Check if we can fast-path this string
- if (len(s.encode("utf-8")) <= nbytes):
- return s
-
- # We use a StringIO here to avoid mem thrashing via naiive
- # string concatenation. See fx. http://www.skymind.com/~ocrow/python_string/
- buf = StringIO()
- for char in s :
- if buf.tell() >= nbytes - 1 :
- return buf.getvalue()
- buf.write(char.encode("utf-8"))
-
- return unicode(buf.getvalue().decode("utf-8"))
-
-
-def expand_type (type_prefix, uri):
- """
- Return a string with a Xapian query matching all child types of 'uri'
- inside the Xapian prefix 'type_prefix'.
- """
- is_negation = uri.startswith(NEGATION_OPERATOR)
- uri = uri[1:] if is_negation else uri
- children = Symbol.find_child_uris_extended(uri)
- children = [ "%s:%s" % (type_prefix, child) for child in children ]
-
- result = " OR ".join(children)
- return result if not is_negation else "NOT (%s)" % result
-
-class Indexer:
- """
- Abstraction of the FT indexer and search engine
- """
-
- QUERY_PARSER_FLAGS = xapian.QueryParser.FLAG_PHRASE | \
- xapian.QueryParser.FLAG_BOOLEAN | \
- xapian.QueryParser.FLAG_PURE_NOT | \
- xapian.QueryParser.FLAG_LOVEHATE | \
- xapian.QueryParser.FLAG_WILDCARD
-
- def __init__ (self):
-
- self._cursor = cursor = get_default_cursor()
- os.environ["XAPIAN_CJK_NGRAM"] = "1"
- self._interpretation = TableLookup(cursor, "interpretation")
- self._manifestation = TableLookup(cursor, "manifestation")
- self._mimetype = TableLookup(cursor, "mimetype")
- self._actor = TableLookup(cursor, "actor")
- self._event_cache = LRUCache(constants.CACHE_SIZE)
-
- log.debug("Opening full text index: %s" % INDEX_FILE)
- try:
- self._index = xapian.WritableDatabase(INDEX_FILE, xapian.DB_CREATE_OR_OPEN)
- except xapian.DatabaseError, e:
- log.warn("Full text index corrupted: '%s'. Rebuilding index." % e)
- self._index = xapian.WritableDatabase(INDEX_FILE, xapian.DB_CREATE_OR_OVERWRITE)
- self._tokenizer = indexer = xapian.TermGenerator()
- self._query_parser = xapian.QueryParser()
- self._query_parser.set_database (self._index)
- self._query_parser.add_prefix("name", "N")
- self._query_parser.add_prefix("title", "N")
- self._query_parser.add_prefix("site", "S")
- self._query_parser.add_prefix("app", "A")
- self._query_parser.add_boolean_prefix("zgei", FILTER_PREFIX_EVENT_INTERPRETATION)
- self._query_parser.add_boolean_prefix("zgem", FILTER_PREFIX_EVENT_MANIFESTATION)
- self._query_parser.add_boolean_prefix("zga", FILTER_PREFIX_ACTOR)
- self._query_parser.add_prefix("zgsu", FILTER_PREFIX_SUBJECT_URI)
- self._query_parser.add_boolean_prefix("zgsi", FILTER_PREFIX_SUBJECT_INTERPRETATION)
- self._query_parser.add_boolean_prefix("zgsm", FILTER_PREFIX_SUBJECT_MANIFESTATION)
- self._query_parser.add_prefix("zgso", FILTER_PREFIX_SUBJECT_ORIGIN)
- self._query_parser.add_boolean_prefix("zgst", FILTER_PREFIX_SUBJECT_MIMETYPE)
- self._query_parser.add_boolean_prefix("zgss", FILTER_PREFIX_SUBJECT_STORAGE)
- self._query_parser.add_prefix("category", FILTER_PREFIX_XDG_CATEGORY)
- self._query_parser.add_valuerangeprocessor(
- xapian.NumberValueRangeProcessor(VALUE_EVENT_ID, "id", True))
- self._query_parser.add_valuerangeprocessor(
- xapian.NumberValueRangeProcessor(VALUE_TIMESTAMP, "ms", False))
- self._query_parser.set_default_op(xapian.Query.OP_AND)
- self._enquire = xapian.Enquire(self._index)
-
- self._desktops = {}
-
- gobject.threads_init()
- self._may_run = True
- self._queue = Queue(0)
- self._worker = threading.Thread(target=self._worker_thread,
- name="IndexWorker")
- self._worker.daemon = True
-
- # We need to defer the index checking until after ZG has completed
- # full setup. Hence the idle handler.
- # We also don't start the worker until after we've checked the index
- gobject.idle_add (self._check_index_and_start_worker)
-
- @synchronized (INDEX_LOCK)
- def _check_index_and_start_worker (self):
- """
- Check whether we need a rebuild of the index.
- Returns True if the index is good. False if a reindexing has
- been commenced.
-
- This method should be called from the main thread and only once.
- It starts the worker thread as a side effect.
-
- We are clearing the queue, because there may be a race when an
- event insertion / deletion is already queued and our index
- is corrupted. Creating a new queue instance should be safe,
- because we're running in main thread as are the index_event
- and delete_event methods, and the worker thread wasn't yet
- started.
- """
- if self._index.get_metadata("fts_index_version") != INDEX_VERSION:
- log.info("Index must be upgraded. Doing full rebuild")
- self._queue = Queue(0)
- self._queue.put(Reindex(self))
- elif self._index.get_doccount() == 0:
- # If the index is empty we trigger a rebuild
- # We must delay reindexing until after the engine is done setting up
- log.info("Empty index detected. Doing full rebuild")
- self._queue = Queue(0)
- self._queue.put(Reindex(self))
-
- # Now that we've checked the index from the main thread we can start the worker
- self._worker.start()
-
- def index_event (self, event):
- """
- This method schedules and event for indexing. It returns immediate and
- defers the actual work to a bottom half thread. This means that it
- will not block the main loop of the Zeitgeist daemon while indexing
- (which may be a heavy operation)
- """
- self._queue.put (event)
- return event
-
- def delete_event (self, event_id):
- """
- Remove an event from the index given its event id
- """
- self._queue.put (Deletion(event_id))
- return
-
- @synchronized (INDEX_LOCK)
- def search (self, query_string, time_range=None, filters=None, offset=0, maxhits=10, result_type=100):
- """
- Do a full text search over the indexed corpus. The `result_type`
- parameter may be a zeitgeist.datamodel.ResultType or 100. In case it is
- 100 the textual relevancy of the search engine will be used to sort the
- results. Result type 100 is the fastest (and default) mode.
-
- The filters argument should be a list of event templates.
- """
- # Expand event template filters if necessary
- if filters:
- query_string = "(%s) AND (%s)" % (query_string, self._compile_event_filter_query (filters))
-
- # Expand time range value query
- if time_range and not time_range.is_always():
- query_string = "(%s) AND (%s)" % (query_string, self._compile_time_range_filter_query (time_range))
-
- # If the result type coalesces the events we need to fetch some extra
- # events from the index to have a chance of actually holding 'maxhits'
- # unique events
- if result_type in COALESCING_RESULT_TYPES:
- raw_maxhits = maxhits * 3
- else:
- raw_maxhits = maxhits
-
- # When not sorting by relevance, we fetch the results from Xapian sorted,
- # by timestamp. That minimizes the skew we get from otherwise doing a
- # relevancy ranked xapaian query and then resorting with Zeitgeist. The
- # "skew" is that low-relevancy results may still have the highest timestamp
- if result_type == 100:
- self._enquire.set_sort_by_relevance()
- else:
- self._enquire.set_sort_by_value(VALUE_TIMESTAMP, True)
-
- # Allow wildcards
- query_start = time.time()
- query = self._query_parser.parse_query (query_string,
- self.QUERY_PARSER_FLAGS)
- self._enquire.set_query (query)
- hits = self._enquire.get_mset (offset, raw_maxhits)
- hit_count = hits.get_matches_estimated()
- log.debug("Search '%s' gave %s hits in %sms" %
- (query_string, hits.get_matches_estimated(), (time.time() - query_start)*1000))
-
- if result_type == 100:
- event_ids = []
- for m in hits:
- event_id = int(xapian.sortable_unserialise(
- m.document.get_value(VALUE_EVENT_ID)))
- event_ids.append (event_id)
- if event_ids:
- return self.get_events(event_ids), hit_count
- else:
- return [], 0
- else:
- templates = []
- for m in hits:
- event_id = int(xapian.sortable_unserialise(
- m.document.get_value(VALUE_EVENT_ID)))
- ev = Event()
- ev[0][Event.Id] = str(event_id)
- templates.append(ev)
- if templates:
- x = self._find_events(1, TimeRange.always(),
- templates,
- StorageState.Any,
- maxhits,
- result_type), hit_count
- return x
- else:
- return [], 0
-
- def _worker_thread (self):
- is_dirty = False
- while self._may_run:
- # FIXME: Throttle IO and CPU
- try:
- # If we are dirty wait a while before we flush,
- # or if we are clean wait indefinitely to avoid
- # needless wakeups
- if is_dirty:
- event = self._queue.get(True, 0.5)
- else:
- event = self._queue.get(True)
-
- if isinstance (event, Deletion):
- self._delete_event_real (event.event_id)
- elif isinstance (event, Reindex):
- self._reindex (event.all_events)
- else:
- self._index_event_real (event)
-
- is_dirty = True
- except Empty:
- if is_dirty:
- # Write changes to disk
- log.debug("Committing FTS index")
- self._index.flush()
- is_dirty = False
- else:
- log.debug("No changes to index. Sleeping")
-
- @synchronized (INDEX_LOCK)
- def _reindex (self, event_list):
- """
- Index everything in the ZG log. The argument must be a list
- of events. Typically extracted by a Reindex instance.
- Only call from worker thread as it writes to the db and Xapian
- is *not* thread safe (only single-writer-multiple-reader).
- """
- self._index.close ()
- self._index = xapian.WritableDatabase(INDEX_FILE, xapian.DB_CREATE_OR_OVERWRITE)
- self._query_parser.set_database (self._index)
- self._enquire = xapian.Enquire(self._index)
- # Register that this index was built with CJK enabled
- self._index.set_metadata("fts_index_version", INDEX_VERSION)
- log.info("Preparing to rebuild index with %s events" % len(event_list))
- for e in event_list : self._queue.put(e)
-
- @synchronized (INDEX_LOCK)
- def _delete_event_real (self, event_id):
- """
- Look up the doc id given an event id and remove the xapian.Document
- for that doc id.
- Note: This is slow, but there's not much we can do about it
- """
- try:
- _id = xapian.sortable_serialise(float(event_id))
- query = xapian.Query(xapian.Query.OP_VALUE_RANGE,
- VALUE_EVENT_ID, _id, _id)
-
- self._enquire.set_query (query)
- hits = self._enquire.get_mset (0, 10)
-
- total = hits.get_matches_estimated()
- if total > 1:
- log.warning ("More than one event found with id '%s'" % event_id)
- elif total <= 0:
- log.debug ("No event for id '%s'" % event_id)
- return
-
- for m in hits:
- log.debug("Deleting event '%s' with docid '%s'" %
- (event_id, m.docid))
- self._index.delete_document(m.docid)
- except Exception, e:
- log.error("Failed to delete event '%s': %s" % (event_id, e))
-
- def _split_uri (self, uri):
- """
- Returns a triple of (scheme, host, and path) extracted from `uri`
- """
- i = uri.find(":")
- if i == -1 :
- scheme = ""
- host = ""
- path = uri
- else:
- scheme = uri[:i]
- host = ""
- path = ""
-
- if uri[i+1] == "/" and uri[i+2] == "/":
- j = uri.find("/", i+3)
- if j == -1 :
- host = uri[i+3:]
- else:
- host = uri[i+3:j]
- path = uri[j:]
- else:
- host = uri[i+1:]
-
- # Strip out URI query part
- i = path.find("?")
- if i != -1:
- path = path[:i]
-
- return scheme, host, path
-
- def _get_desktop_entry (self, app_id):
- """
- Return a xdg.DesktopEntry.DesktopEntry `app_id` or None in case
- no file is found for the given desktop id
- """
- if app_id in self._desktops:
- return self._desktops[app_id]
-
- for datadir in xdg_data_dirs:
- path = os.path.join(datadir, "applications", app_id)
- if os.path.exists(path):
- try:
- desktop = DesktopEntry(path)
- self._desktops[app_id] = desktop
- return desktop
- except Exception, e:
- log.warning("Unable to load %s: %s" % (path, e))
- return None
-
- return None
-
- def _index_actor (self, actor):
- """
- Takes an actor as a path to a .desktop file or app:// uri
- and index the contents of the corresponding .desktop file
- into the document currently set for self._tokenizer.
- """
- if not actor : return
-
- # Get the path of the .desktop file and convert it to
- # an app id (eg. 'gedit.desktop')
- scheme, host, path = self._split_uri(url_unescape (actor))
- if not path:
- path = host
-
- if not path :
- log.debug("Unable to determine application id for %s" % actor)
- return
-
- if path.startswith("/") :
- path = os.path.basename(path)
-
- desktop = self._get_desktop_entry(path)
- if desktop:
- if not desktop.getNoDisplay():
- self._tokenizer.index_text(desktop.getName(), 5)
- self._tokenizer.index_text(desktop.getName(), 5, "A")
- self._tokenizer.index_text(desktop.getGenericName(), 5)
- self._tokenizer.index_text(desktop.getGenericName(), 5, "A")
- self._tokenizer.index_text(desktop.getComment(), 2)
- self._tokenizer.index_text(desktop.getComment(), 2, "A")
-
- doc = self._tokenizer.get_document()
- for cat in desktop.getCategories():
- doc.add_boolean_term(FILTER_PREFIX_XDG_CATEGORY+cat.lower())
- else:
- log.debug("Unable to look up app info for %s" % actor)
-
-
- def _index_uri (self, uri):
- """
- Index `uri` into the document currectly set on self._tokenizer
- """
- # File URIs and paths are indexed in one way, and all other,
- # usually web URIs, are indexed in another way because there may
- # be domain name etc. in there we want to rank differently
- scheme, host, path = self._split_uri (url_unescape (uri))
- if scheme == "file" or not scheme:
- path, name = os.path.split(path)
- self._tokenizer.index_text(name, 5)
- self._tokenizer.index_text(name, 5, "N")
-
- # Index parent names with descending weight
- weight = 5
- while path and name:
- weight = weight / 1.5
- path, name = os.path.split(path)
- self._tokenizer.index_text(name, int(weight))
-
- elif scheme == "mailto":
- tokens = host.split("@")
- name = tokens[0]
- self._tokenizer.index_text(name, 6)
- if len(tokens) > 1:
- self._tokenizer.index_text(" ".join[1:], 1)
- else:
- # We're cautious about indexing the path components of
- # non-file URIs as some websites practice *extremely* long
- # and useless URLs
- path, name = os.path.split(path)
- if len(name) > 30 : name = name[:30]
- if len(path) > 30 : path = path[30]
- if name:
- self._tokenizer.index_text(name, 5)
- self._tokenizer.index_text(name, 5, "N")
- if path:
- self._tokenizer.index_text(path, 1)
- self._tokenizer.index_text(path, 1, "N")
- if host:
- self._tokenizer.index_text(host, 2)
- self._tokenizer.index_text(host, 2, "N")
- self._tokenizer.index_text(host, 2, "S")
-
- def _index_text (self, text):
- """
- Index `text` as raw text data for the document currently
- set on self._tokenizer. The text is assumed to be a primary
- description of the subject, such as the basename of a file.
-
- Primary use is for subject.text
- """
- self._tokenizer.index_text(text, 5)
-
- def _index_contents (self, uri):
- # xmlindexer doesn't extract words for URIs only for file paths
-
- # FIXME: IONICE and NICE on xmlindexer
-
- path = uri.replace("file://", "")
- xmlindexer = subprocess.Popen(['xmlindexer', path],
- stdout=subprocess.PIPE)
- xml = xmlindexer.communicate()[0].strip()
- xmlindexer.wait()
-
- dom = minidom.parseString(xml)
- text_nodes = dom.getElementsByTagName("text")
- lines = []
- if text_nodes:
- for line in text_nodes[0].childNodes:
- lines.append(line.data)
-
- if lines:
- self._tokenizer.index_text (" ".join(lines))
-
-
- def _add_doc_filters (self, event, doc):
- """Adds the filtering rules to the doc. Filtering rules will
- not affect the relevancy ranking of the event/doc"""
- if event.interpretation:
- doc.add_boolean_term (cap_string(FILTER_PREFIX_EVENT_INTERPRETATION+event.interpretation))
- if event.manifestation:
- doc.add_boolean_term (cap_string(FILTER_PREFIX_EVENT_MANIFESTATION+event.manifestation))
- if event.actor:
- doc.add_boolean_term (cap_string(FILTER_PREFIX_ACTOR+mangle_uri(event.actor)))
-
- for su in event.subjects:
- if su.uri:
- doc.add_boolean_term (cap_string(FILTER_PREFIX_SUBJECT_URI+mangle_uri(su.uri)))
- if su.interpretation:
- doc.add_boolean_term (cap_string(FILTER_PREFIX_SUBJECT_INTERPRETATION+su.interpretation))
- if su.manifestation:
- doc.add_boolean_term (cap_string(FILTER_PREFIX_SUBJECT_MANIFESTATION+su.manifestation))
- if su.origin:
- doc.add_boolean_term (cap_string(FILTER_PREFIX_SUBJECT_ORIGIN+mangle_uri(su.origin)))
- if su.mimetype:
- doc.add_boolean_term (cap_string(FILTER_PREFIX_SUBJECT_MIMETYPE+su.mimetype))
- if su.storage:
- doc.add_boolean_term (cap_string(FILTER_PREFIX_SUBJECT_STORAGE+su.storage))
-
- @synchronized (INDEX_LOCK)
- def _index_event_real (self, event):
- if not isinstance (event, OrigEvent):
- log.error("Not an Event, found: %s" % type(event))
- if not event.id:
- log.warning("Not indexing event. Event has no id")
- return
-
- try:
- doc = xapian.Document()
- doc.add_value (VALUE_EVENT_ID,
- xapian.sortable_serialise(float(event.id)))
- doc.add_value (VALUE_TIMESTAMP,
- xapian.sortable_serialise(float(event.timestamp)))
- self._tokenizer.set_document (doc)
-
- self._index_actor (event.actor)
-
- for subject in event.subjects:
- if not subject.uri : continue
-
- # By spec URIs can have arbitrary length. In reality that's just silly.
- # The general online "rule" is to keep URLs less than 2k so we just
- # choose to enforce that
- if len(subject.uri) > 2000:
- log.info ("URI too long (%s). Discarding: %s..."% (len(subject.uri), subject.uri[:30]))
- return
- log.debug("Indexing '%s'" % subject.uri)
-
- self._index_uri (subject.uri)
- self._index_text (subject.text)
-
- # If the subject URI is an actor, we index the .desktop also
- if subject.uri.startswith ("application://"):
- self._index_actor (subject.uri)
-
- # File contents indexing disabled for now...
- #self._index_contents (subject.uri)
-
- # FIXME: Possibly index payloads when we have apriori knowledge
-
- self._add_doc_filters (event, doc)
- self._index.add_document (doc)
-
- except Exception, e:
- log.error("Error indexing event: %s" % e)
-
- def _compile_event_filter_query (self, events):
- """Takes a list of event templates and compiles a filter query
- based on their, interpretations, manifestations, and actor,
- for event and subjects.
-
- All fields within the same event will be ANDed and each template
- will be ORed with the others. Like elsewhere in Zeitgeist the
- type tree of the interpretations and manifestations will be expanded
- to match all child symbols as well
- """
- query = []
- for event in events:
- if not isinstance(event, Event):
- raise TypeError("Expected Event. Found %s" % type(event))
-
- tmpl = []
- if event.interpretation :
- tmpl.append(expand_type("zgei", event.interpretation))
- if event.manifestation :
- tmpl.append(expand_type("zgem", event.manifestation))
- if event.actor : tmpl.append("zga:%s" % mangle_uri(event.actor))
- for su in event.subjects:
- if su.uri :
- tmpl.append("zgsu:%s" % mangle_uri(su.uri))
- if su.interpretation :
- tmpl.append(expand_type("zgsi", su.interpretation))
- if su.manifestation :
- tmpl.append(expand_type("zgsm", su.manifestation))
- if su.origin :
- tmpl.append("zgso:%s" % mangle_uri(su.origin))
- if su.mimetype :
- tmpl.append("zgst:%s" % su.mimetype)
- if su.storage :
- tmpl.append("zgss:%s" % su.storage)
-
- tmpl = "(" + ") AND (".join(tmpl) + ")"
- query.append(tmpl)
-
- return " OR ".join(query)
-
- def _compile_time_range_filter_query (self, time_range):
- """Takes a TimeRange and compiles a range query for it"""
-
- if not isinstance(time_range, TimeRange):
- raise TypeError("Expected TimeRange, but found %s" % type(time_range))
-
- return "%s..%sms" % (time_range.begin, time_range.end)
-
- def _get_event_from_row(self, row):
- event = Event()
- event[0][Event.Id] = row["id"] # Id property is read-only in the public API
- event.timestamp = row["timestamp"]
- for field in ("interpretation", "manifestation", "actor"):
- # Try to get event attributes from row using the attributed field id
- # If attribute does not exist we break the attribute fetching and return
- # None instead of of crashing
- try:
- setattr(event, field, getattr(self, "_" + field).value(row[field]))
- except KeyError, e:
- log.error("Event %i broken: Table %s has no id %i" \
- %(row["id"], field, row[field]))
- return None
- event.origin = row["event_origin_uri"] or ""
- event.payload = row["payload"] or "" # default payload: empty string
- return event
-
- def _get_subject_from_row(self, row):
- subject = Subject()
- for field in ("uri", "text", "storage"):
- setattr(subject, field, row["subj_" + field])
- subject.origin = row["subj_origin_uri"]
- if row["subj_current_uri"]:
- subject.current_uri = row["subj_current_uri"]
- for field in ("interpretation", "manifestation", "mimetype"):
- # Try to get subject attributes from row using the attributed field id
- # If attribute does not exist we break the attribute fetching and return
- # None instead of crashing
- try:
- setattr(subject, field,
- getattr(self, "_" + field).value(row["subj_" + field]))
- except KeyError, e:
- log.error("Event %i broken: Table %s has no id %i" \
- %(row["id"], field, row["subj_" + field]))
- return None
- return subject
-
- def get_events(self, ids, sender=None):
- """
- Look up a list of events.
- """
-
- t = time.time()
-
- if not ids:
- return []
-
- # Split ids into cached and uncached
- uncached_ids = array("i")
- cached_ids = array("i")
-
- # If ids batch greater than MAX_CACHE_BATCH_SIZE ids ignore cache
- use_cache = True
- if len(ids) > MAX_CACHE_BATCH_SIZE:
- use_cache = False
- if not use_cache:
- uncached_ids = ids
- else:
- for id in ids:
- if id in self._event_cache:
- cached_ids.append(id)
- else:
- uncached_ids.append(id)
-
- id_hash = defaultdict(lambda: array("i"))
- for n, id in enumerate(ids):
- # the same id can be at multible places (LP: #673916)
- # cache all of them
- id_hash[id].append(n)
-
- # If we are not able to get an event by the given id
- # append None instead of raising an Error. The client
- # might simply have requested an event that has been
- # deleted
- events = {}
- sorted_events = [None]*len(ids)
-
- for id in cached_ids:
- event = self._event_cache[id]
- if event:
- if event is not None:
- for n in id_hash[event.id]:
- # insert the event into all necessary spots (LP: #673916)
- sorted_events[n] = event
-
- # Get uncached events
- rows = self._cursor.execute("""
- SELECT * FROM event_view
- WHERE id IN (%s)
- """ % ",".join("%d" % _id for _id in uncached_ids))
-
- time_get_uncached = time.time() - t
- t = time.time()
-
- t_get_event = 0
- t_get_subject = 0
- t_apply_get_hooks = 0
-
- row_counter = 0
- for row in rows:
- row_counter += 1
- # Assumption: all rows of a same event for its different
- # subjects are in consecutive order.
- t_get_event -= time.time()
- event = self._get_event_from_row(row)
- t_get_event += time.time()
-
- if event:
- # Check for existing event.id in event to attach
- # other subjects to it
- if event.id not in events:
- events[event.id] = event
- else:
- event = events[event.id]
-
- t_get_subject -= time.time()
- subject = self._get_subject_from_row(row)
- t_get_subject += time.time()
- # Check if subject has a proper value. If none than something went
- # wrong while trying to fetch the subject from the row. So instead
- # of failing and raising an error. We silently skip the event.
- if subject:
- event.append_subject(subject)
- if use_cache and not event.payload:
- self._event_cache[event.id] = event
- if event is not None:
- for n in id_hash[event.id]:
- # insert the event into all necessary spots (LP: #673916)
- sorted_events[n] = event
- # Avoid caching events with payloads to have keep the cache MB size
- # at a decent level
-
-
- log.debug("Got %d raw events in %fs" % (row_counter, time_get_uncached))
- log.debug("Got %d events in %fs" % (len(sorted_events), time.time()-t))
- log.debug(" Where time spent in _get_event_from_row in %fs" % (t_get_event))
- log.debug(" Where time spent in _get_subject_from_row in %fs" % (t_get_subject))
- log.debug(" Where time spent in apply_get_hooks in %fs" % (t_apply_get_hooks))
- return sorted_events
-
- def _find_events(self, return_mode, time_range, event_templates,
- storage_state, max_events, order, sender=None):
- """
- Accepts 'event_templates' as either a real list of Events or as
- a list of tuples (event_data, subject_data) as we do in the
- DBus API.
-
- Return modes:
- - 0: IDs.
- - 1: Events.
- """
- t = time.time()
-
- where = self._build_sql_event_filter(time_range, event_templates,
- storage_state)
-
- if not where.may_have_results():
- return []
-
- if return_mode == 0:
- sql = "SELECT DISTINCT id FROM event_view"
- elif return_mode == 1:
- sql = "SELECT id FROM event_view"
- else:
- raise NotImplementedError, "Unsupported return_mode."
-
- wheresql = " WHERE %s" % where.sql if where else ""
-
- def group_and_sort(field, wheresql, time_asc=False, count_asc=None,
- aggregation_type='max'):
-
- args = {
- 'field': field,
- 'aggregation_type': aggregation_type,
- 'where_sql': wheresql,
- 'time_sorting': 'ASC' if time_asc else 'DESC',
- 'aggregation_sql': '',
- 'order_sql': '',
- }
-
- if count_asc is not None:
- args['aggregation_sql'] = ', COUNT(%s) AS num_events' % \
- field
- args['order_sql'] = 'num_events %s,' % \
- ('ASC' if count_asc else 'DESC')
-
- return """
- NATURAL JOIN (
- SELECT %(field)s,
- %(aggregation_type)s(timestamp) AS timestamp
- %(aggregation_sql)s
- FROM event_view %(where_sql)s
- GROUP BY %(field)s)
- GROUP BY %(field)s
- ORDER BY %(order_sql)s timestamp %(time_sorting)s
- """ % args
-
- if order == ResultType.MostRecentEvents:
- sql += wheresql + " ORDER BY timestamp DESC"
- elif order == ResultType.LeastRecentEvents:
- sql += wheresql + " ORDER BY timestamp ASC"
- elif order == ResultType.MostRecentEventOrigin:
- sql += group_and_sort("origin", wheresql, time_asc=False)
- elif order == ResultType.LeastRecentEventOrigin:
- sql += group_and_sort("origin", wheresql, time_asc=True)
- elif order == ResultType.MostPopularEventOrigin:
- sql += group_and_sort("origin", wheresql, time_asc=False,
- count_asc=False)
- elif order == ResultType.LeastPopularEventOrigin:
- sql += group_and_sort("origin", wheresql, time_asc=True,
- count_asc=True)
- elif order == ResultType.MostRecentSubjects:
- # Remember, event.subj_id identifies the subject URI
- sql += group_and_sort("subj_id", wheresql, time_asc=False)
- elif order == ResultType.LeastRecentSubjects:
- sql += group_and_sort("subj_id", wheresql, time_asc=True)
- elif order == ResultType.MostPopularSubjects:
- sql += group_and_sort("subj_id", wheresql, time_asc=False,
- count_asc=False)
- elif order == ResultType.LeastPopularSubjects:
- sql += group_and_sort("subj_id", wheresql, time_asc=True,
- count_asc=True)
- elif order == ResultType.MostRecentCurrentUri:
- sql += group_and_sort("subj_id_current", wheresql, time_asc=False)
- elif order == ResultType.LeastRecentCurrentUri:
- sql += group_and_sort("subj_id_current", wheresql, time_asc=True)
- elif order == ResultType.MostPopularCurrentUri:
- sql += group_and_sort("subj_id_current", wheresql, time_asc=False,
- count_asc=False)
- elif order == ResultType.LeastPopularCurrentUri:
- sql += group_and_sort("subj_id_current", wheresql, time_asc=True,
- count_asc=True)
- elif order == ResultType.MostRecentActor:
- sql += group_and_sort("actor", wheresql, time_asc=False)
- elif order == ResultType.LeastRecentActor:
- sql += group_and_sort("actor", wheresql, time_asc=True)
- elif order == ResultType.MostPopularActor:
- sql += group_and_sort("actor", wheresql, time_asc=False,
- count_asc=False)
- elif order == ResultType.LeastPopularActor:
- sql += group_and_sort("actor", wheresql, time_asc=True,
- count_asc=True)
- elif order == ResultType.OldestActor:
- sql += group_and_sort("actor", wheresql, time_asc=True,
- aggregation_type="min")
- elif order == ResultType.MostRecentOrigin:
- sql += group_and_sort("subj_origin", wheresql, time_asc=False)
- elif order == ResultType.LeastRecentOrigin:
- sql += group_and_sort("subj_origin", wheresql, time_asc=True)
- elif order == ResultType.MostPopularOrigin:
- sql += group_and_sort("subj_origin", wheresql, time_asc=False,
- count_asc=False)
- elif order == ResultType.LeastPopularOrigin:
- sql += group_and_sort("subj_origin", wheresql, time_asc=True,
- count_asc=True)
- elif order == ResultType.MostRecentSubjectInterpretation:
- sql += group_and_sort("subj_interpretation", wheresql,
- time_asc=False)
- elif order == ResultType.LeastRecentSubjectInterpretation:
- sql += group_and_sort("subj_interpretation", wheresql,
- time_asc=True)
- elif order == ResultType.MostPopularSubjectInterpretation:
- sql += group_and_sort("subj_interpretation", wheresql,
- time_asc=False, count_asc=False)
- elif order == ResultType.LeastPopularSubjectInterpretation:
- sql += group_and_sort("subj_interpretation", wheresql,
- time_asc=True, count_asc=True)
- elif order == ResultType.MostRecentMimeType:
- sql += group_and_sort("subj_mimetype", wheresql, time_asc=False)
- elif order == ResultType.LeastRecentMimeType:
- sql += group_and_sort("subj_mimetype", wheresql, time_asc=True)
- elif order == ResultType.MostPopularMimeType:
- sql += group_and_sort("subj_mimetype", wheresql, time_asc=False,
- count_asc=False)
- elif order == ResultType.LeastPopularMimeType:
- sql += group_and_sort("subj_mimetype", wheresql, time_asc=True,
- count_asc=True)
-
- if max_events > 0:
- sql += " LIMIT %d" % max_events
- result = array("i", self._cursor.execute(sql, where.arguments).fetch(0))
-
- if return_mode == 0:
- log.debug("Found %d event IDs in %fs" % (len(result), time.time()- t))
- elif return_mode == 1:
- log.debug("Found %d events in %fs" % (len(result), time.time()- t))
- result = self.get_events(ids=result, sender=sender)
- else:
- raise Exception("%d" % return_mode)
-
- return result
-
- @staticmethod
- def _build_templates(templates):
- for event_template in templates:
- event_data = event_template[0]
- for subject in (event_template[1] or (Subject(),)):
- yield Event((event_data, [], None)), Subject(subject)
-
- def _build_sql_from_event_templates(self, templates):
-
- where_or = WhereClause(WhereClause.OR)
-
- for template in templates:
- event_template = Event((template[0], [], None))
- if template[1]:
- subject_templates = [Subject(data) for data in template[1]]
- else:
- subject_templates = None
-
- subwhere = WhereClause(WhereClause.AND)
-
- if event_template.id:
- subwhere.add("id = ?", event_template.id)
-
- try:
- value, negation, wildcard = parse_operators(Event, Event.Interpretation, event_template.interpretation)
- # Expand event interpretation children
- event_interp_where = WhereClause(WhereClause.OR, negation)
- for child_interp in (Symbol.find_child_uris_extended(value)):
- if child_interp:
- event_interp_where.add_text_condition("interpretation",
- child_interp, like=wildcard, cache=self._interpretation)
- if event_interp_where:
- subwhere.extend(event_interp_where)
-
- value, negation, wildcard = parse_operators(Event, Event.Manifestation, event_template.manifestation)
- # Expand event manifestation children
- event_manif_where = WhereClause(WhereClause.OR, negation)
- for child_manif in (Symbol.find_child_uris_extended(value)):
- if child_manif:
- event_manif_where.add_text_condition("manifestation",
- child_manif, like=wildcard, cache=self._manifestation)
- if event_manif_where:
- subwhere.extend(event_manif_where)
-
- value, negation, wildcard = parse_operators(Event, Event.Actor, event_template.actor)
- if value:
- subwhere.add_text_condition("actor", value, wildcard, negation, cache=self._actor)
-
- value, negation, wildcard = parse_operators(Event, Event.Origin, event_template.origin)
- if value:
- subwhere.add_text_condition("origin", value, wildcard, negation)
-
- if subject_templates is not None:
- for subject_template in subject_templates:
- value, negation, wildcard = parse_operators(Subject, Subject.Interpretation, subject_template.interpretation)
- # Expand subject interpretation children
- su_interp_where = WhereClause(WhereClause.OR, negation)
- for child_interp in (Symbol.find_child_uris_extended(value)):
- if child_interp:
- su_interp_where.add_text_condition("subj_interpretation",
- child_interp, like=wildcard, cache=self._interpretation)
- if su_interp_where:
- subwhere.extend(su_interp_where)
-
- value, negation, wildcard = parse_operators(Subject, Subject.Manifestation, subject_template.manifestation)
- # Expand subject manifestation children
- su_manif_where = WhereClause(WhereClause.OR, negation)
- for child_manif in (Symbol.find_child_uris_extended(value)):
- if child_manif:
- su_manif_where.add_text_condition("subj_manifestation",
- child_manif, like=wildcard, cache=self._manifestation)
- if su_manif_where:
- subwhere.extend(su_manif_where)
-
- # FIXME: Expand mime children as well.
- # Right now we only do exact matching for mimetypes
- # thekorn: this will be fixed when wildcards are supported
- value, negation, wildcard = parse_operators(Subject, Subject.Mimetype, subject_template.mimetype)
- if value:
- subwhere.add_text_condition("subj_mimetype",
- value, wildcard, negation, cache=self._mimetype)
-
- for key in ("uri", "origin", "text"):
- value = getattr(subject_template, key)
- if value:
- value, negation, wildcard = parse_operators(Subject, getattr(Subject, key.title()), value)
- subwhere.add_text_condition("subj_%s" % key, value, wildcard, negation)
-
- if subject_template.current_uri:
- value, negation, wildcard = parse_operators(Subject,
- Subject.CurrentUri, subject_template.current_uri)
- subwhere.add_text_condition("subj_current_uri", value, wildcard, negation)
-
- if subject_template.storage:
- subwhere.add_text_condition("subj_storage", subject_template.storage)
-
- except KeyError, e:
- # Value not in DB
- log.debug("Unknown entity in query: %s" % e)
- where_or.register_no_result()
- continue
- where_or.extend(subwhere)
- return where_or
-
- def _build_sql_event_filter(self, time_range, templates, storage_state):
-
- where = WhereClause(WhereClause.AND)
-
- # thekorn: we are using the unary operator here to tell sql to not use
- # the index on the timestamp column at the first place. This `fix` for
- # (LP: #672965) is based on some benchmarks, which suggest a performance
- # win, but we might not oversee all implications.
- # (see http://www.sqlite.org/optoverview.html section 6.0)
- min_time, max_time = time_range
- if min_time != 0:
- where.add("+timestamp >= ?", min_time)
- if max_time != sys.maxint:
- where.add("+timestamp <= ?", max_time)
-
- if storage_state in (StorageState.Available, StorageState.NotAvailable):
- where.add("(subj_storage_state = ? OR subj_storage_state IS NULL)",
- storage_state)
- elif storage_state != StorageState.Any:
- raise ValueError, "Unknown storage state '%d'" % storage_state
-
- where.extend(self._build_sql_from_event_templates(templates))
-
- return where
-
-if __name__ == "__main__":
- mainloop = gobject.MainLoop(is_running=True)
- search_engine = SearchEngineExtension()
- ZG_CLIENT._iface.connect_exit(lambda: mainloop.quit ())
- mainloop.run()
-
diff --git a/extensions/fts-python/lrucache.py b/extensions/fts-python/lrucache.py
deleted file mode 100644
index 265ed401..00000000
--- a/extensions/fts-python/lrucache.py
+++ /dev/null
@@ -1,125 +0,0 @@
-# -.- coding: utf-8 -.-
-
-# lrucache.py
-#
-# Copyright © 2009 Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
-# Copyright © 2009 Markus Korn <thekorn@gmx.de>
-# Copyright © 2011 Seif Lotfy <seif@lotfy.com>
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation, either version 2.1 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-class LRUCache:
- """
- A simple LRUCache implementation backed by a linked list and a dict.
- It can be accessed and updated just like a dict. To check if an element
- exists in the cache the following type of statements can be used:
- if "foo" in cache
- """
-
- class _Item:
- """
- A container for each item in LRUCache which knows about the
- item's position and relations
- """
- def __init__(self, item_key, item_value):
- self.value = item_value
- self.key = item_key
- self.next = None
- self.prev = None
-
- def __init__(self, max_size):
- """
- The size of the cache (in number of cached items) is guaranteed to
- never exceed 'size'
- """
- self._max_size = max_size
- self.clear()
-
-
- def clear(self):
- self._list_end = None # The newest item
- self._list_start = None # Oldest item
- self._map = {}
-
- def __len__(self):
- return len(self._map)
-
- def __contains__(self, key):
- return key in self._map
-
- def __delitem__(self, key):
- item = self._map[key]
- if item.prev:
- item.prev.next = item.next
- else:
- # we are deleting the first item, so we need a new first one
- self._list_start = item.next
- if item.next:
- item.next.prev = item.prev
- else:
- # we are deleting the last item, get a new last one
- self._list_end = item.prev
- del self._map[key], item
-
- def __setitem__(self, key, value):
- if key in self._map:
- item = self._map[key]
- item.value = value
- self._move_item_to_end(item)
- else:
- new = LRUCache._Item(key, value)
- self._append_to_list(new)
-
- if len(self._map) > self._max_size :
- # Remove eldest entry from list
- self.remove_eldest_item()
-
- def __getitem__(self, key):
- item = self._map[key]
- self._move_item_to_end(item)
- return item.value
-
- def __iter__(self):
- """
- Iteration is in order from eldest to newest,
- and returns (key,value) tuples
- """
- iter = self._list_start
- while iter != None:
- yield (iter.key, iter.value)
- iter = iter.next
-
- def _move_item_to_end(self, item):
- del self[item.key]
- self._append_to_list(item)
-
- def _append_to_list(self, item):
- self._map[item.key] = item
- if not self._list_start:
- self._list_start = item
- if self._list_end:
- self._list_end.next = item
- item.prev = self._list_end
- item.next = None
- self._list_end = item
-
- def remove_eldest_item(self):
- if self._list_start == self._list_end:
- self._list_start = None
- self._list_end = None
- return
- old = self._list_start
- old.next.prev = None
- self._list_start = old.next
- del self[old.key], old
diff --git a/extensions/fts-python/sql.py b/extensions/fts-python/sql.py
deleted file mode 100644
index 20f1bc30..00000000
--- a/extensions/fts-python/sql.py
+++ /dev/null
@@ -1,301 +0,0 @@
-# -.- coding: utf-8 -.-
-
-# Zeitgeist
-#
-# Copyright © 2009-2010 Siegfried-Angel Gevatter Pujals <rainct@ubuntu.com>
-# Copyright © 2009 Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
-# Copyright © 2009-2011 Markus Korn <thekorn@gmx.net>
-# Copyright © 2009 Seif Lotfy <seif@lotfy.com>
-# Copyright © 2011 J.P. Lacerda <jpaflacerda@gmail.com>
-# Copyright © 2011 Collabora Ltd.
-# By Siegfried-Angel Gevatter Pujals <rainct@ubuntu.com>
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation, either version 2.1 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-import sqlite3
-import logging
-import time
-import os
-import shutil
-
-from constants import constants
-
-log = logging.getLogger("siis.zeitgeist.sql")
-
-TABLE_MAP = {
- "origin": "uri",
- "subj_mimetype": "mimetype",
- "subj_origin": "uri",
- "subj_uri": "uri",
- "subj_current_uri": "uri",
-}
-
-def explain_query(cursor, statement, arguments=()):
- plan = ""
- for r in cursor.execute("EXPLAIN QUERY PLAN "+statement, arguments).fetchall():
- plan += str(list(r)) + "\n"
- log.debug("Got query:\nQUERY:\n%s (%s)\nPLAN:\n%s" % (statement, arguments, plan))
-
-class UnicodeCursor(sqlite3.Cursor):
-
- debug_explain = os.getenv("ZEITGEIST_DEBUG_QUERY_PLANS")
-
- @staticmethod
- def fix_unicode(obj):
- if isinstance(obj, (int, long)):
- # thekorn: as long as we are using the unary operator for timestamp
- # related queries we have to make sure that integers are not
- # converted to strings, same applies for long numbers.
- return obj
- if isinstance(obj, str):
- obj = obj.decode("UTF-8")
- # seif: Python’s default encoding is ASCII, so whenever a character with
- # an ASCII value > 127 is in the input data, you’ll get a UnicodeDecodeError
- # because that character can’t be handled by the ASCII encoding.
- try:
- obj = unicode(obj)
- except UnicodeDecodeError, ex:
- pass
- return obj
-
- def execute(self, statement, parameters=()):
- parameters = [self.fix_unicode(p) for p in parameters]
- if UnicodeCursor.debug_explain:
- explain_query(super(UnicodeCursor, self), statement, parameters)
- return super(UnicodeCursor, self).execute(statement, parameters)
-
- def fetch(self, index=None):
- if index is not None:
- for row in self:
- yield row[index]
- else:
- for row in self:
- yield row
-
-def _get_schema_version (cursor, schema_name):
- """
- Returns the schema version for schema_name or returns 0 in case
- the schema doesn't exist.
- """
- try:
- schema_version_result = cursor.execute("""
- SELECT version FROM schema_version WHERE schema=?
- """, (schema_name,))
- result = schema_version_result.fetchone()
- return result[0] if result else 0
- except sqlite3.OperationalError, e:
- # The schema isn't there...
- log.debug ("Schema '%s' not found: %s" % (schema_name, e))
- return 0
-
-def _connect_to_db(file_path):
- conn = sqlite3.connect(file_path)
- conn.row_factory = sqlite3.Row
- cursor = conn.cursor(UnicodeCursor)
- return cursor
-
-_cursor = None
-def get_default_cursor():
- global _cursor
- if not _cursor:
- dbfile = constants.DATABASE_FILE
- start = time.time()
- log.info("Using database: %s" % dbfile)
- new_database = not os.path.exists(dbfile)
- _cursor = _connect_to_db(dbfile)
- core_schema_version = _get_schema_version(_cursor, constants.CORE_SCHEMA)
- if core_schema_version < constants.CORE_SCHEMA_VERSION:
- log.exception(
- "Database '%s' is on version %s, but %s is required" % \
- (constants.CORE_SCHEMA, core_schema_version,
- constants.CORE_SCHEMA_VERSION))
- raise SystemExit(27)
- return _cursor
-def unset_cursor():
- global _cursor
- _cursor = None
-
-class TableLookup(dict):
-
- # We are not using an LRUCache as pressumably there won't be thousands
- # of manifestations/interpretations/mimetypes/actors on most
- # installations, so we can save us the overhead of tracking their usage.
-
- def __init__(self, cursor, table):
-
- self._cursor = cursor
- self._table = table
-
- for row in cursor.execute("SELECT id, value FROM %s" % table):
- self[row["value"]] = row["id"]
-
- self._inv_dict = dict((value, key) for key, value in self.iteritems())
-
- def __getitem__(self, name):
- # Use this for inserting new properties into the database
- if name in self:
- return super(TableLookup, self).__getitem__(name)
- id = self._cursor.execute("SELECT id FROM %s WHERE value=?"
- % self._table, (name,)).fetchone()[0]
- # If we are here it's a newly inserted value, insert it into cache
- self[name] = id
- self._inv_dict[id] = name
- return id
-
- def value(self, id):
- # When we fetch an event, it either was already in the database
- # at the time Zeitgeist started or it was inserted later -using
- # Zeitgeist-, so here we always have the data in memory already.
- return self._inv_dict[id]
-
- def id(self, name):
- # Use this when fetching values which are supposed to be in the
- # database already. Eg., in find_eventids.
- return super(TableLookup, self).__getitem__(name)
-
- def remove_id(self, id):
- value = self.value(id)
- del self._inv_dict[id]
- del self[value]
-
-def get_right_boundary(text):
- """ returns the smallest string which is greater than `text` """
- if not text:
- # if the search prefix is empty we query for the whole range
- # of 'utf-8 'unicode chars
- return unichr(0x10ffff)
- if isinstance(text, str):
- # we need to make sure the text is decoded as 'utf-8' unicode
- text = unicode(text, "UTF-8")
- charpoint = ord(text[-1])
- if charpoint == 0x10ffff:
- # if the last character is the biggest possible char we need to
- # look at the second last
- return get_right_boundary(text[:-1])
- return text[:-1] + unichr(charpoint+1)
-
-class WhereClause:
- """
- This class provides a convenient representation a SQL `WHERE' clause,
- composed of a set of conditions joined together.
-
- The relation between conditions can be either of type *AND* or *OR*, but
- not both. To create more complex clauses, use several :class:`WhereClause`
- instances and joining them together using :meth:`extend`.
-
- Instances of this class can then be used to obtain a line of SQL code and
- a list of arguments, for use with the SQLite3 module, accessing the
- appropriate properties:
- >>> where.sql, where.arguments
- """
-
- AND = " AND "
- OR = " OR "
- NOT = "NOT "
-
- @staticmethod
- def optimize_glob(column, table, prefix):
- """returns an optimized version of the GLOB statement as described
- in http://www.sqlite.org/optoverview.html `4.0 The LIKE optimization`
- """
- if isinstance(prefix, str):
- # we need to make sure the text is decoded as 'utf-8' unicode
- prefix = unicode(prefix, "UTF-8")
- if not prefix:
- # empty prefix means 'select all', no way to optimize this
- sql = "SELECT %s FROM %s" %(column, table)
- return sql, ()
- elif all([i == unichr(0x10ffff) for i in prefix]):
- sql = "SELECT %s FROM %s WHERE value >= ?" %(column, table)
- return sql, (prefix,)
- else:
- sql = "SELECT %s FROM %s WHERE (value >= ? AND value < ?)" %(column, table)
- return sql, (prefix, get_right_boundary(prefix))
-
- def __init__(self, relation, negation=False):
- self._conditions = []
- self.arguments = []
- self._relation = relation
- self._no_result_member = False
- self._negation = negation
-
- def __len__(self):
- return len(self._conditions)
-
- def add(self, condition, arguments=None):
- if not condition:
- return
- self._conditions.append(condition)
- if arguments is not None:
- if not hasattr(arguments, "__iter__"):
- self.arguments.append(arguments)
- else:
- self.arguments.extend(arguments)
-
- def add_text_condition(self, column, value, like=False, negation=False, cache=None):
- if like:
- assert column in ("origin", "subj_uri", "subj_current_uri",
- "subj_origin", "actor", "subj_mimetype"), \
- "prefix search on the %r column is not supported by zeitgeist" % column
- if column == "subj_uri":
- # subj_id directly points to the id of an uri entry
- view_column = "subj_id"
- elif column == "subj_current_uri":
- view_column = "subj_id_current"
- else:
- view_column = column
- optimized_glob, value = self.optimize_glob("id", TABLE_MAP.get(column, column), value)
- sql = "%s %sIN (%s)" %(view_column, self.NOT if negation else "", optimized_glob)
- if negation:
- sql += " OR %s IS NULL" % view_column
- else:
- if column == "origin":
- column ="event_origin_uri"
- elif column == "subj_origin":
- column = "subj_origin_uri"
- sql = "%s %s= ?" %(column, "!" if negation else "")
- if cache is not None:
- value = cache[value]
- self.add(sql, value)
-
- def extend(self, where):
- self.add(where.sql, where.arguments)
- if not where.may_have_results():
- if self._relation == self.AND:
- self.clear()
- self.register_no_result()
-
- @property
- def sql(self):
- if self: # Do not return "()" if there are no conditions
- negation = self.NOT if self._negation else ""
- return "%s(%s)" %(negation, self._relation.join(self._conditions))
-
- def register_no_result(self):
- self._no_result_member = True
-
- def may_have_results(self):
- """
- Return False if we know from our cached data that the query
- will give no results.
- """
- return len(self._conditions) > 0 or not self._no_result_member
-
- def clear(self):
- """
- Reset this WhereClause to the state of a newly created one.
- """
- self._conditions = []
- self.arguments = []
- self._no_result_member = False
diff --git a/extensions/fts.vala b/extensions/fts.vala
index e6435927..0c614996 100644
--- a/extensions/fts.vala
+++ b/extensions/fts.vala
@@ -52,8 +52,11 @@ namespace Zeitgeist
class SearchEngine: Extension, RemoteSearchEngine
{
+ private const string INDEXER_NAME = "org.gnome.zeitgeist.SimpleIndexer";
+
private RemoteSimpleIndexer siin;
private uint registration_id;
+ private MonitorManager? notifier;
SearchEngine ()
{
@@ -64,6 +67,15 @@ namespace Zeitgeist
{
if (Utils.using_in_memory_database ()) return;
+ // installing a monitor from the daemon will ensure that we don't
+ // miss any notifications that would be emitted in between
+ // zeitgeist start and fts daemon start
+ notifier = MonitorManager.get_default ();
+ notifier.install_monitor (new BusName (INDEXER_NAME),
+ "/org/gnome/zeitgeist/monitor/special",
+ new TimeRange.anytime (),
+ new GenericArray<Event> ());
+
try
{
var connection = Bus.get_sync (BusType.SESSION, null);
@@ -73,7 +85,7 @@ namespace Zeitgeist
// FIXME: shouldn't we delay this to next idle callback?
// Get SimpleIndexer
Bus.watch_name_on_connection (connection,
- "org.gnome.zeitgeist.SimpleIndexer",
+ INDEXER_NAME,
BusNameWatcherFlags.AUTO_START,
(conn) =>
{
diff --git a/src/datamodel.vala b/src/datamodel.vala
index 1c10a07f..4dd4a6b3 100644
--- a/src/datamodel.vala
+++ b/src/datamodel.vala
@@ -288,7 +288,6 @@ namespace Zeitgeist
if (property.has_prefix (parsed)) matches = true;
}
- debug ("Checking matches for %s", parsed);
return (is_negated) ? !matches : matches;
}
@@ -481,8 +480,6 @@ namespace Zeitgeist
*/
//Check if interpretation is child of template_event or same
- debug("Checking if event %u matches template_event %u\n",
- this.id, template_event.id);
if (!check_field_match (this.interpretation, template_event.interpretation, true))
return false;
//Check if manifestation is child of template_event or same
diff --git a/src/engine.vala b/src/engine.vala
index 2de9849c..a6bac29b 100644
--- a/src/engine.vala
+++ b/src/engine.vala
@@ -44,6 +44,7 @@ public class Engine : DbReader
Object (database: new Zeitgeist.SQLite.Database ());
// TODO: take care of this if we decide to subclass Engine
+ // (we need to propagate the error, so it can't go to construct {})
last_id = database.get_last_id ();
extension_collection = new ExtensionCollection (this);
}
diff --git a/src/notify.vala b/src/notify.vala
index bc9ae669..c3526bad 100644
--- a/src/notify.vala
+++ b/src/notify.vala
@@ -26,11 +26,32 @@ namespace Zeitgeist
public class MonitorManager : Object
{
+ private static unowned MonitorManager? instance;
+
private HashTable<string, Monitor> monitors;
private HashTable<string, GenericArray<string>> connections;
+ // ref-counted singleton - it can get destroyed easily, but has
+ // singleton semantics as long as some top-level instance keeps
+ // a reference to it
+ public static MonitorManager get_default ()
+ {
+ return instance ?? new MonitorManager ();
+ }
+
+ private MonitorManager ()
+ {
+ }
+
+ ~MonitorManager ()
+ {
+ instance = null;
+ }
+
construct
{
+ instance = this;
+
monitors = new HashTable<string, Monitor> (str_hash, str_equal);
connections = new HashTable<string, GenericArray<string>>
(str_hash, str_equal);
@@ -53,7 +74,8 @@ namespace Zeitgeist
foreach (var owner in connections.get_keys())
{
- if (arg0 == owner)
+ // Don't disconnect monitors using service names
+ if (arg0 == owner && g_dbus_is_unique_name (arg0))
{
var paths = connections.lookup (arg0);
debug("Client disconnected %s", owner);
@@ -120,32 +142,49 @@ namespace Zeitgeist
{
queued_notifications = new SList<QueuedNotification> ();
Bus.get_proxy<RemoteMonitor> (BusType.SESSION, peer,
- object_path, DBusProxyFlags.DO_NOT_LOAD_PROPERTIES |
- DBusProxyFlags.DO_NOT_CONNECT_SIGNALS,
+ object_path,
+ DBusProxyFlags.DO_NOT_LOAD_PROPERTIES
+ | DBusProxyFlags.DO_NOT_CONNECT_SIGNALS
+ | DBusProxyFlags.DO_NOT_AUTO_START,
null, (obj, res) =>
{
try
{
proxy_object = Bus.get_proxy.end (res);
+ // Process queued notifications...
+ flush_notifications ();
+
+ proxy_object.notify["g-name-owner"].connect (name_owner_changed);
}
catch (IOError err)
{
warning ("%s", err.message);
}
-
- // Process queued notifications...
- queued_notifications.reverse ();
- foreach (unowned QueuedNotification notification
- in queued_notifications)
- {
- notification.send (proxy_object);
- }
- queued_notifications = null;
});
time_range = tr;
event_templates = templates;
}
+ private void name_owner_changed ()
+ requires (proxy_object != null)
+ {
+ // FIXME: can we use this to actually remove the monitor?
+ // (instead of using NameOwnerChanged signal)
+ DBusProxy p = proxy_object as DBusProxy;
+ if (p.g_name_owner != null) flush_notifications ();
+ }
+
+ private void flush_notifications ()
+ {
+ queued_notifications.reverse ();
+ foreach (unowned QueuedNotification notification
+ in queued_notifications)
+ {
+ notification.send (proxy_object);
+ }
+ queued_notifications = null;
+ }
+
private bool matches (Event event)
{
if (event_templates.length == 0)
@@ -182,8 +221,15 @@ namespace Zeitgeist
// between monitors?
Variant events_v = Events.to_variant (matching_events);
+ string? name_owner = null;
if (proxy_object != null)
{
+ DBusProxy p = proxy_object as DBusProxy;
+ if (p != null) name_owner = p.g_name_owner;
+ }
+
+ if (proxy_object != null && name_owner != null)
+ {
DBusProxy p = (DBusProxy) proxy_object;
debug ("Notifying %s about %d insertions",
p.get_name (), matching_events.length);
@@ -208,8 +254,15 @@ namespace Zeitgeist
{
Variant time_v = intersect_tr.to_variant ();
+ string? name_owner = null;
if (proxy_object != null)
{
+ DBusProxy p = proxy_object as DBusProxy;
+ if (p != null) name_owner = p.g_name_owner;
+ }
+
+ if (proxy_object != null && name_owner != null)
+ {
proxy_object.notify_delete (time_v, event_ids);
}
else
diff --git a/src/sql.vala b/src/sql.vala
index feea64bb..8f5bd2be 100644
--- a/src/sql.vala
+++ b/src/sql.vala
@@ -111,10 +111,10 @@ namespace Zeitgeist.SQLite
if (is_read_only)
{
int ver = DatabaseSchema.get_schema_version (database);
- if (ver != DatabaseSchema.CORE_SCHEMA_VERSION)
+ if (ver < DatabaseSchema.CORE_SCHEMA_VERSION)
{
throw new EngineError.DATABASE_CANTOPEN (
- "Unable to open database");
+ "Unable to open database: old schema version");
}
}
else
diff --git a/src/table-lookup.vala b/src/table-lookup.vala
index 0d59c92d..642bfba6 100644
--- a/src/table-lookup.vala
+++ b/src/table-lookup.vala
@@ -90,6 +90,26 @@ namespace Zeitgeist.SQLite
// When we fetch an event, it either was already in the database
// at the time Zeitgeist started or it was inserted later -using
// Zeitgeist-, so here we always have the data in memory already.
+ unowned string val = id_to_value.lookup (id);
+ if (val != null) return val;
+
+ // The above statement isn't exactly true. If this is a standalone
+ // reader in a separate process, the values won't be kept updated
+ // so we need to query the DB if we don't find it.
+ int rc;
+
+ rc = db.exec ("SELECT value FROM %s WHERE id=%d".printf (table, id),
+ (n_columns, values, column_names) =>
+ {
+ id_to_value.insert (id, values[0]);
+ value_to_id.insert (values[0], id);
+ return 0;
+ }, null);
+ if (rc != Sqlite.OK)
+ {
+ critical ("Can't get data from table %s: %d, %s\n", table,
+ rc, db.errmsg ());
+ }
return id_to_value.lookup (id);
}
diff --git a/src/zeitgeist-daemon.vala b/src/zeitgeist-daemon.vala
index 2f3d32f7..fd8c39f5 100644
--- a/src/zeitgeist-daemon.vala
+++ b/src/zeitgeist-daemon.vala
@@ -122,7 +122,7 @@ namespace Zeitgeist
public Daemon () throws EngineError
{
engine = new Engine ();
- notifications = new MonitorManager ();
+ notifications = MonitorManager.get_default ();
}
public Variant get_events (uint32[] event_ids, BusName sender)