48 files changed, 3147 insertions, 1899 deletions
diff --git a/.bzrignore b/.bzrignore
index df052f35..c70ad2bf 100644
--- a/.bzrignore
+++ b/.bzrignore
@@ -44,12 +44,23 @@ extensions/*.c
 extensions/*.stamp
 extensions/*.la
 extensions/*.lo
+extensions/fts++/.deps
+extensions/fts++/.libs
+extensions/fts++/*.c
+extensions/fts++/*.stamp
+extensions/fts++/*.la
+extensions/fts++/*.lo
+extensions/fts++/zeitgeist-internal.*
+extensions/fts++/test/.deps
+extensions/fts++/test/.libs
+extensions/fts++/test/test-fts
+extensions/fts++/org.gnome.zeitgeist.fts.service
+extensions/fts++/zeitgeist-fts
 test/direct/marshalling
 test/dbus/__pycache__
 test/direct/table-lookup-test
 src/zeitgeist-engine.vapi
 src/zeitgeist-engine.h
-extensions/fts-python/org.gnome.zeitgeist.fts.service
 py-compile
 python/_ontology.py
 test/direct/*.c
diff --git a/configure.ac b/configure.ac
index 6b5ca97e..83b06deb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -8,6 +8,7 @@ AM_PATH_PYTHON
 
 AC_PROG_CC
 AM_PROG_CC_C_O
+AC_PROG_CXX
 AC_DISABLE_STATIC
 AC_PROG_LIBTOOL
 
@@ -59,7 +60,8 @@ AC_CONFIG_FILES([
 	Makefile
 	src/Makefile
 	extensions/Makefile
-	extensions/fts-python/Makefile
+	extensions/fts++/Makefile
+	extensions/fts++/test/Makefile
 	data/Makefile
 	data/ontology/Makefile
 	python/Makefile
diff --git a/extensions/Makefile.am b/extensions/Makefile.am
index e6642522..5ddc9238 100644
--- a/extensions/Makefile.am
+++ b/extensions/Makefile.am
@@ -1,4 +1,4 @@
-SUBDIRS = fts-python
+SUBDIRS = fts++
 
 NULL =
 
diff --git a/extensions/fts++/Makefile.am b/extensions/fts++/Makefile.am
new file mode 100644
index 00000000..931695f7
--- /dev/null
+++ b/extensions/fts++/Makefile.am
@@ -0,0 +1,113 @@
+SUBDIRS = test
+NULL = 
+
+noinst_LTLIBRARIES = libzeitgeist-internal.la
+libexec_PROGRAMS = zeitgeist-fts
+
+servicedir = $(DBUS_SERVICES_DIR)
+service_DATA = org.gnome.zeitgeist.fts.service
+
+org.gnome.zeitgeist.fts.service: org.gnome.zeitgeist.fts.service.in
+	$(AM_V_GEN)sed  -e s!\@libexecdir\@!$(libexecdir)! < $< > $@
+org.gnome.zeitgeist.fts.service: Makefile
+
+AM_CPPFLAGS = \
+	$(ZEITGEIST_CFLAGS) \
+	-include $(CONFIG_HEADER) \
+	-w \
+	$(NULL)
+
+AM_VALAFLAGS = \
+	--target-glib=2.26 \
+	--pkg gio-2.0 \
+	--pkg sqlite3 \
+	--pkg posix \
+	--pkg gmodule-2.0 \
+	$(top_srcdir)/config.vapi \
+	$(NULL)
+
+libzeitgeist_internal_la_VALASOURCES = \
+	datamodel.vala \
+	db-reader.vala \
+	engine.vala \
+	sql.vala \
+	remote.vala \
+	utils.vala \
+	errors.vala \
+	table-lookup.vala \
+	sql-schema.vala \
+	where-clause.vala \
+	ontology.vala \
+	ontology-uris.vala \
+	mimetype.vala \
+	ext-dummies.vala \
+	$(NULL)
+
+libzeitgeist_internal_la_SOURCES = \
+	zeitgeist-internal.stamp \
+	$(libzeitgeist_internal_la_VALASOURCES:.vala=.c) \
+	$(NULL)
+
+libzeitgeist_internal_la_LIBADD = \
+	$(ZEITGEIST_LIBS) \
+	$(NULL)
+
+zeitgeist_fts_VALASOURCES = \
+	zeitgeist-fts.vala \
+	$(NULL)
+
+zeitgeist_fts_SOURCES = \
+	zeitgeist-fts_vala.stamp \
+	$(zeitgeist_fts_VALASOURCES:.vala=.c) \
+	controller.cpp \
+	controller.h \
+	fts.cpp \
+	fts.h \
+	indexer.cpp \
+	indexer.h \
+	task.cpp \
+	task.h \
+	stringutils.cpp \
+	stringutils.h \
+	$(NULL)
+
+zeitgeist_fts_LDADD = \
+	$(builddir)/libzeitgeist-internal.la \
+	-lxapian \
+	$(NULL)
+
+BUILT_SOURCES = \
+	zeitgeist-internal.stamp \
+	zeitgeist-fts_vala.stamp \
+	$(NULL)
+
+zeitgeist-internal.stamp: $(libzeitgeist_internal_la_VALASOURCES)
+	$(VALA_V)$(VALAC) $(AM_VALAFLAGS) $(VALAFLAGS) -C -H zeitgeist-internal.h --library zeitgeist-internal $^
+	@touch "$@"
+
+zeitgeist-fts_vala.stamp: $(zeitgeist_fts_VALASOURCES)
+	$(VALA_V)$(VALAC) $(AM_VALAFLAGS) $(VALAFLAGS) \
+	$(srcdir)/zeitgeist-internal.vapi $(srcdir)/fts.vapi -C $^
+	@touch "$@"
+
+EXTRA_DIST = \
+	$(libzeitgeist_internal_la_VALASOURCES) \
+	$(zeitgeist_fts_VALASOURCES) \
+	zeitgeist-fts_vala.stamp \
+	zeitgeist-internal.h \
+	zeitgeist-internal.vapi \
+	org.gnome.zeitgeist.fts.service.in \
+	$(NULL)
+
+CLEANFILES = org.gnome.zeitgeist.fts.service
+
+DISTCLEANFILES = \
+	$(NULL)
+
+distclean-local:
+	rm -f *.c *.o *.stamp *.~[0-9]~
+
+VALA_V = $(VALA_V_$(V))
+VALA_V_ = $(VALA_V_$(AM_DEFAULT_VERBOSITY))
+VALA_V_0 = @echo "  VALAC " $^;
+
diff --git a/extensions/fts++/controller.cpp b/extensions/fts++/controller.cpp
new file mode 100644
index 00000000..51584cd5
--- /dev/null
+++ b/extensions/fts++/controller.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2012 Mikkel Kamstrup Erlandsen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
+ *
+ */
+
+#include "controller.h"
+
+namespace ZeitgeistFTS {
+
+void Controller::Initialize (GError **error)
+{
+  indexer->Initialize (error);
+}
+
+void Controller::Run ()
+{
+  if (!indexer->CheckIndex ())
+    {
+      indexer->DropIndex ();
+      RebuildIndex ();
+    }
+}
+
+void Controller::RebuildIndex ()
+{
+  GError *error = NULL;
+  GPtrArray *events;
+  GPtrArray *templates = g_ptr_array_new ();
+  ZeitgeistTimeRange *time_range = zeitgeist_time_range_new_anytime ();
+
+  g_debug ("asking reader for all events");
+  events = zeitgeist_db_reader_find_events (zg_reader,
+                                            time_range,
+                                            templates,
+                                            ZEITGEIST_STORAGE_STATE_ANY,
+                                            0,
+                                            ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS,
+                                            NULL,
+                                            &error);
+
+  if (error)
+  {
+    g_warning ("%s", error->message);
+    g_error_free (error);
+  }
+  else
+  {
+    g_debug ("reader returned %u events", events->len);
+
+    IndexEvents (events);
+    g_ptr_array_unref (events);
+
+    // Set the db metadata key only once we're done
+    PushTask (new MetadataTask ("fts_index_version", INDEX_VERSION));
+  }
+
+  g_object_unref (time_range);
+  g_ptr_array_unref (templates);
+}
+
+void Controller::IndexEvents (GPtrArray *events)
+{
+  const int CHUNK_SIZE = 32;
+  // Break down index tasks into suitable chunks
+  for (unsigned i = 0; i < events->len; i += CHUNK_SIZE)
+  {
+    PushTask (new IndexEventsTask (g_ptr_array_ref (events), i, CHUNK_SIZE));
+  }
+}
+
+void Controller::DeleteEvents (guint *event_ids, int event_ids_size)
+{
+  // FIXME: Should we break the task here as well?
+  PushTask (new DeleteEventsTask (event_ids, event_ids_size));
+}
+
+void Controller::PushTask (Task* task)
+{
+  queued_tasks.push (task);
+
+  if (processing_source_id == 0)
+  {
+    processing_source_id =
+      g_idle_add ((GSourceFunc) &Controller::ProcessTask, this);
+  }
+}
+
+gboolean Controller::ProcessTask ()
+{
+  if (!queued_tasks.empty ())
+  {
+    Task *task;
+
+    task = queued_tasks.front ();
+    queued_tasks.pop ();
+
+    task->Process (indexer);
+    delete task;
+  }
+
+  bool all_done = queued_tasks.empty ();
+  if (all_done)
+  {
+    indexer->Commit ();
+    if (processing_source_id != 0)
+    {
+      g_source_remove (processing_source_id);
+      processing_source_id = 0;
+    }
+    return FALSE;
+  }
+
+  return TRUE;
+}
+
+bool Controller::HasPendingTasks ()
+{
+  return !queued_tasks.empty ();
+}
+
+}
diff --git a/extensions/fts++/controller.h b/extensions/fts++/controller.h
new file mode 100644
index 00000000..abcd8fda
--- /dev/null
+++ b/extensions/fts++/controller.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2012 Mikkel Kamstrup Erlandsen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
+ *
+ */
+
+#ifndef _ZGFTS_CONTROLLER_H_
+#define _ZGFTS_CONTROLLER_H_
+
+#include <glib-object.h>
+#include <queue>
+#include <vector>
+
+#include "indexer.h"
+#include "task.h"
+#include "zeitgeist-internal.h"
+
+namespace ZeitgeistFTS {
+
+class Controller {
+public:
+  Controller (ZeitgeistDbReader *reader)
+    : zg_reader (reader)
+    , processing_source_id (0)
+    , indexer (new Indexer (reader)) {};
+
+  ~Controller ()
+  {
+    if (processing_source_id != 0)
+      {
+        g_source_remove (processing_source_id);
+      }
+  }
+
+  void Initialize (GError **error);
+  void Run ();
+  void RebuildIndex ();
+
+  void IndexEvents (GPtrArray *events);
+  void DeleteEvents (guint *event_ids, int event_ids_size);
+
+  void PushTask (Task* task);
+  bool HasPendingTasks ();
+  gboolean ProcessTask ();
+
+  Indexer                 *indexer;
+
+private:
+  ZeitgeistDbReader       *zg_reader;
+
+  typedef std::queue<Task*> TaskQueue;
+  TaskQueue                queued_tasks;
+  guint                    processing_source_id;
+};
+
+}
+
+#endif /* _ZGFTS_CONTROLLER_H_ */
diff --git a/extensions/fts++/datamodel.vala b/extensions/fts++/datamodel.vala
new file mode 120000
index 00000000..02172aac
--- /dev/null
+++ b/extensions/fts++/datamodel.vala
@@ -0,0 +1 @@
+../../src/datamodel.vala
+\ No newline at end of file
diff --git a/extensions/fts++/db-reader.vala b/extensions/fts++/db-reader.vala
new file mode 120000
index 00000000..fecbc782
--- /dev/null
+++ b/extensions/fts++/db-reader.vala
@@ -0,0 +1 @@
+../../src/db-reader.vala
+\ No newline at end of file
diff --git a/extensions/fts++/engine.vala b/extensions/fts++/engine.vala
new file mode 120000
index 00000000..e2314a5e
--- /dev/null
+++ b/extensions/fts++/engine.vala
@@ -0,0 +1 @@
+../../src/engine.vala
+\ No newline at end of file
diff --git a/extensions/fts++/errors.vala b/extensions/fts++/errors.vala
new file mode 120000
index 00000000..c630d3ed
--- /dev/null
+++ b/extensions/fts++/errors.vala
@@ -0,0 +1 @@
+../../src/errors.vala
+\ No newline at end of file
diff --git a/extensions/fts++/ext-dummies.vala b/extensions/fts++/ext-dummies.vala
new file mode 100644
index 00000000..f77a2b2d
--- /dev/null
+++ b/extensions/fts++/ext-dummies.vala
@@ -0,0 +1,71 @@
+/* ext-dummies.vala
+ *
+ * Copyright © 2011-2012 Michal Hruby <michal.mhr@gmail.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+namespace Zeitgeist
+{
+    public class ExtensionCollection : Object
+    {
+        public unowned Engine engine { get; construct; }
+
+        public ExtensionCollection (Engine engine)
+        {
+            Object (engine: engine);
+        }
+
+        public string[] get_extension_names ()
+        {
+            string[] result = {};
+            return result;
+        }
+
+        public void call_pre_insert_events (GenericArray<Event?> events,
+            BusName? sender)
+        {
+        }
+
+        public void call_post_insert_events (GenericArray<Event?> events,
+            BusName? sender)
+        {
+        }
+
+        public unowned uint32[] call_pre_delete_events (uint32[] event_ids,
+            BusName? sender)
+        {
+            return event_ids;
+        }
+
+        public void call_post_delete_events (uint32[] event_ids,
+            BusName? sender)
+        {
+        }
+    }
+
+    public class ExtensionStore : Object
+    {
+        public unowned Engine engine { get; construct; }
+
+        public ExtensionStore (Engine engine)
+        {
+            Object (engine: engine);
+        }
+    }
+
+}
+
+// vim:expandtab:ts=4:sw=4
diff --git a/extensions/fts++/fts.cpp b/extensions/fts++/fts.cpp
new file mode 100644
index 00000000..5d66e2ea
--- /dev/null
+++ b/extensions/fts++/fts.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2012 Canonical Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Michal Hruby <michal.hruby@canonical.com>
+ *
+ */
+
+#include "fts.h"
+#include "indexer.h"
+#include "controller.h"
+
+ZeitgeistIndexer*
+zeitgeist_indexer_new (ZeitgeistDbReader *reader, GError **error)
+{
+  ZeitgeistFTS::Controller *ctrl;
+  GError                   *local_error;
+
+  g_return_val_if_fail (ZEITGEIST_IS_DB_READER (reader), NULL);
+  g_return_val_if_fail (error == NULL || *error == NULL, NULL);
+
+  g_setenv ("XAPIAN_CJK_NGRAM", "1", TRUE);
+  ctrl = new ZeitgeistFTS::Controller (reader);
+
+  local_error = NULL;
+  ctrl->Initialize (&local_error);
+  if (local_error)
+  {
+    delete ctrl;
+    g_propagate_error (error, local_error);
+    return NULL;
+  }
+
+
+  ctrl->Run ();
+
+  return (ZeitgeistIndexer*) ctrl;
+}
+
+void
+zeitgeist_indexer_free (ZeitgeistIndexer* indexer)
+{
+  g_return_if_fail (indexer != NULL);
+
+  delete (ZeitgeistFTS::Controller*) indexer;
+}
+
+GPtrArray* zeitgeist_indexer_search (ZeitgeistIndexer *indexer,
+                                     const gchar *search_string,
+                                     ZeitgeistTimeRange *time_range,
+                                     GPtrArray *templates,
+                                     guint offset,
+                                     guint count,
+                                     ZeitgeistResultType result_type,
+                                     guint *matches,
+                                     GError **error)
+{
+  GPtrArray *results;
+  ZeitgeistFTS::Controller *_indexer;
+
+  g_return_val_if_fail (indexer != NULL, NULL);
+  g_return_val_if_fail (search_string != NULL, NULL);
+  g_return_val_if_fail (ZEITGEIST_IS_TIME_RANGE (time_range), NULL);
+  g_return_val_if_fail (error == NULL || *error == NULL, NULL);
+
+  _indexer = (ZeitgeistFTS::Controller*) indexer;
+
+  results = _indexer->indexer->Search (search_string, time_range,
+                                       templates, offset, count, result_type,
+                                       matches, error);
+
+  return results;
+}
+
+void zeitgeist_indexer_index_events (ZeitgeistIndexer *indexer,
+                                     GPtrArray *events)
+{
+  ZeitgeistFTS::Controller *_indexer;
+
+  g_return_if_fail (indexer != NULL);
+  g_return_if_fail (events != NULL);
+
+  _indexer = (ZeitgeistFTS::Controller*) indexer;
+
+  _indexer->IndexEvents (events);
+}
+
+void zeitgeist_indexer_delete_events (ZeitgeistIndexer *indexer,
+                                      guint *event_ids,
+                                      int event_ids_size)
+{
+  ZeitgeistFTS::Controller *_indexer;
+
+  g_return_if_fail (indexer != NULL);
+
+  if (event_ids_size <= 0) return;
+
+  _indexer = (ZeitgeistFTS::Controller*) indexer;
+
+  _indexer->DeleteEvents (event_ids, event_ids_size);
+}
+
+gboolean zeitgeist_indexer_has_pending_tasks (ZeitgeistIndexer *indexer)
+{
+  ZeitgeistFTS::Controller *_indexer;
+
+  g_return_val_if_fail (indexer != NULL, FALSE);
+
+  _indexer = (ZeitgeistFTS::Controller*) indexer;
+
+  return _indexer->HasPendingTasks () ? TRUE : FALSE;
+}
+
+void zeitgeist_indexer_process_task (ZeitgeistIndexer *indexer)
+{
+  ZeitgeistFTS::Controller *_indexer;
+
+  g_return_if_fail (indexer != NULL);
+
+  _indexer = (ZeitgeistFTS::Controller*) indexer;
+
+  _indexer->ProcessTask ();
+}
+
diff --git a/extensions/fts++/fts.h b/extensions/fts++/fts.h
new file mode 100644
index 00000000..2226ec90
--- /dev/null
+++ b/extensions/fts++/fts.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2012 Canonical Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Michal Hruby <michal.hruby@canonical.com>
+ *
+ */
+
+#ifndef _ZGFTS_H_
+#define _ZGFTS_H_
+
+#include <glib.h>
+#include "zeitgeist-internal.h"
+
+typedef struct _ZeitgeistIndexer ZeitgeistIndexer;
+
+G_BEGIN_DECLS
+
+ZeitgeistIndexer*  zeitgeist_indexer_new           (ZeitgeistDbReader* reader,
+                                                    GError **error);
+
+void               zeitgeist_indexer_free          (ZeitgeistIndexer* indexer);
+
+GPtrArray*         zeitgeist_indexer_search        (ZeitgeistIndexer *indexer,
+                                                    const gchar *search_string,
+                                                    ZeitgeistTimeRange *time_range,
+                                                    GPtrArray *templates,
+                                                    guint offset,
+                                                    guint count,
+                                                    ZeitgeistResultType result_type,
+                                                    guint *matches,
+                                                    GError **error);
+
+void               zeitgeist_indexer_index_events  (ZeitgeistIndexer *indexer,
+                                                    GPtrArray *events);
+
+void               zeitgeist_indexer_delete_events (ZeitgeistIndexer *indexer,
+                                                    guint *event_ids,
+                                                    int event_ids_size);
+
+gboolean           zeitgeist_indexer_has_pending_tasks (ZeitgeistIndexer *indexer);
+
+void               zeitgeist_indexer_process_task  (ZeitgeistIndexer *indexer);
+
+G_END_DECLS
+
+#endif /* _ZGFTS_H_ */
diff --git a/extensions/fts++/fts.vapi b/extensions/fts++/fts.vapi
new file mode 100644
index 00000000..1aae3602
--- /dev/null
+++ b/extensions/fts++/fts.vapi
@@ -0,0 +1,25 @@
+/* indexer.vapi is hand-written - not a big deal for these ~10 lines */
+
+namespace Zeitgeist {
+  [Compact]
+  [CCode (free_function = "zeitgeist_indexer_free", cheader_filename = "fts.h")]
+  public class Indexer {
+    public Indexer (DbReader reader) throws EngineError;
+
+    public GLib.GenericArray<Event> search (string search_string,
+                                            TimeRange time_range,
+                                            GLib.GenericArray<Event> templates,
+                                            uint offset,
+                                            uint count,
+                                            ResultType result_type,
+                                            out uint matches) throws GLib.Error;
+
+    public void index_events (GLib.GenericArray<Event> events);
+
+    public void delete_events (uint[] event_ids);
+
+    public bool has_pending_tasks ();
+
+    public void process_task ();
+  }
+}
diff --git a/extensions/fts++/indexer.cpp b/extensions/fts++/indexer.cpp
new file mode 100644
index 00000000..d97f7ebd
--- /dev/null
+++ b/extensions/fts++/indexer.cpp
@@ -0,0 +1,897 @@
+/*
+ * Copyright (C) 2012 Canonical Ltd
+ *               2012 Mikkel Kamstrup Erlandsen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Michal Hruby <michal.hruby@canonical.com>
+ *             Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
+ *
+ */
+
+#include "indexer.h"
+#include "stringutils.h"
+#include <xapian.h>
+#include <queue>
+#include <vector>
+
+#include <gio/gio.h>
+#include <gio/gdesktopappinfo.h>
+
+namespace ZeitgeistFTS {
+
+const std::string FILTER_PREFIX_EVENT_INTERPRETATION = "ZGEI";
+const std::string FILTER_PREFIX_EVENT_MANIFESTATION = "ZGEM";
+const std::string FILTER_PREFIX_ACTOR = "ZGA";
+const std::string FILTER_PREFIX_SUBJECT_URI = "ZGSU";
+const std::string FILTER_PREFIX_SUBJECT_INTERPRETATION = "ZGSI";
+const std::string FILTER_PREFIX_SUBJECT_MANIFESTATION = "ZGSM";
+const std::string FILTER_PREFIX_SUBJECT_ORIGIN = "ZGSO";
+const std::string FILTER_PREFIX_SUBJECT_MIMETYPE = "ZGST";
+const std::string FILTER_PREFIX_SUBJECT_STORAGE = "ZGSS";
+const std::string FILTER_PREFIX_XDG_CATEGORY = "AC";
+
+const Xapian::valueno VALUE_EVENT_ID = 0;
+const Xapian::valueno VALUE_TIMESTAMP = 1;
+
+#define QUERY_PARSER_FLAGS \
+  Xapian::QueryParser::FLAG_PHRASE | Xapian::QueryParser::FLAG_BOOLEAN | \
+  Xapian::QueryParser::FLAG_PURE_NOT | Xapian::QueryParser::FLAG_LOVEHATE | \
+  Xapian::QueryParser::FLAG_WILDCARD
+
+const std::string FTS_MAIN_DIR = "ftspp.index";
+
+void Indexer::Initialize (GError **error)
+{
+  try
+  {
+    if (zeitgeist_utils_using_in_memory_database ())
+    {
+      this->db = new Xapian::WritableDatabase;
+      this->db->add_database (Xapian::InMemory::open ());
+    }
+    else
+    {
+      gchar *path = g_build_filename (zeitgeist_utils_get_data_path (),
+                                      FTS_MAIN_DIR.c_str (), NULL);
+      this->db = new Xapian::WritableDatabase (path,
+                                               Xapian::DB_CREATE_OR_OPEN);
+      g_free (path);
+    }
+
+    this->tokenizer = new Xapian::TermGenerator ();
+    this->query_parser = new Xapian::QueryParser ();
+    this->query_parser->add_prefix ("name", "N");
+    this->query_parser->add_prefix ("title", "N");
+    this->query_parser->add_prefix ("site", "S");
+    this->query_parser->add_prefix ("app", "A");
+    this->query_parser->add_boolean_prefix ("zgei",
+        FILTER_PREFIX_EVENT_INTERPRETATION);
+    this->query_parser->add_boolean_prefix ("zgem", 
+        FILTER_PREFIX_EVENT_MANIFESTATION);
+    this->query_parser->add_boolean_prefix ("zga", FILTER_PREFIX_ACTOR);
+    this->query_parser->add_prefix ("zgsu", FILTER_PREFIX_SUBJECT_URI);
+    this->query_parser->add_boolean_prefix ("zgsi",
+        FILTER_PREFIX_SUBJECT_INTERPRETATION);
+    this->query_parser->add_boolean_prefix ("zgsm",
+        FILTER_PREFIX_SUBJECT_MANIFESTATION);
+    this->query_parser->add_prefix ("zgso", FILTER_PREFIX_SUBJECT_ORIGIN);
+    this->query_parser->add_boolean_prefix ("zgst",
+        FILTER_PREFIX_SUBJECT_MIMETYPE);
+    this->query_parser->add_boolean_prefix ("zgss",
+        FILTER_PREFIX_SUBJECT_STORAGE);
+    this->query_parser->add_prefix ("category", FILTER_PREFIX_XDG_CATEGORY);
+
+    this->query_parser->add_valuerangeprocessor (
+        new Xapian::NumberValueRangeProcessor (VALUE_EVENT_ID, "id"));
+    this->query_parser->add_valuerangeprocessor (
+        new Xapian::NumberValueRangeProcessor (VALUE_TIMESTAMP, "ms", false));
+
+    this->query_parser->set_default_op (Xapian::Query::OP_AND);
+    this->query_parser->set_database (*this->db);
+
+    this->enquire = new Xapian::Enquire (*this->db);
+
+  }
+  catch (const Xapian::Error &xp_error)
+  {
+    g_set_error_literal (error,
+                         ZEITGEIST_ENGINE_ERROR,
+                         ZEITGEIST_ENGINE_ERROR_DATABASE_ERROR,
+                         xp_error.get_msg ().c_str ());
+    this->db = NULL;
+  }
+}
+
+/**
+ * Returns true if and only if the index is good.
+ * Otherwise the index should be rebuild.
+ */
+bool Indexer::CheckIndex ()
+{
+  std::string db_version (db->get_metadata ("fts_index_version"));
+  if (db_version != INDEX_VERSION)
+  {
+    g_message ("Index must be upgraded. Doing full rebuild");
+    return false;
+  }
+  else if (db->get_doccount () == 0)
+  {
+    g_message ("Empty index detected. Doing full rebuild");
+    return false;
+  }
+
+  return true;
+}
+
+/**
+ * Clear the index and create a new empty one
+ */
+void Indexer::DropIndex ()
+{
+  try
+  {
+    if (this->db != NULL)
+    {
+      this->db->close ();
+      delete this->db;
+      this->db = NULL;
+    }
+
+    if (this->enquire != NULL)
+    {
+      delete this->enquire;
+      this->enquire = NULL;
+    }
+
+    if (zeitgeist_utils_using_in_memory_database ())
+    {
+      this->db = new Xapian::WritableDatabase;
+      this->db->add_database (Xapian::InMemory::open ());
+    }
+    else
+    {
+      gchar *path = g_build_filename (zeitgeist_utils_get_data_path (),
+                                      FTS_MAIN_DIR.c_str (), NULL);
+      this->db = new Xapian::WritableDatabase (path,
+                                               Xapian::DB_CREATE_OR_OVERWRITE);
+      // FIXME: leaks on error
+      g_free (path);
+    }
+
+    this->query_parser->set_database (*this->db);
+    this->enquire = new Xapian::Enquire (*this->db);
+  }
+  catch (const Xapian::Error &xp_error)
+  {
+    g_error ("Error ocurred during database reindex: %s",
+             xp_error.get_msg ().c_str ());
+  }
+}
+
+void Indexer::Commit ()
+{
+  try
+  {
+    db->commit ();
+  }
+  catch (Xapian::Error const& e)
+  {
+    g_warning ("Failed to commit changes: %s", e.get_msg ().c_str ());
+  }
+}
+
+std::string Indexer::ExpandType (std::string const& prefix,
+                                 const gchar* unparsed_uri)
+{
+  gchar* uri = g_strdup (unparsed_uri);
+  gboolean is_negation = zeitgeist_utils_parse_negation (&uri);
+  gboolean noexpand = zeitgeist_utils_parse_noexpand (&uri);
+
+  std::string result;
+  GList *symbols = NULL;
+  symbols = g_list_append (symbols, uri);
+  if (!noexpand)
+  {
+    GList *children = zeitgeist_symbol_get_all_children (uri);
+    symbols = g_list_concat (symbols, children);
+  }
+
+  for (GList *iter = symbols; iter != NULL; iter = iter->next)
+  {
+    result += prefix + std::string((gchar*) iter->data);
+    if (iter->next != NULL) result += " OR ";
+  }
+
+  g_list_free (symbols);
+  g_free (uri);
+
+  if (is_negation) result = "NOT (" + result + ")";
+
+  return result;
+}
+
+std::string Indexer::CompileEventFilterQuery (GPtrArray *templates)
+{
+  std::vector<std::string> query;
+
+  for (unsigned i = 0; i < templates->len; i++)
+  {
+    const gchar* val;
+    std::vector<std::string> tmpl;
+    ZeitgeistEvent *event = (ZeitgeistEvent*) g_ptr_array_index (templates, i);
+
+    val = zeitgeist_event_get_interpretation (event);
+    if (val && val[0] != '\0')
+      tmpl.push_back (ExpandType ("zgei:", val));
+
+    val = zeitgeist_event_get_manifestation (event);
+    if (val && val[0] != '\0')
+      tmpl.push_back (ExpandType ("zgem:", val));
+
+    val = zeitgeist_event_get_actor (event);
+    if (val && val[0] != '\0')
+      tmpl.push_back ("zga:" + StringUtils::MangleUri (val));
+
+    GPtrArray *subjects = zeitgeist_event_get_subjects (event);
+    for (unsigned j = 0; j < subjects->len; j++)
+    {
+      ZeitgeistSubject *subject = (ZeitgeistSubject*) g_ptr_array_index (subjects, j);
+      val = zeitgeist_subject_get_uri (subject);
+      if (val && val[0] != '\0')
+        tmpl.push_back ("zgsu:" + StringUtils::MangleUri (val));
+
+      val = zeitgeist_subject_get_interpretation (subject);
+      if (val && val[0] != '\0')
+        tmpl.push_back (ExpandType ("zgsi:", val));
+
+      val = zeitgeist_subject_get_manifestation (subject);
+      if (val && val[0] != '\0')
+        tmpl.push_back (ExpandType ("zgsm:", val));
+
+      val = zeitgeist_subject_get_origin (subject);
+      if (val && val[0] != '\0')
+        tmpl.push_back ("zgso:" + StringUtils::MangleUri (val));
+
+      val = zeitgeist_subject_get_mimetype (subject);
+      if (val && val[0] != '\0')
+        tmpl.push_back (std::string ("zgst:") + val);
+
+      val = zeitgeist_subject_get_storage (subject);
+      if (val && val[0] != '\0')
+        tmpl.push_back (std::string ("zgss:") + val);
+    }
+
+    if (tmpl.size () == 0) continue;
+
+    std::string event_query ("(");
+    for (int i = 0; i < tmpl.size (); i++)
+    {
+      event_query += tmpl[i];
+      if (i < tmpl.size () - 1) event_query += ") AND (";
+    }
+    query.push_back (event_query + ")");
+  }
+
+  if (query.size () == 0) return std::string ("");
+
+  std::string result;
+  for (int i = 0; i < query.size (); i++)
+  {
+    result += query[i];
+    if (i < query.size () - 1) result += " OR ";
+  }
+  return result;
+}
+
+std::string Indexer::CompileTimeRangeFilterQuery (gint64 start, gint64 end)
+{
+  // let's use gprinting to be safe
+  gchar *q = g_strdup_printf ("%" G_GINT64_FORMAT "..%" G_GINT64_FORMAT "ms",
+                              start, end);
+  std::string query (q);
+  g_free (q);
+
+  return query;
+}
+
+/**
+ * Adds the filtering rules to the doc. Filtering rules will
+ * not affect the relevancy ranking of the event/doc
+ */
+void Indexer::AddDocFilters (ZeitgeistEvent *event, Xapian::Document &doc)
+{
+  const gchar* val;
+
+  val = zeitgeist_event_get_interpretation (event);
+  if (val && val[0] != '\0')
+    doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_EVENT_INTERPRETATION + val));
+
+  val = zeitgeist_event_get_manifestation (event);
+  if (val && val[0] != '\0')
+    doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_EVENT_MANIFESTATION + val));
+
+  val = zeitgeist_event_get_actor (event);
+  if (val && val[0] != '\0')
+    doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_ACTOR + StringUtils::MangleUri (val)));
+
+  GPtrArray *subjects = zeitgeist_event_get_subjects (event);
+  for (unsigned j = 0; j < subjects->len; j++)
+  {
+    ZeitgeistSubject *subject = (ZeitgeistSubject*) g_ptr_array_index (subjects, j);
+    val = zeitgeist_subject_get_uri (subject);
+    if (val && val[0] != '\0')
+      doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_SUBJECT_URI + StringUtils::MangleUri (val)));
+
+    val = zeitgeist_subject_get_interpretation (subject);
+    if (val && val[0] != '\0')
+      doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_SUBJECT_INTERPRETATION + val));
+
+    val = zeitgeist_subject_get_manifestation (subject);
+    if (val && val[0] != '\0')
+      doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_SUBJECT_MANIFESTATION + val));
+
+    val = zeitgeist_subject_get_origin (subject);
+    if (val && val[0] != '\0')
+      doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_SUBJECT_ORIGIN + StringUtils::MangleUri (val)));
+
+    val = zeitgeist_subject_get_mimetype (subject);
+    if (val && val[0] != '\0')
+      doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_SUBJECT_MIMETYPE + val));
+
+    val = zeitgeist_subject_get_storage (subject);
+    if (val && val[0] != '\0')
+      doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_SUBJECT_STORAGE + val));
+  }
+}
+
+void Indexer::IndexText (std::string const& text)
+{
+  // FIXME: ascii folding!
+  tokenizer->index_text (text, 5);
+}
+
+void Indexer::IndexUri (std::string const& uri, std::string const& origin)
+{
+  GFile *f = g_file_new_for_uri (uri.c_str ());
+
+  gchar *scheme = g_file_get_uri_scheme (f);
+  if (scheme == NULL)
+  {
+    g_warning ("Invalid URI: %s", uri.c_str ());
+    return;
+  }
+
+  std::string scheme_str(scheme);
+  g_free (scheme);
+
+  if (scheme_str == "file")
+  {
+    // FIXME: special case some typical filenames (like photos)
+    // examples of typical filenames from cameras:
+    //    P07-08-08_16.25.JPG
+    //    P070608_18.54.JPG
+    //    P180308_22.27[1].jpg
+    //    P6220111.JPG
+    //    PC220006.JPG
+    //    DSCN0149.JPG
+    //    DSC01166.JPG
+    //    SDC12583.JPG
+    //    IMGP3199.JPG
+    //    IMGP1251-4.jpg
+    //    IMG_101_8987.JPG
+    //    10052010152.jpg
+    //    4867_93080512835_623012835_1949065_8351752_n.jpg
+    //    2011-05-29 10.49.37.jpg
+    //    V100908_11.24.AVI
+    //    video-2011-05-29-15-14-58.mp4
+
+    // get_parse_name will convert escaped characters to UTF-8, but only for
+    // the "file" scheme, so using it elsewhere won't be of much help
+
+    gchar *pn = g_file_get_parse_name (f);
+    gchar *basename = g_path_get_basename (pn);
+
+    // FIXME: remove unscores, CamelCase and process digits
+    tokenizer->index_text (basename, 5);
+    tokenizer->index_text (basename, 5, "N");
+
+    g_free (basename);
+    // limit the directory indexing to just a few levels
+    //  (the original formula was weight = 5.0 / (1.5^n)
+    unsigned path_weights[] = { 3, 2, 1, 0 };
+    unsigned weight_index = 0;
+
+    // this should be equal to origin, but we already got a nice utf-8 display
+    // name, so we'll use that
+    gchar *dir = g_path_get_dirname (pn);
+    std::string path_component (dir);
+    g_free (dir);
+    g_free (pn);
+
+    while (path_component.length () > 2 && 
+        weight_index < G_N_ELEMENTS (path_weights))
+    {
+      // if this is already home directory we don't want it
+      if (path_component.length () == home_dir_path.length () &&
+          path_component == home_dir_path) return;
+
+      gchar *name = g_path_get_basename (path_component.c_str ());
+
+      // FIXME: un-underscore, uncamelcase, ascii fold
+      tokenizer->index_text (name, path_weights[weight_index++]);
+
+      dir = g_path_get_dirname (path_component.c_str ());
+      path_component = dir;
+      g_free (dir);
+      g_free (name);
+    }
+  }
+  else if (scheme_str == "mailto")
+  {
+    // mailto:username@server.com
+    size_t scheme_len = scheme_str.length () + 1;
+    size_t at_pos = uri.find ('@', scheme_len);
+    if (at_pos == std::string::npos) return;
+
+    tokenizer->index_text (uri.substr (scheme_len, at_pos - scheme_len), 5);
+    tokenizer->index_text (uri.substr (at_pos + 1), 1);
+  }
+  else if (scheme_str.compare (0, 4, "http") == 0)
+  {
+    // http / https - we'll index just the basename of the uri (minus query
+    // part) and the hostname/domain
+
+    // step 1) strip query part
+    gchar *basename;
+    size_t question_mark = uri.find ('?');
+    if (question_mark != std::string::npos)
+    {
+      std::string stripped (uri, 0, question_mark - 1);
+      basename = g_path_get_basename (stripped.c_str ());
+    }
+    else
+    {
+      basename = g_file_get_basename (f);
+    }
+
+    // step 2) unescape and check that it's valid utf8
+    gchar *unescaped_basename = g_uri_unescape_string (basename, "");
+    
+    if (g_utf8_validate (unescaped_basename, -1, NULL))
+    {
+      // FIXME: remove unscores, CamelCase and process digits
+      tokenizer->index_text (unescaped_basename, 5);
+      tokenizer->index_text (unescaped_basename, 5, "N");
+    }
+
+    // and also index hostname (taken from origin field if possible)
+    std::string host_str (origin.empty () ? uri : origin);
+    size_t hostname_start = host_str.find ("://");
+    if (hostname_start != std::string::npos)
+    {
+      std::string hostname (host_str, hostname_start + 3);
+      size_t slash_pos = hostname.find ("/");
+      if (slash_pos != std::string::npos) hostname.resize (slash_pos);
+
+      // support IDN
+      if (g_hostname_is_ascii_encoded (hostname.c_str ()))
+      {
+        gchar *printable_hostname = g_hostname_to_unicode (hostname.c_str ());
+        if (printable_hostname != NULL) hostname = printable_hostname;
+        g_free (printable_hostname);
+      }
+
+      tokenizer->index_text (hostname, 2);
+      tokenizer->index_text (hostname, 2, "N");
+      tokenizer->index_text (hostname, 2, "S");
+    }
+
+    g_free (unescaped_basename);
+    g_free (basename);
+  }
+  else if (scheme_str == "data")
+  {
+    // we *really* don't want to index anything with this scheme
+  }
+  else
+  {
+    std::string authority, path, query;
+    StringUtils::SplitUri (uri, authority, path, query);
+
+    if (!path.empty ())
+    {
+      gchar *basename = g_path_get_basename (path.c_str ());
+      gchar *unescaped_basename = g_uri_unescape_string (basename, "");
+
+      if (g_utf8_validate (unescaped_basename, -1, NULL))
+      {
+        std::string capped (StringUtils::Truncate (unescaped_basename, 30));
+        tokenizer->index_text (capped, 5);
+        tokenizer->index_text (capped, 5, "N");
+      }
+
+      // FIXME: rest of the path?
+      g_free (unescaped_basename);
+      g_free (basename);
+    }
+
+    if (!authority.empty ())
+    {
+      std::string capped (StringUtils::Truncate (authority, 30));
+
+      tokenizer->index_text (capped, 2);
+      tokenizer->index_text (capped, 2, "N");
+      tokenizer->index_text (capped, 2, "S");
+    }
+  }
+
+  g_object_unref (f);
+}
+
+bool Indexer::IndexActor (std::string const& actor, bool is_subject)
+{
+  GDesktopAppInfo *dai = NULL;
+  // check the cache first
+  GAppInfo *ai = app_info_cache[actor];
+
+  if (ai == NULL)
+  {
+    // check also the failed cache
+    if (failed_lookups.count (actor) != 0) return false;
+
+    // and now try to load from the disk
+    if (g_path_is_absolute (actor.c_str ()))
+    {
+      dai = g_desktop_app_info_new_from_filename (actor.c_str ());
+    }
+    else if (g_str_has_prefix (actor.c_str (), "application://"))
+    {
+      dai = g_desktop_app_info_new (actor.substr (14).c_str ());
+    }
+
+    if (dai != NULL)
+    {
+      ai = G_APP_INFO (dai);
+      app_info_cache[actor] = ai;
+    }
+    else
+    {
+      // cache failed lookup
+      failed_lookups.insert (actor);
+      if (clear_failed_id == 0)
+      {
+        // but clear the failed cache in 30 seconds
+        clear_failed_id = g_timeout_add_seconds (30,
+            (GSourceFunc) &Indexer::ClearFailedLookupsCb, this);
+      }
+    }
+  }
+  else
+  {
+    dai = G_DESKTOP_APP_INFO (ai);
+  }
+
+  if (dai == NULL)
+  {
+    g_warning ("Unable to get info on %s", actor.c_str ());
+    return false;
+  }
+
+  const gchar *val;
+  unsigned name_weight = is_subject ? 5 : 2;
+  unsigned comment_weight = 2;
+
+  // FIXME: ascii folding somewhere
+
+  val = g_app_info_get_display_name (ai);
+  if (val && val[0] != '\0')
+  {
+    std::string display_name (val);
+    tokenizer->index_text (display_name, name_weight);
+    tokenizer->index_text (display_name, name_weight, "A");
+  }
+
+  val = g_desktop_app_info_get_generic_name (dai);
+  if (val && val[0] != '\0')
+  {
+    std::string generic_name (val);
+    tokenizer->index_text (generic_name, name_weight);
+    tokenizer->index_text (generic_name, name_weight, "A");
+  }
+
+  if (!is_subject) return true;
+  // the rest of the code only applies to events with application subject uris:
+  // index the comment field, add category terms, index keywords
+
+  val = g_app_info_get_description (ai);
+  if (val && val[0] != '\0')
+  {
+    std::string comment (val);
+    tokenizer->index_text (comment, comment_weight);
+    tokenizer->index_text (comment, comment_weight, "A");
+  }
+
+  val = g_desktop_app_info_get_categories (dai);
+  if (val && val[0] != '\0')
+  {
+    gchar **categories = g_strsplit (val, ";", 0);
+    Xapian::Document doc(tokenizer->get_document ());
+    for (gchar **iter = categories; *iter != NULL; ++iter)
+    {
+      // FIXME: what if this isn't ascii? but it should, that's what
+      // the fdo menu spec says
+      gchar *category = g_ascii_strdown (*iter, -1);
+      doc.add_boolean_term (FILTER_PREFIX_XDG_CATEGORY + category);
+      g_free (category);
+    }
+    g_strfreev (categories);
+  }
+
+  return true;
+}
+
+GPtrArray* Indexer::Search (const gchar *search_string,
+                            ZeitgeistTimeRange *time_range,
+                            GPtrArray *templates,
+                            guint offset,
+                            guint count,
+                            ZeitgeistResultType result_type,
+                            guint *matches,
+                            GError **error)
+{
+  GPtrArray *results = NULL;
+  try
+  {
+    std::string query_string(search_string);
+
+    if (templates && templates->len > 0)
+    {
+      std::string filters (CompileEventFilterQuery (templates));
+      query_string = "(" + query_string + ") AND (" + filters + ")";
+    }
+
+    if (time_range)
+    {
+      gint64 start_time = zeitgeist_time_range_get_start (time_range);
+      gint64 end_time = zeitgeist_time_range_get_end (time_range);
+
+      if (start_time > 0 || end_time < G_MAXINT64)
+      {
+        std::string time_filter (CompileTimeRangeFilterQuery (start_time, end_time));
+        query_string = "(" + query_string + ") AND (" + time_filter + ")";
+      }
+    }
+
+    // FIXME: which result types coalesce?
+    guint maxhits = count * 3;
+
+    if (result_type == 100)
+    {
+      enquire->set_sort_by_relevance ();
+    }
+    else
+    {
+      enquire->set_sort_by_value (VALUE_TIMESTAMP, true);
+    }
+
+    g_debug ("query: %s", query_string.c_str ());
+    Xapian::Query q(query_parser->parse_query (query_string, QUERY_PARSER_FLAGS));
+    enquire->set_query (q);
+    Xapian::MSet hits (enquire->get_mset (offset, maxhits));
+    Xapian::doccount hitcount = hits.get_matches_estimated ();
+
+    if (result_type == 100)
+    {
+      std::vector<unsigned> event_ids;
+      for (Xapian::MSetIterator iter = hits.begin (); iter != hits.end (); ++iter)
+      {
+        Xapian::Document doc(iter.get_document ());
+        double unserialized =
+          Xapian::sortable_unserialise(doc.get_value (VALUE_EVENT_ID));
+        event_ids.push_back (static_cast<unsigned>(unserialized));
+      }
+
+      results = zeitgeist_db_reader_get_events (zg_reader,
+                                                &event_ids[0],
+                                                event_ids.size (),
+                                                NULL,
+                                                error);
+    }
+    else
+    {
+      GPtrArray *event_templates;
+      event_templates = g_ptr_array_new_with_free_func (g_object_unref);
+      for (Xapian::MSetIterator iter = hits.begin (); iter != hits.end (); ++iter)
+      {
+        Xapian::Document doc(iter.get_document ());
+        double unserialized =
+          Xapian::sortable_unserialise(doc.get_value (VALUE_EVENT_ID));
+        // this doesn't need ref sinking, does it?
+        ZeitgeistEvent *event = zeitgeist_event_new ();
+        zeitgeist_event_set_id (event, static_cast<unsigned>(unserialized));
+        g_ptr_array_add (event_templates, event);
+      }
+
+      if (event_templates->len > 0)
+      {
+        ZeitgeistTimeRange *time_range = zeitgeist_time_range_new_anytime ();
+        results = zeitgeist_db_reader_find_events (zg_reader,
+                                                   time_range,
+                                                   event_templates,
+                                                   ZEITGEIST_STORAGE_STATE_ANY,
+                                                   0,
+                                                   result_type,
+                                                   NULL,
+                                                   error);
+
+        g_object_unref (time_range);
+      }
+      else
+      {
+        results = g_ptr_array_new ();
+      }
+
+      g_ptr_array_unref (event_templates);
+    }
+
+    if (matches)
+    {
+      *matches = hitcount;
+    }
+  }
+  catch (Xapian::Error const& e)
+  {
+    g_warning ("Failed to index event: %s", e.get_msg ().c_str ());
+    g_set_error_literal (error,
+                         ZEITGEIST_ENGINE_ERROR,
+                         ZEITGEIST_ENGINE_ERROR_DATABASE_ERROR,
+                         e.get_msg ().c_str ());
+  }
+
+  return results;
+}
+
+void Indexer::IndexEvent (ZeitgeistEvent *event)
+{
+  try
+  {
+    // FIXME: we need to special case MOVE_EVENTs
+    const gchar *val;
+    guint event_id = zeitgeist_event_get_id (event);
+    g_return_if_fail (event_id > 0);
+
+    g_debug ("Indexing event with ID: %u", event_id);
+
+    Xapian::Document doc;
+    doc.add_value (VALUE_EVENT_ID,
+                   Xapian::sortable_serialise (static_cast<double>(event_id)));
+    doc.add_value (VALUE_TIMESTAMP,
+                   Xapian::sortable_serialise (static_cast<double>(zeitgeist_event_get_timestamp (event))));
+
+    tokenizer->set_document (doc);
+
+    val = zeitgeist_event_get_actor (event);
+    if (val && val[0] != '\0')
+    {
+      // it's nice that searching for "gedit" will find all files you worked
+      // with in gedit, but the relevancy has to be low
+      IndexActor (val, false);
+    }
+
+    GPtrArray *subjects = zeitgeist_event_get_subjects (event);
+    for (unsigned i = 0; i < subjects->len; i++)
+    {
+      ZeitgeistSubject *subject;
+      subject = (ZeitgeistSubject*) g_ptr_array_index (subjects, i);
+
+      val = zeitgeist_subject_get_uri (subject);
+      if (val == NULL || val[0] == '\0') continue;
+
+      std::string uri(val);
+
+      if (uri.length () > 512)
+      {
+        g_warning ("URI too long (%lu). Discarding:\n%s",
+                   uri.length (), uri.substr (0, 32).c_str ());
+        return; // ignore this event completely...
+      }
+
+      val = zeitgeist_subject_get_text (subject);
+      if (val && val[0] != '\0')
+      {
+        IndexText (val);
+      }
+
+      val = zeitgeist_subject_get_origin (subject);
+      std::string origin (val != NULL ? val : "");
+
+      if (uri.compare (0, 14, "application://") == 0)
+      {
+        if (!IndexActor (uri, true))
+          IndexUri (uri, origin);
+      }
+      else
+      {
+        IndexUri (uri, origin);
+      }
+    }
+
+    AddDocFilters (event, doc);
+
+    this->db->add_document (doc);
+  }
+  catch (Xapian::Error const& e)
+  {
+    g_warning ("Failed to index event: %s", e.get_msg ().c_str ());
+  }
+}
+
+void Indexer::DeleteEvent (guint32 event_id)
+{
+  g_debug ("Deleting event with ID: %u", event_id);
+
+  try
+  {
+    std::string id(Xapian::sortable_serialise (static_cast<double>(event_id)));
+    Xapian::Query query (Xapian::Query::OP_VALUE_RANGE, VALUE_EVENT_ID, id, id);
+
+    enquire->set_query(query);
+    Xapian::MSet mset = enquire->get_mset(0, 10);
+
+    Xapian::doccount total = mset.get_matches_estimated();
+    if (total > 1)
+    {
+      g_warning ("More than one event found with id '%s", id.c_str ());
+    }
+    else if (total == 0)
+    {
+      g_warning ("No event for id '%s'", id.c_str ());
+      return;
+    }
+
+    Xapian::MSetIterator i, end;
+    for (i= mset.begin(), end = mset.end(); i != end; i++)
+    {
+      db->delete_document (*i);
+    }
+  }
+  catch (Xapian::Error const& e)
+  {
+    g_warning ("Failed to delete event '%u': %s",
+               event_id, e.get_msg().c_str ());
+  }
+}
+
+void Indexer::SetDbMetadata (std::string const& key, std::string const& value)
+{
+  try
+  {
+    db->set_metadata (key, value);
+  }
+  catch (Xapian::Error const& e)
+  {
+    g_warning ("Failed to set metadata: %s", e.get_msg ().c_str ());
+  }
+}
+
+gboolean Indexer::ClearFailedLookupsCb ()
+{
+  failed_lookups.clear ();
+
+  clear_failed_id = 0;
+  return FALSE;
+}
+
+} /* namespace */
diff --git a/extensions/fts++/indexer.h b/extensions/fts++/indexer.h
new file mode 100644
index 00000000..9a0135e7
--- /dev/null
+++ b/extensions/fts++/indexer.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2012 Canonical Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Michal Hruby <michal.hruby@canonical.com>
+ *
+ */
+
+#ifndef _ZGFTS_INDEXER_H_
+#define _ZGFTS_INDEXER_H_
+
+#include <glib-object.h>
+#include <gio/gio.h>
+#include <xapian.h>
+
+#include "zeitgeist-internal.h"
+
+namespace ZeitgeistFTS {
+
+const std::string INDEX_VERSION = "1";
+
+class Indexer
+{
+public:
+  typedef std::map<std::string, GAppInfo*> AppInfoMap;
+  typedef std::set<std::string> ApplicationSet;
+
+  Indexer (ZeitgeistDbReader *reader)
+    : zg_reader (reader)
+    , db (NULL)
+    , query_parser (NULL)
+    , enquire (NULL)
+    , tokenizer (NULL)
+    , clear_failed_id (0)
+  {
+    const gchar *home_dir = g_get_home_dir ();
+    home_dir_path = home_dir != NULL ? home_dir : "/home";
+  }
+
+  ~Indexer ()
+  {
+    if (tokenizer) delete tokenizer;
+    if (enquire) delete enquire;
+    if (query_parser) delete query_parser;
+    if (db) delete db;
+
+    for (AppInfoMap::iterator it = app_info_cache.begin ();
+         it != app_info_cache.end (); ++it)
+    {
+      g_object_unref (it->second);
+    }
+
+    if (clear_failed_id != 0)
+    {
+      g_source_remove (clear_failed_id);
+    }
+  }
+
+  void Initialize (GError **error);
+  bool CheckIndex ();
+  void DropIndex ();
+  void Commit ();
+
+  void IndexEvent (ZeitgeistEvent *event);
+  void DeleteEvent (guint32 event_id);
+  void SetDbMetadata (std::string const& key, std::string const& value);
+
+  GPtrArray* Search (const gchar *search_string,
+                     ZeitgeistTimeRange *time_range,
+                     GPtrArray *templates,
+                     guint offset,
+                     guint count,
+                     ZeitgeistResultType result_type,
+                     guint *matches,
+                     GError **error);
+
+private:
+  std::string ExpandType (std::string const& prefix, const gchar* unparsed_uri);
+  std::string CompileEventFilterQuery (GPtrArray *templates);
+  std::string CompileTimeRangeFilterQuery (gint64 start, gint64 end);
+
+  void AddDocFilters (ZeitgeistEvent *event, Xapian::Document &doc);
+  void IndexText (std::string const& text);
+  void IndexUri (std::string const& uri, std::string const& origin);
+  bool IndexActor (std::string const& actor, bool is_subject);
+
+  gboolean ClearFailedLookupsCb ();
+
+  ZeitgeistDbReader        *zg_reader;
+  Xapian::WritableDatabase *db;
+  Xapian::QueryParser      *query_parser;
+  Xapian::Enquire          *enquire;
+  Xapian::TermGenerator    *tokenizer;
+  AppInfoMap                app_info_cache;
+  ApplicationSet            failed_lookups;
+
+  guint                     clear_failed_id;
+  std::string               home_dir_path;
+};
+
+}
+
+#endif /* _ZGFTS_INDEXER_H_ */
diff --git a/extensions/fts++/mimetype.vala b/extensions/fts++/mimetype.vala
new file mode 120000
index 00000000..fc0a6ce1
--- /dev/null
+++ b/extensions/fts++/mimetype.vala
@@ -0,0 +1 @@
+../../src/mimetype.vala
+\ No newline at end of file
diff --git a/extensions/fts++/ontology-uris.vala b/extensions/fts++/ontology-uris.vala
new file mode 120000
index 00000000..c0b93ab7
--- /dev/null
+++ b/extensions/fts++/ontology-uris.vala
@@ -0,0 +1 @@
+../../src/ontology-uris.vala
+\ No newline at end of file
diff --git a/extensions/fts++/ontology.vala b/extensions/fts++/ontology.vala
new file mode 120000
index 00000000..5daa0215
--- /dev/null
+++ b/extensions/fts++/ontology.vala
@@ -0,0 +1 @@
+../../src/ontology.vala
+\ No newline at end of file
diff --git a/extensions/fts-python/org.gnome.zeitgeist.fts.service.in b/extensions/fts++/org.gnome.zeitgeist.fts.service.in
index 7551d79d..dff8199f 100644
--- a/extensions/fts-python/org.gnome.zeitgeist.fts.service.in
+++ b/extensions/fts++/org.gnome.zeitgeist.fts.service.in
@@ -1,3 +1,3 @@
 [D-BUS Service]
 Name=org.gnome.zeitgeist.SimpleIndexer
-Exec=@pkgdatadir@/fts-python/fts.py
+Exec=@libexecdir@/zeitgeist-fts
diff --git a/extensions/fts++/remote.vala b/extensions/fts++/remote.vala
new file mode 120000
index 00000000..32661b1f
--- /dev/null
+++ b/extensions/fts++/remote.vala
@@ -0,0 +1 @@
+../../src/remote.vala
+\ No newline at end of file
diff --git a/extensions/fts++/sql-schema.vala b/extensions/fts++/sql-schema.vala
new file mode 120000
index 00000000..a2756d4a
--- /dev/null
+++ b/extensions/fts++/sql-schema.vala
@@ -0,0 +1 @@
+../../src/sql-schema.vala
+\ No newline at end of file
diff --git a/extensions/fts++/sql.vala b/extensions/fts++/sql.vala
new file mode 120000
index 00000000..48950aed
--- /dev/null
+++ b/extensions/fts++/sql.vala
@@ -0,0 +1 @@
+../../src/sql.vala
+\ No newline at end of file
diff --git a/extensions/fts++/stringutils.cpp b/extensions/fts++/stringutils.cpp
new file mode 100644
index 00000000..12b0baf8
--- /dev/null
+++ b/extensions/fts++/stringutils.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2012 Mikkel Kamstrup Erlandsen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
+ *
+ */
+#include <string>
+
+#include "stringutils.h"
+
+using namespace std;
+
+namespace ZeitgeistFTS {
+
+namespace StringUtils {
+
+/**
+ * Make sure s has equal or less than 'nbytes' bytes making sure the returned
+ * string is still valid UTF-8.
+ *
+ * NOTE: It is assumed the input string is valid UTF-8. Untrusted text
+ * should be validated with g_utf8_validate().
+ *
+ * This function useful for working with Xapian terms because Xapian has
+ * a max term length of 245 (which is not very well documented, but see
+ * http://xapian.org/docs/omega/termprefixes.html).
+ */
+string Truncate (string const& s, unsigned int nbytes)
+{
+  const gchar *str = s.c_str();
+  const gchar *iter = str;
+
+  nbytes = MIN(nbytes, s.length());
+
+  while (iter - str < nbytes)
+  {
+    const gchar *tmp = g_utf8_next_char (iter);
+    if (tmp - str > nbytes) break;
+    iter = tmp;
+  }
+
+
+  return s.substr(0, iter - str);
+}
+
+/**
+ * Converts a URI into an index- and query friendly string. The problem
+ * is that Xapian doesn't handle CAPITAL letters or most non-alphanumeric
+ * symbols in a boolean term when it does prefix matching. The mangled
+ * URIs returned from this function are suitable for boolean prefix searches.
+ *                 
+ * IMPORTANT: This is a 1-way function! You can not convert back.
+ */
+string MangleUri (string const& orig)
+{
+  string s(orig);
+  size_t pos = 0;
+  while ((pos = s.find_first_of (": /", pos)) != string::npos)
+  {
+    s.replace (pos, 1, 1, '_');
+    pos++;
+  }
+
+  return s;
+}
+
+/**
+ * This method expects a valid uri and tries to split it into authority,
+ * path and query.
+ *
+ * Note that any and all parts may be left untouched.
+ */
+void SplitUri (string const& uri, string &authority,
+               string &path, string &query)
+{
+  size_t colon_pos = uri.find (':');
+  if (colon_pos == string::npos) return; // not an uri?
+  bool has_double_slash = uri.length () > colon_pos + 2 && 
+    uri.compare (colon_pos + 1, 2, "//") == 0;
+
+  size_t start_pos = has_double_slash ? colon_pos + 3 : colon_pos + 1;
+
+  size_t first_slash = uri.find ('/', start_pos);
+  size_t question_mark_pos = uri.find ('?', first_slash == string::npos ?
+      start_pos : first_slash + 1);
+
+  authority = uri.substr (start_pos);
+  if (first_slash != string::npos)
+  {
+    authority.resize (first_slash - start_pos);
+  }
+  else if (question_mark_pos != string::npos)
+  {
+    authority.resize (question_mark_pos - start_pos);
+  }
+
+  if (first_slash == string::npos)
+  {
+    first_slash = start_pos + authority.length ();
+  }
+
+  if (question_mark_pos != string::npos)
+  {
+    path = uri.substr (first_slash, question_mark_pos - first_slash);
+    query = uri.substr (question_mark_pos + 1);
+  }
+  else
+  {
+    path = uri.substr (first_slash);
+  }
+}
+
+} /* namespace StringUtils */
+
+} /* namespace ZeitgeistFTS */
diff --git a/extensions/fts++/stringutils.h b/extensions/fts++/stringutils.h
new file mode 100644
index 00000000..ef011d00
--- /dev/null
+++ b/extensions/fts++/stringutils.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2012 Mikkel Kamstrup Erlandsen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
+ *
+ */
+
+#include <string>
+#include <glib.h>
+
+namespace ZeitgeistFTS {
+
+namespace StringUtils {
+
+const unsigned int MAX_TERM_LENGTH = 245;
+
+std::string Truncate (std::string const& s,
+                      unsigned int nbytes = MAX_TERM_LENGTH);
+
+std::string MangleUri (std::string const& orig);
+
+void SplitUri (std::string const& uri,
+               std::string &host,
+               std::string &path,
+               std::string &basename);
+
+} /* namespace StringUtils */
+
+} /* namespace ZeitgeistFTS */
diff --git a/extensions/fts++/table-lookup.vala b/extensions/fts++/table-lookup.vala
new file mode 120000
index 00000000..9e242838
--- /dev/null
+++ b/extensions/fts++/table-lookup.vala
@@ -0,0 +1 @@
+../../src/table-lookup.vala
+\ No newline at end of file
diff --git a/extensions/fts++/task.cpp b/extensions/fts++/task.cpp
new file mode 100644
index 00000000..74c4092d
--- /dev/null
+++ b/extensions/fts++/task.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2012 Canonical Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Michal Hruby <michal.hruby@canonical.com>
+ *
+ */
+
+#include "task.h"
+
+namespace ZeitgeistFTS {
+
+void IndexEventsTask::Process (Indexer *indexer)
+{
+  unsigned end_index = MIN (start_index + event_count, events->len);
+  for (unsigned i = start_index; i < end_index; i++)
+  {
+    indexer->IndexEvent ((ZeitgeistEvent*) g_ptr_array_index (events, i));
+  }
+}
+
+void DeleteEventsTask::Process (Indexer *indexer)
+{
+  for (unsigned i = 0; i < event_ids.size (); i++)
+  {
+    indexer->DeleteEvent (event_ids[i]);
+  }
+}
+
+void MetadataTask::Process (Indexer *indexer)
+{
+  indexer->SetDbMetadata (key_name, value);
+}
+
+}
diff --git a/extensions/fts++/task.h b/extensions/fts++/task.h
new file mode 100644
index 00000000..1c124382
--- /dev/null
+++ b/extensions/fts++/task.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2012 Canonical Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Michal Hruby <michal.hruby@canonical.com>
+ *
+ */
+
+#ifndef _ZGFTS_TASK_H_
+#define _ZGFTS_TASK_H_
+
+#include <glib.h>
+
+#include "indexer.h"
+
+namespace ZeitgeistFTS {
+
+/**
+ * A task contains a chunk of work defined by the Controller.
+ * A task should not be clever in scheduling on its own, the
+ * Controller is responsible for breaking down tasks in suitable
+ * chunks.
+ */
+class Task
+{
+public:
+  virtual ~Task () {}
+  virtual void Process (Indexer *indexer) = 0;
+};
+
+class IndexEventsTask : public Task
+{
+public:
+  void Process (Indexer *indexer);
+
+  IndexEventsTask (GPtrArray *event_arr)
+    : events (event_arr), start_index (0), event_count (event_arr->len) {}
+
+  IndexEventsTask (GPtrArray *event_arr, unsigned index, unsigned count)
+    : events (event_arr), start_index (index), event_count (count) {}
+
+  virtual ~IndexEventsTask ()
+  {
+    g_ptr_array_unref (events);
+  }
+
+private:
+  GPtrArray *events;
+  unsigned start_index;
+  unsigned event_count;
+};
+
+class DeleteEventsTask : public Task
+{
+public:
+  void Process (Indexer *indexer);
+
+  DeleteEventsTask (unsigned *event_ids_arr, int event_ids_arr_size)
+    : event_ids (event_ids_arr, event_ids_arr + event_ids_arr_size) {}
+
+  virtual ~DeleteEventsTask ()
+  {
+  }
+
+private:
+  std::vector<unsigned> event_ids;
+};
+
+class MetadataTask : public Task
+{
+public:
+  void Process (Indexer *indexer);
+
+  MetadataTask (std::string const& name, std::string const& val)
+    : key_name (name), value (val) {}
+
+  virtual ~MetadataTask ()
+  {}
+
+private:
+  std::string key_name;
+  std::string value;
+};
+
+}
+
+#endif /* _ZGFTS_TASK_H_ */
+
diff --git a/extensions/fts++/test/Makefile.am b/extensions/fts++/test/Makefile.am
new file mode 100644
index 00000000..e36cf773
--- /dev/null
+++ b/extensions/fts++/test/Makefile.am
@@ -0,0 +1,27 @@
+NULL = 
+check_PROGRAMS = test-fts
+TESTS = test-fts
+
+AM_CPPFLAGS = \
+	$(ZEITGEIST_CFLAGS) \
+	-include $(CONFIG_HEADER) \
+	-w \
+	-I$(srcdir)/.. \
+	$(NULL)
+
+test_fts_SOURCES = \
+  test-stringutils.cpp \
+  test-indexer.cpp \
+  test-fts.c \
+  $(srcdir)/../stringutils.cpp \
+  $(srcdir)/../controller.cpp \
+  $(srcdir)/../indexer.cpp \
+  $(srcdir)/../task.cpp \
+  $(srcdir)/../fts.cpp \
+  $(NULL)
+
+test_fts_LDADD = \
+  $(builddir)/../libzeitgeist-internal.la \
+  -lxapian \
+  $(NULL)
+
diff --git a/extensions/fts++/test/test-fts.c b/extensions/fts++/test/test-fts.c
new file mode 100644
index 00000000..6b9208fd
--- /dev/null
+++ b/extensions/fts++/test/test-fts.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2012 Mikkel Kamstrup Erlandsen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
+ *
+ */
+
+#include <glib-object.h>
+
+void test_stringutils_create_suite (void);
+void test_indexer_create_suite (void);
+
+gint
+main (gint argc, gchar *argv[])
+{
+  g_type_init ();
+
+  g_test_init (&argc, &argv, NULL);
+
+  test_stringutils_create_suite ();
+  test_indexer_create_suite ();
+
+  return g_test_run ();
+}
diff --git a/extensions/fts++/test/test-indexer.cpp b/extensions/fts++/test/test-indexer.cpp
new file mode 100644
index 00000000..188213e7
--- /dev/null
+++ b/extensions/fts++/test/test-indexer.cpp
@@ -0,0 +1,531 @@
+/*
+ * Copyright (C) 2012 Mikkel Kamstrup Erlandsen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
+ *
+ */
+
+#include <glib-object.h>
+
+#include "stringutils.h"
+#include "fts.h"
+#include <zeitgeist-internal.h>
+
+using namespace ZeitgeistFTS;
+
+typedef struct
+{
+  ZeitgeistDbReader *db;
+  ZeitgeistIndexer *indexer;
+} Fixture;
+
+static void setup    (Fixture *fix, gconstpointer data);
+static void teardown (Fixture *fix, gconstpointer data);
+
+static void
+setup (Fixture *fix, gconstpointer data)
+{
+  // use in-memory databases for both zg db and fts db
+  GError *error = NULL;
+  g_setenv ("ZEITGEIST_DATABASE_PATH", ":memory:", TRUE);
+  fix->db = ZEITGEIST_DB_READER (zeitgeist_engine_new (&error));
+
+  if (error)
+  {
+    g_warning ("%s", error->message);
+    return;
+  }
+
+  fix->indexer = zeitgeist_indexer_new (fix->db, &error);
+  if (error)
+  {
+    g_warning ("%s", error->message);
+    return;
+  }
+}
+
+static void
+teardown (Fixture *fix, gconstpointer data)
+{
+  zeitgeist_indexer_free (fix->indexer);
+  g_object_unref (fix->db);
+}
+
+static ZeitgeistEvent* create_test_event1 (void)
+{
+  ZeitgeistEvent *event = zeitgeist_event_new ();
+  ZeitgeistSubject *subject = zeitgeist_subject_new ();
+  
+  zeitgeist_subject_set_interpretation (subject, ZEITGEIST_NFO_RASTER_IMAGE);
+  zeitgeist_subject_set_manifestation (subject, ZEITGEIST_NFO_REMOTE_DATA_OBJECT);
+  zeitgeist_subject_set_uri (subject, "http://example.com/image.jpg");
+  zeitgeist_subject_set_text (subject, "text");
+  zeitgeist_subject_set_mimetype (subject, "image/png");
+
+  zeitgeist_event_set_interpretation (event, ZEITGEIST_ZG_ACCESS_EVENT);
+  zeitgeist_event_set_manifestation (event, ZEITGEIST_ZG_USER_ACTIVITY);
+  zeitgeist_event_set_actor (event, "application://firefox.desktop");
+  zeitgeist_event_add_subject (event, subject);
+
+  g_object_unref (subject);
+  return event;
+}
+
+static ZeitgeistEvent* create_test_event2 (void)
+{
+  ZeitgeistEvent *event = zeitgeist_event_new ();
+  ZeitgeistSubject *subject = zeitgeist_subject_new ();
+  
+  zeitgeist_subject_set_interpretation (subject, ZEITGEIST_NFO_WEBSITE);
+  zeitgeist_subject_set_manifestation (subject, ZEITGEIST_NFO_REMOTE_DATA_OBJECT);
+  zeitgeist_subject_set_uri (subject, "http://example.com/I%20Love%20Wikis");
+  zeitgeist_subject_set_text (subject, "Example.com Wiki Page. Kanji is awesome 漢字");
+  zeitgeist_subject_set_mimetype (subject, "text/html");
+
+  zeitgeist_event_set_interpretation (event, ZEITGEIST_ZG_ACCESS_EVENT);
+  zeitgeist_event_set_manifestation (event, ZEITGEIST_ZG_USER_ACTIVITY);
+  zeitgeist_event_set_actor (event, "application://firefox.desktop");
+  zeitgeist_event_add_subject (event, subject);
+
+  g_object_unref (subject);
+  return event;
+}
+
+static ZeitgeistEvent* create_test_event3 (void)
+{
+  ZeitgeistEvent *event = zeitgeist_event_new ();
+  ZeitgeistSubject *subject = zeitgeist_subject_new ();
+  
+  zeitgeist_subject_set_interpretation (subject, ZEITGEIST_NFO_WEBSITE);
+  zeitgeist_subject_set_manifestation (subject, ZEITGEIST_NFO_REMOTE_DATA_OBJECT);
+  // Greek IDN - stands for http://παράδειγμα.δοκιμή
+  zeitgeist_subject_set_uri (subject, "http://xn--hxajbheg2az3al.xn--jxalpdlp/");
+  zeitgeist_subject_set_text (subject, "IDNwiki");
+  zeitgeist_subject_set_mimetype (subject, "text/html");
+
+  zeitgeist_event_set_interpretation (event, ZEITGEIST_ZG_ACCESS_EVENT);
+  zeitgeist_event_set_manifestation (event, ZEITGEIST_ZG_USER_ACTIVITY);
+  zeitgeist_event_set_actor (event, "application://firefox.desktop");
+  zeitgeist_event_add_subject (event, subject);
+
+  g_object_unref (subject);
+  return event;
+}
+
+static ZeitgeistEvent* create_test_event4 (void)
+{
+  ZeitgeistEvent *event = zeitgeist_event_new ();
+  ZeitgeistSubject *subject = zeitgeist_subject_new ();
+  
+  zeitgeist_subject_set_interpretation (subject, ZEITGEIST_NFO_PRESENTATION);
+  zeitgeist_subject_set_manifestation (subject, ZEITGEIST_NFO_FILE_DATA_OBJECT);
+  zeitgeist_subject_set_uri (subject, "file:///home/username/Documents/my_fabulous_presentation.pdf");
+  zeitgeist_subject_set_text (subject, NULL);
+  zeitgeist_subject_set_mimetype (subject, "application/pdf");
+
+  zeitgeist_event_set_interpretation (event, ZEITGEIST_ZG_MODIFY_EVENT);
+  zeitgeist_event_set_manifestation (event, ZEITGEIST_ZG_USER_ACTIVITY);
+  zeitgeist_event_set_actor (event, "application://libreoffice-impress.desktop");
+  zeitgeist_event_add_subject (event, subject);
+
+  g_object_unref (subject);
+  return event;
+}
+
+// Steals the event, ref it if you want to keep it
+static guint
+index_event (Fixture *fix, ZeitgeistEvent *event)
+{
+  guint event_id = 0;
+
+  // add event to DBs
+  event_id = zeitgeist_engine_insert_event (ZEITGEIST_ENGINE (fix->db),
+                                            event, NULL, NULL);
+
+  GPtrArray *events = g_ptr_array_new_with_free_func (g_object_unref);
+  g_ptr_array_add (events, event); // steal event ref
+  zeitgeist_indexer_index_events (fix->indexer, events);
+  g_ptr_array_unref (events);
+
+  while (zeitgeist_indexer_has_pending_tasks (fix->indexer))
+  {
+    zeitgeist_indexer_process_task (fix->indexer);
+  }
+
+  return event_id;
+}
+
+static void
+test_simple_query (Fixture *fix, gconstpointer data)
+{
+  guint matches;
+  guint event_id;
+  ZeitgeistEvent* event;
+ 
+  // add test events to DBs
+  event_id = index_event (fix, create_test_event1 ());
+  index_event (fix, create_test_event2 ());
+  index_event (fix, create_test_event3 ());
+  index_event (fix, create_test_event4 ());
+
+  GPtrArray *results =
+    zeitgeist_indexer_search (fix->indexer,
+                              "text",
+                              zeitgeist_time_range_new_anytime (),
+                              g_ptr_array_new (),
+                              0,
+                              10,
+                              ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS,
+                              &matches,
+                              NULL);
+
+  g_assert_cmpuint (matches, >, 0);
+  g_assert_cmpuint (results->len, ==, 1);
+
+  event = (ZeitgeistEvent*) results->pdata[0];
+  g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id);
+
+  ZeitgeistSubject *subject = (ZeitgeistSubject*)
+    g_ptr_array_index (zeitgeist_event_get_subjects (event), 0);
+  g_assert_cmpstr (zeitgeist_subject_get_text (subject), ==, "text");
+}
+
+static void
+test_simple_with_filter (Fixture *fix, gconstpointer data)
+{
+  guint matches;
+  guint event_id;
+  ZeitgeistEvent* event;
+
+  // add test events to DBs
+  index_event (fix, create_test_event1 ());
+  index_event (fix, create_test_event2 ());
+
+  GPtrArray *filters = g_ptr_array_new_with_free_func (g_object_unref);
+  event = zeitgeist_event_new ();
+  zeitgeist_event_set_interpretation (event, ZEITGEIST_NFO_DOCUMENT);
+  g_ptr_array_add (filters, event); // steals ref
+
+  GPtrArray *results =
+    zeitgeist_indexer_search (fix->indexer,
+                              "text",
+                              zeitgeist_time_range_new_anytime (),
+                              filters,
+                              0,
+                              10,
+                              ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS,
+                              &matches,
+                              NULL);
+
+  g_assert_cmpuint (results->len, ==, 0);
+  g_assert_cmpuint (matches, ==, 0);
+}
+
+static void
+test_simple_with_valid_filter (Fixture *fix, gconstpointer data)
+{
+  guint matches;
+  guint event_id;
+  ZeitgeistEvent* event;
+  ZeitgeistSubject *subject;
+
+  // add test events to DBs
+  event_id = index_event (fix, create_test_event1 ());
+  index_event (fix, create_test_event2 ());
+
+  GPtrArray *filters = g_ptr_array_new_with_free_func (g_object_unref);
+  event = zeitgeist_event_new ();
+  subject = zeitgeist_subject_new ();
+  zeitgeist_subject_set_interpretation (subject, ZEITGEIST_NFO_IMAGE);
+  zeitgeist_event_add_subject (event, subject);
+  g_ptr_array_add (filters, event); // steals ref
+
+  GPtrArray *results =
+    zeitgeist_indexer_search (fix->indexer,
+                              "text",
+                              zeitgeist_time_range_new_anytime (),
+                              filters,
+                              0,
+                              10,
+                              ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS,
+                              &matches,
+                              NULL);
+
+  g_assert_cmpuint (matches, >, 0);
+  g_assert_cmpuint (results->len, ==, 1);
+
+  event = (ZeitgeistEvent*) results->pdata[0];
+  g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id);
+
+  subject = (ZeitgeistSubject*)
+    g_ptr_array_index (zeitgeist_event_get_subjects (event), 0);
+  g_assert_cmpstr (zeitgeist_subject_get_text (subject), ==, "text");
+}
+
+static void
+test_simple_negation (Fixture *fix, gconstpointer data)
+{
+  guint matches;
+  guint event_id;
+  ZeitgeistEvent* event;
+  ZeitgeistSubject *subject;
+
+  // add test events to DBs
+  event_id = index_event (fix, create_test_event1 ());
+  index_event (fix, create_test_event2 ());
+
+  GPtrArray *filters = g_ptr_array_new_with_free_func (g_object_unref);
+  event = zeitgeist_event_new ();
+  subject = zeitgeist_subject_new ();
+  zeitgeist_subject_set_interpretation (subject, "!" ZEITGEIST_NFO_IMAGE);
+  zeitgeist_event_add_subject (event, subject);
+  g_ptr_array_add (filters, event); // steals ref
+
+  GPtrArray *results =
+    zeitgeist_indexer_search (fix->indexer,
+                              "text",
+                              zeitgeist_time_range_new_anytime (),
+                              filters,
+                              0,
+                              10,
+                              ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS,
+                              &matches,
+                              NULL);
+
+  g_assert_cmpuint (matches, ==, 0);
+  g_assert_cmpuint (results->len, ==, 0);
+}
+
+static void
+test_simple_noexpand (Fixture *fix, gconstpointer data)
+{
+  guint matches;
+  guint event_id;
+  ZeitgeistEvent* event;
+  ZeitgeistSubject *subject;
+
+  // add test events to DBs
+  event_id = index_event (fix, create_test_event1 ());
+  index_event (fix, create_test_event2 ());
+
+  GPtrArray *filters = g_ptr_array_new_with_free_func (g_object_unref);
+  event = zeitgeist_event_new ();
+  subject = zeitgeist_subject_new ();
+  zeitgeist_subject_set_interpretation (subject, "+" ZEITGEIST_NFO_IMAGE);
+  zeitgeist_event_add_subject (event, subject);
+  g_ptr_array_add (filters, event); // steals ref
+
+  GPtrArray *results =
+    zeitgeist_indexer_search (fix->indexer,
+                              "text",
+                              zeitgeist_time_range_new_anytime (),
+                              filters,
+                              0,
+                              10,
+                              ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS,
+                              &matches,
+                              NULL);
+
+  g_assert_cmpuint (matches, ==, 0);
+  g_assert_cmpuint (results->len, ==, 0);
+}
+
+static void
+test_simple_noexpand_valid (Fixture *fix, gconstpointer data)
+{
+  guint matches;
+  guint event_id;
+  ZeitgeistEvent* event;
+  ZeitgeistSubject *subject;
+
+  // add test events to DBs
+  event_id = index_event (fix, create_test_event1 ());
+  index_event (fix, create_test_event2 ());
+
+  GPtrArray *filters = g_ptr_array_new_with_free_func (g_object_unref);
+  event = zeitgeist_event_new ();
+  subject = zeitgeist_subject_new ();
+  zeitgeist_subject_set_interpretation (subject, "+"ZEITGEIST_NFO_RASTER_IMAGE);
+  zeitgeist_event_add_subject (event, subject);
+  g_ptr_array_add (filters, event); // steals ref
+
+  GPtrArray *results =
+    zeitgeist_indexer_search (fix->indexer,
+                              "text",
+                              zeitgeist_time_range_new_anytime (),
+                              filters,
+                              0,
+                              10,
+                              ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS,
+                              &matches,
+                              NULL);
+
+  g_assert_cmpuint (matches, >, 0);
+  g_assert_cmpuint (results->len, ==, 1);
+
+  event = (ZeitgeistEvent*) results->pdata[0];
+  g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id);
+
+  subject = (ZeitgeistSubject*)
+    g_ptr_array_index (zeitgeist_event_get_subjects (event), 0);
+  g_assert_cmpstr (zeitgeist_subject_get_text (subject), ==, "text");
+}
+
+static void
+test_simple_url_unescape (Fixture *fix, gconstpointer data)
+{
+  guint matches;
+  guint event_id;
+  ZeitgeistEvent* event;
+  ZeitgeistSubject *subject;
+
+  // add test events to DBs
+  index_event (fix, create_test_event1 ());
+  event_id = index_event (fix, create_test_event2 ());
+
+  GPtrArray *filters = g_ptr_array_new_with_free_func (g_object_unref);
+  event = zeitgeist_event_new ();
+  subject = zeitgeist_subject_new ();
+  zeitgeist_subject_set_interpretation (subject, ZEITGEIST_NFO_WEBSITE);
+  zeitgeist_event_add_subject (event, subject);
+  g_ptr_array_add (filters, event); // steals ref
+
+  GPtrArray *results =
+    zeitgeist_indexer_search (fix->indexer,
+                              "love",
+                              zeitgeist_time_range_new_anytime (),
+                              filters,
+                              0,
+                              10,
+                              ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS,
+                              &matches,
+                              NULL);
+
+  g_assert_cmpuint (matches, >, 0);
+  g_assert_cmpuint (results->len, ==, 1);
+
+  event = (ZeitgeistEvent*) results->pdata[0];
+  g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id);
+
+  subject = (ZeitgeistSubject*)
+    g_ptr_array_index (zeitgeist_event_get_subjects (event), 0);
+  g_assert_cmpstr (zeitgeist_subject_get_text (subject), ==, "Example.com Wiki Page. Kanji is awesome 漢字");
+}
+
+static void
+test_simple_cjk (Fixture *fix, gconstpointer data)
+{
+  guint matches;
+  guint event_id;
+  ZeitgeistEvent* event;
+  ZeitgeistSubject *subject;
+
+  // add test events to DBs
+  index_event (fix, create_test_event1 ());
+  event_id = index_event (fix, create_test_event2 ());
+
+  GPtrArray *results =
+    zeitgeist_indexer_search (fix->indexer,
+                              "漢*",
+                              zeitgeist_time_range_new_anytime (),
+                              g_ptr_array_new (),
+                              0,
+                              10,
+                              ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS,
+                              &matches,
+                              NULL);
+
+  g_assert_cmpuint (matches, >, 0);
+  g_assert_cmpuint (results->len, ==, 1);
+
+  event = (ZeitgeistEvent*) results->pdata[0];
+  g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id);
+
+  subject = (ZeitgeistSubject*)
+    g_ptr_array_index (zeitgeist_event_get_subjects (event), 0);
+  g_assert_cmpstr (zeitgeist_subject_get_text (subject), ==, "Example.com Wiki Page. Kanji is awesome 漢字");
+}
+
+static void
+test_simple_idn_support (Fixture *fix, gconstpointer data)
+{
+  guint matches;
+  guint event_id;
+  ZeitgeistEvent* event;
+  ZeitgeistSubject *subject;
+
+  // add test events to DBs
+  index_event (fix, create_test_event1 ());
+  index_event (fix, create_test_event2 ());
+  event_id = index_event (fix, create_test_event3 ());
+
+  GPtrArray *results =
+    zeitgeist_indexer_search (fix->indexer,
+                              "παράδειγμα",
+                              zeitgeist_time_range_new_anytime (),
+                              g_ptr_array_new (),
+                              0,
+                              10,
+                              ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS,
+                              &matches,
+                              NULL);
+
+  g_assert_cmpuint (matches, >, 0);
+  g_assert_cmpuint (results->len, ==, 1);
+
+  event = (ZeitgeistEvent*) results->pdata[0];
+  g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id);
+
+  subject = (ZeitgeistSubject*)
+    g_ptr_array_index (zeitgeist_event_get_subjects (event), 0);
+  g_assert_cmpstr (zeitgeist_subject_get_text (subject), ==, "IDNwiki");
+}
+
+G_BEGIN_DECLS
+
+static void discard_message (const gchar *domain,
+                             GLogLevelFlags level,
+                             const gchar *msg,
+                             gpointer userdata)
+{
+}
+
+void test_indexer_create_suite (void)
+{
+  g_test_add ("/Zeitgeist/FTS/Indexer/SimpleQuery", Fixture, 0,
+              setup, test_simple_query, teardown);
+  g_test_add ("/Zeitgeist/FTS/Indexer/SimpleWithFilter", Fixture, 0,
+              setup, test_simple_with_filter, teardown);
+  g_test_add ("/Zeitgeist/FTS/Indexer/SimpleWithValidFilter", Fixture, 0,
+              setup, test_simple_with_valid_filter, teardown);
+  g_test_add ("/Zeitgeist/FTS/Indexer/SimpleNegation", Fixture, 0,
+              setup, test_simple_negation, teardown);
+  g_test_add ("/Zeitgeist/FTS/Indexer/SimpleNoexpand", Fixture, 0,
+              setup, test_simple_noexpand, teardown);
+  g_test_add ("/Zeitgeist/FTS/Indexer/SimpleNoexpandValid", Fixture, 0,
+              setup, test_simple_noexpand_valid, teardown);
+  g_test_add ("/Zeitgeist/FTS/Indexer/URLUnescape", Fixture, 0,
+              setup, test_simple_url_unescape, teardown);
+  g_test_add ("/Zeitgeist/FTS/Indexer/IDNSupport", Fixture, 0,
+              setup, test_simple_idn_support, teardown);
+  g_test_add ("/Zeitgeist/FTS/Indexer/CJK", Fixture, 0,
+              setup, test_simple_cjk, teardown);
+
+  // get rid of the "rebuilding index..." messages
+  g_log_set_handler (NULL, G_LOG_LEVEL_MESSAGE, discard_message, NULL);
+}
+
+G_END_DECLS
diff --git a/extensions/fts++/test/test-stringutils.cpp b/extensions/fts++/test/test-stringutils.cpp
new file mode 100644
index 00000000..3f9405fa
--- /dev/null
+++ b/extensions/fts++/test/test-stringutils.cpp
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2012 Mikkel Kamstrup Erlandsen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
+ *
+ */
+
+#include <glib-object.h>
+
+#include "stringutils.h"
+
+using namespace ZeitgeistFTS;
+
+typedef struct
+{
+  int i;
+} Fixture;
+
+static void setup    (Fixture *fix, gconstpointer data);
+static void teardown (Fixture *fix, gconstpointer data);
+
+static void
+setup (Fixture *fix, gconstpointer data)
+{
+
+}
+
+static void
+teardown (Fixture *fix, gconstpointer data)
+{
+
+}
+
+static void
+test_truncate (Fixture *fix, gconstpointer data)
+{
+  g_assert_cmpstr ("", ==, StringUtils::Truncate("").c_str ());
+
+  g_assert_cmpstr ("", ==, StringUtils::Truncate("a", 0).c_str ());
+  g_assert_cmpstr ("a", ==, StringUtils::Truncate("a", 1).c_str ());
+  g_assert_cmpstr ("a", ==, StringUtils::Truncate("a").c_str ());
+
+  g_assert_cmpstr ("", ==, StringUtils::Truncate("aa", 0).c_str ());
+  g_assert_cmpstr ("a", ==, StringUtils::Truncate("aa", 1).c_str ());
+  g_assert_cmpstr ("aa", ==, StringUtils::Truncate("aa", 2).c_str ());
+  g_assert_cmpstr ("aa", ==, StringUtils::Truncate("aa").c_str ());
+
+
+  g_assert_cmpstr ("", ==, StringUtils::Truncate("å", 0).c_str ());
+  g_assert_cmpstr ("", ==, StringUtils::Truncate("å", 1).c_str ());
+  g_assert_cmpstr ("å", ==, StringUtils::Truncate("å").c_str ());
+
+  g_assert_cmpstr ("", ==, StringUtils::Truncate("åå", 0).c_str ());
+  g_assert_cmpstr ("", ==, StringUtils::Truncate("åå", 1).c_str ());
+  g_assert_cmpstr ("å", ==, StringUtils::Truncate("åå", 2).c_str ());
+  g_assert_cmpstr ("å", ==, StringUtils::Truncate("åå", 3).c_str ());
+  g_assert_cmpstr ("åå", ==, StringUtils::Truncate("åå", 4).c_str ());
+  g_assert_cmpstr ("åå", ==, StringUtils::Truncate("åå").c_str ());
+}
+
+static void
+test_mangle (Fixture *fix, gconstpointer data)
+{
+  g_assert_cmpstr ("", ==, StringUtils::MangleUri("").c_str ());
+
+  g_assert_cmpstr ("file", ==, StringUtils::MangleUri("file").c_str ());
+  g_assert_cmpstr ("file___", ==, StringUtils::MangleUri("file://").c_str ());
+  g_assert_cmpstr ("http___www.zeitgeist-project.com", ==,
+      StringUtils::MangleUri("http://www.zeitgeist-project.com").c_str ());
+
+  g_assert_cmpstr ("scheme_no_spaces_in_uris", ==,
+      StringUtils::MangleUri("scheme:no spaces in uris").c_str ());
+}
+
+static void
+test_split (Fixture *fix, gconstpointer data)
+{
+  std::string authority, path, query;
+
+  authority = path = query = "";
+  StringUtils::SplitUri ("", authority, path, query); // doesn't crash
+
+  g_assert_cmpstr ("", ==, authority.c_str ());
+  g_assert_cmpstr ("", ==, path.c_str ());
+  g_assert_cmpstr ("", ==, query.c_str ());
+
+  authority = path = query = "";
+  StringUtils::SplitUri ("scheme:", authority, path, query); // doesn't crash
+
+  g_assert_cmpstr ("", ==, authority.c_str ());
+  g_assert_cmpstr ("", ==, path.c_str ());
+  g_assert_cmpstr ("", ==, query.c_str ());
+
+  authority = path = query = "";
+  StringUtils::SplitUri ("ldap://ldap1.example.net:6666/o=University%20"
+                         "of%20Michigan,c=US??sub?(cn=Babs%20Jensen)",
+                         authority, path, query);
+
+  g_assert_cmpstr ("ldap1.example.net:6666", ==, authority.c_str ());
+  g_assert_cmpstr ("/o=University%20of%20Michigan,c=US", ==, path.c_str ());
+  g_assert_cmpstr ("?sub?(cn=Babs%20Jensen)", ==, query.c_str ());
+
+
+  authority = path = query = "";
+  StringUtils::SplitUri ("mailto:jsmith@example.com",
+                         authority, path, query);
+
+  g_assert_cmpstr ("jsmith@example.com", ==, authority.c_str ());
+  g_assert_cmpstr ("", ==, path.c_str ());
+  g_assert_cmpstr ("", ==, query.c_str ());
+
+  authority = path = query = "";
+  StringUtils::SplitUri ("mailto:jsmith@example.com?subject=A%20Test&body="
+                         "My%20idea%20is%3A%20%0A", authority, path, query);
+
+  g_assert_cmpstr ("jsmith@example.com", ==, authority.c_str ());
+  g_assert_cmpstr ("", ==, path.c_str ());
+  g_assert_cmpstr ("subject=A%20Test&body=My%20idea%20is%3A%20%0A", ==, query.c_str ());
+
+  authority = path = query = "";
+  StringUtils::SplitUri ("sip:alice@atlanta.com?subject=project%20x",
+                         authority, path, query);
+
+  g_assert_cmpstr ("alice@atlanta.com", ==, authority.c_str ());
+  g_assert_cmpstr ("", ==, path.c_str ());
+  g_assert_cmpstr ("subject=project%20x", ==, query.c_str ());
+
+  authority = path = query = "";
+  StringUtils::SplitUri ("file:///",
+                         authority, path, query);
+
+  g_assert_cmpstr ("", ==, authority.c_str ());
+  g_assert_cmpstr ("/", ==, path.c_str ());
+  g_assert_cmpstr ("", ==, query.c_str ());
+
+  authority = path = query = "";
+  StringUtils::SplitUri ("file:///home/username/file.ext",
+                         authority, path, query);
+
+  g_assert_cmpstr ("", ==, authority.c_str ());
+  g_assert_cmpstr ("/home/username/file.ext", ==, path.c_str ());
+  g_assert_cmpstr ("", ==, query.c_str ());
+
+  authority = path = query = "";
+  StringUtils::SplitUri ("dns://192.168.1.1/ftp.example.org?type=A",
+                         authority, path, query);
+
+  g_assert_cmpstr ("192.168.1.1", ==, authority.c_str ());
+  g_assert_cmpstr ("/ftp.example.org", ==, path.c_str ());
+  g_assert_cmpstr ("type=A", ==, query.c_str ());
+}
+
+G_BEGIN_DECLS
+
+void test_stringutils_create_suite (void)
+{
+  g_test_add ("/Zeitgeist/FTS/StringUtils/Truncate", Fixture, 0,
+              setup, test_truncate, teardown);
+  g_test_add ("/Zeitgeist/FTS/StringUtils/MangleUri", Fixture, 0,
+              setup, test_mangle, teardown);
+  g_test_add ("/Zeitgeist/FTS/StringUtils/SplitUri", Fixture, 0,
+              setup, test_split, teardown);
+}
+
+G_END_DECLS
diff --git a/extensions/fts++/utils.vala b/extensions/fts++/utils.vala
new file mode 120000
index 00000000..6da71ce8
--- /dev/null
+++ b/extensions/fts++/utils.vala
@@ -0,0 +1 @@
+../../src/utils.vala
+\ No newline at end of file
diff --git a/extensions/fts++/where-clause.vala b/extensions/fts++/where-clause.vala
new file mode 120000
index 00000000..efc7d8f9
--- /dev/null
+++ b/extensions/fts++/where-clause.vala
@@ -0,0 +1 @@
+../../src/where-clause.vala
+\ No newline at end of file
diff --git a/extensions/fts++/zeitgeist-fts.vala b/extensions/fts++/zeitgeist-fts.vala
new file mode 100644
index 00000000..f245b03e
--- /dev/null
+++ b/extensions/fts++/zeitgeist-fts.vala
@@ -0,0 +1,301 @@
+/* zeitgeist-fts.vala
+ *
+ * Copyright © 2012 Canonical Ltd.
+ * Copyright © 2012 Michal Hruby <michal.mhr@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+namespace Zeitgeist
+{
+
+    [DBus (name = "org.freedesktop.DBus")]
+    public interface RemoteDBus : Object
+    {
+        public abstract bool name_has_owner (string name) throws IOError;
+    }
+
+    public class FtsDaemon : Object, RemoteSimpleIndexer, RemoteMonitor
+    {
+        //const string DBUS_NAME = "org.gnome.zeitgeist.Fts";
+        const string DBUS_NAME = "org.gnome.zeitgeist.SimpleIndexer";
+        const string ZEITGEIST_DBUS_NAME = "org.gnome.zeitgeist.Engine";
+        private static bool show_version_info = false;
+        private static string log_level = "";
+
+        const OptionEntry[] options =
+        {
+            {
+                "version", 'v', 0, OptionArg.NONE, out show_version_info,
+                "Print program's version number and exit", null
+            },
+            {
+                "log-level", 0, 0, OptionArg.STRING, out log_level,
+                "How much information should be printed; possible values: " +
+                "DEBUG, INFO, WARNING, ERROR, CRITICAL", "LEVEL"
+            },
+            {
+                null
+            }
+        };
+
+        private static FtsDaemon? instance;
+        private static MainLoop mainloop;
+        private static bool name_acquired = false;
+
+        private DbReader engine;
+        private Indexer indexer;
+
+        private uint indexer_register_id;
+        private uint monitor_register_id;
+        private unowned DBusConnection connection;
+
+        public FtsDaemon () throws EngineError
+        {
+            engine = new DbReader ();
+            indexer = new Indexer (engine);
+        }
+
+        private void do_quit ()
+        {
+            engine.close ();
+            mainloop.quit ();
+        }
+
+        public void register_dbus_object (DBusConnection conn) throws IOError
+        {
+            connection = conn;
+            indexer_register_id = conn.register_object<RemoteSimpleIndexer> (
+                    "/org/gnome/zeitgeist/index/activity", this);
+            monitor_register_id = conn.register_object<RemoteMonitor> (
+                    "/org/gnome/zeitgeist/monitor/special", this);
+        }
+
+        public void unregister_dbus_object ()
+        {
+            if (indexer_register_id != 0)
+            {
+                connection.unregister_object (indexer_register_id);
+                indexer_register_id = 0;
+            }
+
+            if (monitor_register_id != 0)
+            {
+                connection.unregister_object (monitor_register_id);
+                monitor_register_id = 0;
+            }
+        }
+
+        public async void notify_insert (Variant time_range, Variant events)
+            throws IOError
+        {
+            debug ("got insertion notification");
+            var events_arr = Events.from_variant (events);
+            indexer.index_events (events_arr);
+        }
+
+        public async void notify_delete (Variant time_range, uint32[] event_ids)
+            throws IOError
+        {
+            debug ("got deletion notification");
+            indexer.delete_events (event_ids);
+        }
+
+        public async void search (string query_string, Variant time_range,
+                                  Variant filter_templates,
+                                  uint offset, uint count, uint result_type,
+                                  out Variant events, out uint matches)
+            throws Error
+        {
+            var tr = new TimeRange.from_variant (time_range);
+            var templates = Events.from_variant (filter_templates);
+            var results = instance.indexer.search (query_string,
+                                                   tr,
+                                                   templates,
+                                                   offset,
+                                                   count,
+                                                   (ResultType) result_type,
+                                                   out matches);
+
+            events = Events.to_variant (results);
+        }
+
+        private static void name_acquired_callback (DBusConnection conn)
+        {
+            name_acquired = true;
+        }
+
+        private static void name_lost_callback (DBusConnection? conn)
+        {
+            if (conn == null)
+            {
+                // something happened to our bus connection
+                mainloop.quit ();
+            }
+            else if (instance != null && name_acquired)
+            {
+                // we owned the name and we lost it... what to do?
+                mainloop.quit ();
+            }
+        }
+
+        static void run ()
+            throws Error
+        {
+            DBusConnection connection = Bus.get_sync (BusType.SESSION);
+            var proxy = connection.get_proxy_sync<RemoteDBus> (
+                "org.freedesktop.DBus", "/org/freedesktop/DBus",
+                DBusProxyFlags.DO_NOT_LOAD_PROPERTIES);
+            bool zeitgeist_up = proxy.name_has_owner (ZEITGEIST_DBUS_NAME);
+            // FIXME: throw an error that zeitgeist isn't up? or just start it?
+            bool name_owned = proxy.name_has_owner (DBUS_NAME);
+            if (name_owned)
+            {
+                throw new EngineError.EXISTING_INSTANCE (
+                    "The FTS daemon is running already.");
+            }
+
+            /* setup Engine instance and register objects on dbus */
+            try
+            {
+                instance = new FtsDaemon ();
+                instance.register_dbus_object (connection);
+            }
+            catch (Error err)
+            {
+                if (err is EngineError.DATABASE_CANTOPEN)
+                {
+                    warning ("Could not access the database file.\n" +
+                        "Please check the permissions of file %s.",
+                        Utils.get_database_file_path ());
+                }
+                else if (err is EngineError.DATABASE_BUSY)
+                {
+                    warning ("It looks like another Zeitgeist instance " +
+                        "is already running (the database is locked).");
+                }
+                throw err;
+            }
+
+            uint owner_id = Bus.own_name_on_connection (connection,
+                DBUS_NAME,
+                BusNameOwnerFlags.NONE,
+                name_acquired_callback,
+                name_lost_callback);
+
+            mainloop = new MainLoop ();
+            mainloop.run ();
+
+            if (instance != null)
+            {
+                Bus.unown_name (owner_id);
+                instance.unregister_dbus_object ();
+                instance = null;
+
+                // make sure we send quit reply
+                try
+                {
+                    connection.flush_sync ();
+                }
+                catch (Error e)
+                {
+                    warning ("%s", e.message);
+                }
+            }
+        }
+
+        static void safe_exit ()
+        {
+            instance.do_quit ();
+        }
+
+        static int main (string[] args)
+        {
+            // FIXME: the cat process xapian spawns won't like this and we
+            // can freeze if it dies
+            Posix.signal (Posix.SIGHUP, safe_exit);
+            Posix.signal (Posix.SIGINT, safe_exit);
+            Posix.signal (Posix.SIGTERM, safe_exit);
+
+            var opt_context = new OptionContext (" - Zeitgeist FTS daemon");
+            opt_context.add_main_entries (options, null);
+
+            try
+            {
+                opt_context.parse (ref args);
+
+                if (show_version_info)
+                {
+                    stdout.printf (Config.VERSION + "\n");
+                    return 0;
+                }
+
+                LogLevelFlags discarded = LogLevelFlags.LEVEL_DEBUG;
+                if (log_level != null)
+                {
+                    var ld = LogLevelFlags.LEVEL_DEBUG;
+                    var li = LogLevelFlags.LEVEL_INFO;
+                    var lm = LogLevelFlags.LEVEL_MESSAGE;
+                    var lw = LogLevelFlags.LEVEL_WARNING;
+                    var lc = LogLevelFlags.LEVEL_CRITICAL;
+                    switch (log_level.up ())
+                    {
+                        case "DEBUG":
+                            discarded = 0;
+                            break;
+                        case "INFO":
+                            discarded = ld;
+                            break;
+                        case "WARNING":
+                            discarded = ld | li | lm;
+                            break;
+                        case "CRITICAL":
+                            discarded = ld | li | lm | lw;
+                            break;
+                        case "ERROR":
+                            discarded = ld | li | lm | lw | lc;
+                            break;
+                    }
+                }
+                if (discarded != 0)
+                {
+                    Log.set_handler ("", discarded, () => {});
+                }
+                else
+                {
+                    Environment.set_variable ("G_MESSAGES_DEBUG", "all", true);
+                }
+
+                run ();
+            }
+            catch (Error err)
+            {
+                if (err is EngineError.DATABASE_CANTOPEN)
+                    return 21;
+                if (err is EngineError.DATABASE_BUSY)
+                    return 22;
+
+                warning ("%s", err.message);
+                return 1;
+            }
+
+            return 0;
+        }
+
+    }
+
+}
+
+// vim:expandtab:ts=4:sw=4
diff --git a/extensions/fts-python/Makefile.am b/extensions/fts-python/Makefile.am
deleted file mode 100644
index 73cf55ee..00000000
--- a/extensions/fts-python/Makefile.am
+++ /dev/null
@@ -1,23 +0,0 @@
-NULL =
-
-ftsdir = $(pkgdatadir)/fts-python
-dist_fts_SCRIPTS = \
-	fts.py \
-	$(NULL)
-
-dist_fts_DATA = \
-	datamodel.py \
-	constants.py \
-	lrucache.py \
-	sql.py \
-	$(NULL)
-
-servicedir = $(DBUS_SERVICES_DIR)
-service_DATA = org.gnome.zeitgeist.fts.service
-
-org.gnome.zeitgeist.fts.service: org.gnome.zeitgeist.fts.service.in
-	$(AM_V_GEN)sed  -e s!\@pkgdatadir\@!$(pkgdatadir)! < $< > $@
-org.gnome.zeitgeist.fts.service: Makefile
-
-EXTRA_DIST = org.gnome.zeitgeist.fts.service.in
-CLEANFILES = org.gnome.zeitgeist.fts.service
diff --git a/extensions/fts-python/constants.py b/extensions/fts-python/constants.py
deleted file mode 100644
index f52e5efb..00000000
--- a/extensions/fts-python/constants.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# -.- coding: utf-8 -.-
-
-# Zeitgeist
-#
-# Copyright © 2009 Markus Korn <thekorn@gmx.de>
-# Copyright © 2009-2010 Siegfried-Angel Gevatter Pujals <rainct@ubuntu.com>
-# Copyright © 2009 Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation, either version 2.1 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-import os
-import logging
-from xdg import BaseDirectory
-
-from zeitgeist.client import ZeitgeistDBusInterface
-
-__all__ = [
-	"log",
-	"get_engine",
-	"constants"
-]
-
-log = logging.getLogger("zeitgeist.engine")
-
-_engine = None
-def get_engine():
-	""" Get the running engine instance or create a new one. """
-	global _engine
-	if _engine is None or _engine.is_closed():
-		import main # _zeitgeist.engine.main
-		_engine = main.ZeitgeistEngine()
-	return _engine
-
-class _Constants:
-	# Directories
-	DATA_PATH = os.environ.get("ZEITGEIST_DATA_PATH",
-		BaseDirectory.save_data_path("zeitgeist"))
-	DATABASE_FILE = os.environ.get("ZEITGEIST_DATABASE_PATH",
-		os.path.join(DATA_PATH, "activity.sqlite"))
-	DATABASE_FILE_BACKUP = os.environ.get("ZEITGEIST_DATABASE_BACKUP_PATH",
-		os.path.join(DATA_PATH, "activity.sqlite.bck"))
-	DEFAULT_LOG_PATH = os.path.join(BaseDirectory.xdg_cache_home,
-		"zeitgeist", "daemon.log")
-	
-	# D-Bus
-	DBUS_INTERFACE = ZeitgeistDBusInterface.INTERFACE_NAME
-	SIG_EVENT = "asaasay"
-	
-	# Required version of DB schema
-	CORE_SCHEMA="core"
-	CORE_SCHEMA_VERSION = 4
-	
-	USER_EXTENSION_PATH = os.path.join(DATA_PATH, "extensions")
-	
-	# configure runtime cache for events
-	# default size is 2000
-	CACHE_SIZE = int(os.environ.get("ZEITGEIST_CACHE_SIZE", 2000))
-	log.debug("Cache size = %i" %CACHE_SIZE)
-
-constants = _Constants()
diff --git a/extensions/fts-python/datamodel.py b/extensions/fts-python/datamodel.py
deleted file mode 100644
index defbe711..00000000
--- a/extensions/fts-python/datamodel.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# -.- coding: utf-8 -.-
-
-# Zeitgeist
-#
-# Copyright © 2009 Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
-# Copyright © 2009 Markus Korn <thekorn@gmx.de>
-# Copyright © 2009 Seif Lotfy <seif@lotfy.com>
-# Copyright © 2009-2010 Siegfried-Angel Gevatter Pujals <rainct@ubuntu.com>
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation, either version 2.1 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-from zeitgeist.datamodel import Event as OrigEvent, Subject as OrigSubject, \
-	DataSource as OrigDataSource
-	
-class Event(OrigEvent):
-	
-	@staticmethod
-	def _to_unicode(obj):
-		"""
-		Return an unicode representation of the given object.
-		If obj is None, return an empty string.
-		"""
-		return unicode(obj) if obj is not None else u""
-	
-	@staticmethod
-	def _make_dbus_sendable(obj):
-		"""
-		Ensure that all fields in the event struct are non-None
-		"""
-		for n, value in enumerate(obj[0]):
-			obj[0][n] = obj._to_unicode(value)
-		for subject in obj[1]:
-			for n, value in enumerate(subject):
-				subject[n] = obj._to_unicode(value)
-		# The payload require special handling, since it is binary data
-		# If there is indeed data here, we must not unicode encode it!
-		if obj[2] is None:
-			obj[2] = u""
-		elif isinstance(obj[2], unicode):
-			obj[2] = str(obj[2])
-		return obj
-			
-	@staticmethod
-	def get_plain(ev):
-		"""
-		Ensure that an Event instance is a Plain Old Python Object (popo),
-		without DBus wrappings etc.
-		"""
-		popo = []
-		popo.append(map(unicode, ev[0]))
-		popo.append([map(unicode, subj) for subj in ev[1]])
-		# We need the check here so that if D-Bus gives us an empty
-		# byte array we don't serialize the text "dbus.Array(...)".
-		popo.append(str(ev[2]) if ev[2] else u'')
-		return popo
-
-class Subject(OrigSubject):
-    pass
-
-class DataSource(OrigDataSource):
-
-	@staticmethod
-	def get_plain(datasource):
-		for plaintype, props in {
-				unicode: (DataSource.Name, DataSource.Description),
-				lambda x: map(Event.get_plain, x): (DataSource.EventTemplates,),
-				bool: (DataSource.Running, DataSource.Enabled),
-				int: (DataSource.LastSeen,),
-			}.iteritems():
-			for prop in props:
-				datasource[prop] = plaintype(datasource[prop])
-		return tuple(datasource)
diff --git a/extensions/fts-python/fts.py b/extensions/fts-python/fts.py
deleted file mode 100644
index 772eb699..00000000
--- a/extensions/fts-python/fts.py
+++ /dev/null
@@ -1,1273 +0,0 @@
-#!/usr/bin/env python
-# -.- coding: utf-8 -.-
-
-# Zeitgeist
-#
-# Copyright © 2009 Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
-# Copyright © 2010 Canonical Ltd
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-
-#
-# TODO
-#
-# - Delete events hook
-# - ? Filter on StorageState
-# - Throttle IO and CPU where possible
-
-import os, sys
-import time
-import pickle
-import dbus
-import sqlite3
-import dbus.service
-from xdg import BaseDirectory
-from xdg.DesktopEntry import DesktopEntry, xdg_data_dirs
-import logging
-import subprocess
-from xml.dom import minidom
-import xapian
-import os
-from Queue import Queue, Empty
-import threading
-from urllib import quote as url_escape, unquote as url_unescape
-import gobject, gio
-from cStringIO import StringIO
-
-from collections import defaultdict
-from array import array
-from zeitgeist.datamodel import Event as OrigEvent, StorageState, TimeRange, \
-    ResultType, get_timestamp_for_now, Interpretation, Symbol, NEGATION_OPERATOR, WILDCARD, NULL_EVENT
-from datamodel import Event, Subject
-from constants import constants
-from zeitgeist.client import ZeitgeistClient, ZeitgeistDBusInterface
-from sql import get_default_cursor, unset_cursor, TableLookup, WhereClause
-from lrucache import LRUCache
-
-ZG_CLIENT = ZeitgeistClient()
-
-logging.basicConfig(level=logging.DEBUG)
-log = logging.getLogger("zeitgeist.fts")
-
-INDEX_FILE = os.path.join(constants.DATA_PATH, "bb.fts.index")
-INDEX_VERSION = "1"
-INDEX_LOCK = threading.Lock()
-FTS_DBUS_BUS_NAME = "org.gnome.zeitgeist.SimpleIndexer"
-FTS_DBUS_OBJECT_PATH = "/org/gnome/zeitgeist/index/activity"
-FTS_DBUS_INTERFACE = "org.gnome.zeitgeist.Index"
-
-FILTER_PREFIX_EVENT_INTERPRETATION = "ZGEI"
-FILTER_PREFIX_EVENT_MANIFESTATION = "ZGEM"
-FILTER_PREFIX_ACTOR = "ZGA"
-FILTER_PREFIX_SUBJECT_URI = "ZGSU"
-FILTER_PREFIX_SUBJECT_INTERPRETATION = "ZGSI"
-FILTER_PREFIX_SUBJECT_MANIFESTATION = "ZGSM"
-FILTER_PREFIX_SUBJECT_ORIGIN = "ZGSO"
-FILTER_PREFIX_SUBJECT_MIMETYPE = "ZGST"
-FILTER_PREFIX_SUBJECT_STORAGE = "ZGSS"
-FILTER_PREFIX_XDG_CATEGORY = "AC"
-
-VALUE_EVENT_ID = 0
-VALUE_TIMESTAMP = 1
-
-MAX_CACHE_BATCH_SIZE = constants.CACHE_SIZE/2
-
-# When sorting by of the COALESCING_RESULT_TYPES result types,
-# we need to fetch some extra events from the Xapian index because
-# the final result set will be coalesced on some property of the event
-COALESCING_RESULT_TYPES = [ \
-    ResultType.MostRecentSubjects,
-    ResultType.LeastRecentSubjects,
-    ResultType.MostPopularSubjects,
-    ResultType.LeastPopularSubjects,
-    ResultType.MostRecentActor,
-    ResultType.LeastRecentActor,
-    ResultType.MostPopularActor,
-    ResultType.LeastPopularActor,
-]
-
-MAX_TERM_LENGTH = 245
-
-
-class NegationNotSupported(ValueError):
-    pass
-
-class WildcardNotSupported(ValueError):
-    pass
-
-def parse_negation(kind, field, value, parse_negation=True):
-    """checks if value starts with the negation operator,
-    if value starts with the negation operator but the field does
-    not support negation a ValueError is raised.
-    This function returns a (value_without_negation, negation)-tuple
-    """
-    negation = False
-    if parse_negation and value.startswith(NEGATION_OPERATOR):
-        negation = True
-        value = value[len(NEGATION_OPERATOR):]
-    if negation and field not in kind.SUPPORTS_NEGATION:
-        raise NegationNotSupported("This field does not support negation")
-    return value, negation
-    
-def parse_wildcard(kind, field, value):
-    """checks if value ends with the a wildcard,
-    if value ends with a wildcard but the field does not support wildcards
-    a ValueError is raised.
-    This function returns a (value_without_wildcard, wildcard)-tuple
-    """
-    wildcard = False
-    if value.endswith(WILDCARD):
-        wildcard = True
-        value = value[:-len(WILDCARD)]
-    if wildcard and field not in kind.SUPPORTS_WILDCARDS:
-        raise WildcardNotSupported("This field does not support wildcards")
-    return value, wildcard
-    
-def parse_operators(kind, field, value):
-    """runs both (parse_negation and parse_wildcard) parser functions
-    on query values, and handles the special case of Subject.Text correctly.
-    returns a (value_without_negation_and_wildcard, negation, wildcard)-tuple
-    """
-    try:
-        value, negation = parse_negation(kind, field, value)
-    except ValueError:
-        if kind is Subject and field == Subject.Text:
-            # we do not support negation of the text field,
-            # the text field starts with the NEGATION_OPERATOR
-            # so we handle this string as the content instead
-            # of an operator
-            negation = False
-        else:
-            raise
-    value, wildcard = parse_wildcard(kind, field, value)
-    return value, negation, wildcard
-
-
-def synchronized(lock):
-    """ Synchronization decorator. """
-    def wrap(f):
-        def newFunction(*args, **kw):
-            lock.acquire()
-            try:
-                return f(*args, **kw)
-            finally:
-                lock.release()
-        return newFunction
-    return wrap
-
-class Deletion:
-    """
-    A marker class that marks an event id for deletion
-    """
-    def __init__ (self, event_id):
-        self.event_id = event_id
-
-class Reindex:
-    """
-    Marker class that tells the worker thread to rebuild the entire index.
-    On construction time all events are pulled out of the zg_engine
-    argument and stored for later processing in the worker thread.
-    This avoid concurrent access to the ZG sqlite db from the worker thread.
-    """
-    def __init__ (self, zg_engine):
-        all_events = zg_engine._find_events(1, TimeRange.always(),
-            [], StorageState.Any,
-            sys.maxint,
-            ResultType.MostRecentEvents)
-        self.all_events = all_events
-
-class SearchEngineExtension (dbus.service.Object):
-    """
-    Full text indexing and searching extension for Zeitgeist
-    """
-    PUBLIC_METHODS = []
-    
-    def __init__ (self):
-        bus_name = dbus.service.BusName(FTS_DBUS_BUS_NAME, bus=dbus.SessionBus())
-        dbus.service.Object.__init__(self, bus_name, FTS_DBUS_OBJECT_PATH)
-        self._indexer = Indexer()
-        
-        ZG_CLIENT.install_monitor((0, 2**63 - 1), [],
-            self.pre_insert_event, self.post_delete_events)
-    
-    def pre_insert_event(self, timerange, events):
-        for event in events:
-            self._indexer.index_event (event)
-
-    def post_delete_events (self, ids):
-        for _id in ids:
-            self._indexer.delete_event (_id)
-                
-    @dbus.service.method(FTS_DBUS_INTERFACE,
-                         in_signature="s(xx)a("+constants.SIG_EVENT+")uuu",
-                         out_signature="a("+constants.SIG_EVENT+")u")
-    def Search(self, query_string, time_range, filter_templates, offset, count, result_type):
-        """
-        DBus method to perform a full text search against the contents of the
-        Zeitgeist log. Returns an array of events.
-        """
-        time_range = TimeRange(time_range[0], time_range[1])
-        filter_templates = map(Event, filter_templates)
-        events, hit_count = self._indexer.search(query_string, time_range,
-                                                 filter_templates,
-                                                 offset, count, result_type)
-        return self._make_events_sendable (events), hit_count
-        
-    @dbus.service.method(FTS_DBUS_INTERFACE,
-                       in_signature="",
-                       out_signature="")
-    def ForceReindex(self):
-        """
-        DBus method to force a reindex of the entire Zeitgeist log.
-        This method is only intended for debugging purposes and is not
-        considered blessed public API.
-        """
-        log.debug ("Received ForceReindex request over DBus.")
-        self._indexer._queue.put (Reindex (self._indexer))
-    
-    def _make_events_sendable(self, events):
-        return [NULL_EVENT if event is None else Event._make_dbus_sendable(event) for event in events]
-
-def mangle_uri (uri):
-    """
-    Converts a URI into an index- and query friendly string. The problem
-    is that Xapian doesn't handle CAPITAL letters or most non-alphanumeric
-    symbols in a boolean term when it does prefix matching. The mangled
-    URIs returned from this function are suitable for boolean prefix searches.
-    
-    IMPORTANT: This is a 1-way function! You can not convert back.
-    """
-    result = ""
-    for c in uri.lower():
-        if c in (": /"):
-            result += "_"
-        else:
-            result += c
-    return result
-
-def cap_string (s, nbytes=MAX_TERM_LENGTH):
-    """
-    If s has more than nbytes bytes (not characters) then cap it off
-    after nbytes bytes in a way still producing a valid utf-8 string.
-    
-    Assumes that s is a utf-8 string.
-    
-    This function useful for working with Xapian terms because Xapian has
-    a max term length of 245 (which is not very well documented, but see
-    http://xapian.org/docs/omega/termprefixes.html).
-    """
-    # Check if we can fast-path this string
-    if (len(s.encode("utf-8")) <= nbytes):
-        return s
-    
-    # We use a StringIO here to avoid mem thrashing via naiive
-    # string concatenation. See fx. http://www.skymind.com/~ocrow/python_string/
-    buf = StringIO()
-    for char in s :
-        if buf.tell() >= nbytes - 1 :
-            return buf.getvalue()
-        buf.write(char.encode("utf-8"))
-    
-    return unicode(buf.getvalue().decode("utf-8"))
-
-
-def expand_type (type_prefix, uri):
-    """
-    Return a string with a Xapian query matching all child types of 'uri'
-    inside the Xapian prefix 'type_prefix'.
-    """
-    is_negation = uri.startswith(NEGATION_OPERATOR)
-    uri = uri[1:] if is_negation else uri
-    children = Symbol.find_child_uris_extended(uri)
-    children = [ "%s:%s" % (type_prefix, child) for child in children ]
-
-    result = " OR ".join(children)
-    return result if not is_negation else "NOT (%s)" % result
-
-class Indexer:
-    """
-    Abstraction of the FT indexer and search engine
-    """
-    
-    QUERY_PARSER_FLAGS = xapian.QueryParser.FLAG_PHRASE |   \
-                         xapian.QueryParser.FLAG_BOOLEAN |  \
-                         xapian.QueryParser.FLAG_PURE_NOT |  \
-                         xapian.QueryParser.FLAG_LOVEHATE | \
-                         xapian.QueryParser.FLAG_WILDCARD
-    
-    def __init__ (self):
-        
-        self._cursor = cursor = get_default_cursor()
-        os.environ["XAPIAN_CJK_NGRAM"] = "1"
-        self._interpretation = TableLookup(cursor, "interpretation")
-        self._manifestation = TableLookup(cursor, "manifestation")
-        self._mimetype = TableLookup(cursor, "mimetype")
-        self._actor = TableLookup(cursor, "actor")
-        self._event_cache = LRUCache(constants.CACHE_SIZE)
-        
-        log.debug("Opening full text index: %s" % INDEX_FILE)
-        try:
-            self._index = xapian.WritableDatabase(INDEX_FILE, xapian.DB_CREATE_OR_OPEN)
-        except xapian.DatabaseError, e:
-            log.warn("Full text index corrupted: '%s'. Rebuilding index." % e)
-            self._index = xapian.WritableDatabase(INDEX_FILE, xapian.DB_CREATE_OR_OVERWRITE)
-        self._tokenizer = indexer = xapian.TermGenerator()
-        self._query_parser = xapian.QueryParser()
-        self._query_parser.set_database (self._index)
-        self._query_parser.add_prefix("name", "N")
-        self._query_parser.add_prefix("title", "N")
-        self._query_parser.add_prefix("site", "S")
-        self._query_parser.add_prefix("app", "A")
-        self._query_parser.add_boolean_prefix("zgei", FILTER_PREFIX_EVENT_INTERPRETATION)
-        self._query_parser.add_boolean_prefix("zgem", FILTER_PREFIX_EVENT_MANIFESTATION)
-        self._query_parser.add_boolean_prefix("zga", FILTER_PREFIX_ACTOR)
-        self._query_parser.add_prefix("zgsu", FILTER_PREFIX_SUBJECT_URI)
-        self._query_parser.add_boolean_prefix("zgsi", FILTER_PREFIX_SUBJECT_INTERPRETATION)
-        self._query_parser.add_boolean_prefix("zgsm", FILTER_PREFIX_SUBJECT_MANIFESTATION)
-        self._query_parser.add_prefix("zgso", FILTER_PREFIX_SUBJECT_ORIGIN)
-        self._query_parser.add_boolean_prefix("zgst", FILTER_PREFIX_SUBJECT_MIMETYPE)
-        self._query_parser.add_boolean_prefix("zgss", FILTER_PREFIX_SUBJECT_STORAGE)
-        self._query_parser.add_prefix("category", FILTER_PREFIX_XDG_CATEGORY)
-        self._query_parser.add_valuerangeprocessor(
-              xapian.NumberValueRangeProcessor(VALUE_EVENT_ID, "id", True))
-        self._query_parser.add_valuerangeprocessor(
-              xapian.NumberValueRangeProcessor(VALUE_TIMESTAMP, "ms", False))
-        self._query_parser.set_default_op(xapian.Query.OP_AND)
-        self._enquire = xapian.Enquire(self._index)
-        
-        self._desktops = {}
-        
-        gobject.threads_init()
-        self._may_run = True
-        self._queue = Queue(0)
-        self._worker = threading.Thread(target=self._worker_thread,
-                                        name="IndexWorker")
-        self._worker.daemon = True
-        
-        # We need to defer the index checking until after ZG has completed
-        # full setup. Hence the idle handler.
-        # We also don't start the worker until after we've checked the index
-        gobject.idle_add (self._check_index_and_start_worker)
-
-    @synchronized (INDEX_LOCK)
-    def _check_index_and_start_worker (self):
-        """
-        Check whether we need a rebuild of the index.
-        Returns True if the index is good. False if a reindexing has
-        been commenced.
-        
-        This method should be called from the main thread and only once.
-        It starts the worker thread as a side effect.
-
-        We are clearing the queue, because there may be a race when an
-        event insertion / deletion is already queued and our index
-        is corrupted. Creating a new queue instance should be safe,
-        because we're running in main thread as are the index_event
-        and delete_event methods, and the worker thread wasn't yet
-        started.
-        """
-        if self._index.get_metadata("fts_index_version") != INDEX_VERSION:
-            log.info("Index must be upgraded. Doing full rebuild")
-            self._queue = Queue(0)
-            self._queue.put(Reindex(self))
-        elif self._index.get_doccount() == 0:
-            # If the index is empty we trigger a rebuild
-            # We must delay reindexing until after the engine is done setting up
-            log.info("Empty index detected. Doing full rebuild")
-            self._queue = Queue(0)
-            self._queue.put(Reindex(self))
-        
-        # Now that we've checked the index from the main thread we can start the worker
-        self._worker.start()
-    
-    def index_event (self, event):
-        """
-        This method schedules and event for indexing. It returns immediate and
-        defers the actual work to a bottom half thread. This means that it
-        will not block the main loop of the Zeitgeist daemon while indexing
-        (which may be a heavy operation)
-        """
-        self._queue.put (event)
-        return event
-    
-    def delete_event (self, event_id):
-        """
-        Remove an event from the index given its event id
-        """
-        self._queue.put (Deletion(event_id))
-        return        
-    
-    @synchronized (INDEX_LOCK)
-    def search (self, query_string, time_range=None, filters=None, offset=0, maxhits=10, result_type=100):
-        """
-        Do a full text search over the indexed corpus. The `result_type`
-        parameter may be a zeitgeist.datamodel.ResultType or 100. In case it is
-        100 the textual relevancy of the search engine will be used to sort the
-        results. Result type 100 is the fastest (and default) mode.
-        
-        The filters argument should be a list of event templates.
-        """
-        # Expand event template filters if necessary
-        if filters:
-            query_string = "(%s) AND (%s)" % (query_string, self._compile_event_filter_query (filters))
-        
-        # Expand time range value query
-        if time_range and not time_range.is_always():
-            query_string = "(%s) AND (%s)" % (query_string, self._compile_time_range_filter_query (time_range))
-        
-        # If the result type coalesces the events we need to fetch some extra
-        # events from the index to have a chance of actually holding 'maxhits'
-        # unique events
-        if result_type in COALESCING_RESULT_TYPES:
-            raw_maxhits = maxhits * 3
-        else:
-            raw_maxhits = maxhits
-        
-        # When not sorting by relevance, we fetch the results from Xapian sorted,
-        # by timestamp. That minimizes the skew we get from otherwise doing a
-        # relevancy ranked xapaian query and then resorting with Zeitgeist. The
-        # "skew" is that low-relevancy results may still have the highest timestamp
-        if result_type == 100:
-          self._enquire.set_sort_by_relevance()
-        else:
-          self._enquire.set_sort_by_value(VALUE_TIMESTAMP, True)
-        
-        # Allow wildcards
-        query_start = time.time()
-        query = self._query_parser.parse_query (query_string,
-                                                self.QUERY_PARSER_FLAGS)
-        self._enquire.set_query (query)
-        hits = self._enquire.get_mset (offset, raw_maxhits)
-        hit_count = hits.get_matches_estimated()
-        log.debug("Search '%s' gave %s hits in %sms" %
-                  (query_string, hits.get_matches_estimated(), (time.time() - query_start)*1000))
-        
-        if result_type == 100:
-            event_ids = []
-            for m in hits:
-                event_id = int(xapian.sortable_unserialise(
-                                          m.document.get_value(VALUE_EVENT_ID)))                
-                event_ids.append (event_id)
-            if event_ids:
-                return self.get_events(event_ids), hit_count
-            else:
-                return [], 0
-        else:
-            templates = []
-            for m in hits:
-                event_id = int(xapian.sortable_unserialise(
-                                          m.document.get_value(VALUE_EVENT_ID)))
-                ev = Event()
-                ev[0][Event.Id] = str(event_id)
-                templates.append(ev)
-            if templates:
-                x = self._find_events(1, TimeRange.always(),
-                                                 templates,
-                                                 StorageState.Any,
-                                                 maxhits,
-                                                 result_type), hit_count
-                return x
-            else:
-                return [], 0
-    
-    def _worker_thread (self):
-        is_dirty = False
-        while self._may_run:
-            # FIXME: Throttle IO and CPU
-            try:
-                # If we are dirty wait a while before we flush,
-                # or if we are clean wait indefinitely to avoid
-                # needless wakeups
-                if is_dirty:
-                    event = self._queue.get(True, 0.5)
-                else:
-                    event = self._queue.get(True)
-                
-                if isinstance (event, Deletion):
-                    self._delete_event_real (event.event_id)
-                elif isinstance (event, Reindex):
-                    self._reindex (event.all_events)
-                else:
-                    self._index_event_real (event)
-                
-                is_dirty = True
-            except Empty:
-                if is_dirty:
-                    # Write changes to disk
-                    log.debug("Committing FTS index")
-                    self._index.flush()
-                    is_dirty = False
-                else:
-                    log.debug("No changes to index. Sleeping")
-    
-    @synchronized (INDEX_LOCK)
-    def _reindex (self, event_list):
-        """
-        Index everything in the ZG log. The argument must be a list
-        of events. Typically extracted by a Reindex instance.
-        Only call from worker thread as it writes to the db and Xapian
-        is *not* thread safe (only single-writer-multiple-reader).
-        """
-        self._index.close ()
-        self._index = xapian.WritableDatabase(INDEX_FILE, xapian.DB_CREATE_OR_OVERWRITE)
-        self._query_parser.set_database (self._index)
-        self._enquire = xapian.Enquire(self._index)
-        # Register that this index was built with CJK enabled
-        self._index.set_metadata("fts_index_version", INDEX_VERSION)
-        log.info("Preparing to rebuild index with %s events" % len(event_list))
-        for e in event_list : self._queue.put(e)
-    
-    @synchronized (INDEX_LOCK)
-    def _delete_event_real (self, event_id):
-        """
-        Look up the doc id given an event id and remove the xapian.Document
-        for that doc id.
-        Note: This is slow, but there's not much we can do about it
-        """
-        try:
-            _id = xapian.sortable_serialise(float(event_id))
-            query = xapian.Query(xapian.Query.OP_VALUE_RANGE, 
-                                 VALUE_EVENT_ID, _id, _id)
-            
-            self._enquire.set_query (query)
-            hits = self._enquire.get_mset (0, 10)
-            
-            total = hits.get_matches_estimated()
-            if total > 1:
-                log.warning ("More than one event found with id '%s'" % event_id)
-            elif total <= 0:
-                log.debug ("No event for id '%s'" % event_id)
-                return
-        
-            for m in hits:
-                log.debug("Deleting event '%s' with docid '%s'" %
-                          (event_id, m.docid))
-                self._index.delete_document(m.docid)
-        except Exception, e:
-            log.error("Failed to delete event '%s': %s" % (event_id, e))
-        
-    def _split_uri (self, uri):
-        """
-        Returns a triple of (scheme, host, and path) extracted from `uri`
-        """        
-        i = uri.find(":")
-        if i == -1 :
-            scheme =  ""
-            host = ""
-            path = uri
-        else:
-            scheme = uri[:i]
-            host = ""
-            path = ""
-          
-        if uri[i+1] == "/" and uri[i+2] == "/":
-            j = uri.find("/", i+3)
-            if j == -1 :
-                host = uri[i+3:]
-            else:
-                host = uri[i+3:j]
-                path = uri[j:]
-        else:
-            host = uri[i+1:]
-        
-        # Strip out URI query part
-        i = path.find("?")
-        if i != -1:
-            path = path[:i]
-        
-        return scheme, host, path
-    
-    def _get_desktop_entry (self, app_id):
-        """
-        Return a xdg.DesktopEntry.DesktopEntry `app_id` or None in case
-        no file is found for the given desktop id
-        """
-        if app_id in self._desktops:
-            return self._desktops[app_id]
-        
-        for datadir in xdg_data_dirs:
-            path = os.path.join(datadir, "applications", app_id)
-            if os.path.exists(path):
-                try:
-                    desktop = DesktopEntry(path)
-                    self._desktops[app_id] = desktop
-                    return desktop
-                except Exception, e:
-                    log.warning("Unable to load %s: %s" % (path, e))
-                    return None
-        
-        return None
-    
-    def _index_actor (self, actor):
-        """
-        Takes an actor as a path to a .desktop file or app:// uri
-        and index the contents of the corresponding .desktop file
-        into the document currently set for self._tokenizer.
-        """
-        if not actor : return
-        
-        # Get the path of the .desktop file and convert it to
-        # an app id (eg. 'gedit.desktop')
-        scheme, host, path = self._split_uri(url_unescape (actor))
-        if not path:
-            path = host
-        
-        if not path :
-            log.debug("Unable to determine application id for %s" % actor)
-            return
-        
-        if path.startswith("/") :
-            path = os.path.basename(path)
-        
-        desktop = self._get_desktop_entry(path)
-        if desktop:
-            if not desktop.getNoDisplay():
-                self._tokenizer.index_text(desktop.getName(), 5)
-                self._tokenizer.index_text(desktop.getName(), 5, "A")
-                self._tokenizer.index_text(desktop.getGenericName(), 5)
-                self._tokenizer.index_text(desktop.getGenericName(), 5, "A")
-                self._tokenizer.index_text(desktop.getComment(), 2)
-                self._tokenizer.index_text(desktop.getComment(), 2, "A")
-            
-                doc = self._tokenizer.get_document()
-                for cat in desktop.getCategories():
-                    doc.add_boolean_term(FILTER_PREFIX_XDG_CATEGORY+cat.lower())
-        else:
-            log.debug("Unable to look up app info for %s" % actor)
-        
-    
-    def _index_uri (self, uri):
-        """
-        Index `uri` into the document currectly set on self._tokenizer
-        """
-        # File URIs and paths are indexed in one way, and all other,
-        # usually web URIs, are indexed in another way because there may
-        # be domain name etc. in there we want to rank differently
-        scheme, host, path = self._split_uri (url_unescape (uri))
-        if scheme == "file" or not scheme:
-            path, name = os.path.split(path)
-            self._tokenizer.index_text(name, 5)
-            self._tokenizer.index_text(name, 5, "N")
-            
-            # Index parent names with descending weight
-            weight = 5
-            while path and name:
-                weight = weight / 1.5
-                path, name = os.path.split(path)
-                self._tokenizer.index_text(name, int(weight))
-            
-        elif scheme == "mailto":
-            tokens = host.split("@")
-            name = tokens[0]
-            self._tokenizer.index_text(name, 6)
-            if len(tokens) > 1:
-                self._tokenizer.index_text(" ".join[1:], 1)
-        else:
-            # We're cautious about indexing the path components of
-            # non-file URIs as some websites practice *extremely* long
-            # and useless URLs
-            path, name = os.path.split(path)
-            if len(name) > 30 : name = name[:30]
-            if len(path) > 30 : path = path[30]
-            if name:
-                self._tokenizer.index_text(name, 5)
-                self._tokenizer.index_text(name, 5, "N")
-            if path:
-                self._tokenizer.index_text(path, 1)
-                self._tokenizer.index_text(path, 1, "N")
-            if host:
-                self._tokenizer.index_text(host, 2)
-                self._tokenizer.index_text(host, 2, "N")
-                self._tokenizer.index_text(host, 2, "S")
-    
-    def _index_text (self, text):
-        """
-        Index `text` as raw text data for the document currently
-        set on self._tokenizer. The text is assumed to be a primary
-        description of the subject, such as the basename of a file.
-        
-        Primary use is for subject.text
-        """
-        self._tokenizer.index_text(text, 5)
-    
-    def _index_contents (self, uri):
-        # xmlindexer doesn't extract words for URIs only for file paths
-        
-        # FIXME: IONICE and NICE on xmlindexer
-        
-        path = uri.replace("file://", "")
-        xmlindexer = subprocess.Popen(['xmlindexer', path],
-                                      stdout=subprocess.PIPE)
-        xml = xmlindexer.communicate()[0].strip()
-        xmlindexer.wait()        
-        
-        dom = minidom.parseString(xml)
-        text_nodes = dom.getElementsByTagName("text")
-        lines = []
-        if text_nodes:
-            for line in text_nodes[0].childNodes:
-                lines.append(line.data)
-        
-        if lines:
-                self._tokenizer.index_text (" ".join(lines))
-        
-    
-    def _add_doc_filters (self, event, doc):
-        """Adds the filtering rules to the doc. Filtering rules will
-           not affect the relevancy ranking of the event/doc"""
-        if event.interpretation:
-            doc.add_boolean_term (cap_string(FILTER_PREFIX_EVENT_INTERPRETATION+event.interpretation))
-        if event.manifestation:
-            doc.add_boolean_term (cap_string(FILTER_PREFIX_EVENT_MANIFESTATION+event.manifestation))
-        if event.actor:
-            doc.add_boolean_term (cap_string(FILTER_PREFIX_ACTOR+mangle_uri(event.actor)))
-        
-        for su in event.subjects:
-            if su.uri:
-                doc.add_boolean_term (cap_string(FILTER_PREFIX_SUBJECT_URI+mangle_uri(su.uri)))
-            if su.interpretation:
-                doc.add_boolean_term (cap_string(FILTER_PREFIX_SUBJECT_INTERPRETATION+su.interpretation))
-            if su.manifestation:
-                doc.add_boolean_term (cap_string(FILTER_PREFIX_SUBJECT_MANIFESTATION+su.manifestation))
-            if su.origin:
-                doc.add_boolean_term (cap_string(FILTER_PREFIX_SUBJECT_ORIGIN+mangle_uri(su.origin)))
-            if su.mimetype:
-                doc.add_boolean_term (cap_string(FILTER_PREFIX_SUBJECT_MIMETYPE+su.mimetype))
-            if su.storage:
-                doc.add_boolean_term (cap_string(FILTER_PREFIX_SUBJECT_STORAGE+su.storage))
-    
-    @synchronized (INDEX_LOCK)
-    def _index_event_real (self, event):
-        if not isinstance (event, OrigEvent):
-            log.error("Not an Event, found: %s" % type(event))
-        if not event.id:
-            log.warning("Not indexing event. Event has no id")
-            return
-        
-        try:
-            doc = xapian.Document()
-            doc.add_value (VALUE_EVENT_ID,
-                           xapian.sortable_serialise(float(event.id)))
-            doc.add_value (VALUE_TIMESTAMP,
-                           xapian.sortable_serialise(float(event.timestamp)))
-            self._tokenizer.set_document (doc)
-        
-            self._index_actor (event.actor)
-        
-            for subject in event.subjects:
-                if not subject.uri : continue
-                
-                # By spec URIs can have arbitrary length. In reality that's just silly.
-                # The general online "rule" is to keep URLs less than 2k so we just
-                # choose to enforce that
-                if len(subject.uri) > 2000:
-                    log.info ("URI too long (%s). Discarding: %s..."% (len(subject.uri), subject.uri[:30]))
-                    return
-                log.debug("Indexing '%s'" % subject.uri)
-                
-                self._index_uri (subject.uri)
-                self._index_text (subject.text)
-                
-                # If the subject URI is an actor, we index the .desktop also
-                if subject.uri.startswith ("application://"):
-                    self._index_actor (subject.uri)
-                
-                # File contents indexing disabled for now...
-                #self._index_contents (subject.uri)
-                
-                # FIXME: Possibly index payloads when we have apriori knowledge
-            
-            self._add_doc_filters (event, doc)    
-            self._index.add_document (doc)
-        
-        except Exception, e:
-            log.error("Error indexing event: %s" % e)
-
-    def _compile_event_filter_query (self, events):
-        """Takes a list of event templates and compiles a filter query
-           based on their, interpretations, manifestations, and actor,
-           for event and subjects.
-           
-           All fields within the same event will be ANDed and each template
-           will be ORed with the others. Like elsewhere in Zeitgeist the
-           type tree of the interpretations and manifestations will be expanded
-           to match all child symbols as well
-        """
-        query = []
-        for event in events:
-            if not isinstance(event, Event):
-                raise TypeError("Expected Event. Found %s" % type(event))
-            
-            tmpl = []
-            if event.interpretation :
-                tmpl.append(expand_type("zgei", event.interpretation))
-            if event.manifestation :
-                tmpl.append(expand_type("zgem", event.manifestation))
-            if event.actor : tmpl.append("zga:%s" % mangle_uri(event.actor))
-            for su in event.subjects:
-                if su.uri :
-                    tmpl.append("zgsu:%s" % mangle_uri(su.uri))
-                if su.interpretation :
-                    tmpl.append(expand_type("zgsi", su.interpretation))
-                if su.manifestation :
-                    tmpl.append(expand_type("zgsm", su.manifestation))
-                if su.origin :
-                    tmpl.append("zgso:%s" % mangle_uri(su.origin))
-                if su.mimetype :
-                    tmpl.append("zgst:%s" % su.mimetype)
-                if su.storage :
-                    tmpl.append("zgss:%s" % su.storage)
-            
-            tmpl = "(" + ") AND (".join(tmpl) + ")"
-            query.append(tmpl)
-        
-        return " OR ".join(query)
-    
-    def _compile_time_range_filter_query (self, time_range):
-        """Takes a TimeRange and compiles a range query for it"""
-        
-        if not isinstance(time_range, TimeRange):
-            raise TypeError("Expected TimeRange, but found %s" % type(time_range))
-        
-        return "%s..%sms" % (time_range.begin, time_range.end)
-    
-    def _get_event_from_row(self, row):
-        event = Event()
-        event[0][Event.Id] = row["id"] # Id property is read-only in the public API
-        event.timestamp = row["timestamp"]
-        for field in ("interpretation", "manifestation", "actor"):
-            # Try to get event attributes from row using the attributed field id
-            # If attribute does not exist we break the attribute fetching and return
-            # None instead of of crashing
-            try:
-                setattr(event, field, getattr(self, "_" + field).value(row[field]))
-            except KeyError, e:
-                log.error("Event %i broken: Table %s has no id %i" \
-                        %(row["id"], field, row[field]))
-                return None
-        event.origin = row["event_origin_uri"] or ""
-        event.payload = row["payload"] or "" # default payload: empty string
-        return event
-    
-    def _get_subject_from_row(self, row):
-        subject = Subject()
-        for field in ("uri", "text", "storage"):
-            setattr(subject, field, row["subj_" + field])
-        subject.origin = row["subj_origin_uri"]
-        if row["subj_current_uri"]:
-            subject.current_uri = row["subj_current_uri"]
-        for field in ("interpretation", "manifestation", "mimetype"):
-            # Try to get subject attributes from row using the attributed field id
-            # If attribute does not exist we break the attribute fetching and return
-            # None instead of crashing
-            try:
-                setattr(subject, field,
-                    getattr(self, "_" + field).value(row["subj_" + field]))
-            except KeyError, e:
-                log.error("Event %i broken: Table %s has no id %i" \
-                        %(row["id"], field, row["subj_" + field]))
-                return None
-        return subject
-    
-    def get_events(self, ids, sender=None):
-        """
-        Look up a list of events.
-        """
-        
-        t = time.time()
-        
-        if not ids:
-            return []
-        
-        # Split ids into cached and uncached
-        uncached_ids = array("i")
-        cached_ids = array("i")
-        
-        # If ids batch greater than MAX_CACHE_BATCH_SIZE ids ignore cache
-        use_cache = True
-        if len(ids) > MAX_CACHE_BATCH_SIZE:
-            use_cache = False
-        if not use_cache:
-            uncached_ids = ids
-        else:
-            for id in ids:
-                if id in self._event_cache:
-                    cached_ids.append(id)
-                else:
-                    uncached_ids.append(id)
-        
-        id_hash = defaultdict(lambda: array("i"))
-        for n, id in enumerate(ids):
-            # the same id can be at multible places (LP: #673916)
-            # cache all of them
-            id_hash[id].append(n)
-        
-        # If we are not able to get an event by the given id
-        # append None instead of raising an Error. The client
-        # might simply have requested an event that has been
-        # deleted
-        events = {}
-        sorted_events = [None]*len(ids)
-        
-        for id in cached_ids:
-            event = self._event_cache[id]
-            if event:
-                if event is not None:
-                    for n in id_hash[event.id]:
-                        # insert the event into all necessary spots (LP: #673916)
-                        sorted_events[n] = event
-        
-        # Get uncached events
-        rows = self._cursor.execute("""
-            SELECT * FROM event_view
-            WHERE id IN (%s)
-            """ % ",".join("%d" % _id for _id in uncached_ids))
-        
-        time_get_uncached = time.time() - t
-        t = time.time()
-        
-        t_get_event = 0
-        t_get_subject = 0
-        t_apply_get_hooks = 0
-        
-        row_counter = 0
-        for row in rows:
-            row_counter += 1
-            # Assumption: all rows of a same event for its different
-            # subjects are in consecutive order.
-            t_get_event -= time.time()
-            event = self._get_event_from_row(row)
-            t_get_event += time.time()
-            
-            if event:
-                # Check for existing event.id in event to attach 
-                # other subjects to it
-                if event.id not in events:
-                    events[event.id] = event
-                else:
-                    event = events[event.id]
-                    
-                t_get_subject -= time.time()
-                subject = self._get_subject_from_row(row)
-                t_get_subject += time.time()
-                # Check if subject has a proper value. If none than something went
-                # wrong while trying to fetch the subject from the row. So instead
-                # of failing and raising an error. We silently skip the event.
-                if subject:
-                    event.append_subject(subject)
-                    if use_cache and not event.payload:
-                        self._event_cache[event.id] = event
-                    if event is not None:
-                        for n in id_hash[event.id]:
-                            # insert the event into all necessary spots (LP: #673916)
-                            sorted_events[n] = event
-                    # Avoid caching events with payloads to have keep the cache MB size 
-                    # at a decent level
-                    
-
-        log.debug("Got %d raw events in %fs" % (row_counter, time_get_uncached))
-        log.debug("Got %d events in %fs" % (len(sorted_events), time.time()-t))
-        log.debug("    Where time spent in _get_event_from_row in %fs" % (t_get_event))
-        log.debug("    Where time spent in _get_subject_from_row in %fs" % (t_get_subject))
-        log.debug("    Where time spent in apply_get_hooks in %fs" % (t_apply_get_hooks))
-        return sorted_events
-    
-    def _find_events(self, return_mode, time_range, event_templates,
-        storage_state, max_events, order, sender=None):
-        """
-        Accepts 'event_templates' as either a real list of Events or as
-        a list of tuples (event_data, subject_data) as we do in the
-        DBus API.
-        
-        Return modes:
-         - 0: IDs.
-         - 1: Events.
-        """
-        t = time.time()
-        
-        where = self._build_sql_event_filter(time_range, event_templates,
-            storage_state)
-        
-        if not where.may_have_results():
-            return []
-        
-        if return_mode == 0:
-            sql = "SELECT DISTINCT id FROM event_view"
-        elif return_mode == 1:
-            sql = "SELECT id FROM event_view"
-        else:
-            raise NotImplementedError, "Unsupported return_mode."
-        
-        wheresql = " WHERE %s" % where.sql if where else ""
-        
-        def group_and_sort(field, wheresql, time_asc=False, count_asc=None,
-            aggregation_type='max'):
-            
-            args = {
-                'field': field,
-                'aggregation_type': aggregation_type,
-                'where_sql': wheresql,
-                'time_sorting': 'ASC' if time_asc else 'DESC',
-                'aggregation_sql': '',
-                'order_sql': '',
-            }
-            
-            if count_asc is not None:
-                args['aggregation_sql'] = ', COUNT(%s) AS num_events' % \
-                    field
-                args['order_sql'] = 'num_events %s,' % \
-                    ('ASC' if count_asc else 'DESC')
-            
-            return """
-                NATURAL JOIN (
-                    SELECT %(field)s,
-                        %(aggregation_type)s(timestamp) AS timestamp
-                        %(aggregation_sql)s
-                    FROM event_view %(where_sql)s
-                    GROUP BY %(field)s)
-                GROUP BY %(field)s
-                ORDER BY %(order_sql)s timestamp %(time_sorting)s
-                """ % args
-        
-        if order == ResultType.MostRecentEvents:
-            sql += wheresql + " ORDER BY timestamp DESC"
-        elif order == ResultType.LeastRecentEvents:
-            sql += wheresql + " ORDER BY timestamp ASC"
-        elif order == ResultType.MostRecentEventOrigin:
-            sql += group_and_sort("origin", wheresql, time_asc=False)
-        elif order == ResultType.LeastRecentEventOrigin:
-            sql += group_and_sort("origin", wheresql, time_asc=True)
-        elif order == ResultType.MostPopularEventOrigin:
-            sql += group_and_sort("origin", wheresql, time_asc=False,
-                count_asc=False)
-        elif order == ResultType.LeastPopularEventOrigin:
-            sql += group_and_sort("origin", wheresql, time_asc=True,
-                count_asc=True)
-        elif order == ResultType.MostRecentSubjects:
-            # Remember, event.subj_id identifies the subject URI
-            sql += group_and_sort("subj_id", wheresql, time_asc=False)
-        elif order == ResultType.LeastRecentSubjects:
-            sql += group_and_sort("subj_id", wheresql, time_asc=True)
-        elif order == ResultType.MostPopularSubjects:
-            sql += group_and_sort("subj_id", wheresql, time_asc=False,
-                count_asc=False)
-        elif order == ResultType.LeastPopularSubjects:
-            sql += group_and_sort("subj_id", wheresql, time_asc=True,
-                count_asc=True)
-        elif order == ResultType.MostRecentCurrentUri:
-            sql += group_and_sort("subj_id_current", wheresql, time_asc=False)
-        elif order == ResultType.LeastRecentCurrentUri:
-            sql += group_and_sort("subj_id_current", wheresql, time_asc=True)
-        elif order == ResultType.MostPopularCurrentUri:
-            sql += group_and_sort("subj_id_current", wheresql, time_asc=False,
-                count_asc=False)
-        elif order == ResultType.LeastPopularCurrentUri:
-            sql += group_and_sort("subj_id_current", wheresql, time_asc=True,
-                count_asc=True)
-        elif order == ResultType.MostRecentActor:
-            sql += group_and_sort("actor", wheresql, time_asc=False)
-        elif order == ResultType.LeastRecentActor:
-            sql += group_and_sort("actor", wheresql, time_asc=True)
-        elif order == ResultType.MostPopularActor:
-            sql += group_and_sort("actor", wheresql, time_asc=False,
-                count_asc=False)
-        elif order == ResultType.LeastPopularActor:
-            sql += group_and_sort("actor", wheresql, time_asc=True,
-                count_asc=True)
-        elif order == ResultType.OldestActor:
-            sql += group_and_sort("actor", wheresql, time_asc=True,
-                aggregation_type="min")
-        elif order == ResultType.MostRecentOrigin:
-            sql += group_and_sort("subj_origin", wheresql, time_asc=False)
-        elif order == ResultType.LeastRecentOrigin:
-            sql += group_and_sort("subj_origin", wheresql, time_asc=True)
-        elif order == ResultType.MostPopularOrigin:
-            sql += group_and_sort("subj_origin", wheresql, time_asc=False,
-                count_asc=False)
-        elif order == ResultType.LeastPopularOrigin:
-            sql += group_and_sort("subj_origin", wheresql, time_asc=True,
-                count_asc=True)
-        elif order == ResultType.MostRecentSubjectInterpretation:
-            sql += group_and_sort("subj_interpretation", wheresql,
-                time_asc=False)
-        elif order == ResultType.LeastRecentSubjectInterpretation:
-            sql += group_and_sort("subj_interpretation", wheresql,
-                time_asc=True)
-        elif order == ResultType.MostPopularSubjectInterpretation:
-            sql += group_and_sort("subj_interpretation", wheresql,
-                time_asc=False, count_asc=False)
-        elif order == ResultType.LeastPopularSubjectInterpretation:
-            sql += group_and_sort("subj_interpretation", wheresql,
-                time_asc=True, count_asc=True)
-        elif order == ResultType.MostRecentMimeType:
-            sql += group_and_sort("subj_mimetype", wheresql, time_asc=False)
-        elif order == ResultType.LeastRecentMimeType:
-            sql += group_and_sort("subj_mimetype", wheresql, time_asc=True)
-        elif order == ResultType.MostPopularMimeType:
-            sql += group_and_sort("subj_mimetype", wheresql, time_asc=False,
-                count_asc=False)
-        elif order == ResultType.LeastPopularMimeType:
-            sql += group_and_sort("subj_mimetype", wheresql, time_asc=True,
-                count_asc=True)
-        
-        if max_events > 0:
-            sql += " LIMIT %d" % max_events
-        result = array("i", self._cursor.execute(sql, where.arguments).fetch(0))
-        
-        if return_mode == 0:
-            log.debug("Found %d event IDs in %fs" % (len(result), time.time()- t))
-        elif return_mode == 1:
-            log.debug("Found %d events in %fs" % (len(result), time.time()- t))
-            result = self.get_events(ids=result, sender=sender)    
-        else:
-            raise Exception("%d" % return_mode)
-        
-        return result
-        
-    @staticmethod
-    def _build_templates(templates):
-        for event_template in templates:
-            event_data = event_template[0]
-            for subject in (event_template[1] or (Subject(),)):
-                yield Event((event_data, [], None)), Subject(subject)
-    
-    def _build_sql_from_event_templates(self, templates):
-    
-        where_or = WhereClause(WhereClause.OR)
-        
-        for template in templates:
-            event_template = Event((template[0], [], None))
-            if template[1]:
-                subject_templates = [Subject(data) for data in template[1]]
-            else:
-                subject_templates = None
-            
-            subwhere = WhereClause(WhereClause.AND)
-            
-            if event_template.id:
-                subwhere.add("id = ?", event_template.id)
-            
-            try:
-                value, negation, wildcard = parse_operators(Event, Event.Interpretation, event_template.interpretation)
-                # Expand event interpretation children
-                event_interp_where = WhereClause(WhereClause.OR, negation)
-                for child_interp in (Symbol.find_child_uris_extended(value)):
-                    if child_interp:
-                        event_interp_where.add_text_condition("interpretation",
-                                               child_interp, like=wildcard, cache=self._interpretation)
-                if event_interp_where:
-                    subwhere.extend(event_interp_where)
-                
-                value, negation, wildcard = parse_operators(Event, Event.Manifestation, event_template.manifestation)
-                # Expand event manifestation children
-                event_manif_where = WhereClause(WhereClause.OR, negation)
-                for child_manif in (Symbol.find_child_uris_extended(value)):
-                    if child_manif:
-                        event_manif_where.add_text_condition("manifestation",
-                                              child_manif, like=wildcard, cache=self._manifestation)
-                if event_manif_where:
-                    subwhere.extend(event_manif_where)
-                
-                value, negation, wildcard = parse_operators(Event, Event.Actor, event_template.actor)
-                if value:
-                    subwhere.add_text_condition("actor", value, wildcard, negation, cache=self._actor)
-                
-                value, negation, wildcard = parse_operators(Event, Event.Origin, event_template.origin)
-                if value:
-                    subwhere.add_text_condition("origin", value, wildcard, negation)
-                
-                if subject_templates is not None:
-                    for subject_template in subject_templates:
-                        value, negation, wildcard = parse_operators(Subject, Subject.Interpretation, subject_template.interpretation)
-                        # Expand subject interpretation children
-                        su_interp_where = WhereClause(WhereClause.OR, negation)
-                        for child_interp in (Symbol.find_child_uris_extended(value)):
-                            if child_interp:
-                                su_interp_where.add_text_condition("subj_interpretation",
-                                                    child_interp, like=wildcard, cache=self._interpretation)
-                        if su_interp_where:
-                            subwhere.extend(su_interp_where)
-                        
-                        value, negation, wildcard = parse_operators(Subject, Subject.Manifestation, subject_template.manifestation)
-                        # Expand subject manifestation children
-                        su_manif_where = WhereClause(WhereClause.OR, negation)
-                        for child_manif in (Symbol.find_child_uris_extended(value)):
-                            if child_manif:
-                                su_manif_where.add_text_condition("subj_manifestation",
-                                                   child_manif, like=wildcard, cache=self._manifestation)
-                        if su_manif_where:
-                            subwhere.extend(su_manif_where)
-                        
-                        # FIXME: Expand mime children as well.
-                        # Right now we only do exact matching for mimetypes
-                        # thekorn: this will be fixed when wildcards are supported
-                        value, negation, wildcard = parse_operators(Subject, Subject.Mimetype, subject_template.mimetype)
-                        if value:
-                            subwhere.add_text_condition("subj_mimetype",
-                                         value, wildcard, negation, cache=self._mimetype)
-                
-                        for key in ("uri", "origin", "text"):
-                            value = getattr(subject_template, key)
-                            if value:
-                                value, negation, wildcard = parse_operators(Subject, getattr(Subject, key.title()), value)
-                                subwhere.add_text_condition("subj_%s" % key, value, wildcard, negation)
-                        
-                        if subject_template.current_uri:
-                            value, negation, wildcard = parse_operators(Subject,
-                                Subject.CurrentUri, subject_template.current_uri)
-                            subwhere.add_text_condition("subj_current_uri", value, wildcard, negation)
-                        
-                        if subject_template.storage:
-                            subwhere.add_text_condition("subj_storage", subject_template.storage)
-                        
-            except KeyError, e:
-                # Value not in DB
-                log.debug("Unknown entity in query: %s" % e)
-                where_or.register_no_result()
-                continue
-            where_or.extend(subwhere) 
-        return where_or
-    
-    def _build_sql_event_filter(self, time_range, templates, storage_state):
-        
-        where = WhereClause(WhereClause.AND)
-        
-        # thekorn: we are using the unary operator here to tell sql to not use
-        # the index on the timestamp column at the first place. This `fix` for
-        # (LP: #672965) is based on some benchmarks, which suggest a performance
-        # win, but we might not oversee all implications.
-        # (see http://www.sqlite.org/optoverview.html section 6.0)
-        min_time, max_time = time_range
-        if min_time != 0:
-            where.add("+timestamp >= ?", min_time)
-        if max_time != sys.maxint:
-            where.add("+timestamp <= ?", max_time)
-        
-        if storage_state in (StorageState.Available, StorageState.NotAvailable):
-            where.add("(subj_storage_state = ? OR subj_storage_state IS NULL)",
-                storage_state)
-        elif storage_state != StorageState.Any:
-            raise ValueError, "Unknown storage state '%d'" % storage_state
-        
-        where.extend(self._build_sql_from_event_templates(templates))
-        
-        return where
-
-if __name__ == "__main__":
-    mainloop = gobject.MainLoop(is_running=True)
-    search_engine = SearchEngineExtension()
-    ZG_CLIENT._iface.connect_exit(lambda: mainloop.quit ())
-    mainloop.run()
-
diff --git a/extensions/fts-python/lrucache.py b/extensions/fts-python/lrucache.py
deleted file mode 100644
index 265ed401..00000000
--- a/extensions/fts-python/lrucache.py
+++ /dev/null
@@ -1,125 +0,0 @@
-# -.- coding: utf-8 -.-
-
-# lrucache.py
-#
-# Copyright © 2009 Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
-# Copyright © 2009 Markus Korn <thekorn@gmx.de>
-# Copyright © 2011 Seif Lotfy <seif@lotfy.com>
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation, either version 2.1 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-class LRUCache:
-	"""
-	A simple LRUCache implementation backed by a linked list and a dict.
-	It can be accessed and updated just like a dict. To check if an element
-	exists in the cache the following type of statements can be used:
-		if "foo" in cache
-	"""
-	   		
-	class _Item:
-		"""
-		A container for each item in LRUCache which knows about the 
-		item's position and relations
-		"""
-		def __init__(self, item_key, item_value):
-			self.value = item_value
-			self.key = item_key
-			self.next = None
-			self.prev = None
-	
-	def __init__(self, max_size):
-		"""
-		The size of the cache (in number of cached items) is guaranteed to
-		never exceed 'size'
-		"""
-		self._max_size = max_size
-		self.clear()
-	
-	
-	def clear(self):
-		self._list_end = None # The newest item
-		self._list_start = None # Oldest item
-		self._map = {}	
-	
-	def __len__(self):
-		return len(self._map)
-	
-	def __contains__(self, key):
-		return key in self._map
-		
-	def __delitem__(self, key):
-		item = self._map[key]
-		if item.prev:
-			item.prev.next = item.next
-		else:
-			# we are deleting the first item, so we need a new first one
-			self._list_start = item.next
-		if item.next:
-			item.next.prev = item.prev
-		else:
-			# we are deleting the last item, get a new last one
-			self._list_end = item.prev
-		del self._map[key], item
-	
-	def __setitem__(self, key, value):
-		if key in self._map:
-			item = self._map[key]
-			item.value = value
-			self._move_item_to_end(item)
-		else:
-			new = LRUCache._Item(key, value)
-			self._append_to_list(new)
-
-			if len(self._map) > self._max_size :
-				# Remove eldest entry from list
-				self.remove_eldest_item()				
-
-	def __getitem__(self, key):
-		item = self._map[key]
-		self._move_item_to_end(item)
-		return item.value
-	
-	def __iter__(self):
-		"""
-		Iteration is in order from eldest to newest,
-		and returns (key,value) tuples
-		"""
-		iter = self._list_start
-		while iter != None:
-			yield (iter.key, iter.value)
-			iter = iter.next
-	
-	def _move_item_to_end(self, item):
-		del self[item.key]
-		self._append_to_list(item)
-	
-	def _append_to_list(self, item):
-		self._map[item.key] = item
-		if not self._list_start:
-			self._list_start = item
-		if self._list_end:
-			self._list_end.next = item
-			item.prev = self._list_end
-			item.next = None
-		self._list_end = item
-	
-	def remove_eldest_item(self):
-		if self._list_start == self._list_end:
-			self._list_start = None
-			self._list_end = None
-			return
-		old = self._list_start
-		old.next.prev = None
-		self._list_start = old.next
-		del self[old.key], old
diff --git a/extensions/fts-python/sql.py b/extensions/fts-python/sql.py
deleted file mode 100644
index 20f1bc30..00000000
--- a/extensions/fts-python/sql.py
+++ /dev/null
@@ -1,301 +0,0 @@
-# -.- coding: utf-8 -.-
-
-# Zeitgeist
-#
-# Copyright © 2009-2010 Siegfried-Angel Gevatter Pujals <rainct@ubuntu.com>
-# Copyright © 2009 Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>
-# Copyright © 2009-2011 Markus Korn <thekorn@gmx.net>
-# Copyright © 2009 Seif Lotfy <seif@lotfy.com>
-# Copyright © 2011 J.P. Lacerda <jpaflacerda@gmail.com>
-# Copyright © 2011 Collabora Ltd.
-#             By Siegfried-Angel Gevatter Pujals <rainct@ubuntu.com>
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation, either version 2.1 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-import sqlite3
-import logging
-import time
-import os
-import shutil
-
-from constants import constants
-
-log = logging.getLogger("siis.zeitgeist.sql")
-
-TABLE_MAP = {
-	"origin": "uri",
-	"subj_mimetype": "mimetype",
-	"subj_origin": "uri",
-	"subj_uri": "uri",
-	"subj_current_uri": "uri",
-}
-
-def explain_query(cursor, statement, arguments=()):
-	plan = ""
-	for r in cursor.execute("EXPLAIN QUERY PLAN "+statement, arguments).fetchall():
-		plan += str(list(r)) + "\n"
-	log.debug("Got query:\nQUERY:\n%s (%s)\nPLAN:\n%s" % (statement, arguments, plan))
-
-class UnicodeCursor(sqlite3.Cursor):
-	
-	debug_explain = os.getenv("ZEITGEIST_DEBUG_QUERY_PLANS")
-	
-	@staticmethod
-	def fix_unicode(obj):
-		if isinstance(obj, (int, long)):
-			# thekorn: as long as we are using the unary operator for timestamp
-			# related queries we have to make sure that integers are not
-			# converted to strings, same applies for long numbers.
-			return obj
-		if isinstance(obj, str):
-			obj = obj.decode("UTF-8")
-		# seif: Python’s default encoding is ASCII, so whenever a character with
-		# an ASCII value > 127 is in the input data, you’ll get a UnicodeDecodeError
-		# because that character can’t be handled by the ASCII encoding.
-		try:
-			obj = unicode(obj)
-		except UnicodeDecodeError, ex:
-			pass
-		return obj
-	
-	def execute(self, statement, parameters=()):
-		parameters = [self.fix_unicode(p) for p in parameters]
-		if UnicodeCursor.debug_explain:
-			explain_query(super(UnicodeCursor, self), statement, parameters)
-		return super(UnicodeCursor, self).execute(statement, parameters)
-
-	def fetch(self, index=None):
-		if index is not None:
-			for row in self:
-				yield row[index]
-		else:
-			for row in self:
-				yield row
-
-def _get_schema_version (cursor, schema_name):
-	"""
-	Returns the schema version for schema_name or returns 0 in case
-	the schema doesn't exist.
-	"""
-	try:
-		schema_version_result = cursor.execute("""
-			SELECT version FROM schema_version WHERE schema=?
-		""", (schema_name,))
-		result = schema_version_result.fetchone()
-		return result[0] if result else 0
-	except sqlite3.OperationalError, e:
-		# The schema isn't there...
-		log.debug ("Schema '%s' not found: %s" % (schema_name, e))
-		return 0
-
-def _connect_to_db(file_path):
-	conn = sqlite3.connect(file_path)
-	conn.row_factory = sqlite3.Row
-	cursor = conn.cursor(UnicodeCursor)
-	return cursor
-
-_cursor = None
-def get_default_cursor():
-	global _cursor
-	if not _cursor:
-		dbfile = constants.DATABASE_FILE
-		start = time.time()
-		log.info("Using database: %s" % dbfile)
-		new_database = not os.path.exists(dbfile)
-		_cursor = _connect_to_db(dbfile)
-		core_schema_version = _get_schema_version(_cursor, constants.CORE_SCHEMA)
-		if core_schema_version < constants.CORE_SCHEMA_VERSION:
-			log.exception(
-				"Database '%s' is on version %s, but %s is required" % \
-				(constants.CORE_SCHEMA, core_schema_version,
-				constants.CORE_SCHEMA_VERSION))
-			raise SystemExit(27)
-	return _cursor
-def unset_cursor():
-	global _cursor
-	_cursor = None
-
-class TableLookup(dict):
-	
-	# We are not using an LRUCache as pressumably there won't be thousands
-	# of manifestations/interpretations/mimetypes/actors on most
-	# installations, so we can save us the overhead of tracking their usage.
-	
-	def __init__(self, cursor, table):
-		
-		self._cursor = cursor
-		self._table = table
-		
-		for row in cursor.execute("SELECT id, value FROM %s" % table):
-			self[row["value"]] = row["id"]
-		
-		self._inv_dict = dict((value, key) for key, value in self.iteritems())
-	
-	def __getitem__(self, name):
-		# Use this for inserting new properties into the database
-		if name in self:
-			return super(TableLookup, self).__getitem__(name)
-		id = self._cursor.execute("SELECT id FROM %s WHERE value=?"
-			% self._table, (name,)).fetchone()[0]
-		# If we are here it's a newly inserted value, insert it into cache
-		self[name] = id
-		self._inv_dict[id] = name
-		return id
-	
-	def value(self, id):
-		# When we fetch an event, it either was already in the database
-		# at the time Zeitgeist started or it was inserted later -using
-		# Zeitgeist-, so here we always have the data in memory already.
-		return self._inv_dict[id]
-	
-	def id(self, name):
-		# Use this when fetching values which are supposed to be in the
-		# database already. Eg., in find_eventids.
-		return super(TableLookup, self).__getitem__(name)
-		
-	def remove_id(self, id):
-		value = self.value(id)
-		del self._inv_dict[id]
-		del self[value]
-		
-def get_right_boundary(text):
-	""" returns the smallest string which is greater than `text` """
-	if not text:
-		# if the search prefix is empty we query for the whole range
-		# of 'utf-8 'unicode chars
-		return unichr(0x10ffff)
-	if isinstance(text, str):
-		# we need to make sure the text is decoded as 'utf-8' unicode
-		text = unicode(text, "UTF-8")
-	charpoint = ord(text[-1])
-	if charpoint == 0x10ffff:
-		# if the last character is the biggest possible char we need to
-		# look at the second last
-		return get_right_boundary(text[:-1])
-	return text[:-1] + unichr(charpoint+1)
-
-class WhereClause:
-	"""
-	This class provides a convenient representation a SQL `WHERE' clause,
-	composed of a set of conditions joined together.
-	
-	The relation between conditions can be either of type *AND* or *OR*, but
-	not both. To create more complex clauses, use several :class:`WhereClause`
-	instances and joining them together using :meth:`extend`.
-	
-	Instances of this class can then be used to obtain a line of SQL code and
-	a list of arguments, for use with the SQLite3 module, accessing the
-	appropriate properties:
-		>>> where.sql, where.arguments
-	"""
-	
-	AND = " AND "
-	OR = " OR "
-	NOT = "NOT "
-	
-	@staticmethod
-	def optimize_glob(column, table, prefix):
-		"""returns an optimized version of the GLOB statement as described
-		in http://www.sqlite.org/optoverview.html `4.0 The LIKE optimization`
-		"""
-		if isinstance(prefix, str):
-			# we need to make sure the text is decoded as 'utf-8' unicode
-			prefix = unicode(prefix, "UTF-8")
-		if not prefix:
-			# empty prefix means 'select all', no way to optimize this
-			sql = "SELECT %s FROM %s" %(column, table)
-			return sql, ()
-		elif all([i == unichr(0x10ffff) for i in prefix]):
-			sql = "SELECT %s FROM %s WHERE value >= ?" %(column, table)
-			return sql, (prefix,)
-		else:
-			sql = "SELECT %s FROM %s WHERE (value >= ? AND value < ?)" %(column, table)
-			return sql, (prefix, get_right_boundary(prefix))
-	
-	def __init__(self, relation, negation=False):
-		self._conditions = []
-		self.arguments = []
-		self._relation = relation
-		self._no_result_member = False
-		self._negation = negation
-	
-	def __len__(self):
-		return len(self._conditions)
-	
-	def add(self, condition, arguments=None):
-		if not condition:
-			return
-		self._conditions.append(condition)
-		if arguments is not None:
-			if not hasattr(arguments, "__iter__"):
-				self.arguments.append(arguments)
-			else:
-				self.arguments.extend(arguments)
-			
-	def add_text_condition(self, column, value, like=False, negation=False, cache=None):
-		if like:
-			assert column in ("origin", "subj_uri", "subj_current_uri",
-			"subj_origin", "actor", "subj_mimetype"), \
-				"prefix search on the %r column is not supported by zeitgeist" % column
-			if column == "subj_uri":
-				# subj_id directly points to the id of an uri entry
-				view_column = "subj_id"
-			elif column == "subj_current_uri":
-				view_column = "subj_id_current"
-			else:
-				view_column = column
-			optimized_glob, value = self.optimize_glob("id", TABLE_MAP.get(column, column), value)
-			sql = "%s %sIN (%s)" %(view_column, self.NOT if negation else "", optimized_glob)
-			if negation:
-				sql += " OR %s IS NULL" % view_column
-		else:
-			if column == "origin":
-				column ="event_origin_uri"
-			elif column == "subj_origin":
-				column = "subj_origin_uri"
-			sql = "%s %s= ?" %(column, "!" if negation else "")
-			if cache is not None:
-				value = cache[value]
-		self.add(sql, value)
-	
-	def extend(self, where):
-		self.add(where.sql, where.arguments)
-		if not where.may_have_results():
-			if self._relation == self.AND:
-				self.clear()
-			self.register_no_result()
-	
-	@property
-	def sql(self):
-		if self: # Do not return "()" if there are no conditions
-			negation = self.NOT if self._negation else ""
-			return "%s(%s)" %(negation, self._relation.join(self._conditions))
-	
-	def register_no_result(self):
-		self._no_result_member = True
-	
-	def may_have_results(self):
-		"""
-		Return False if we know from our cached data that the query
-		will give no results.
-		"""
-		return len(self._conditions) > 0 or not self._no_result_member
-	
-	def clear(self):
-		"""
-		Reset this WhereClause to the state of a newly created one.
-		"""
-		self._conditions = []
-		self.arguments = []
-		self._no_result_member = False
diff --git a/extensions/fts.vala b/extensions/fts.vala
index e6435927..0c614996 100644
--- a/extensions/fts.vala
+++ b/extensions/fts.vala
@@ -52,8 +52,11 @@ namespace Zeitgeist
     class SearchEngine: Extension, RemoteSearchEngine
     {
 
+        private const string INDEXER_NAME = "org.gnome.zeitgeist.SimpleIndexer";
+
         private RemoteSimpleIndexer siin;
         private uint registration_id;
+        private MonitorManager? notifier;
 
         SearchEngine ()
         {
@@ -64,6 +67,15 @@ namespace Zeitgeist
         {
             if (Utils.using_in_memory_database ()) return;
 
+            // installing a monitor from the daemon will ensure that we don't
+            // miss any notifications that would be emitted in between
+            // zeitgeist start and fts daemon start
+            notifier = MonitorManager.get_default ();
+            notifier.install_monitor (new BusName (INDEXER_NAME),
+                                      "/org/gnome/zeitgeist/monitor/special",
+                                      new TimeRange.anytime (),
+                                      new GenericArray<Event> ());
+
             try
             {
                 var connection = Bus.get_sync (BusType.SESSION, null);
@@ -73,7 +85,7 @@ namespace Zeitgeist
                 // FIXME: shouldn't we delay this to next idle callback?
                 // Get SimpleIndexer
                 Bus.watch_name_on_connection (connection,
-                    "org.gnome.zeitgeist.SimpleIndexer",
+                    INDEXER_NAME,
                     BusNameWatcherFlags.AUTO_START,
                     (conn) =>
                     {
diff --git a/src/datamodel.vala b/src/datamodel.vala
index 1c10a07f..4dd4a6b3 100644
--- a/src/datamodel.vala
+++ b/src/datamodel.vala
@@ -288,7 +288,6 @@ namespace Zeitgeist
             if (property.has_prefix (parsed)) matches = true;
         }
 
-        debug ("Checking matches for %s", parsed);
         return (is_negated) ? !matches : matches;
     }
 
@@ -481,8 +480,6 @@ namespace Zeitgeist
             */
 
             //Check if interpretation is child of template_event or same
-            debug("Checking if event %u matches template_event %u\n",
-                this.id, template_event.id);
             if (!check_field_match (this.interpretation, template_event.interpretation, true))
                 return false;
             //Check if manifestation is child of template_event or same
diff --git a/src/engine.vala b/src/engine.vala
index 2de9849c..a6bac29b 100644
--- a/src/engine.vala
+++ b/src/engine.vala
@@ -44,6 +44,7 @@ public class Engine : DbReader
         Object (database: new Zeitgeist.SQLite.Database ());
 
         // TODO: take care of this if we decide to subclass Engine
+        // (we need to propagate the error, so it can't go to construct {})
         last_id = database.get_last_id ();
         extension_collection = new ExtensionCollection (this);
     }
diff --git a/src/notify.vala b/src/notify.vala
index bc9ae669..c3526bad 100644
--- a/src/notify.vala
+++ b/src/notify.vala
@@ -26,11 +26,32 @@ namespace Zeitgeist
     public class MonitorManager : Object
     {
 
+        private static unowned MonitorManager? instance;
+
         private HashTable<string, Monitor> monitors;
         private HashTable<string, GenericArray<string>> connections;
 
+        // ref-counted singleton - it can get destroyed easily, but has
+        // singleton semantics as long as some top-level instance keeps
+        // a reference to it
+        public static MonitorManager get_default ()
+        {
+            return instance ?? new MonitorManager ();
+        }
+
+        private MonitorManager ()
+        {
+        }
+
+        ~MonitorManager ()
+        {
+            instance = null;
+        }
+
         construct
         {
+            instance = this;
+
             monitors = new HashTable<string, Monitor> (str_hash, str_equal);
             connections = new HashTable<string, GenericArray<string>>
                 (str_hash, str_equal);
@@ -53,7 +74,8 @@ namespace Zeitgeist
 
                         foreach (var owner in connections.get_keys())
                         {
-                            if (arg0 == owner)
+                            // Don't disconnect monitors using service names
+                            if (arg0 == owner && g_dbus_is_unique_name (arg0))
                             {
                                 var paths = connections.lookup (arg0);
                                 debug("Client disconnected %s", owner);
@@ -120,32 +142,49 @@ namespace Zeitgeist
             {
                 queued_notifications = new SList<QueuedNotification> ();
                 Bus.get_proxy<RemoteMonitor> (BusType.SESSION, peer,
-                    object_path, DBusProxyFlags.DO_NOT_LOAD_PROPERTIES |
-                    DBusProxyFlags.DO_NOT_CONNECT_SIGNALS,
+                    object_path,
+                    DBusProxyFlags.DO_NOT_LOAD_PROPERTIES
+                    | DBusProxyFlags.DO_NOT_CONNECT_SIGNALS
+                    | DBusProxyFlags.DO_NOT_AUTO_START,
                     null, (obj, res) =>
                     {
                         try
                         {
                             proxy_object = Bus.get_proxy.end (res);
+                            // Process queued notifications...
+                            flush_notifications ();
+
+                            proxy_object.notify["g-name-owner"].connect (name_owner_changed);
                         }
                         catch (IOError err)
                         {
                             warning ("%s", err.message);
                         }
-
-                        // Process queued notifications...
-                        queued_notifications.reverse ();
-                        foreach (unowned QueuedNotification notification
-                            in queued_notifications)
-                        {
-                            notification.send (proxy_object);
-                        }
-                        queued_notifications = null;
                     });
                 time_range = tr;
                 event_templates = templates;
             }
 
+            private void name_owner_changed ()
+                requires (proxy_object != null)
+            {
+                // FIXME: can we use this to actually remove the monitor?
+                //  (instead of using NameOwnerChanged signal)
+                DBusProxy p = proxy_object as DBusProxy;
+                if (p.g_name_owner != null) flush_notifications ();
+            }
+
+            private void flush_notifications ()
+            {
+                queued_notifications.reverse ();
+                foreach (unowned QueuedNotification notification
+                    in queued_notifications)
+                {
+                    notification.send (proxy_object);
+                }
+                queued_notifications = null;
+            }
+
             private bool matches (Event event)
             {
                 if (event_templates.length == 0)
@@ -182,8 +221,15 @@ namespace Zeitgeist
                         // between monitors?
                         Variant events_v = Events.to_variant (matching_events);
 
+                        string? name_owner = null;
                         if (proxy_object != null)
                         {
+                            DBusProxy p = proxy_object as DBusProxy;
+                            if (p != null) name_owner = p.g_name_owner;
+                        }
+
+                        if (proxy_object != null && name_owner != null)
+                        {
                             DBusProxy p = (DBusProxy) proxy_object;
                             debug ("Notifying %s about %d insertions",
                                 p.get_name (), matching_events.length);
@@ -208,8 +254,15 @@ namespace Zeitgeist
                 {
                     Variant time_v = intersect_tr.to_variant ();
 
+                    string? name_owner = null;
                     if (proxy_object != null)
                     {
+                        DBusProxy p = proxy_object as DBusProxy;
+                        if (p != null) name_owner = p.g_name_owner;
+                    }
+
+                    if (proxy_object != null && name_owner != null)
+                    {
                         proxy_object.notify_delete (time_v, event_ids);
                     }
                     else
diff --git a/src/sql.vala b/src/sql.vala
index feea64bb..8f5bd2be 100644
--- a/src/sql.vala
+++ b/src/sql.vala
@@ -111,10 +111,10 @@ namespace Zeitgeist.SQLite
                     if (is_read_only)
                     {
                         int ver = DatabaseSchema.get_schema_version (database);
-                        if (ver != DatabaseSchema.CORE_SCHEMA_VERSION)
+                        if (ver < DatabaseSchema.CORE_SCHEMA_VERSION)
                         {
                             throw new EngineError.DATABASE_CANTOPEN (
-                                "Unable to open database");
+                                "Unable to open database: old schema version");
                         }
                     }
                     else
diff --git a/src/table-lookup.vala b/src/table-lookup.vala
index 0d59c92d..642bfba6 100644
--- a/src/table-lookup.vala
+++ b/src/table-lookup.vala
@@ -90,6 +90,26 @@ namespace Zeitgeist.SQLite
             // When we fetch an event, it either was already in the database
             // at the time Zeitgeist started or it was inserted later -using
             // Zeitgeist-, so here we always have the data in memory already.
+            unowned string val = id_to_value.lookup (id);
+            if (val != null) return val;
+
+            // The above statement isn't exactly true. If this is a standalone
+            // reader in a separate process, the values won't be kept updated
+            // so we need to query the DB if we don't find it.
+            int rc;
+
+            rc = db.exec ("SELECT value FROM %s WHERE id=%d".printf (table, id),
+                (n_columns, values, column_names) =>
+                {
+                    id_to_value.insert (id, values[0]);
+                    value_to_id.insert (values[0], id);
+                    return 0;
+                }, null);
+            if (rc != Sqlite.OK)
+            {
+                critical ("Can't get data from table %s: %d, %s\n", table,
+                    rc, db.errmsg ());
+            }
             return id_to_value.lookup (id);
         }
 
diff --git a/src/zeitgeist-daemon.vala b/src/zeitgeist-daemon.vala
index 2f3d32f7..fd8c39f5 100644
--- a/src/zeitgeist-daemon.vala
+++ b/src/zeitgeist-daemon.vala
@@ -122,7 +122,7 @@ namespace Zeitgeist
         public Daemon () throws EngineError
         {
             engine = new Engine ();
-            notifications = new MonitorManager ();
+            notifications = MonitorManager.get_default ();
         }
 
         public Variant get_events (uint32[] event_ids, BusName sender)