diff options
48 files changed, 3147 insertions, 1899 deletions
@@ -44,12 +44,23 @@ extensions/*.c extensions/*.stamp extensions/*.la extensions/*.lo +extensions/fts++/.deps +extensions/fts++/.libs +extensions/fts++/*.c +extensions/fts++/*.stamp +extensions/fts++/*.la +extensions/fts++/*.lo +extensions/fts++/zeitgeist-internal.* +extensions/fts++/test/.deps +extensions/fts++/test/.libs +extensions/fts++/test/test-fts +extensions/fts++/org.gnome.zeitgeist.fts.service +extensions/fts++/zeitgeist-fts test/direct/marshalling test/dbus/__pycache__ test/direct/table-lookup-test src/zeitgeist-engine.vapi src/zeitgeist-engine.h -extensions/fts-python/org.gnome.zeitgeist.fts.service py-compile python/_ontology.py test/direct/*.c diff --git a/configure.ac b/configure.ac index 6b5ca97e..83b06deb 100644 --- a/configure.ac +++ b/configure.ac @@ -8,6 +8,7 @@ AM_PATH_PYTHON AC_PROG_CC AM_PROG_CC_C_O +AC_PROG_CXX AC_DISABLE_STATIC AC_PROG_LIBTOOL @@ -59,7 +60,8 @@ AC_CONFIG_FILES([ Makefile src/Makefile extensions/Makefile - extensions/fts-python/Makefile + extensions/fts++/Makefile + extensions/fts++/test/Makefile data/Makefile data/ontology/Makefile python/Makefile diff --git a/extensions/Makefile.am b/extensions/Makefile.am index e6642522..5ddc9238 100644 --- a/extensions/Makefile.am +++ b/extensions/Makefile.am @@ -1,4 +1,4 @@ -SUBDIRS = fts-python +SUBDIRS = fts++ NULL = diff --git a/extensions/fts++/Makefile.am b/extensions/fts++/Makefile.am new file mode 100644 index 00000000..931695f7 --- /dev/null +++ b/extensions/fts++/Makefile.am @@ -0,0 +1,113 @@ +SUBDIRS = test +NULL = + +noinst_LTLIBRARIES = libzeitgeist-internal.la +libexec_PROGRAMS = zeitgeist-fts + +servicedir = $(DBUS_SERVICES_DIR) +service_DATA = org.gnome.zeitgeist.fts.service + +org.gnome.zeitgeist.fts.service: org.gnome.zeitgeist.fts.service.in + $(AM_V_GEN)sed -e s!\@libexecdir\@!$(libexecdir)! < $< > $@ +org.gnome.zeitgeist.fts.service: Makefile + +AM_CPPFLAGS = \ + $(ZEITGEIST_CFLAGS) \ + -include $(CONFIG_HEADER) \ + -w \ + $(NULL) + +AM_VALAFLAGS = \ + --target-glib=2.26 \ + --pkg gio-2.0 \ + --pkg sqlite3 \ + --pkg posix \ + --pkg gmodule-2.0 \ + $(top_srcdir)/config.vapi \ + $(NULL) + +libzeitgeist_internal_la_VALASOURCES = \ + datamodel.vala \ + db-reader.vala \ + engine.vala \ + sql.vala \ + remote.vala \ + utils.vala \ + errors.vala \ + table-lookup.vala \ + sql-schema.vala \ + where-clause.vala \ + ontology.vala \ + ontology-uris.vala \ + mimetype.vala \ + ext-dummies.vala \ + $(NULL) + +libzeitgeist_internal_la_SOURCES = \ + zeitgeist-internal.stamp \ + $(libzeitgeist_internal_la_VALASOURCES:.vala=.c) \ + $(NULL) + +libzeitgeist_internal_la_LIBADD = \ + $(ZEITGEIST_LIBS) \ + $(NULL) + +zeitgeist_fts_VALASOURCES = \ + zeitgeist-fts.vala \ + $(NULL) + +zeitgeist_fts_SOURCES = \ + zeitgeist-fts_vala.stamp \ + $(zeitgeist_fts_VALASOURCES:.vala=.c) \ + controller.cpp \ + controller.h \ + fts.cpp \ + fts.h \ + indexer.cpp \ + indexer.h \ + task.cpp \ + task.h \ + stringutils.cpp \ + stringutils.h \ + $(NULL) + +zeitgeist_fts_LDADD = \ + $(builddir)/libzeitgeist-internal.la \ + -lxapian \ + $(NULL) + +BUILT_SOURCES = \ + zeitgeist-internal.stamp \ + zeitgeist-fts_vala.stamp \ + $(NULL) + +zeitgeist-internal.stamp: $(libzeitgeist_internal_la_VALASOURCES) + $(VALA_V)$(VALAC) $(AM_VALAFLAGS) $(VALAFLAGS) -C -H zeitgeist-internal.h --library zeitgeist-internal $^ + @touch "$@" + +zeitgeist-fts_vala.stamp: $(zeitgeist_fts_VALASOURCES) + $(VALA_V)$(VALAC) $(AM_VALAFLAGS) $(VALAFLAGS) \ + $(srcdir)/zeitgeist-internal.vapi $(srcdir)/fts.vapi -C $^ + @touch "$@" + +EXTRA_DIST = \ + $(libzeitgeist_internal_la_VALASOURCES) \ + $(zeitgeist_fts_VALASOURCES) \ + zeitgeist-fts_vala.stamp \ + zeitgeist-internal.h \ + zeitgeist-internal.vapi \ + org.gnome.zeitgeist.fts.service.in \ + $(NULL) + +CLEANFILES = org.gnome.zeitgeist.fts.service + +DISTCLEANFILES = \ + $(NULL) + +distclean-local: + rm -f *.c *.o *.stamp *.~[0-9]~ + +VALA_V = $(VALA_V_$(V)) +VALA_V_ = $(VALA_V_$(AM_DEFAULT_VERBOSITY)) +VALA_V_0 = @echo " VALAC " $^; + diff --git a/extensions/fts++/controller.cpp b/extensions/fts++/controller.cpp new file mode 100644 index 00000000..51584cd5 --- /dev/null +++ b/extensions/fts++/controller.cpp @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2012 Mikkel Kamstrup Erlandsen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com> + * + */ + +#include "controller.h" + +namespace ZeitgeistFTS { + +void Controller::Initialize (GError **error) +{ + indexer->Initialize (error); +} + +void Controller::Run () +{ + if (!indexer->CheckIndex ()) + { + indexer->DropIndex (); + RebuildIndex (); + } +} + +void Controller::RebuildIndex () +{ + GError *error = NULL; + GPtrArray *events; + GPtrArray *templates = g_ptr_array_new (); + ZeitgeistTimeRange *time_range = zeitgeist_time_range_new_anytime (); + + g_debug ("asking reader for all events"); + events = zeitgeist_db_reader_find_events (zg_reader, + time_range, + templates, + ZEITGEIST_STORAGE_STATE_ANY, + 0, + ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS, + NULL, + &error); + + if (error) + { + g_warning ("%s", error->message); + g_error_free (error); + } + else + { + g_debug ("reader returned %u events", events->len); + + IndexEvents (events); + g_ptr_array_unref (events); + + // Set the db metadata key only once we're done + PushTask (new MetadataTask ("fts_index_version", INDEX_VERSION)); + } + + g_object_unref (time_range); + g_ptr_array_unref (templates); +} + +void Controller::IndexEvents (GPtrArray *events) +{ + const int CHUNK_SIZE = 32; + // Break down index tasks into suitable chunks + for (unsigned i = 0; i < events->len; i += CHUNK_SIZE) + { + PushTask (new IndexEventsTask (g_ptr_array_ref (events), i, CHUNK_SIZE)); + } +} + +void Controller::DeleteEvents (guint *event_ids, int event_ids_size) +{ + // FIXME: Should we break the task here as well? + PushTask (new DeleteEventsTask (event_ids, event_ids_size)); +} + +void Controller::PushTask (Task* task) +{ + queued_tasks.push (task); + + if (processing_source_id == 0) + { + processing_source_id = + g_idle_add ((GSourceFunc) &Controller::ProcessTask, this); + } +} + +gboolean Controller::ProcessTask () +{ + if (!queued_tasks.empty ()) + { + Task *task; + + task = queued_tasks.front (); + queued_tasks.pop (); + + task->Process (indexer); + delete task; + } + + bool all_done = queued_tasks.empty (); + if (all_done) + { + indexer->Commit (); + if (processing_source_id != 0) + { + g_source_remove (processing_source_id); + processing_source_id = 0; + } + return FALSE; + } + + return TRUE; +} + +bool Controller::HasPendingTasks () +{ + return !queued_tasks.empty (); +} + +} diff --git a/extensions/fts++/controller.h b/extensions/fts++/controller.h new file mode 100644 index 00000000..abcd8fda --- /dev/null +++ b/extensions/fts++/controller.h @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2012 Mikkel Kamstrup Erlandsen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com> + * + */ + +#ifndef _ZGFTS_CONTROLLER_H_ +#define _ZGFTS_CONTROLLER_H_ + +#include <glib-object.h> +#include <queue> +#include <vector> + +#include "indexer.h" +#include "task.h" +#include "zeitgeist-internal.h" + +namespace ZeitgeistFTS { + +class Controller { +public: + Controller (ZeitgeistDbReader *reader) + : zg_reader (reader) + , processing_source_id (0) + , indexer (new Indexer (reader)) {}; + + ~Controller () + { + if (processing_source_id != 0) + { + g_source_remove (processing_source_id); + } + } + + void Initialize (GError **error); + void Run (); + void RebuildIndex (); + + void IndexEvents (GPtrArray *events); + void DeleteEvents (guint *event_ids, int event_ids_size); + + void PushTask (Task* task); + bool HasPendingTasks (); + gboolean ProcessTask (); + + Indexer *indexer; + +private: + ZeitgeistDbReader *zg_reader; + + typedef std::queue<Task*> TaskQueue; + TaskQueue queued_tasks; + guint processing_source_id; +}; + +} + +#endif /* _ZGFTS_CONTROLLER_H_ */ diff --git a/extensions/fts++/datamodel.vala b/extensions/fts++/datamodel.vala new file mode 120000 index 00000000..02172aac --- /dev/null +++ b/extensions/fts++/datamodel.vala @@ -0,0 +1 @@ +../../src/datamodel.vala
\ No newline at end of file diff --git a/extensions/fts++/db-reader.vala b/extensions/fts++/db-reader.vala new file mode 120000 index 00000000..fecbc782 --- /dev/null +++ b/extensions/fts++/db-reader.vala @@ -0,0 +1 @@ +../../src/db-reader.vala
\ No newline at end of file diff --git a/extensions/fts++/engine.vala b/extensions/fts++/engine.vala new file mode 120000 index 00000000..e2314a5e --- /dev/null +++ b/extensions/fts++/engine.vala @@ -0,0 +1 @@ +../../src/engine.vala
\ No newline at end of file diff --git a/extensions/fts++/errors.vala b/extensions/fts++/errors.vala new file mode 120000 index 00000000..c630d3ed --- /dev/null +++ b/extensions/fts++/errors.vala @@ -0,0 +1 @@ +../../src/errors.vala
\ No newline at end of file diff --git a/extensions/fts++/ext-dummies.vala b/extensions/fts++/ext-dummies.vala new file mode 100644 index 00000000..f77a2b2d --- /dev/null +++ b/extensions/fts++/ext-dummies.vala @@ -0,0 +1,71 @@ +/* ext-dummies.vala + * + * Copyright © 2011-2012 Michal Hruby <michal.mhr@gmail.com> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ + +namespace Zeitgeist +{ + public class ExtensionCollection : Object + { + public unowned Engine engine { get; construct; } + + public ExtensionCollection (Engine engine) + { + Object (engine: engine); + } + + public string[] get_extension_names () + { + string[] result = {}; + return result; + } + + public void call_pre_insert_events (GenericArray<Event?> events, + BusName? sender) + { + } + + public void call_post_insert_events (GenericArray<Event?> events, + BusName? sender) + { + } + + public unowned uint32[] call_pre_delete_events (uint32[] event_ids, + BusName? sender) + { + return event_ids; + } + + public void call_post_delete_events (uint32[] event_ids, + BusName? sender) + { + } + } + + public class ExtensionStore : Object + { + public unowned Engine engine { get; construct; } + + public ExtensionStore (Engine engine) + { + Object (engine: engine); + } + } + +} + +// vim:expandtab:ts=4:sw=4 diff --git a/extensions/fts++/fts.cpp b/extensions/fts++/fts.cpp new file mode 100644 index 00000000..5d66e2ea --- /dev/null +++ b/extensions/fts++/fts.cpp @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2012 Canonical Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Authored by Michal Hruby <michal.hruby@canonical.com> + * + */ + +#include "fts.h" +#include "indexer.h" +#include "controller.h" + +ZeitgeistIndexer* +zeitgeist_indexer_new (ZeitgeistDbReader *reader, GError **error) +{ + ZeitgeistFTS::Controller *ctrl; + GError *local_error; + + g_return_val_if_fail (ZEITGEIST_IS_DB_READER (reader), NULL); + g_return_val_if_fail (error == NULL || *error == NULL, NULL); + + g_setenv ("XAPIAN_CJK_NGRAM", "1", TRUE); + ctrl = new ZeitgeistFTS::Controller (reader); + + local_error = NULL; + ctrl->Initialize (&local_error); + if (local_error) + { + delete ctrl; + g_propagate_error (error, local_error); + return NULL; + } + + + ctrl->Run (); + + return (ZeitgeistIndexer*) ctrl; +} + +void +zeitgeist_indexer_free (ZeitgeistIndexer* indexer) +{ + g_return_if_fail (indexer != NULL); + + delete (ZeitgeistFTS::Controller*) indexer; +} + +GPtrArray* zeitgeist_indexer_search (ZeitgeistIndexer *indexer, + const gchar *search_string, + ZeitgeistTimeRange *time_range, + GPtrArray *templates, + guint offset, + guint count, + ZeitgeistResultType result_type, + guint *matches, + GError **error) +{ + GPtrArray *results; + ZeitgeistFTS::Controller *_indexer; + + g_return_val_if_fail (indexer != NULL, NULL); + g_return_val_if_fail (search_string != NULL, NULL); + g_return_val_if_fail (ZEITGEIST_IS_TIME_RANGE (time_range), NULL); + g_return_val_if_fail (error == NULL || *error == NULL, NULL); + + _indexer = (ZeitgeistFTS::Controller*) indexer; + + results = _indexer->indexer->Search (search_string, time_range, + templates, offset, count, result_type, + matches, error); + + return results; +} + +void zeitgeist_indexer_index_events (ZeitgeistIndexer *indexer, + GPtrArray *events) +{ + ZeitgeistFTS::Controller *_indexer; + + g_return_if_fail (indexer != NULL); + g_return_if_fail (events != NULL); + + _indexer = (ZeitgeistFTS::Controller*) indexer; + + _indexer->IndexEvents (events); +} + +void zeitgeist_indexer_delete_events (ZeitgeistIndexer *indexer, + guint *event_ids, + int event_ids_size) +{ + ZeitgeistFTS::Controller *_indexer; + + g_return_if_fail (indexer != NULL); + + if (event_ids_size <= 0) return; + + _indexer = (ZeitgeistFTS::Controller*) indexer; + + _indexer->DeleteEvents (event_ids, event_ids_size); +} + +gboolean zeitgeist_indexer_has_pending_tasks (ZeitgeistIndexer *indexer) +{ + ZeitgeistFTS::Controller *_indexer; + + g_return_val_if_fail (indexer != NULL, FALSE); + + _indexer = (ZeitgeistFTS::Controller*) indexer; + + return _indexer->HasPendingTasks () ? TRUE : FALSE; +} + +void zeitgeist_indexer_process_task (ZeitgeistIndexer *indexer) +{ + ZeitgeistFTS::Controller *_indexer; + + g_return_if_fail (indexer != NULL); + + _indexer = (ZeitgeistFTS::Controller*) indexer; + + _indexer->ProcessTask (); +} + diff --git a/extensions/fts++/fts.h b/extensions/fts++/fts.h new file mode 100644 index 00000000..2226ec90 --- /dev/null +++ b/extensions/fts++/fts.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2012 Canonical Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Authored by Michal Hruby <michal.hruby@canonical.com> + * + */ + +#ifndef _ZGFTS_H_ +#define _ZGFTS_H_ + +#include <glib.h> +#include "zeitgeist-internal.h" + +typedef struct _ZeitgeistIndexer ZeitgeistIndexer; + +G_BEGIN_DECLS + +ZeitgeistIndexer* zeitgeist_indexer_new (ZeitgeistDbReader* reader, + GError **error); + +void zeitgeist_indexer_free (ZeitgeistIndexer* indexer); + +GPtrArray* zeitgeist_indexer_search (ZeitgeistIndexer *indexer, + const gchar *search_string, + ZeitgeistTimeRange *time_range, + GPtrArray *templates, + guint offset, + guint count, + ZeitgeistResultType result_type, + guint *matches, + GError **error); + +void zeitgeist_indexer_index_events (ZeitgeistIndexer *indexer, + GPtrArray *events); + +void zeitgeist_indexer_delete_events (ZeitgeistIndexer *indexer, + guint *event_ids, + int event_ids_size); + +gboolean zeitgeist_indexer_has_pending_tasks (ZeitgeistIndexer *indexer); + +void zeitgeist_indexer_process_task (ZeitgeistIndexer *indexer); + +G_END_DECLS + +#endif /* _ZGFTS_H_ */ diff --git a/extensions/fts++/fts.vapi b/extensions/fts++/fts.vapi new file mode 100644 index 00000000..1aae3602 --- /dev/null +++ b/extensions/fts++/fts.vapi @@ -0,0 +1,25 @@ +/* indexer.vapi is hand-written - not a big deal for these ~10 lines */ + +namespace Zeitgeist { + [Compact] + [CCode (free_function = "zeitgeist_indexer_free", cheader_filename = "fts.h")] + public class Indexer { + public Indexer (DbReader reader) throws EngineError; + + public GLib.GenericArray<Event> search (string search_string, + TimeRange time_range, + GLib.GenericArray<Event> templates, + uint offset, + uint count, + ResultType result_type, + out uint matches) throws GLib.Error; + + public void index_events (GLib.GenericArray<Event> events); + + public void delete_events (uint[] event_ids); + + public bool has_pending_tasks (); + + public void process_task (); + } +} diff --git a/extensions/fts++/indexer.cpp b/extensions/fts++/indexer.cpp new file mode 100644 index 00000000..d97f7ebd --- /dev/null +++ b/extensions/fts++/indexer.cpp @@ -0,0 +1,897 @@ +/* + * Copyright (C) 2012 Canonical Ltd + * 2012 Mikkel Kamstrup Erlandsen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Authored by Michal Hruby <michal.hruby@canonical.com> + * Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com> + * + */ + +#include "indexer.h" +#include "stringutils.h" +#include <xapian.h> +#include <queue> +#include <vector> + +#include <gio/gio.h> +#include <gio/gdesktopappinfo.h> + +namespace ZeitgeistFTS { + +const std::string FILTER_PREFIX_EVENT_INTERPRETATION = "ZGEI"; +const std::string FILTER_PREFIX_EVENT_MANIFESTATION = "ZGEM"; +const std::string FILTER_PREFIX_ACTOR = "ZGA"; +const std::string FILTER_PREFIX_SUBJECT_URI = "ZGSU"; +const std::string FILTER_PREFIX_SUBJECT_INTERPRETATION = "ZGSI"; +const std::string FILTER_PREFIX_SUBJECT_MANIFESTATION = "ZGSM"; +const std::string FILTER_PREFIX_SUBJECT_ORIGIN = "ZGSO"; +const std::string FILTER_PREFIX_SUBJECT_MIMETYPE = "ZGST"; +const std::string FILTER_PREFIX_SUBJECT_STORAGE = "ZGSS"; +const std::string FILTER_PREFIX_XDG_CATEGORY = "AC"; + +const Xapian::valueno VALUE_EVENT_ID = 0; +const Xapian::valueno VALUE_TIMESTAMP = 1; + +#define QUERY_PARSER_FLAGS \ + Xapian::QueryParser::FLAG_PHRASE | Xapian::QueryParser::FLAG_BOOLEAN | \ + Xapian::QueryParser::FLAG_PURE_NOT | Xapian::QueryParser::FLAG_LOVEHATE | \ + Xapian::QueryParser::FLAG_WILDCARD + +const std::string FTS_MAIN_DIR = "ftspp.index"; + +void Indexer::Initialize (GError **error) +{ + try + { + if (zeitgeist_utils_using_in_memory_database ()) + { + this->db = new Xapian::WritableDatabase; + this->db->add_database (Xapian::InMemory::open ()); + } + else + { + gchar *path = g_build_filename (zeitgeist_utils_get_data_path (), + FTS_MAIN_DIR.c_str (), NULL); + this->db = new Xapian::WritableDatabase (path, + Xapian::DB_CREATE_OR_OPEN); + g_free (path); + } + + this->tokenizer = new Xapian::TermGenerator (); + this->query_parser = new Xapian::QueryParser (); + this->query_parser->add_prefix ("name", "N"); + this->query_parser->add_prefix ("title", "N"); + this->query_parser->add_prefix ("site", "S"); + this->query_parser->add_prefix ("app", "A"); + this->query_parser->add_boolean_prefix ("zgei", + FILTER_PREFIX_EVENT_INTERPRETATION); + this->query_parser->add_boolean_prefix ("zgem", + FILTER_PREFIX_EVENT_MANIFESTATION); + this->query_parser->add_boolean_prefix ("zga", FILTER_PREFIX_ACTOR); + this->query_parser->add_prefix ("zgsu", FILTER_PREFIX_SUBJECT_URI); + this->query_parser->add_boolean_prefix ("zgsi", + FILTER_PREFIX_SUBJECT_INTERPRETATION); + this->query_parser->add_boolean_prefix ("zgsm", + FILTER_PREFIX_SUBJECT_MANIFESTATION); + this->query_parser->add_prefix ("zgso", FILTER_PREFIX_SUBJECT_ORIGIN); + this->query_parser->add_boolean_prefix ("zgst", + FILTER_PREFIX_SUBJECT_MIMETYPE); + this->query_parser->add_boolean_prefix ("zgss", + FILTER_PREFIX_SUBJECT_STORAGE); + this->query_parser->add_prefix ("category", FILTER_PREFIX_XDG_CATEGORY); + + this->query_parser->add_valuerangeprocessor ( + new Xapian::NumberValueRangeProcessor (VALUE_EVENT_ID, "id")); + this->query_parser->add_valuerangeprocessor ( + new Xapian::NumberValueRangeProcessor (VALUE_TIMESTAMP, "ms", false)); + + this->query_parser->set_default_op (Xapian::Query::OP_AND); + this->query_parser->set_database (*this->db); + + this->enquire = new Xapian::Enquire (*this->db); + + } + catch (const Xapian::Error &xp_error) + { + g_set_error_literal (error, + ZEITGEIST_ENGINE_ERROR, + ZEITGEIST_ENGINE_ERROR_DATABASE_ERROR, + xp_error.get_msg ().c_str ()); + this->db = NULL; + } +} + +/** + * Returns true if and only if the index is good. + * Otherwise the index should be rebuild. + */ +bool Indexer::CheckIndex () +{ + std::string db_version (db->get_metadata ("fts_index_version")); + if (db_version != INDEX_VERSION) + { + g_message ("Index must be upgraded. Doing full rebuild"); + return false; + } + else if (db->get_doccount () == 0) + { + g_message ("Empty index detected. Doing full rebuild"); + return false; + } + + return true; +} + +/** + * Clear the index and create a new empty one + */ +void Indexer::DropIndex () +{ + try + { + if (this->db != NULL) + { + this->db->close (); + delete this->db; + this->db = NULL; + } + + if (this->enquire != NULL) + { + delete this->enquire; + this->enquire = NULL; + } + + if (zeitgeist_utils_using_in_memory_database ()) + { + this->db = new Xapian::WritableDatabase; + this->db->add_database (Xapian::InMemory::open ()); + } + else + { + gchar *path = g_build_filename (zeitgeist_utils_get_data_path (), + FTS_MAIN_DIR.c_str (), NULL); + this->db = new Xapian::WritableDatabase (path, + Xapian::DB_CREATE_OR_OVERWRITE); + // FIXME: leaks on error + g_free (path); + } + + this->query_parser->set_database (*this->db); + this->enquire = new Xapian::Enquire (*this->db); + } + catch (const Xapian::Error &xp_error) + { + g_error ("Error ocurred during database reindex: %s", + xp_error.get_msg ().c_str ()); + } +} + +void Indexer::Commit () +{ + try + { + db->commit (); + } + catch (Xapian::Error const& e) + { + g_warning ("Failed to commit changes: %s", e.get_msg ().c_str ()); + } +} + +std::string Indexer::ExpandType (std::string const& prefix, + const gchar* unparsed_uri) +{ + gchar* uri = g_strdup (unparsed_uri); + gboolean is_negation = zeitgeist_utils_parse_negation (&uri); + gboolean noexpand = zeitgeist_utils_parse_noexpand (&uri); + + std::string result; + GList *symbols = NULL; + symbols = g_list_append (symbols, uri); + if (!noexpand) + { + GList *children = zeitgeist_symbol_get_all_children (uri); + symbols = g_list_concat (symbols, children); + } + + for (GList *iter = symbols; iter != NULL; iter = iter->next) + { + result += prefix + std::string((gchar*) iter->data); + if (iter->next != NULL) result += " OR "; + } + + g_list_free (symbols); + g_free (uri); + + if (is_negation) result = "NOT (" + result + ")"; + + return result; +} + +std::string Indexer::CompileEventFilterQuery (GPtrArray *templates) +{ + std::vector<std::string> query; + + for (unsigned i = 0; i < templates->len; i++) + { + const gchar* val; + std::vector<std::string> tmpl; + ZeitgeistEvent *event = (ZeitgeistEvent*) g_ptr_array_index (templates, i); + + val = zeitgeist_event_get_interpretation (event); + if (val && val[0] != '\0') + tmpl.push_back (ExpandType ("zgei:", val)); + + val = zeitgeist_event_get_manifestation (event); + if (val && val[0] != '\0') + tmpl.push_back (ExpandType ("zgem:", val)); + + val = zeitgeist_event_get_actor (event); + if (val && val[0] != '\0') + tmpl.push_back ("zga:" + StringUtils::MangleUri (val)); + + GPtrArray *subjects = zeitgeist_event_get_subjects (event); + for (unsigned j = 0; j < subjects->len; j++) + { + ZeitgeistSubject *subject = (ZeitgeistSubject*) g_ptr_array_index (subjects, j); + val = zeitgeist_subject_get_uri (subject); + if (val && val[0] != '\0') + tmpl.push_back ("zgsu:" + StringUtils::MangleUri (val)); + + val = zeitgeist_subject_get_interpretation (subject); + if (val && val[0] != '\0') + tmpl.push_back (ExpandType ("zgsi:", val)); + + val = zeitgeist_subject_get_manifestation (subject); + if (val && val[0] != '\0') + tmpl.push_back (ExpandType ("zgsm:", val)); + + val = zeitgeist_subject_get_origin (subject); + if (val && val[0] != '\0') + tmpl.push_back ("zgso:" + StringUtils::MangleUri (val)); + + val = zeitgeist_subject_get_mimetype (subject); + if (val && val[0] != '\0') + tmpl.push_back (std::string ("zgst:") + val); + + val = zeitgeist_subject_get_storage (subject); + if (val && val[0] != '\0') + tmpl.push_back (std::string ("zgss:") + val); + } + + if (tmpl.size () == 0) continue; + + std::string event_query ("("); + for (int i = 0; i < tmpl.size (); i++) + { + event_query += tmpl[i]; + if (i < tmpl.size () - 1) event_query += ") AND ("; + } + query.push_back (event_query + ")"); + } + + if (query.size () == 0) return std::string (""); + + std::string result; + for (int i = 0; i < query.size (); i++) + { + result += query[i]; + if (i < query.size () - 1) result += " OR "; + } + return result; +} + +std::string Indexer::CompileTimeRangeFilterQuery (gint64 start, gint64 end) +{ + // let's use gprinting to be safe + gchar *q = g_strdup_printf ("%" G_GINT64_FORMAT "..%" G_GINT64_FORMAT "ms", + start, end); + std::string query (q); + g_free (q); + + return query; +} + +/** + * Adds the filtering rules to the doc. Filtering rules will + * not affect the relevancy ranking of the event/doc + */ +void Indexer::AddDocFilters (ZeitgeistEvent *event, Xapian::Document &doc) +{ + const gchar* val; + + val = zeitgeist_event_get_interpretation (event); + if (val && val[0] != '\0') + doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_EVENT_INTERPRETATION + val)); + + val = zeitgeist_event_get_manifestation (event); + if (val && val[0] != '\0') + doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_EVENT_MANIFESTATION + val)); + + val = zeitgeist_event_get_actor (event); + if (val && val[0] != '\0') + doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_ACTOR + StringUtils::MangleUri (val))); + + GPtrArray *subjects = zeitgeist_event_get_subjects (event); + for (unsigned j = 0; j < subjects->len; j++) + { + ZeitgeistSubject *subject = (ZeitgeistSubject*) g_ptr_array_index (subjects, j); + val = zeitgeist_subject_get_uri (subject); + if (val && val[0] != '\0') + doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_SUBJECT_URI + StringUtils::MangleUri (val))); + + val = zeitgeist_subject_get_interpretation (subject); + if (val && val[0] != '\0') + doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_SUBJECT_INTERPRETATION + val)); + + val = zeitgeist_subject_get_manifestation (subject); + if (val && val[0] != '\0') + doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_SUBJECT_MANIFESTATION + val)); + + val = zeitgeist_subject_get_origin (subject); + if (val && val[0] != '\0') + doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_SUBJECT_ORIGIN + StringUtils::MangleUri (val))); + + val = zeitgeist_subject_get_mimetype (subject); + if (val && val[0] != '\0') + doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_SUBJECT_MIMETYPE + val)); + + val = zeitgeist_subject_get_storage (subject); + if (val && val[0] != '\0') + doc.add_boolean_term (StringUtils::Truncate (FILTER_PREFIX_SUBJECT_STORAGE + val)); + } +} + +void Indexer::IndexText (std::string const& text) +{ + // FIXME: ascii folding! + tokenizer->index_text (text, 5); +} + +void Indexer::IndexUri (std::string const& uri, std::string const& origin) +{ + GFile *f = g_file_new_for_uri (uri.c_str ()); + + gchar *scheme = g_file_get_uri_scheme (f); + if (scheme == NULL) + { + g_warning ("Invalid URI: %s", uri.c_str ()); + return; + } + + std::string scheme_str(scheme); + g_free (scheme); + + if (scheme_str == "file") + { + // FIXME: special case some typical filenames (like photos) + // examples of typical filenames from cameras: + // P07-08-08_16.25.JPG + // P070608_18.54.JPG + // P180308_22.27[1].jpg + // P6220111.JPG + // PC220006.JPG + // DSCN0149.JPG + // DSC01166.JPG + // SDC12583.JPG + // IMGP3199.JPG + // IMGP1251-4.jpg + // IMG_101_8987.JPG + // 10052010152.jpg + // 4867_93080512835_623012835_1949065_8351752_n.jpg + // 2011-05-29 10.49.37.jpg + // V100908_11.24.AVI + // video-2011-05-29-15-14-58.mp4 + + // get_parse_name will convert escaped characters to UTF-8, but only for + // the "file" scheme, so using it elsewhere won't be of much help + + gchar *pn = g_file_get_parse_name (f); + gchar *basename = g_path_get_basename (pn); + + // FIXME: remove unscores, CamelCase and process digits + tokenizer->index_text (basename, 5); + tokenizer->index_text (basename, 5, "N"); + + g_free (basename); + // limit the directory indexing to just a few levels + // (the original formula was weight = 5.0 / (1.5^n) + unsigned path_weights[] = { 3, 2, 1, 0 }; + unsigned weight_index = 0; + + // this should be equal to origin, but we already got a nice utf-8 display + // name, so we'll use that + gchar *dir = g_path_get_dirname (pn); + std::string path_component (dir); + g_free (dir); + g_free (pn); + + while (path_component.length () > 2 && + weight_index < G_N_ELEMENTS (path_weights)) + { + // if this is already home directory we don't want it + if (path_component.length () == home_dir_path.length () && + path_component == home_dir_path) return; + + gchar *name = g_path_get_basename (path_component.c_str ()); + + // FIXME: un-underscore, uncamelcase, ascii fold + tokenizer->index_text (name, path_weights[weight_index++]); + + dir = g_path_get_dirname (path_component.c_str ()); + path_component = dir; + g_free (dir); + g_free (name); + } + } + else if (scheme_str == "mailto") + { + // mailto:username@server.com + size_t scheme_len = scheme_str.length () + 1; + size_t at_pos = uri.find ('@', scheme_len); + if (at_pos == std::string::npos) return; + + tokenizer->index_text (uri.substr (scheme_len, at_pos - scheme_len), 5); + tokenizer->index_text (uri.substr (at_pos + 1), 1); + } + else if (scheme_str.compare (0, 4, "http") == 0) + { + // http / https - we'll index just the basename of the uri (minus query + // part) and the hostname/domain + + // step 1) strip query part + gchar *basename; + size_t question_mark = uri.find ('?'); + if (question_mark != std::string::npos) + { + std::string stripped (uri, 0, question_mark - 1); + basename = g_path_get_basename (stripped.c_str ()); + } + else + { + basename = g_file_get_basename (f); + } + + // step 2) unescape and check that it's valid utf8 + gchar *unescaped_basename = g_uri_unescape_string (basename, ""); + + if (g_utf8_validate (unescaped_basename, -1, NULL)) + { + // FIXME: remove unscores, CamelCase and process digits + tokenizer->index_text (unescaped_basename, 5); + tokenizer->index_text (unescaped_basename, 5, "N"); + } + + // and also index hostname (taken from origin field if possible) + std::string host_str (origin.empty () ? uri : origin); + size_t hostname_start = host_str.find ("://"); + if (hostname_start != std::string::npos) + { + std::string hostname (host_str, hostname_start + 3); + size_t slash_pos = hostname.find ("/"); + if (slash_pos != std::string::npos) hostname.resize (slash_pos); + + // support IDN + if (g_hostname_is_ascii_encoded (hostname.c_str ())) + { + gchar *printable_hostname = g_hostname_to_unicode (hostname.c_str ()); + if (printable_hostname != NULL) hostname = printable_hostname; + g_free (printable_hostname); + } + + tokenizer->index_text (hostname, 2); + tokenizer->index_text (hostname, 2, "N"); + tokenizer->index_text (hostname, 2, "S"); + } + + g_free (unescaped_basename); + g_free (basename); + } + else if (scheme_str == "data") + { + // we *really* don't want to index anything with this scheme + } + else + { + std::string authority, path, query; + StringUtils::SplitUri (uri, authority, path, query); + + if (!path.empty ()) + { + gchar *basename = g_path_get_basename (path.c_str ()); + gchar *unescaped_basename = g_uri_unescape_string (basename, ""); + + if (g_utf8_validate (unescaped_basename, -1, NULL)) + { + std::string capped (StringUtils::Truncate (unescaped_basename, 30)); + tokenizer->index_text (capped, 5); + tokenizer->index_text (capped, 5, "N"); + } + + // FIXME: rest of the path? + g_free (unescaped_basename); + g_free (basename); + } + + if (!authority.empty ()) + { + std::string capped (StringUtils::Truncate (authority, 30)); + + tokenizer->index_text (capped, 2); + tokenizer->index_text (capped, 2, "N"); + tokenizer->index_text (capped, 2, "S"); + } + } + + g_object_unref (f); +} + +bool Indexer::IndexActor (std::string const& actor, bool is_subject) +{ + GDesktopAppInfo *dai = NULL; + // check the cache first + GAppInfo *ai = app_info_cache[actor]; + + if (ai == NULL) + { + // check also the failed cache + if (failed_lookups.count (actor) != 0) return false; + + // and now try to load from the disk + if (g_path_is_absolute (actor.c_str ())) + { + dai = g_desktop_app_info_new_from_filename (actor.c_str ()); + } + else if (g_str_has_prefix (actor.c_str (), "application://")) + { + dai = g_desktop_app_info_new (actor.substr (14).c_str ()); + } + + if (dai != NULL) + { + ai = G_APP_INFO (dai); + app_info_cache[actor] = ai; + } + else + { + // cache failed lookup + failed_lookups.insert (actor); + if (clear_failed_id == 0) + { + // but clear the failed cache in 30 seconds + clear_failed_id = g_timeout_add_seconds (30, + (GSourceFunc) &Indexer::ClearFailedLookupsCb, this); + } + } + } + else + { + dai = G_DESKTOP_APP_INFO (ai); + } + + if (dai == NULL) + { + g_warning ("Unable to get info on %s", actor.c_str ()); + return false; + } + + const gchar *val; + unsigned name_weight = is_subject ? 5 : 2; + unsigned comment_weight = 2; + + // FIXME: ascii folding somewhere + + val = g_app_info_get_display_name (ai); + if (val && val[0] != '\0') + { + std::string display_name (val); + tokenizer->index_text (display_name, name_weight); + tokenizer->index_text (display_name, name_weight, "A"); + } + + val = g_desktop_app_info_get_generic_name (dai); + if (val && val[0] != '\0') + { + std::string generic_name (val); + tokenizer->index_text (generic_name, name_weight); + tokenizer->index_text (generic_name, name_weight, "A"); + } + + if (!is_subject) return true; + // the rest of the code only applies to events with application subject uris: + // index the comment field, add category terms, index keywords + + val = g_app_info_get_description (ai); + if (val && val[0] != '\0') + { + std::string comment (val); + tokenizer->index_text (comment, comment_weight); + tokenizer->index_text (comment, comment_weight, "A"); + } + + val = g_desktop_app_info_get_categories (dai); + if (val && val[0] != '\0') + { + gchar **categories = g_strsplit (val, ";", 0); + Xapian::Document doc(tokenizer->get_document ()); + for (gchar **iter = categories; *iter != NULL; ++iter) + { + // FIXME: what if this isn't ascii? but it should, that's what + // the fdo menu spec says + gchar *category = g_ascii_strdown (*iter, -1); + doc.add_boolean_term (FILTER_PREFIX_XDG_CATEGORY + category); + g_free (category); + } + g_strfreev (categories); + } + + return true; +} + +GPtrArray* Indexer::Search (const gchar *search_string, + ZeitgeistTimeRange *time_range, + GPtrArray *templates, + guint offset, + guint count, + ZeitgeistResultType result_type, + guint *matches, + GError **error) +{ + GPtrArray *results = NULL; + try + { + std::string query_string(search_string); + + if (templates && templates->len > 0) + { + std::string filters (CompileEventFilterQuery (templates)); + query_string = "(" + query_string + ") AND (" + filters + ")"; + } + + if (time_range) + { + gint64 start_time = zeitgeist_time_range_get_start (time_range); + gint64 end_time = zeitgeist_time_range_get_end (time_range); + + if (start_time > 0 || end_time < G_MAXINT64) + { + std::string time_filter (CompileTimeRangeFilterQuery (start_time, end_time)); + query_string = "(" + query_string + ") AND (" + time_filter + ")"; + } + } + + // FIXME: which result types coalesce? + guint maxhits = count * 3; + + if (result_type == 100) + { + enquire->set_sort_by_relevance (); + } + else + { + enquire->set_sort_by_value (VALUE_TIMESTAMP, true); + } + + g_debug ("query: %s", query_string.c_str ()); + Xapian::Query q(query_parser->parse_query (query_string, QUERY_PARSER_FLAGS)); + enquire->set_query (q); + Xapian::MSet hits (enquire->get_mset (offset, maxhits)); + Xapian::doccount hitcount = hits.get_matches_estimated (); + + if (result_type == 100) + { + std::vector<unsigned> event_ids; + for (Xapian::MSetIterator iter = hits.begin (); iter != hits.end (); ++iter) + { + Xapian::Document doc(iter.get_document ()); + double unserialized = + Xapian::sortable_unserialise(doc.get_value (VALUE_EVENT_ID)); + event_ids.push_back (static_cast<unsigned>(unserialized)); + } + + results = zeitgeist_db_reader_get_events (zg_reader, + &event_ids[0], + event_ids.size (), + NULL, + error); + } + else + { + GPtrArray *event_templates; + event_templates = g_ptr_array_new_with_free_func (g_object_unref); + for (Xapian::MSetIterator iter = hits.begin (); iter != hits.end (); ++iter) + { + Xapian::Document doc(iter.get_document ()); + double unserialized = + Xapian::sortable_unserialise(doc.get_value (VALUE_EVENT_ID)); + // this doesn't need ref sinking, does it? + ZeitgeistEvent *event = zeitgeist_event_new (); + zeitgeist_event_set_id (event, static_cast<unsigned>(unserialized)); + g_ptr_array_add (event_templates, event); + } + + if (event_templates->len > 0) + { + ZeitgeistTimeRange *time_range = zeitgeist_time_range_new_anytime (); + results = zeitgeist_db_reader_find_events (zg_reader, + time_range, + event_templates, + ZEITGEIST_STORAGE_STATE_ANY, + 0, + result_type, + NULL, + error); + + g_object_unref (time_range); + } + else + { + results = g_ptr_array_new (); + } + + g_ptr_array_unref (event_templates); + } + + if (matches) + { + *matches = hitcount; + } + } + catch (Xapian::Error const& e) + { + g_warning ("Failed to index event: %s", e.get_msg ().c_str ()); + g_set_error_literal (error, + ZEITGEIST_ENGINE_ERROR, + ZEITGEIST_ENGINE_ERROR_DATABASE_ERROR, + e.get_msg ().c_str ()); + } + + return results; +} + +void Indexer::IndexEvent (ZeitgeistEvent *event) +{ + try + { + // FIXME: we need to special case MOVE_EVENTs + const gchar *val; + guint event_id = zeitgeist_event_get_id (event); + g_return_if_fail (event_id > 0); + + g_debug ("Indexing event with ID: %u", event_id); + + Xapian::Document doc; + doc.add_value (VALUE_EVENT_ID, + Xapian::sortable_serialise (static_cast<double>(event_id))); + doc.add_value (VALUE_TIMESTAMP, + Xapian::sortable_serialise (static_cast<double>(zeitgeist_event_get_timestamp (event)))); + + tokenizer->set_document (doc); + + val = zeitgeist_event_get_actor (event); + if (val && val[0] != '\0') + { + // it's nice that searching for "gedit" will find all files you worked + // with in gedit, but the relevancy has to be low + IndexActor (val, false); + } + + GPtrArray *subjects = zeitgeist_event_get_subjects (event); + for (unsigned i = 0; i < subjects->len; i++) + { + ZeitgeistSubject *subject; + subject = (ZeitgeistSubject*) g_ptr_array_index (subjects, i); + + val = zeitgeist_subject_get_uri (subject); + if (val == NULL || val[0] == '\0') continue; + + std::string uri(val); + + if (uri.length () > 512) + { + g_warning ("URI too long (%lu). Discarding:\n%s", + uri.length (), uri.substr (0, 32).c_str ()); + return; // ignore this event completely... + } + + val = zeitgeist_subject_get_text (subject); + if (val && val[0] != '\0') + { + IndexText (val); + } + + val = zeitgeist_subject_get_origin (subject); + std::string origin (val != NULL ? val : ""); + + if (uri.compare (0, 14, "application://") == 0) + { + if (!IndexActor (uri, true)) + IndexUri (uri, origin); + } + else + { + IndexUri (uri, origin); + } + } + + AddDocFilters (event, doc); + + this->db->add_document (doc); + } + catch (Xapian::Error const& e) + { + g_warning ("Failed to index event: %s", e.get_msg ().c_str ()); + } +} + +void Indexer::DeleteEvent (guint32 event_id) +{ + g_debug ("Deleting event with ID: %u", event_id); + + try + { + std::string id(Xapian::sortable_serialise (static_cast<double>(event_id))); + Xapian::Query query (Xapian::Query::OP_VALUE_RANGE, VALUE_EVENT_ID, id, id); + + enquire->set_query(query); + Xapian::MSet mset = enquire->get_mset(0, 10); + + Xapian::doccount total = mset.get_matches_estimated(); + if (total > 1) + { + g_warning ("More than one event found with id '%s", id.c_str ()); + } + else if (total == 0) + { + g_warning ("No event for id '%s'", id.c_str ()); + return; + } + + Xapian::MSetIterator i, end; + for (i= mset.begin(), end = mset.end(); i != end; i++) + { + db->delete_document (*i); + } + } + catch (Xapian::Error const& e) + { + g_warning ("Failed to delete event '%u': %s", + event_id, e.get_msg().c_str ()); + } +} + +void Indexer::SetDbMetadata (std::string const& key, std::string const& value) +{ + try + { + db->set_metadata (key, value); + } + catch (Xapian::Error const& e) + { + g_warning ("Failed to set metadata: %s", e.get_msg ().c_str ()); + } +} + +gboolean Indexer::ClearFailedLookupsCb () +{ + failed_lookups.clear (); + + clear_failed_id = 0; + return FALSE; +} + +} /* namespace */ diff --git a/extensions/fts++/indexer.h b/extensions/fts++/indexer.h new file mode 100644 index 00000000..9a0135e7 --- /dev/null +++ b/extensions/fts++/indexer.h @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2012 Canonical Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Authored by Michal Hruby <michal.hruby@canonical.com> + * + */ + +#ifndef _ZGFTS_INDEXER_H_ +#define _ZGFTS_INDEXER_H_ + +#include <glib-object.h> +#include <gio/gio.h> +#include <xapian.h> + +#include "zeitgeist-internal.h" + +namespace ZeitgeistFTS { + +const std::string INDEX_VERSION = "1"; + +class Indexer +{ +public: + typedef std::map<std::string, GAppInfo*> AppInfoMap; + typedef std::set<std::string> ApplicationSet; + + Indexer (ZeitgeistDbReader *reader) + : zg_reader (reader) + , db (NULL) + , query_parser (NULL) + , enquire (NULL) + , tokenizer (NULL) + , clear_failed_id (0) + { + const gchar *home_dir = g_get_home_dir (); + home_dir_path = home_dir != NULL ? home_dir : "/home"; + } + + ~Indexer () + { + if (tokenizer) delete tokenizer; + if (enquire) delete enquire; + if (query_parser) delete query_parser; + if (db) delete db; + + for (AppInfoMap::iterator it = app_info_cache.begin (); + it != app_info_cache.end (); ++it) + { + g_object_unref (it->second); + } + + if (clear_failed_id != 0) + { + g_source_remove (clear_failed_id); + } + } + + void Initialize (GError **error); + bool CheckIndex (); + void DropIndex (); + void Commit (); + + void IndexEvent (ZeitgeistEvent *event); + void DeleteEvent (guint32 event_id); + void SetDbMetadata (std::string const& key, std::string const& value); + + GPtrArray* Search (const gchar *search_string, + ZeitgeistTimeRange *time_range, + GPtrArray *templates, + guint offset, + guint count, + ZeitgeistResultType result_type, + guint *matches, + GError **error); + +private: + std::string ExpandType (std::string const& prefix, const gchar* unparsed_uri); + std::string CompileEventFilterQuery (GPtrArray *templates); + std::string CompileTimeRangeFilterQuery (gint64 start, gint64 end); + + void AddDocFilters (ZeitgeistEvent *event, Xapian::Document &doc); + void IndexText (std::string const& text); + void IndexUri (std::string const& uri, std::string const& origin); + bool IndexActor (std::string const& actor, bool is_subject); + + gboolean ClearFailedLookupsCb (); + + ZeitgeistDbReader *zg_reader; + Xapian::WritableDatabase *db; + Xapian::QueryParser *query_parser; + Xapian::Enquire *enquire; + Xapian::TermGenerator *tokenizer; + AppInfoMap app_info_cache; + ApplicationSet failed_lookups; + + guint clear_failed_id; + std::string home_dir_path; +}; + +} + +#endif /* _ZGFTS_INDEXER_H_ */ diff --git a/extensions/fts++/mimetype.vala b/extensions/fts++/mimetype.vala new file mode 120000 index 00000000..fc0a6ce1 --- /dev/null +++ b/extensions/fts++/mimetype.vala @@ -0,0 +1 @@ +../../src/mimetype.vala
\ No newline at end of file diff --git a/extensions/fts++/ontology-uris.vala b/extensions/fts++/ontology-uris.vala new file mode 120000 index 00000000..c0b93ab7 --- /dev/null +++ b/extensions/fts++/ontology-uris.vala @@ -0,0 +1 @@ +../../src/ontology-uris.vala
\ No newline at end of file diff --git a/extensions/fts++/ontology.vala b/extensions/fts++/ontology.vala new file mode 120000 index 00000000..5daa0215 --- /dev/null +++ b/extensions/fts++/ontology.vala @@ -0,0 +1 @@ +../../src/ontology.vala
\ No newline at end of file diff --git a/extensions/fts-python/org.gnome.zeitgeist.fts.service.in b/extensions/fts++/org.gnome.zeitgeist.fts.service.in index 7551d79d..dff8199f 100644 --- a/extensions/fts-python/org.gnome.zeitgeist.fts.service.in +++ b/extensions/fts++/org.gnome.zeitgeist.fts.service.in @@ -1,3 +1,3 @@ [D-BUS Service] Name=org.gnome.zeitgeist.SimpleIndexer -Exec=@pkgdatadir@/fts-python/fts.py +Exec=@libexecdir@/zeitgeist-fts diff --git a/extensions/fts++/remote.vala b/extensions/fts++/remote.vala new file mode 120000 index 00000000..32661b1f --- /dev/null +++ b/extensions/fts++/remote.vala @@ -0,0 +1 @@ +../../src/remote.vala
\ No newline at end of file diff --git a/extensions/fts++/sql-schema.vala b/extensions/fts++/sql-schema.vala new file mode 120000 index 00000000..a2756d4a --- /dev/null +++ b/extensions/fts++/sql-schema.vala @@ -0,0 +1 @@ +../../src/sql-schema.vala
\ No newline at end of file diff --git a/extensions/fts++/sql.vala b/extensions/fts++/sql.vala new file mode 120000 index 00000000..48950aed --- /dev/null +++ b/extensions/fts++/sql.vala @@ -0,0 +1 @@ +../../src/sql.vala
\ No newline at end of file diff --git a/extensions/fts++/stringutils.cpp b/extensions/fts++/stringutils.cpp new file mode 100644 index 00000000..12b0baf8 --- /dev/null +++ b/extensions/fts++/stringutils.cpp @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2012 Mikkel Kamstrup Erlandsen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com> + * + */ +#include <string> + +#include "stringutils.h" + +using namespace std; + +namespace ZeitgeistFTS { + +namespace StringUtils { + +/** + * Make sure s has equal or less than 'nbytes' bytes making sure the returned + * string is still valid UTF-8. + * + * NOTE: It is assumed the input string is valid UTF-8. Untrusted text + * should be validated with g_utf8_validate(). + * + * This function useful for working with Xapian terms because Xapian has + * a max term length of 245 (which is not very well documented, but see + * http://xapian.org/docs/omega/termprefixes.html). + */ +string Truncate (string const& s, unsigned int nbytes) +{ + const gchar *str = s.c_str(); + const gchar *iter = str; + + nbytes = MIN(nbytes, s.length()); + + while (iter - str < nbytes) + { + const gchar *tmp = g_utf8_next_char (iter); + if (tmp - str > nbytes) break; + iter = tmp; + } + + + return s.substr(0, iter - str); +} + +/** + * Converts a URI into an index- and query friendly string. The problem + * is that Xapian doesn't handle CAPITAL letters or most non-alphanumeric + * symbols in a boolean term when it does prefix matching. The mangled + * URIs returned from this function are suitable for boolean prefix searches. + * + * IMPORTANT: This is a 1-way function! You can not convert back. + */ +string MangleUri (string const& orig) +{ + string s(orig); + size_t pos = 0; + while ((pos = s.find_first_of (": /", pos)) != string::npos) + { + s.replace (pos, 1, 1, '_'); + pos++; + } + + return s; +} + +/** + * This method expects a valid uri and tries to split it into authority, + * path and query. + * + * Note that any and all parts may be left untouched. + */ +void SplitUri (string const& uri, string &authority, + string &path, string &query) +{ + size_t colon_pos = uri.find (':'); + if (colon_pos == string::npos) return; // not an uri? + bool has_double_slash = uri.length () > colon_pos + 2 && + uri.compare (colon_pos + 1, 2, "//") == 0; + + size_t start_pos = has_double_slash ? colon_pos + 3 : colon_pos + 1; + + size_t first_slash = uri.find ('/', start_pos); + size_t question_mark_pos = uri.find ('?', first_slash == string::npos ? + start_pos : first_slash + 1); + + authority = uri.substr (start_pos); + if (first_slash != string::npos) + { + authority.resize (first_slash - start_pos); + } + else if (question_mark_pos != string::npos) + { + authority.resize (question_mark_pos - start_pos); + } + + if (first_slash == string::npos) + { + first_slash = start_pos + authority.length (); + } + + if (question_mark_pos != string::npos) + { + path = uri.substr (first_slash, question_mark_pos - first_slash); + query = uri.substr (question_mark_pos + 1); + } + else + { + path = uri.substr (first_slash); + } +} + +} /* namespace StringUtils */ + +} /* namespace ZeitgeistFTS */ diff --git a/extensions/fts++/stringutils.h b/extensions/fts++/stringutils.h new file mode 100644 index 00000000..ef011d00 --- /dev/null +++ b/extensions/fts++/stringutils.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2012 Mikkel Kamstrup Erlandsen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com> + * + */ + +#include <string> +#include <glib.h> + +namespace ZeitgeistFTS { + +namespace StringUtils { + +const unsigned int MAX_TERM_LENGTH = 245; + +std::string Truncate (std::string const& s, + unsigned int nbytes = MAX_TERM_LENGTH); + +std::string MangleUri (std::string const& orig); + +void SplitUri (std::string const& uri, + std::string &host, + std::string &path, + std::string &basename); + +} /* namespace StringUtils */ + +} /* namespace ZeitgeistFTS */ diff --git a/extensions/fts++/table-lookup.vala b/extensions/fts++/table-lookup.vala new file mode 120000 index 00000000..9e242838 --- /dev/null +++ b/extensions/fts++/table-lookup.vala @@ -0,0 +1 @@ +../../src/table-lookup.vala
\ No newline at end of file diff --git a/extensions/fts++/task.cpp b/extensions/fts++/task.cpp new file mode 100644 index 00000000..74c4092d --- /dev/null +++ b/extensions/fts++/task.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2012 Canonical Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Authored by Michal Hruby <michal.hruby@canonical.com> + * + */ + +#include "task.h" + +namespace ZeitgeistFTS { + +void IndexEventsTask::Process (Indexer *indexer) +{ + unsigned end_index = MIN (start_index + event_count, events->len); + for (unsigned i = start_index; i < end_index; i++) + { + indexer->IndexEvent ((ZeitgeistEvent*) g_ptr_array_index (events, i)); + } +} + +void DeleteEventsTask::Process (Indexer *indexer) +{ + for (unsigned i = 0; i < event_ids.size (); i++) + { + indexer->DeleteEvent (event_ids[i]); + } +} + +void MetadataTask::Process (Indexer *indexer) +{ + indexer->SetDbMetadata (key_name, value); +} + +} diff --git a/extensions/fts++/task.h b/extensions/fts++/task.h new file mode 100644 index 00000000..1c124382 --- /dev/null +++ b/extensions/fts++/task.h @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2012 Canonical Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Authored by Michal Hruby <michal.hruby@canonical.com> + * + */ + +#ifndef _ZGFTS_TASK_H_ +#define _ZGFTS_TASK_H_ + +#include <glib.h> + +#include "indexer.h" + +namespace ZeitgeistFTS { + +/** + * A task contains a chunk of work defined by the Controller. + * A task should not be clever in scheduling on its own, the + * Controller is responsible for breaking down tasks in suitable + * chunks. + */ +class Task +{ +public: + virtual ~Task () {} + virtual void Process (Indexer *indexer) = 0; +}; + +class IndexEventsTask : public Task +{ +public: + void Process (Indexer *indexer); + + IndexEventsTask (GPtrArray *event_arr) + : events (event_arr), start_index (0), event_count (event_arr->len) {} + + IndexEventsTask (GPtrArray *event_arr, unsigned index, unsigned count) + : events (event_arr), start_index (index), event_count (count) {} + + virtual ~IndexEventsTask () + { + g_ptr_array_unref (events); + } + +private: + GPtrArray *events; + unsigned start_index; + unsigned event_count; +}; + +class DeleteEventsTask : public Task +{ +public: + void Process (Indexer *indexer); + + DeleteEventsTask (unsigned *event_ids_arr, int event_ids_arr_size) + : event_ids (event_ids_arr, event_ids_arr + event_ids_arr_size) {} + + virtual ~DeleteEventsTask () + { + } + +private: + std::vector<unsigned> event_ids; +}; + +class MetadataTask : public Task +{ +public: + void Process (Indexer *indexer); + + MetadataTask (std::string const& name, std::string const& val) + : key_name (name), value (val) {} + + virtual ~MetadataTask () + {} + +private: + std::string key_name; + std::string value; +}; + +} + +#endif /* _ZGFTS_TASK_H_ */ + diff --git a/extensions/fts++/test/Makefile.am b/extensions/fts++/test/Makefile.am new file mode 100644 index 00000000..e36cf773 --- /dev/null +++ b/extensions/fts++/test/Makefile.am @@ -0,0 +1,27 @@ +NULL = +check_PROGRAMS = test-fts +TESTS = test-fts + +AM_CPPFLAGS = \ + $(ZEITGEIST_CFLAGS) \ + -include $(CONFIG_HEADER) \ + -w \ + -I$(srcdir)/.. \ + $(NULL) + +test_fts_SOURCES = \ + test-stringutils.cpp \ + test-indexer.cpp \ + test-fts.c \ + $(srcdir)/../stringutils.cpp \ + $(srcdir)/../controller.cpp \ + $(srcdir)/../indexer.cpp \ + $(srcdir)/../task.cpp \ + $(srcdir)/../fts.cpp \ + $(NULL) + +test_fts_LDADD = \ + $(builddir)/../libzeitgeist-internal.la \ + -lxapian \ + $(NULL) + diff --git a/extensions/fts++/test/test-fts.c b/extensions/fts++/test/test-fts.c new file mode 100644 index 00000000..6b9208fd --- /dev/null +++ b/extensions/fts++/test/test-fts.c @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2012 Mikkel Kamstrup Erlandsen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com> + * + */ + +#include <glib-object.h> + +void test_stringutils_create_suite (void); +void test_indexer_create_suite (void); + +gint +main (gint argc, gchar *argv[]) +{ + g_type_init (); + + g_test_init (&argc, &argv, NULL); + + test_stringutils_create_suite (); + test_indexer_create_suite (); + + return g_test_run (); +} diff --git a/extensions/fts++/test/test-indexer.cpp b/extensions/fts++/test/test-indexer.cpp new file mode 100644 index 00000000..188213e7 --- /dev/null +++ b/extensions/fts++/test/test-indexer.cpp @@ -0,0 +1,531 @@ +/* + * Copyright (C) 2012 Mikkel Kamstrup Erlandsen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com> + * + */ + +#include <glib-object.h> + +#include "stringutils.h" +#include "fts.h" +#include <zeitgeist-internal.h> + +using namespace ZeitgeistFTS; + +typedef struct +{ + ZeitgeistDbReader *db; + ZeitgeistIndexer *indexer; +} Fixture; + +static void setup (Fixture *fix, gconstpointer data); +static void teardown (Fixture *fix, gconstpointer data); + +static void +setup (Fixture *fix, gconstpointer data) +{ + // use in-memory databases for both zg db and fts db + GError *error = NULL; + g_setenv ("ZEITGEIST_DATABASE_PATH", ":memory:", TRUE); + fix->db = ZEITGEIST_DB_READER (zeitgeist_engine_new (&error)); + + if (error) + { + g_warning ("%s", error->message); + return; + } + + fix->indexer = zeitgeist_indexer_new (fix->db, &error); + if (error) + { + g_warning ("%s", error->message); + return; + } +} + +static void +teardown (Fixture *fix, gconstpointer data) +{ + zeitgeist_indexer_free (fix->indexer); + g_object_unref (fix->db); +} + +static ZeitgeistEvent* create_test_event1 (void) +{ + ZeitgeistEvent *event = zeitgeist_event_new (); + ZeitgeistSubject *subject = zeitgeist_subject_new (); + + zeitgeist_subject_set_interpretation (subject, ZEITGEIST_NFO_RASTER_IMAGE); + zeitgeist_subject_set_manifestation (subject, ZEITGEIST_NFO_REMOTE_DATA_OBJECT); + zeitgeist_subject_set_uri (subject, "http://example.com/image.jpg"); + zeitgeist_subject_set_text (subject, "text"); + zeitgeist_subject_set_mimetype (subject, "image/png"); + + zeitgeist_event_set_interpretation (event, ZEITGEIST_ZG_ACCESS_EVENT); + zeitgeist_event_set_manifestation (event, ZEITGEIST_ZG_USER_ACTIVITY); + zeitgeist_event_set_actor (event, "application://firefox.desktop"); + zeitgeist_event_add_subject (event, subject); + + g_object_unref (subject); + return event; +} + +static ZeitgeistEvent* create_test_event2 (void) +{ + ZeitgeistEvent *event = zeitgeist_event_new (); + ZeitgeistSubject *subject = zeitgeist_subject_new (); + + zeitgeist_subject_set_interpretation (subject, ZEITGEIST_NFO_WEBSITE); + zeitgeist_subject_set_manifestation (subject, ZEITGEIST_NFO_REMOTE_DATA_OBJECT); + zeitgeist_subject_set_uri (subject, "http://example.com/I%20Love%20Wikis"); + zeitgeist_subject_set_text (subject, "Example.com Wiki Page. Kanji is awesome 漢字"); + zeitgeist_subject_set_mimetype (subject, "text/html"); + + zeitgeist_event_set_interpretation (event, ZEITGEIST_ZG_ACCESS_EVENT); + zeitgeist_event_set_manifestation (event, ZEITGEIST_ZG_USER_ACTIVITY); + zeitgeist_event_set_actor (event, "application://firefox.desktop"); + zeitgeist_event_add_subject (event, subject); + + g_object_unref (subject); + return event; +} + +static ZeitgeistEvent* create_test_event3 (void) +{ + ZeitgeistEvent *event = zeitgeist_event_new (); + ZeitgeistSubject *subject = zeitgeist_subject_new (); + + zeitgeist_subject_set_interpretation (subject, ZEITGEIST_NFO_WEBSITE); + zeitgeist_subject_set_manifestation (subject, ZEITGEIST_NFO_REMOTE_DATA_OBJECT); + // Greek IDN - stands for http://παράδειγμα.δοκιμή + zeitgeist_subject_set_uri (subject, "http://xn--hxajbheg2az3al.xn--jxalpdlp/"); + zeitgeist_subject_set_text (subject, "IDNwiki"); + zeitgeist_subject_set_mimetype (subject, "text/html"); + + zeitgeist_event_set_interpretation (event, ZEITGEIST_ZG_ACCESS_EVENT); + zeitgeist_event_set_manifestation (event, ZEITGEIST_ZG_USER_ACTIVITY); + zeitgeist_event_set_actor (event, "application://firefox.desktop"); + zeitgeist_event_add_subject (event, subject); + + g_object_unref (subject); + return event; +} + +static ZeitgeistEvent* create_test_event4 (void) +{ + ZeitgeistEvent *event = zeitgeist_event_new (); + ZeitgeistSubject *subject = zeitgeist_subject_new (); + + zeitgeist_subject_set_interpretation (subject, ZEITGEIST_NFO_PRESENTATION); + zeitgeist_subject_set_manifestation (subject, ZEITGEIST_NFO_FILE_DATA_OBJECT); + zeitgeist_subject_set_uri (subject, "file:///home/username/Documents/my_fabulous_presentation.pdf"); + zeitgeist_subject_set_text (subject, NULL); + zeitgeist_subject_set_mimetype (subject, "application/pdf"); + + zeitgeist_event_set_interpretation (event, ZEITGEIST_ZG_MODIFY_EVENT); + zeitgeist_event_set_manifestation (event, ZEITGEIST_ZG_USER_ACTIVITY); + zeitgeist_event_set_actor (event, "application://libreoffice-impress.desktop"); + zeitgeist_event_add_subject (event, subject); + + g_object_unref (subject); + return event; +} + +// Steals the event, ref it if you want to keep it +static guint +index_event (Fixture *fix, ZeitgeistEvent *event) +{ + guint event_id = 0; + + // add event to DBs + event_id = zeitgeist_engine_insert_event (ZEITGEIST_ENGINE (fix->db), + event, NULL, NULL); + + GPtrArray *events = g_ptr_array_new_with_free_func (g_object_unref); + g_ptr_array_add (events, event); // steal event ref + zeitgeist_indexer_index_events (fix->indexer, events); + g_ptr_array_unref (events); + + while (zeitgeist_indexer_has_pending_tasks (fix->indexer)) + { + zeitgeist_indexer_process_task (fix->indexer); + } + + return event_id; +} + +static void +test_simple_query (Fixture *fix, gconstpointer data) +{ + guint matches; + guint event_id; + ZeitgeistEvent* event; + + // add test events to DBs + event_id = index_event (fix, create_test_event1 ()); + index_event (fix, create_test_event2 ()); + index_event (fix, create_test_event3 ()); + index_event (fix, create_test_event4 ()); + + GPtrArray *results = + zeitgeist_indexer_search (fix->indexer, + "text", + zeitgeist_time_range_new_anytime (), + g_ptr_array_new (), + 0, + 10, + ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS, + &matches, + NULL); + + g_assert_cmpuint (matches, >, 0); + g_assert_cmpuint (results->len, ==, 1); + + event = (ZeitgeistEvent*) results->pdata[0]; + g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id); + + ZeitgeistSubject *subject = (ZeitgeistSubject*) + g_ptr_array_index (zeitgeist_event_get_subjects (event), 0); + g_assert_cmpstr (zeitgeist_subject_get_text (subject), ==, "text"); +} + +static void +test_simple_with_filter (Fixture *fix, gconstpointer data) +{ + guint matches; + guint event_id; + ZeitgeistEvent* event; + + // add test events to DBs + index_event (fix, create_test_event1 ()); + index_event (fix, create_test_event2 ()); + + GPtrArray *filters = g_ptr_array_new_with_free_func (g_object_unref); + event = zeitgeist_event_new (); + zeitgeist_event_set_interpretation (event, ZEITGEIST_NFO_DOCUMENT); + g_ptr_array_add (filters, event); // steals ref + + GPtrArray *results = + zeitgeist_indexer_search (fix->indexer, + "text", + zeitgeist_time_range_new_anytime (), + filters, + 0, + 10, + ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS, + &matches, + NULL); + + g_assert_cmpuint (results->len, ==, 0); + g_assert_cmpuint (matches, ==, 0); +} + +static void +test_simple_with_valid_filter (Fixture *fix, gconstpointer data) +{ + guint matches; + guint event_id; + ZeitgeistEvent* event; + ZeitgeistSubject *subject; + + // add test events to DBs + event_id = index_event (fix, create_test_event1 ()); + index_event (fix, create_test_event2 ()); + + GPtrArray *filters = g_ptr_array_new_with_free_func (g_object_unref); + event = zeitgeist_event_new (); + subject = zeitgeist_subject_new (); + zeitgeist_subject_set_interpretation (subject, ZEITGEIST_NFO_IMAGE); + zeitgeist_event_add_subject (event, subject); + g_ptr_array_add (filters, event); // steals ref + + GPtrArray *results = + zeitgeist_indexer_search (fix->indexer, + "text", + zeitgeist_time_range_new_anytime (), + filters, + 0, + 10, + ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS, + &matches, + NULL); + + g_assert_cmpuint (matches, >, 0); + g_assert_cmpuint (results->len, ==, 1); + + event = (ZeitgeistEvent*) results->pdata[0]; + g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id); + + subject = (ZeitgeistSubject*) + g_ptr_array_index (zeitgeist_event_get_subjects (event), 0); + g_assert_cmpstr (zeitgeist_subject_get_text (subject), ==, "text"); +} + +static void +test_simple_negation (Fixture *fix, gconstpointer data) +{ + guint matches; + guint event_id; + ZeitgeistEvent* event; + ZeitgeistSubject *subject; + + // add test events to DBs + event_id = index_event (fix, create_test_event1 ()); + index_event (fix, create_test_event2 ()); + + GPtrArray *filters = g_ptr_array_new_with_free_func (g_object_unref); + event = zeitgeist_event_new (); + subject = zeitgeist_subject_new (); + zeitgeist_subject_set_interpretation (subject, "!" ZEITGEIST_NFO_IMAGE); + zeitgeist_event_add_subject (event, subject); + g_ptr_array_add (filters, event); // steals ref + + GPtrArray *results = + zeitgeist_indexer_search (fix->indexer, + "text", + zeitgeist_time_range_new_anytime (), + filters, + 0, + 10, + ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS, + &matches, + NULL); + + g_assert_cmpuint (matches, ==, 0); + g_assert_cmpuint (results->len, ==, 0); +} + +static void +test_simple_noexpand (Fixture *fix, gconstpointer data) +{ + guint matches; + guint event_id; + ZeitgeistEvent* event; + ZeitgeistSubject *subject; + + // add test events to DBs + event_id = index_event (fix, create_test_event1 ()); + index_event (fix, create_test_event2 ()); + + GPtrArray *filters = g_ptr_array_new_with_free_func (g_object_unref); + event = zeitgeist_event_new (); + subject = zeitgeist_subject_new (); + zeitgeist_subject_set_interpretation (subject, "+" ZEITGEIST_NFO_IMAGE); + zeitgeist_event_add_subject (event, subject); + g_ptr_array_add (filters, event); // steals ref + + GPtrArray *results = + zeitgeist_indexer_search (fix->indexer, + "text", + zeitgeist_time_range_new_anytime (), + filters, + 0, + 10, + ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS, + &matches, + NULL); + + g_assert_cmpuint (matches, ==, 0); + g_assert_cmpuint (results->len, ==, 0); +} + +static void +test_simple_noexpand_valid (Fixture *fix, gconstpointer data) +{ + guint matches; + guint event_id; + ZeitgeistEvent* event; + ZeitgeistSubject *subject; + + // add test events to DBs + event_id = index_event (fix, create_test_event1 ()); + index_event (fix, create_test_event2 ()); + + GPtrArray *filters = g_ptr_array_new_with_free_func (g_object_unref); + event = zeitgeist_event_new (); + subject = zeitgeist_subject_new (); + zeitgeist_subject_set_interpretation (subject, "+"ZEITGEIST_NFO_RASTER_IMAGE); + zeitgeist_event_add_subject (event, subject); + g_ptr_array_add (filters, event); // steals ref + + GPtrArray *results = + zeitgeist_indexer_search (fix->indexer, + "text", + zeitgeist_time_range_new_anytime (), + filters, + 0, + 10, + ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS, + &matches, + NULL); + + g_assert_cmpuint (matches, >, 0); + g_assert_cmpuint (results->len, ==, 1); + + event = (ZeitgeistEvent*) results->pdata[0]; + g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id); + + subject = (ZeitgeistSubject*) + g_ptr_array_index (zeitgeist_event_get_subjects (event), 0); + g_assert_cmpstr (zeitgeist_subject_get_text (subject), ==, "text"); +} + +static void +test_simple_url_unescape (Fixture *fix, gconstpointer data) +{ + guint matches; + guint event_id; + ZeitgeistEvent* event; + ZeitgeistSubject *subject; + + // add test events to DBs + index_event (fix, create_test_event1 ()); + event_id = index_event (fix, create_test_event2 ()); + + GPtrArray *filters = g_ptr_array_new_with_free_func (g_object_unref); + event = zeitgeist_event_new (); + subject = zeitgeist_subject_new (); + zeitgeist_subject_set_interpretation (subject, ZEITGEIST_NFO_WEBSITE); + zeitgeist_event_add_subject (event, subject); + g_ptr_array_add (filters, event); // steals ref + + GPtrArray *results = + zeitgeist_indexer_search (fix->indexer, + "love", + zeitgeist_time_range_new_anytime (), + filters, + 0, + 10, + ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS, + &matches, + NULL); + + g_assert_cmpuint (matches, >, 0); + g_assert_cmpuint (results->len, ==, 1); + + event = (ZeitgeistEvent*) results->pdata[0]; + g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id); + + subject = (ZeitgeistSubject*) + g_ptr_array_index (zeitgeist_event_get_subjects (event), 0); + g_assert_cmpstr (zeitgeist_subject_get_text (subject), ==, "Example.com Wiki Page. Kanji is awesome 漢字"); +} + +static void +test_simple_cjk (Fixture *fix, gconstpointer data) +{ + guint matches; + guint event_id; + ZeitgeistEvent* event; + ZeitgeistSubject *subject; + + // add test events to DBs + index_event (fix, create_test_event1 ()); + event_id = index_event (fix, create_test_event2 ()); + + GPtrArray *results = + zeitgeist_indexer_search (fix->indexer, + "漢*", + zeitgeist_time_range_new_anytime (), + g_ptr_array_new (), + 0, + 10, + ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS, + &matches, + NULL); + + g_assert_cmpuint (matches, >, 0); + g_assert_cmpuint (results->len, ==, 1); + + event = (ZeitgeistEvent*) results->pdata[0]; + g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id); + + subject = (ZeitgeistSubject*) + g_ptr_array_index (zeitgeist_event_get_subjects (event), 0); + g_assert_cmpstr (zeitgeist_subject_get_text (subject), ==, "Example.com Wiki Page. Kanji is awesome 漢字"); +} + +static void +test_simple_idn_support (Fixture *fix, gconstpointer data) +{ + guint matches; + guint event_id; + ZeitgeistEvent* event; + ZeitgeistSubject *subject; + + // add test events to DBs + index_event (fix, create_test_event1 ()); + index_event (fix, create_test_event2 ()); + event_id = index_event (fix, create_test_event3 ()); + + GPtrArray *results = + zeitgeist_indexer_search (fix->indexer, + "παράδειγμα", + zeitgeist_time_range_new_anytime (), + g_ptr_array_new (), + 0, + 10, + ZEITGEIST_RESULT_TYPE_MOST_RECENT_EVENTS, + &matches, + NULL); + + g_assert_cmpuint (matches, >, 0); + g_assert_cmpuint (results->len, ==, 1); + + event = (ZeitgeistEvent*) results->pdata[0]; + g_assert_cmpuint (zeitgeist_event_get_id (event), ==, event_id); + + subject = (ZeitgeistSubject*) + g_ptr_array_index (zeitgeist_event_get_subjects (event), 0); + g_assert_cmpstr (zeitgeist_subject_get_text (subject), ==, "IDNwiki"); +} + +G_BEGIN_DECLS + +static void discard_message (const gchar *domain, + GLogLevelFlags level, + const gchar *msg, + gpointer userdata) +{ +} + +void test_indexer_create_suite (void) +{ + g_test_add ("/Zeitgeist/FTS/Indexer/SimpleQuery", Fixture, 0, + setup, test_simple_query, teardown); + g_test_add ("/Zeitgeist/FTS/Indexer/SimpleWithFilter", Fixture, 0, + setup, test_simple_with_filter, teardown); + g_test_add ("/Zeitgeist/FTS/Indexer/SimpleWithValidFilter", Fixture, 0, + setup, test_simple_with_valid_filter, teardown); + g_test_add ("/Zeitgeist/FTS/Indexer/SimpleNegation", Fixture, 0, + setup, test_simple_negation, teardown); + g_test_add ("/Zeitgeist/FTS/Indexer/SimpleNoexpand", Fixture, 0, + setup, test_simple_noexpand, teardown); + g_test_add ("/Zeitgeist/FTS/Indexer/SimpleNoexpandValid", Fixture, 0, + setup, test_simple_noexpand_valid, teardown); + g_test_add ("/Zeitgeist/FTS/Indexer/URLUnescape", Fixture, 0, + setup, test_simple_url_unescape, teardown); + g_test_add ("/Zeitgeist/FTS/Indexer/IDNSupport", Fixture, 0, + setup, test_simple_idn_support, teardown); + g_test_add ("/Zeitgeist/FTS/Indexer/CJK", Fixture, 0, + setup, test_simple_cjk, teardown); + + // get rid of the "rebuilding index..." messages + g_log_set_handler (NULL, G_LOG_LEVEL_MESSAGE, discard_message, NULL); +} + +G_END_DECLS diff --git a/extensions/fts++/test/test-stringutils.cpp b/extensions/fts++/test/test-stringutils.cpp new file mode 100644 index 00000000..3f9405fa --- /dev/null +++ b/extensions/fts++/test/test-stringutils.cpp @@ -0,0 +1,178 @@ +/* + * Copyright (C) 2012 Mikkel Kamstrup Erlandsen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Authored by Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com> + * + */ + +#include <glib-object.h> + +#include "stringutils.h" + +using namespace ZeitgeistFTS; + +typedef struct +{ + int i; +} Fixture; + +static void setup (Fixture *fix, gconstpointer data); +static void teardown (Fixture *fix, gconstpointer data); + +static void +setup (Fixture *fix, gconstpointer data) +{ + +} + +static void +teardown (Fixture *fix, gconstpointer data) +{ + +} + +static void +test_truncate (Fixture *fix, gconstpointer data) +{ + g_assert_cmpstr ("", ==, StringUtils::Truncate("").c_str ()); + + g_assert_cmpstr ("", ==, StringUtils::Truncate("a", 0).c_str ()); + g_assert_cmpstr ("a", ==, StringUtils::Truncate("a", 1).c_str ()); + g_assert_cmpstr ("a", ==, StringUtils::Truncate("a").c_str ()); + + g_assert_cmpstr ("", ==, StringUtils::Truncate("aa", 0).c_str ()); + g_assert_cmpstr ("a", ==, StringUtils::Truncate("aa", 1).c_str ()); + g_assert_cmpstr ("aa", ==, StringUtils::Truncate("aa", 2).c_str ()); + g_assert_cmpstr ("aa", ==, StringUtils::Truncate("aa").c_str ()); + + + g_assert_cmpstr ("", ==, StringUtils::Truncate("å", 0).c_str ()); + g_assert_cmpstr ("", ==, StringUtils::Truncate("å", 1).c_str ()); + g_assert_cmpstr ("å", ==, StringUtils::Truncate("å").c_str ()); + + g_assert_cmpstr ("", ==, StringUtils::Truncate("åå", 0).c_str ()); + g_assert_cmpstr ("", ==, StringUtils::Truncate("åå", 1).c_str ()); + g_assert_cmpstr ("å", ==, StringUtils::Truncate("åå", 2).c_str ()); + g_assert_cmpstr ("å", ==, StringUtils::Truncate("åå", 3).c_str ()); + g_assert_cmpstr ("åå", ==, StringUtils::Truncate("åå", 4).c_str ()); + g_assert_cmpstr ("åå", ==, StringUtils::Truncate("åå").c_str ()); +} + +static void +test_mangle (Fixture *fix, gconstpointer data) +{ + g_assert_cmpstr ("", ==, StringUtils::MangleUri("").c_str ()); + + g_assert_cmpstr ("file", ==, StringUtils::MangleUri("file").c_str ()); + g_assert_cmpstr ("file___", ==, StringUtils::MangleUri("file://").c_str ()); + g_assert_cmpstr ("http___www.zeitgeist-project.com", ==, + StringUtils::MangleUri("http://www.zeitgeist-project.com").c_str ()); + + g_assert_cmpstr ("scheme_no_spaces_in_uris", ==, + StringUtils::MangleUri("scheme:no spaces in uris").c_str ()); +} + +static void +test_split (Fixture *fix, gconstpointer data) +{ + std::string authority, path, query; + + authority = path = query = ""; + StringUtils::SplitUri ("", authority, path, query); // doesn't crash + + g_assert_cmpstr ("", ==, authority.c_str ()); + g_assert_cmpstr ("", ==, path.c_str ()); + g_assert_cmpstr ("", ==, query.c_str ()); + + authority = path = query = ""; + StringUtils::SplitUri ("scheme:", authority, path, query); // doesn't crash + + g_assert_cmpstr ("", ==, authority.c_str ()); + g_assert_cmpstr ("", ==, path.c_str ()); + g_assert_cmpstr ("", ==, query.c_str ()); + + authority = path = query = ""; + StringUtils::SplitUri ("ldap://ldap1.example.net:6666/o=University%20" + "of%20Michigan,c=US??sub?(cn=Babs%20Jensen)", + authority, path, query); + + g_assert_cmpstr ("ldap1.example.net:6666", ==, authority.c_str ()); + g_assert_cmpstr ("/o=University%20of%20Michigan,c=US", ==, path.c_str ()); + g_assert_cmpstr ("?sub?(cn=Babs%20Jensen)", ==, query.c_str ()); + + + authority = path = query = ""; + StringUtils::SplitUri ("mailto:jsmith@example.com", + authority, path, query); + + g_assert_cmpstr ("jsmith@example.com", ==, authority.c_str ()); + g_assert_cmpstr ("", ==, path.c_str ()); + g_assert_cmpstr ("", ==, query.c_str ()); + + authority = path = query = ""; + StringUtils::SplitUri ("mailto:jsmith@example.com?subject=A%20Test&body=" + "My%20idea%20is%3A%20%0A", authority, path, query); + + g_assert_cmpstr ("jsmith@example.com", ==, authority.c_str ()); + g_assert_cmpstr ("", ==, path.c_str ()); + g_assert_cmpstr ("subject=A%20Test&body=My%20idea%20is%3A%20%0A", ==, query.c_str ()); + + authority = path = query = ""; + StringUtils::SplitUri ("sip:alice@atlanta.com?subject=project%20x", + authority, path, query); + + g_assert_cmpstr ("alice@atlanta.com", ==, authority.c_str ()); + g_assert_cmpstr ("", ==, path.c_str ()); + g_assert_cmpstr ("subject=project%20x", ==, query.c_str ()); + + authority = path = query = ""; + StringUtils::SplitUri ("file:///", + authority, path, query); + + g_assert_cmpstr ("", ==, authority.c_str ()); + g_assert_cmpstr ("/", ==, path.c_str ()); + g_assert_cmpstr ("", ==, query.c_str ()); + + authority = path = query = ""; + StringUtils::SplitUri ("file:///home/username/file.ext", + authority, path, query); + + g_assert_cmpstr ("", ==, authority.c_str ()); + g_assert_cmpstr ("/home/username/file.ext", ==, path.c_str ()); + g_assert_cmpstr ("", ==, query.c_str ()); + + authority = path = query = ""; + StringUtils::SplitUri ("dns://192.168.1.1/ftp.example.org?type=A", + authority, path, query); + + g_assert_cmpstr ("192.168.1.1", ==, authority.c_str ()); + g_assert_cmpstr ("/ftp.example.org", ==, path.c_str ()); + g_assert_cmpstr ("type=A", ==, query.c_str ()); +} + +G_BEGIN_DECLS + +void test_stringutils_create_suite (void) +{ + g_test_add ("/Zeitgeist/FTS/StringUtils/Truncate", Fixture, 0, + setup, test_truncate, teardown); + g_test_add ("/Zeitgeist/FTS/StringUtils/MangleUri", Fixture, 0, + setup, test_mangle, teardown); + g_test_add ("/Zeitgeist/FTS/StringUtils/SplitUri", Fixture, 0, + setup, test_split, teardown); +} + +G_END_DECLS diff --git a/extensions/fts++/utils.vala b/extensions/fts++/utils.vala new file mode 120000 index 00000000..6da71ce8 --- /dev/null +++ b/extensions/fts++/utils.vala @@ -0,0 +1 @@ +../../src/utils.vala
\ No newline at end of file diff --git a/extensions/fts++/where-clause.vala b/extensions/fts++/where-clause.vala new file mode 120000 index 00000000..efc7d8f9 --- /dev/null +++ b/extensions/fts++/where-clause.vala @@ -0,0 +1 @@ +../../src/where-clause.vala
\ No newline at end of file diff --git a/extensions/fts++/zeitgeist-fts.vala b/extensions/fts++/zeitgeist-fts.vala new file mode 100644 index 00000000..f245b03e --- /dev/null +++ b/extensions/fts++/zeitgeist-fts.vala @@ -0,0 +1,301 @@ +/* zeitgeist-fts.vala + * + * Copyright © 2012 Canonical Ltd. + * Copyright © 2012 Michal Hruby <michal.mhr@gmail.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + */ + +namespace Zeitgeist +{ + + [DBus (name = "org.freedesktop.DBus")] + public interface RemoteDBus : Object + { + public abstract bool name_has_owner (string name) throws IOError; + } + + public class FtsDaemon : Object, RemoteSimpleIndexer, RemoteMonitor + { + //const string DBUS_NAME = "org.gnome.zeitgeist.Fts"; + const string DBUS_NAME = "org.gnome.zeitgeist.SimpleIndexer"; + const string ZEITGEIST_DBUS_NAME = "org.gnome.zeitgeist.Engine"; + private static bool show_version_info = false; + private static string log_level = ""; + + const OptionEntry[] options = + { + { + "version", 'v', 0, OptionArg.NONE, out show_version_info, + "Print program's version number and exit", null + }, + { + "log-level", 0, 0, OptionArg.STRING, out log_level, + "How much information should be printed; possible values: " + + "DEBUG, INFO, WARNING, ERROR, CRITICAL", "LEVEL" + }, + { + null + } + }; + + private static FtsDaemon? instance; + private static MainLoop mainloop; + private static bool name_acquired = false; + + private DbReader engine; + private Indexer indexer; + + private uint indexer_register_id; + private uint monitor_register_id; + private unowned DBusConnection connection; + + public FtsDaemon () throws EngineError + { + engine = new DbReader (); + indexer = new Indexer (engine); + } + + private void do_quit () + { + engine.close (); + mainloop.quit (); + } + + public void register_dbus_object (DBusConnection conn) throws IOError + { + connection = conn; + indexer_register_id = conn.register_object<RemoteSimpleIndexer> ( + "/org/gnome/zeitgeist/index/activity", this); + monitor_register_id = conn.register_object<RemoteMonitor> ( + "/org/gnome/zeitgeist/monitor/special", this); + } + + public void unregister_dbus_object () + { + if (indexer_register_id != 0) + { + connection.unregister_object (indexer_register_id); + indexer_register_id = 0; + } + + if (monitor_register_id != 0) + { + connection.unregister_object (monitor_register_id); + monitor_register_id = 0; + } + } + + public async void notify_insert (Variant time_range, Variant events) + throws IOError + { + debug ("got insertion notification"); + var events_arr = Events.from_variant (events); + indexer.index_events (events_arr); + } + + public async void notify_delete (Variant time_range, uint32[] event_ids) + throws IOError + { + debug ("got deletion notification"); + indexer.delete_events (event_ids); + } + + public async void search (string query_string, Variant time_range, + Variant filter_templates, + uint offset, uint count, uint result_type, + out Variant events, out uint matches) + throws Error + { + var tr = new TimeRange.from_variant (time_range); + var templates = Events.from_variant (filter_templates); + var results = instance.indexer.search (query_string, + tr, + templates, + offset, + count, + (ResultType) result_type, + out matches); + + events = Events.to_variant (results); + } + + private static void name_acquired_callback (DBusConnection conn) + { + name_acquired = true; + } + + private static void name_lost_callback (DBusConnection? conn) + { + if (conn == null) + { + // something happened to our bus connection + mainloop.quit (); + } + else if (instance != null && name_acquired) + { + // we owned the name and we lost it... what to do? + mainloop.quit (); + } + } + + static void run () + throws Error + { + DBusConnection connection = Bus.get_sync (BusType.SESSION); + var proxy = connection.get_proxy_sync<RemoteDBus> ( + "org.freedesktop.DBus", "/org/freedesktop/DBus", + DBusProxyFlags.DO_NOT_LOAD_PROPERTIES); + bool zeitgeist_up = proxy.name_has_owner (ZEITGEIST_DBUS_NAME); + // FIXME: throw an error that zeitgeist isn't up? or just start it? + bool name_owned = proxy.name_has_owner (DBUS_NAME); + if (name_owned) + { + throw new EngineError.EXISTING_INSTANCE ( + "The FTS daemon is running already."); + } + + /* setup Engine instance and register objects on dbus */ + try + { + instance = new FtsDaemon (); + instance.register_dbus_object (connection); + } + catch (Error err) + { + if (err is EngineError.DATABASE_CANTOPEN) + { + warning ("Could not access the database file.\n" + + "Please check the permissions of file %s.", + Utils.get_database_file_path ()); + } + else if (err is EngineError.DATABASE_BUSY) + { + warning ("It looks like another Zeitgeist instance " + + "is already running (the database is locked)."); + } + throw err; + } + + uint owner_id = Bus.own_name_on_connection (connection, + DBUS_NAME, + BusNameOwnerFlags.NONE, + name_acquired_callback, + name_lost_callback); + + mainloop = new MainLoop (); + mainloop.run (); + + if (instance != null) + { + Bus.unown_name (owner_id); + instance.unregister_dbus_object (); + instance = null; + + // make sure we send quit reply + try + { + connection.flush_sync (); + } + catch (Error e) + { + warning ("%s", e.message); + } + } + } + + static void safe_exit () + { + instance.do_quit (); + } + + static int main (string[] args) + { + // FIXME: the cat process xapian spawns won't like this and we + // can freeze if it dies + Posix.signal (Posix.SIGHUP, safe_exit); + Posix.signal (Posix.SIGINT, safe_exit); + Posix.signal (Posix.SIGTERM, safe_exit); + + var opt_context = new OptionContext (" - Zeitgeist FTS daemon"); + opt_context.add_main_entries (options, null); + + try + { + opt_context.parse (ref args); + + if (show_version_info) + { + stdout.printf (Config.VERSION + "\n"); + return 0; + } + + LogLevelFlags discarded = LogLevelFlags.LEVEL_DEBUG; + if (log_level != null) + { + var ld = LogLevelFlags.LEVEL_DEBUG; + var li = LogLevelFlags.LEVEL_INFO; + var lm = LogLevelFlags.LEVEL_MESSAGE; + var lw = LogLevelFlags.LEVEL_WARNING; + var lc = LogLevelFlags.LEVEL_CRITICAL; + switch (log_level.up ()) + { + case "DEBUG": + discarded = 0; + break; + case "INFO": + discarded = ld; + break; + case "WARNING": + discarded = ld | li | lm; + break; + case "CRITICAL": + discarded = ld | li | lm | lw; + break; + case "ERROR": + discarded = ld | li | lm | lw | lc; + break; + } + } + if (discarded != 0) + { + Log.set_handler ("", discarded, () => {}); + } + else + { + Environment.set_variable ("G_MESSAGES_DEBUG", "all", true); + } + + run (); + } + catch (Error err) + { + if (err is EngineError.DATABASE_CANTOPEN) + return 21; + if (err is EngineError.DATABASE_BUSY) + return 22; + + warning ("%s", err.message); + return 1; + } + + return 0; + } + + } + +} + +// vim:expandtab:ts=4:sw=4 diff --git a/extensions/fts-python/Makefile.am b/extensions/fts-python/Makefile.am deleted file mode 100644 index 73cf55ee..00000000 --- a/extensions/fts-python/Makefile.am +++ /dev/null @@ -1,23 +0,0 @@ -NULL = - -ftsdir = $(pkgdatadir)/fts-python -dist_fts_SCRIPTS = \ - fts.py \ - $(NULL) - -dist_fts_DATA = \ - datamodel.py \ - constants.py \ - lrucache.py \ - sql.py \ - $(NULL) - -servicedir = $(DBUS_SERVICES_DIR) -service_DATA = org.gnome.zeitgeist.fts.service - -org.gnome.zeitgeist.fts.service: org.gnome.zeitgeist.fts.service.in - $(AM_V_GEN)sed -e s!\@pkgdatadir\@!$(pkgdatadir)! < $< > $@ -org.gnome.zeitgeist.fts.service: Makefile - -EXTRA_DIST = org.gnome.zeitgeist.fts.service.in -CLEANFILES = org.gnome.zeitgeist.fts.service diff --git a/extensions/fts-python/constants.py b/extensions/fts-python/constants.py deleted file mode 100644 index f52e5efb..00000000 --- a/extensions/fts-python/constants.py +++ /dev/null @@ -1,71 +0,0 @@ -# -.- coding: utf-8 -.- - -# Zeitgeist -# -# Copyright © 2009 Markus Korn <thekorn@gmx.de> -# Copyright © 2009-2010 Siegfried-Angel Gevatter Pujals <rainct@ubuntu.com> -# Copyright © 2009 Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com> -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation, either version 2.1 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - -import os -import logging -from xdg import BaseDirectory - -from zeitgeist.client import ZeitgeistDBusInterface - -__all__ = [ - "log", - "get_engine", - "constants" -] - -log = logging.getLogger("zeitgeist.engine") - -_engine = None -def get_engine(): - """ Get the running engine instance or create a new one. """ - global _engine - if _engine is None or _engine.is_closed(): - import main # _zeitgeist.engine.main - _engine = main.ZeitgeistEngine() - return _engine - -class _Constants: - # Directories - DATA_PATH = os.environ.get("ZEITGEIST_DATA_PATH", - BaseDirectory.save_data_path("zeitgeist")) - DATABASE_FILE = os.environ.get("ZEITGEIST_DATABASE_PATH", - os.path.join(DATA_PATH, "activity.sqlite")) - DATABASE_FILE_BACKUP = os.environ.get("ZEITGEIST_DATABASE_BACKUP_PATH", - os.path.join(DATA_PATH, "activity.sqlite.bck")) - DEFAULT_LOG_PATH = os.path.join(BaseDirectory.xdg_cache_home, - "zeitgeist", "daemon.log") - - # D-Bus - DBUS_INTERFACE = ZeitgeistDBusInterface.INTERFACE_NAME - SIG_EVENT = "asaasay" - - # Required version of DB schema - CORE_SCHEMA="core" - CORE_SCHEMA_VERSION = 4 - - USER_EXTENSION_PATH = os.path.join(DATA_PATH, "extensions") - - # configure runtime cache for events - # default size is 2000 - CACHE_SIZE = int(os.environ.get("ZEITGEIST_CACHE_SIZE", 2000)) - log.debug("Cache size = %i" %CACHE_SIZE) - -constants = _Constants() diff --git a/extensions/fts-python/datamodel.py b/extensions/fts-python/datamodel.py deleted file mode 100644 index defbe711..00000000 --- a/extensions/fts-python/datamodel.py +++ /dev/null @@ -1,83 +0,0 @@ -# -.- coding: utf-8 -.- - -# Zeitgeist -# -# Copyright © 2009 Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com> -# Copyright © 2009 Markus Korn <thekorn@gmx.de> -# Copyright © 2009 Seif Lotfy <seif@lotfy.com> -# Copyright © 2009-2010 Siegfried-Angel Gevatter Pujals <rainct@ubuntu.com> -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation, either version 2.1 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - -from zeitgeist.datamodel import Event as OrigEvent, Subject as OrigSubject, \ - DataSource as OrigDataSource - -class Event(OrigEvent): - - @staticmethod - def _to_unicode(obj): - """ - Return an unicode representation of the given object. - If obj is None, return an empty string. - """ - return unicode(obj) if obj is not None else u"" - - @staticmethod - def _make_dbus_sendable(obj): - """ - Ensure that all fields in the event struct are non-None - """ - for n, value in enumerate(obj[0]): - obj[0][n] = obj._to_unicode(value) - for subject in obj[1]: - for n, value in enumerate(subject): - subject[n] = obj._to_unicode(value) - # The payload require special handling, since it is binary data - # If there is indeed data here, we must not unicode encode it! - if obj[2] is None: - obj[2] = u"" - elif isinstance(obj[2], unicode): - obj[2] = str(obj[2]) - return obj - - @staticmethod - def get_plain(ev): - """ - Ensure that an Event instance is a Plain Old Python Object (popo), - without DBus wrappings etc. - """ - popo = [] - popo.append(map(unicode, ev[0])) - popo.append([map(unicode, subj) for subj in ev[1]]) - # We need the check here so that if D-Bus gives us an empty - # byte array we don't serialize the text "dbus.Array(...)". - popo.append(str(ev[2]) if ev[2] else u'') - return popo - -class Subject(OrigSubject): - pass - -class DataSource(OrigDataSource): - - @staticmethod - def get_plain(datasource): - for plaintype, props in { - unicode: (DataSource.Name, DataSource.Description), - lambda x: map(Event.get_plain, x): (DataSource.EventTemplates,), - bool: (DataSource.Running, DataSource.Enabled), - int: (DataSource.LastSeen,), - }.iteritems(): - for prop in props: - datasource[prop] = plaintype(datasource[prop]) - return tuple(datasource) diff --git a/extensions/fts-python/fts.py b/extensions/fts-python/fts.py deleted file mode 100644 index 772eb699..00000000 --- a/extensions/fts-python/fts.py +++ /dev/null @@ -1,1273 +0,0 @@ -#!/usr/bin/env python -# -.- coding: utf-8 -.- - -# Zeitgeist -# -# Copyright © 2009 Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com> -# Copyright © 2010 Canonical Ltd -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. -# - -# -# TODO -# -# - Delete events hook -# - ? Filter on StorageState -# - Throttle IO and CPU where possible - -import os, sys -import time -import pickle -import dbus -import sqlite3 -import dbus.service -from xdg import BaseDirectory -from xdg.DesktopEntry import DesktopEntry, xdg_data_dirs -import logging -import subprocess -from xml.dom import minidom -import xapian -import os -from Queue import Queue, Empty -import threading -from urllib import quote as url_escape, unquote as url_unescape -import gobject, gio -from cStringIO import StringIO - -from collections import defaultdict -from array import array -from zeitgeist.datamodel import Event as OrigEvent, StorageState, TimeRange, \ - ResultType, get_timestamp_for_now, Interpretation, Symbol, NEGATION_OPERATOR, WILDCARD, NULL_EVENT -from datamodel import Event, Subject -from constants import constants -from zeitgeist.client import ZeitgeistClient, ZeitgeistDBusInterface -from sql import get_default_cursor, unset_cursor, TableLookup, WhereClause -from lrucache import LRUCache - -ZG_CLIENT = ZeitgeistClient() - -logging.basicConfig(level=logging.DEBUG) -log = logging.getLogger("zeitgeist.fts") - -INDEX_FILE = os.path.join(constants.DATA_PATH, "bb.fts.index") -INDEX_VERSION = "1" -INDEX_LOCK = threading.Lock() -FTS_DBUS_BUS_NAME = "org.gnome.zeitgeist.SimpleIndexer" -FTS_DBUS_OBJECT_PATH = "/org/gnome/zeitgeist/index/activity" -FTS_DBUS_INTERFACE = "org.gnome.zeitgeist.Index" - -FILTER_PREFIX_EVENT_INTERPRETATION = "ZGEI" -FILTER_PREFIX_EVENT_MANIFESTATION = "ZGEM" -FILTER_PREFIX_ACTOR = "ZGA" -FILTER_PREFIX_SUBJECT_URI = "ZGSU" -FILTER_PREFIX_SUBJECT_INTERPRETATION = "ZGSI" -FILTER_PREFIX_SUBJECT_MANIFESTATION = "ZGSM" -FILTER_PREFIX_SUBJECT_ORIGIN = "ZGSO" -FILTER_PREFIX_SUBJECT_MIMETYPE = "ZGST" -FILTER_PREFIX_SUBJECT_STORAGE = "ZGSS" -FILTER_PREFIX_XDG_CATEGORY = "AC" - -VALUE_EVENT_ID = 0 -VALUE_TIMESTAMP = 1 - -MAX_CACHE_BATCH_SIZE = constants.CACHE_SIZE/2 - -# When sorting by of the COALESCING_RESULT_TYPES result types, -# we need to fetch some extra events from the Xapian index because -# the final result set will be coalesced on some property of the event -COALESCING_RESULT_TYPES = [ \ - ResultType.MostRecentSubjects, - ResultType.LeastRecentSubjects, - ResultType.MostPopularSubjects, - ResultType.LeastPopularSubjects, - ResultType.MostRecentActor, - ResultType.LeastRecentActor, - ResultType.MostPopularActor, - ResultType.LeastPopularActor, -] - -MAX_TERM_LENGTH = 245 - - -class NegationNotSupported(ValueError): - pass - -class WildcardNotSupported(ValueError): - pass - -def parse_negation(kind, field, value, parse_negation=True): - """checks if value starts with the negation operator, - if value starts with the negation operator but the field does - not support negation a ValueError is raised. - This function returns a (value_without_negation, negation)-tuple - """ - negation = False - if parse_negation and value.startswith(NEGATION_OPERATOR): - negation = True - value = value[len(NEGATION_OPERATOR):] - if negation and field not in kind.SUPPORTS_NEGATION: - raise NegationNotSupported("This field does not support negation") - return value, negation - -def parse_wildcard(kind, field, value): - """checks if value ends with the a wildcard, - if value ends with a wildcard but the field does not support wildcards - a ValueError is raised. - This function returns a (value_without_wildcard, wildcard)-tuple - """ - wildcard = False - if value.endswith(WILDCARD): - wildcard = True - value = value[:-len(WILDCARD)] - if wildcard and field not in kind.SUPPORTS_WILDCARDS: - raise WildcardNotSupported("This field does not support wildcards") - return value, wildcard - -def parse_operators(kind, field, value): - """runs both (parse_negation and parse_wildcard) parser functions - on query values, and handles the special case of Subject.Text correctly. - returns a (value_without_negation_and_wildcard, negation, wildcard)-tuple - """ - try: - value, negation = parse_negation(kind, field, value) - except ValueError: - if kind is Subject and field == Subject.Text: - # we do not support negation of the text field, - # the text field starts with the NEGATION_OPERATOR - # so we handle this string as the content instead - # of an operator - negation = False - else: - raise - value, wildcard = parse_wildcard(kind, field, value) - return value, negation, wildcard - - -def synchronized(lock): - """ Synchronization decorator. """ - def wrap(f): - def newFunction(*args, **kw): - lock.acquire() - try: - return f(*args, **kw) - finally: - lock.release() - return newFunction - return wrap - -class Deletion: - """ - A marker class that marks an event id for deletion - """ - def __init__ (self, event_id): - self.event_id = event_id - -class Reindex: - """ - Marker class that tells the worker thread to rebuild the entire index. - On construction time all events are pulled out of the zg_engine - argument and stored for later processing in the worker thread. - This avoid concurrent access to the ZG sqlite db from the worker thread. - """ - def __init__ (self, zg_engine): - all_events = zg_engine._find_events(1, TimeRange.always(), - [], StorageState.Any, - sys.maxint, - ResultType.MostRecentEvents) - self.all_events = all_events - -class SearchEngineExtension (dbus.service.Object): - """ - Full text indexing and searching extension for Zeitgeist - """ - PUBLIC_METHODS = [] - - def __init__ (self): - bus_name = dbus.service.BusName(FTS_DBUS_BUS_NAME, bus=dbus.SessionBus()) - dbus.service.Object.__init__(self, bus_name, FTS_DBUS_OBJECT_PATH) - self._indexer = Indexer() - - ZG_CLIENT.install_monitor((0, 2**63 - 1), [], - self.pre_insert_event, self.post_delete_events) - - def pre_insert_event(self, timerange, events): - for event in events: - self._indexer.index_event (event) - - def post_delete_events (self, ids): - for _id in ids: - self._indexer.delete_event (_id) - - @dbus.service.method(FTS_DBUS_INTERFACE, - in_signature="s(xx)a("+constants.SIG_EVENT+")uuu", - out_signature="a("+constants.SIG_EVENT+")u") - def Search(self, query_string, time_range, filter_templates, offset, count, result_type): - """ - DBus method to perform a full text search against the contents of the - Zeitgeist log. Returns an array of events. - """ - time_range = TimeRange(time_range[0], time_range[1]) - filter_templates = map(Event, filter_templates) - events, hit_count = self._indexer.search(query_string, time_range, - filter_templates, - offset, count, result_type) - return self._make_events_sendable (events), hit_count - - @dbus.service.method(FTS_DBUS_INTERFACE, - in_signature="", - out_signature="") - def ForceReindex(self): - """ - DBus method to force a reindex of the entire Zeitgeist log. - This method is only intended for debugging purposes and is not - considered blessed public API. - """ - log.debug ("Received ForceReindex request over DBus.") - self._indexer._queue.put (Reindex (self._indexer)) - - def _make_events_sendable(self, events): - return [NULL_EVENT if event is None else Event._make_dbus_sendable(event) for event in events] - -def mangle_uri (uri): - """ - Converts a URI into an index- and query friendly string. The problem - is that Xapian doesn't handle CAPITAL letters or most non-alphanumeric - symbols in a boolean term when it does prefix matching. The mangled - URIs returned from this function are suitable for boolean prefix searches. - - IMPORTANT: This is a 1-way function! You can not convert back. - """ - result = "" - for c in uri.lower(): - if c in (": /"): - result += "_" - else: - result += c - return result - -def cap_string (s, nbytes=MAX_TERM_LENGTH): - """ - If s has more than nbytes bytes (not characters) then cap it off - after nbytes bytes in a way still producing a valid utf-8 string. - - Assumes that s is a utf-8 string. - - This function useful for working with Xapian terms because Xapian has - a max term length of 245 (which is not very well documented, but see - http://xapian.org/docs/omega/termprefixes.html). - """ - # Check if we can fast-path this string - if (len(s.encode("utf-8")) <= nbytes): - return s - - # We use a StringIO here to avoid mem thrashing via naiive - # string concatenation. See fx. http://www.skymind.com/~ocrow/python_string/ - buf = StringIO() - for char in s : - if buf.tell() >= nbytes - 1 : - return buf.getvalue() - buf.write(char.encode("utf-8")) - - return unicode(buf.getvalue().decode("utf-8")) - - -def expand_type (type_prefix, uri): - """ - Return a string with a Xapian query matching all child types of 'uri' - inside the Xapian prefix 'type_prefix'. - """ - is_negation = uri.startswith(NEGATION_OPERATOR) - uri = uri[1:] if is_negation else uri - children = Symbol.find_child_uris_extended(uri) - children = [ "%s:%s" % (type_prefix, child) for child in children ] - - result = " OR ".join(children) - return result if not is_negation else "NOT (%s)" % result - -class Indexer: - """ - Abstraction of the FT indexer and search engine - """ - - QUERY_PARSER_FLAGS = xapian.QueryParser.FLAG_PHRASE | \ - xapian.QueryParser.FLAG_BOOLEAN | \ - xapian.QueryParser.FLAG_PURE_NOT | \ - xapian.QueryParser.FLAG_LOVEHATE | \ - xapian.QueryParser.FLAG_WILDCARD - - def __init__ (self): - - self._cursor = cursor = get_default_cursor() - os.environ["XAPIAN_CJK_NGRAM"] = "1" - self._interpretation = TableLookup(cursor, "interpretation") - self._manifestation = TableLookup(cursor, "manifestation") - self._mimetype = TableLookup(cursor, "mimetype") - self._actor = TableLookup(cursor, "actor") - self._event_cache = LRUCache(constants.CACHE_SIZE) - - log.debug("Opening full text index: %s" % INDEX_FILE) - try: - self._index = xapian.WritableDatabase(INDEX_FILE, xapian.DB_CREATE_OR_OPEN) - except xapian.DatabaseError, e: - log.warn("Full text index corrupted: '%s'. Rebuilding index." % e) - self._index = xapian.WritableDatabase(INDEX_FILE, xapian.DB_CREATE_OR_OVERWRITE) - self._tokenizer = indexer = xapian.TermGenerator() - self._query_parser = xapian.QueryParser() - self._query_parser.set_database (self._index) - self._query_parser.add_prefix("name", "N") - self._query_parser.add_prefix("title", "N") - self._query_parser.add_prefix("site", "S") - self._query_parser.add_prefix("app", "A") - self._query_parser.add_boolean_prefix("zgei", FILTER_PREFIX_EVENT_INTERPRETATION) - self._query_parser.add_boolean_prefix("zgem", FILTER_PREFIX_EVENT_MANIFESTATION) - self._query_parser.add_boolean_prefix("zga", FILTER_PREFIX_ACTOR) - self._query_parser.add_prefix("zgsu", FILTER_PREFIX_SUBJECT_URI) - self._query_parser.add_boolean_prefix("zgsi", FILTER_PREFIX_SUBJECT_INTERPRETATION) - self._query_parser.add_boolean_prefix("zgsm", FILTER_PREFIX_SUBJECT_MANIFESTATION) - self._query_parser.add_prefix("zgso", FILTER_PREFIX_SUBJECT_ORIGIN) - self._query_parser.add_boolean_prefix("zgst", FILTER_PREFIX_SUBJECT_MIMETYPE) - self._query_parser.add_boolean_prefix("zgss", FILTER_PREFIX_SUBJECT_STORAGE) - self._query_parser.add_prefix("category", FILTER_PREFIX_XDG_CATEGORY) - self._query_parser.add_valuerangeprocessor( - xapian.NumberValueRangeProcessor(VALUE_EVENT_ID, "id", True)) - self._query_parser.add_valuerangeprocessor( - xapian.NumberValueRangeProcessor(VALUE_TIMESTAMP, "ms", False)) - self._query_parser.set_default_op(xapian.Query.OP_AND) - self._enquire = xapian.Enquire(self._index) - - self._desktops = {} - - gobject.threads_init() - self._may_run = True - self._queue = Queue(0) - self._worker = threading.Thread(target=self._worker_thread, - name="IndexWorker") - self._worker.daemon = True - - # We need to defer the index checking until after ZG has completed - # full setup. Hence the idle handler. - # We also don't start the worker until after we've checked the index - gobject.idle_add (self._check_index_and_start_worker) - - @synchronized (INDEX_LOCK) - def _check_index_and_start_worker (self): - """ - Check whether we need a rebuild of the index. - Returns True if the index is good. False if a reindexing has - been commenced. - - This method should be called from the main thread and only once. - It starts the worker thread as a side effect. - - We are clearing the queue, because there may be a race when an - event insertion / deletion is already queued and our index - is corrupted. Creating a new queue instance should be safe, - because we're running in main thread as are the index_event - and delete_event methods, and the worker thread wasn't yet - started. - """ - if self._index.get_metadata("fts_index_version") != INDEX_VERSION: - log.info("Index must be upgraded. Doing full rebuild") - self._queue = Queue(0) - self._queue.put(Reindex(self)) - elif self._index.get_doccount() == 0: - # If the index is empty we trigger a rebuild - # We must delay reindexing until after the engine is done setting up - log.info("Empty index detected. Doing full rebuild") - self._queue = Queue(0) - self._queue.put(Reindex(self)) - - # Now that we've checked the index from the main thread we can start the worker - self._worker.start() - - def index_event (self, event): - """ - This method schedules and event for indexing. It returns immediate and - defers the actual work to a bottom half thread. This means that it - will not block the main loop of the Zeitgeist daemon while indexing - (which may be a heavy operation) - """ - self._queue.put (event) - return event - - def delete_event (self, event_id): - """ - Remove an event from the index given its event id - """ - self._queue.put (Deletion(event_id)) - return - - @synchronized (INDEX_LOCK) - def search (self, query_string, time_range=None, filters=None, offset=0, maxhits=10, result_type=100): - """ - Do a full text search over the indexed corpus. The `result_type` - parameter may be a zeitgeist.datamodel.ResultType or 100. In case it is - 100 the textual relevancy of the search engine will be used to sort the - results. Result type 100 is the fastest (and default) mode. - - The filters argument should be a list of event templates. - """ - # Expand event template filters if necessary - if filters: - query_string = "(%s) AND (%s)" % (query_string, self._compile_event_filter_query (filters)) - - # Expand time range value query - if time_range and not time_range.is_always(): - query_string = "(%s) AND (%s)" % (query_string, self._compile_time_range_filter_query (time_range)) - - # If the result type coalesces the events we need to fetch some extra - # events from the index to have a chance of actually holding 'maxhits' - # unique events - if result_type in COALESCING_RESULT_TYPES: - raw_maxhits = maxhits * 3 - else: - raw_maxhits = maxhits - - # When not sorting by relevance, we fetch the results from Xapian sorted, - # by timestamp. That minimizes the skew we get from otherwise doing a - # relevancy ranked xapaian query and then resorting with Zeitgeist. The - # "skew" is that low-relevancy results may still have the highest timestamp - if result_type == 100: - self._enquire.set_sort_by_relevance() - else: - self._enquire.set_sort_by_value(VALUE_TIMESTAMP, True) - - # Allow wildcards - query_start = time.time() - query = self._query_parser.parse_query (query_string, - self.QUERY_PARSER_FLAGS) - self._enquire.set_query (query) - hits = self._enquire.get_mset (offset, raw_maxhits) - hit_count = hits.get_matches_estimated() - log.debug("Search '%s' gave %s hits in %sms" % - (query_string, hits.get_matches_estimated(), (time.time() - query_start)*1000)) - - if result_type == 100: - event_ids = [] - for m in hits: - event_id = int(xapian.sortable_unserialise( - m.document.get_value(VALUE_EVENT_ID))) - event_ids.append (event_id) - if event_ids: - return self.get_events(event_ids), hit_count - else: - return [], 0 - else: - templates = [] - for m in hits: - event_id = int(xapian.sortable_unserialise( - m.document.get_value(VALUE_EVENT_ID))) - ev = Event() - ev[0][Event.Id] = str(event_id) - templates.append(ev) - if templates: - x = self._find_events(1, TimeRange.always(), - templates, - StorageState.Any, - maxhits, - result_type), hit_count - return x - else: - return [], 0 - - def _worker_thread (self): - is_dirty = False - while self._may_run: - # FIXME: Throttle IO and CPU - try: - # If we are dirty wait a while before we flush, - # or if we are clean wait indefinitely to avoid - # needless wakeups - if is_dirty: - event = self._queue.get(True, 0.5) - else: - event = self._queue.get(True) - - if isinstance (event, Deletion): - self._delete_event_real (event.event_id) - elif isinstance (event, Reindex): - self._reindex (event.all_events) - else: - self._index_event_real (event) - - is_dirty = True - except Empty: - if is_dirty: - # Write changes to disk - log.debug("Committing FTS index") - self._index.flush() - is_dirty = False - else: - log.debug("No changes to index. Sleeping") - - @synchronized (INDEX_LOCK) - def _reindex (self, event_list): - """ - Index everything in the ZG log. The argument must be a list - of events. Typically extracted by a Reindex instance. - Only call from worker thread as it writes to the db and Xapian - is *not* thread safe (only single-writer-multiple-reader). - """ - self._index.close () - self._index = xapian.WritableDatabase(INDEX_FILE, xapian.DB_CREATE_OR_OVERWRITE) - self._query_parser.set_database (self._index) - self._enquire = xapian.Enquire(self._index) - # Register that this index was built with CJK enabled - self._index.set_metadata("fts_index_version", INDEX_VERSION) - log.info("Preparing to rebuild index with %s events" % len(event_list)) - for e in event_list : self._queue.put(e) - - @synchronized (INDEX_LOCK) - def _delete_event_real (self, event_id): - """ - Look up the doc id given an event id and remove the xapian.Document - for that doc id. - Note: This is slow, but there's not much we can do about it - """ - try: - _id = xapian.sortable_serialise(float(event_id)) - query = xapian.Query(xapian.Query.OP_VALUE_RANGE, - VALUE_EVENT_ID, _id, _id) - - self._enquire.set_query (query) - hits = self._enquire.get_mset (0, 10) - - total = hits.get_matches_estimated() - if total > 1: - log.warning ("More than one event found with id '%s'" % event_id) - elif total <= 0: - log.debug ("No event for id '%s'" % event_id) - return - - for m in hits: - log.debug("Deleting event '%s' with docid '%s'" % - (event_id, m.docid)) - self._index.delete_document(m.docid) - except Exception, e: - log.error("Failed to delete event '%s': %s" % (event_id, e)) - - def _split_uri (self, uri): - """ - Returns a triple of (scheme, host, and path) extracted from `uri` - """ - i = uri.find(":") - if i == -1 : - scheme = "" - host = "" - path = uri - else: - scheme = uri[:i] - host = "" - path = "" - - if uri[i+1] == "/" and uri[i+2] == "/": - j = uri.find("/", i+3) - if j == -1 : - host = uri[i+3:] - else: - host = uri[i+3:j] - path = uri[j:] - else: - host = uri[i+1:] - - # Strip out URI query part - i = path.find("?") - if i != -1: - path = path[:i] - - return scheme, host, path - - def _get_desktop_entry (self, app_id): - """ - Return a xdg.DesktopEntry.DesktopEntry `app_id` or None in case - no file is found for the given desktop id - """ - if app_id in self._desktops: - return self._desktops[app_id] - - for datadir in xdg_data_dirs: - path = os.path.join(datadir, "applications", app_id) - if os.path.exists(path): - try: - desktop = DesktopEntry(path) - self._desktops[app_id] = desktop - return desktop - except Exception, e: - log.warning("Unable to load %s: %s" % (path, e)) - return None - - return None - - def _index_actor (self, actor): - """ - Takes an actor as a path to a .desktop file or app:// uri - and index the contents of the corresponding .desktop file - into the document currently set for self._tokenizer. - """ - if not actor : return - - # Get the path of the .desktop file and convert it to - # an app id (eg. 'gedit.desktop') - scheme, host, path = self._split_uri(url_unescape (actor)) - if not path: - path = host - - if not path : - log.debug("Unable to determine application id for %s" % actor) - return - - if path.startswith("/") : - path = os.path.basename(path) - - desktop = self._get_desktop_entry(path) - if desktop: - if not desktop.getNoDisplay(): - self._tokenizer.index_text(desktop.getName(), 5) - self._tokenizer.index_text(desktop.getName(), 5, "A") - self._tokenizer.index_text(desktop.getGenericName(), 5) - self._tokenizer.index_text(desktop.getGenericName(), 5, "A") - self._tokenizer.index_text(desktop.getComment(), 2) - self._tokenizer.index_text(desktop.getComment(), 2, "A") - - doc = self._tokenizer.get_document() - for cat in desktop.getCategories(): - doc.add_boolean_term(FILTER_PREFIX_XDG_CATEGORY+cat.lower()) - else: - log.debug("Unable to look up app info for %s" % actor) - - - def _index_uri (self, uri): - """ - Index `uri` into the document currectly set on self._tokenizer - """ - # File URIs and paths are indexed in one way, and all other, - # usually web URIs, are indexed in another way because there may - # be domain name etc. in there we want to rank differently - scheme, host, path = self._split_uri (url_unescape (uri)) - if scheme == "file" or not scheme: - path, name = os.path.split(path) - self._tokenizer.index_text(name, 5) - self._tokenizer.index_text(name, 5, "N") - - # Index parent names with descending weight - weight = 5 - while path and name: - weight = weight / 1.5 - path, name = os.path.split(path) - self._tokenizer.index_text(name, int(weight)) - - elif scheme == "mailto": - tokens = host.split("@") - name = tokens[0] - self._tokenizer.index_text(name, 6) - if len(tokens) > 1: - self._tokenizer.index_text(" ".join[1:], 1) - else: - # We're cautious about indexing the path components of - # non-file URIs as some websites practice *extremely* long - # and useless URLs - path, name = os.path.split(path) - if len(name) > 30 : name = name[:30] - if len(path) > 30 : path = path[30] - if name: - self._tokenizer.index_text(name, 5) - self._tokenizer.index_text(name, 5, "N") - if path: - self._tokenizer.index_text(path, 1) - self._tokenizer.index_text(path, 1, "N") - if host: - self._tokenizer.index_text(host, 2) - self._tokenizer.index_text(host, 2, "N") - self._tokenizer.index_text(host, 2, "S") - - def _index_text (self, text): - """ - Index `text` as raw text data for the document currently - set on self._tokenizer. The text is assumed to be a primary - description of the subject, such as the basename of a file. - - Primary use is for subject.text - """ - self._tokenizer.index_text(text, 5) - - def _index_contents (self, uri): - # xmlindexer doesn't extract words for URIs only for file paths - - # FIXME: IONICE and NICE on xmlindexer - - path = uri.replace("file://", "") - xmlindexer = subprocess.Popen(['xmlindexer', path], - stdout=subprocess.PIPE) - xml = xmlindexer.communicate()[0].strip() - xmlindexer.wait() - - dom = minidom.parseString(xml) - text_nodes = dom.getElementsByTagName("text") - lines = [] - if text_nodes: - for line in text_nodes[0].childNodes: - lines.append(line.data) - - if lines: - self._tokenizer.index_text (" ".join(lines)) - - - def _add_doc_filters (self, event, doc): - """Adds the filtering rules to the doc. Filtering rules will - not affect the relevancy ranking of the event/doc""" - if event.interpretation: - doc.add_boolean_term (cap_string(FILTER_PREFIX_EVENT_INTERPRETATION+event.interpretation)) - if event.manifestation: - doc.add_boolean_term (cap_string(FILTER_PREFIX_EVENT_MANIFESTATION+event.manifestation)) - if event.actor: - doc.add_boolean_term (cap_string(FILTER_PREFIX_ACTOR+mangle_uri(event.actor))) - - for su in event.subjects: - if su.uri: - doc.add_boolean_term (cap_string(FILTER_PREFIX_SUBJECT_URI+mangle_uri(su.uri))) - if su.interpretation: - doc.add_boolean_term (cap_string(FILTER_PREFIX_SUBJECT_INTERPRETATION+su.interpretation)) - if su.manifestation: - doc.add_boolean_term (cap_string(FILTER_PREFIX_SUBJECT_MANIFESTATION+su.manifestation)) - if su.origin: - doc.add_boolean_term (cap_string(FILTER_PREFIX_SUBJECT_ORIGIN+mangle_uri(su.origin))) - if su.mimetype: - doc.add_boolean_term (cap_string(FILTER_PREFIX_SUBJECT_MIMETYPE+su.mimetype)) - if su.storage: - doc.add_boolean_term (cap_string(FILTER_PREFIX_SUBJECT_STORAGE+su.storage)) - - @synchronized (INDEX_LOCK) - def _index_event_real (self, event): - if not isinstance (event, OrigEvent): - log.error("Not an Event, found: %s" % type(event)) - if not event.id: - log.warning("Not indexing event. Event has no id") - return - - try: - doc = xapian.Document() - doc.add_value (VALUE_EVENT_ID, - xapian.sortable_serialise(float(event.id))) - doc.add_value (VALUE_TIMESTAMP, - xapian.sortable_serialise(float(event.timestamp))) - self._tokenizer.set_document (doc) - - self._index_actor (event.actor) - - for subject in event.subjects: - if not subject.uri : continue - - # By spec URIs can have arbitrary length. In reality that's just silly. - # The general online "rule" is to keep URLs less than 2k so we just - # choose to enforce that - if len(subject.uri) > 2000: - log.info ("URI too long (%s). Discarding: %s..."% (len(subject.uri), subject.uri[:30])) - return - log.debug("Indexing '%s'" % subject.uri) - - self._index_uri (subject.uri) - self._index_text (subject.text) - - # If the subject URI is an actor, we index the .desktop also - if subject.uri.startswith ("application://"): - self._index_actor (subject.uri) - - # File contents indexing disabled for now... - #self._index_contents (subject.uri) - - # FIXME: Possibly index payloads when we have apriori knowledge - - self._add_doc_filters (event, doc) - self._index.add_document (doc) - - except Exception, e: - log.error("Error indexing event: %s" % e) - - def _compile_event_filter_query (self, events): - """Takes a list of event templates and compiles a filter query - based on their, interpretations, manifestations, and actor, - for event and subjects. - - All fields within the same event will be ANDed and each template - will be ORed with the others. Like elsewhere in Zeitgeist the - type tree of the interpretations and manifestations will be expanded - to match all child symbols as well - """ - query = [] - for event in events: - if not isinstance(event, Event): - raise TypeError("Expected Event. Found %s" % type(event)) - - tmpl = [] - if event.interpretation : - tmpl.append(expand_type("zgei", event.interpretation)) - if event.manifestation : - tmpl.append(expand_type("zgem", event.manifestation)) - if event.actor : tmpl.append("zga:%s" % mangle_uri(event.actor)) - for su in event.subjects: - if su.uri : - tmpl.append("zgsu:%s" % mangle_uri(su.uri)) - if su.interpretation : - tmpl.append(expand_type("zgsi", su.interpretation)) - if su.manifestation : - tmpl.append(expand_type("zgsm", su.manifestation)) - if su.origin : - tmpl.append("zgso:%s" % mangle_uri(su.origin)) - if su.mimetype : - tmpl.append("zgst:%s" % su.mimetype) - if su.storage : - tmpl.append("zgss:%s" % su.storage) - - tmpl = "(" + ") AND (".join(tmpl) + ")" - query.append(tmpl) - - return " OR ".join(query) - - def _compile_time_range_filter_query (self, time_range): - """Takes a TimeRange and compiles a range query for it""" - - if not isinstance(time_range, TimeRange): - raise TypeError("Expected TimeRange, but found %s" % type(time_range)) - - return "%s..%sms" % (time_range.begin, time_range.end) - - def _get_event_from_row(self, row): - event = Event() - event[0][Event.Id] = row["id"] # Id property is read-only in the public API - event.timestamp = row["timestamp"] - for field in ("interpretation", "manifestation", "actor"): - # Try to get event attributes from row using the attributed field id - # If attribute does not exist we break the attribute fetching and return - # None instead of of crashing - try: - setattr(event, field, getattr(self, "_" + field).value(row[field])) - except KeyError, e: - log.error("Event %i broken: Table %s has no id %i" \ - %(row["id"], field, row[field])) - return None - event.origin = row["event_origin_uri"] or "" - event.payload = row["payload"] or "" # default payload: empty string - return event - - def _get_subject_from_row(self, row): - subject = Subject() - for field in ("uri", "text", "storage"): - setattr(subject, field, row["subj_" + field]) - subject.origin = row["subj_origin_uri"] - if row["subj_current_uri"]: - subject.current_uri = row["subj_current_uri"] - for field in ("interpretation", "manifestation", "mimetype"): - # Try to get subject attributes from row using the attributed field id - # If attribute does not exist we break the attribute fetching and return - # None instead of crashing - try: - setattr(subject, field, - getattr(self, "_" + field).value(row["subj_" + field])) - except KeyError, e: - log.error("Event %i broken: Table %s has no id %i" \ - %(row["id"], field, row["subj_" + field])) - return None - return subject - - def get_events(self, ids, sender=None): - """ - Look up a list of events. - """ - - t = time.time() - - if not ids: - return [] - - # Split ids into cached and uncached - uncached_ids = array("i") - cached_ids = array("i") - - # If ids batch greater than MAX_CACHE_BATCH_SIZE ids ignore cache - use_cache = True - if len(ids) > MAX_CACHE_BATCH_SIZE: - use_cache = False - if not use_cache: - uncached_ids = ids - else: - for id in ids: - if id in self._event_cache: - cached_ids.append(id) - else: - uncached_ids.append(id) - - id_hash = defaultdict(lambda: array("i")) - for n, id in enumerate(ids): - # the same id can be at multible places (LP: #673916) - # cache all of them - id_hash[id].append(n) - - # If we are not able to get an event by the given id - # append None instead of raising an Error. The client - # might simply have requested an event that has been - # deleted - events = {} - sorted_events = [None]*len(ids) - - for id in cached_ids: - event = self._event_cache[id] - if event: - if event is not None: - for n in id_hash[event.id]: - # insert the event into all necessary spots (LP: #673916) - sorted_events[n] = event - - # Get uncached events - rows = self._cursor.execute(""" - SELECT * FROM event_view - WHERE id IN (%s) - """ % ",".join("%d" % _id for _id in uncached_ids)) - - time_get_uncached = time.time() - t - t = time.time() - - t_get_event = 0 - t_get_subject = 0 - t_apply_get_hooks = 0 - - row_counter = 0 - for row in rows: - row_counter += 1 - # Assumption: all rows of a same event for its different - # subjects are in consecutive order. - t_get_event -= time.time() - event = self._get_event_from_row(row) - t_get_event += time.time() - - if event: - # Check for existing event.id in event to attach - # other subjects to it - if event.id not in events: - events[event.id] = event - else: - event = events[event.id] - - t_get_subject -= time.time() - subject = self._get_subject_from_row(row) - t_get_subject += time.time() - # Check if subject has a proper value. If none than something went - # wrong while trying to fetch the subject from the row. So instead - # of failing and raising an error. We silently skip the event. - if subject: - event.append_subject(subject) - if use_cache and not event.payload: - self._event_cache[event.id] = event - if event is not None: - for n in id_hash[event.id]: - # insert the event into all necessary spots (LP: #673916) - sorted_events[n] = event - # Avoid caching events with payloads to have keep the cache MB size - # at a decent level - - - log.debug("Got %d raw events in %fs" % (row_counter, time_get_uncached)) - log.debug("Got %d events in %fs" % (len(sorted_events), time.time()-t)) - log.debug(" Where time spent in _get_event_from_row in %fs" % (t_get_event)) - log.debug(" Where time spent in _get_subject_from_row in %fs" % (t_get_subject)) - log.debug(" Where time spent in apply_get_hooks in %fs" % (t_apply_get_hooks)) - return sorted_events - - def _find_events(self, return_mode, time_range, event_templates, - storage_state, max_events, order, sender=None): - """ - Accepts 'event_templates' as either a real list of Events or as - a list of tuples (event_data, subject_data) as we do in the - DBus API. - - Return modes: - - 0: IDs. - - 1: Events. - """ - t = time.time() - - where = self._build_sql_event_filter(time_range, event_templates, - storage_state) - - if not where.may_have_results(): - return [] - - if return_mode == 0: - sql = "SELECT DISTINCT id FROM event_view" - elif return_mode == 1: - sql = "SELECT id FROM event_view" - else: - raise NotImplementedError, "Unsupported return_mode." - - wheresql = " WHERE %s" % where.sql if where else "" - - def group_and_sort(field, wheresql, time_asc=False, count_asc=None, - aggregation_type='max'): - - args = { - 'field': field, - 'aggregation_type': aggregation_type, - 'where_sql': wheresql, - 'time_sorting': 'ASC' if time_asc else 'DESC', - 'aggregation_sql': '', - 'order_sql': '', - } - - if count_asc is not None: - args['aggregation_sql'] = ', COUNT(%s) AS num_events' % \ - field - args['order_sql'] = 'num_events %s,' % \ - ('ASC' if count_asc else 'DESC') - - return """ - NATURAL JOIN ( - SELECT %(field)s, - %(aggregation_type)s(timestamp) AS timestamp - %(aggregation_sql)s - FROM event_view %(where_sql)s - GROUP BY %(field)s) - GROUP BY %(field)s - ORDER BY %(order_sql)s timestamp %(time_sorting)s - """ % args - - if order == ResultType.MostRecentEvents: - sql += wheresql + " ORDER BY timestamp DESC" - elif order == ResultType.LeastRecentEvents: - sql += wheresql + " ORDER BY timestamp ASC" - elif order == ResultType.MostRecentEventOrigin: - sql += group_and_sort("origin", wheresql, time_asc=False) - elif order == ResultType.LeastRecentEventOrigin: - sql += group_and_sort("origin", wheresql, time_asc=True) - elif order == ResultType.MostPopularEventOrigin: - sql += group_and_sort("origin", wheresql, time_asc=False, - count_asc=False) - elif order == ResultType.LeastPopularEventOrigin: - sql += group_and_sort("origin", wheresql, time_asc=True, - count_asc=True) - elif order == ResultType.MostRecentSubjects: - # Remember, event.subj_id identifies the subject URI - sql += group_and_sort("subj_id", wheresql, time_asc=False) - elif order == ResultType.LeastRecentSubjects: - sql += group_and_sort("subj_id", wheresql, time_asc=True) - elif order == ResultType.MostPopularSubjects: - sql += group_and_sort("subj_id", wheresql, time_asc=False, - count_asc=False) - elif order == ResultType.LeastPopularSubjects: - sql += group_and_sort("subj_id", wheresql, time_asc=True, - count_asc=True) - elif order == ResultType.MostRecentCurrentUri: - sql += group_and_sort("subj_id_current", wheresql, time_asc=False) - elif order == ResultType.LeastRecentCurrentUri: - sql += group_and_sort("subj_id_current", wheresql, time_asc=True) - elif order == ResultType.MostPopularCurrentUri: - sql += group_and_sort("subj_id_current", wheresql, time_asc=False, - count_asc=False) - elif order == ResultType.LeastPopularCurrentUri: - sql += group_and_sort("subj_id_current", wheresql, time_asc=True, - count_asc=True) - elif order == ResultType.MostRecentActor: - sql += group_and_sort("actor", wheresql, time_asc=False) - elif order == ResultType.LeastRecentActor: - sql += group_and_sort("actor", wheresql, time_asc=True) - elif order == ResultType.MostPopularActor: - sql += group_and_sort("actor", wheresql, time_asc=False, - count_asc=False) - elif order == ResultType.LeastPopularActor: - sql += group_and_sort("actor", wheresql, time_asc=True, - count_asc=True) - elif order == ResultType.OldestActor: - sql += group_and_sort("actor", wheresql, time_asc=True, - aggregation_type="min") - elif order == ResultType.MostRecentOrigin: - sql += group_and_sort("subj_origin", wheresql, time_asc=False) - elif order == ResultType.LeastRecentOrigin: - sql += group_and_sort("subj_origin", wheresql, time_asc=True) - elif order == ResultType.MostPopularOrigin: - sql += group_and_sort("subj_origin", wheresql, time_asc=False, - count_asc=False) - elif order == ResultType.LeastPopularOrigin: - sql += group_and_sort("subj_origin", wheresql, time_asc=True, - count_asc=True) - elif order == ResultType.MostRecentSubjectInterpretation: - sql += group_and_sort("subj_interpretation", wheresql, - time_asc=False) - elif order == ResultType.LeastRecentSubjectInterpretation: - sql += group_and_sort("subj_interpretation", wheresql, - time_asc=True) - elif order == ResultType.MostPopularSubjectInterpretation: - sql += group_and_sort("subj_interpretation", wheresql, - time_asc=False, count_asc=False) - elif order == ResultType.LeastPopularSubjectInterpretation: - sql += group_and_sort("subj_interpretation", wheresql, - time_asc=True, count_asc=True) - elif order == ResultType.MostRecentMimeType: - sql += group_and_sort("subj_mimetype", wheresql, time_asc=False) - elif order == ResultType.LeastRecentMimeType: - sql += group_and_sort("subj_mimetype", wheresql, time_asc=True) - elif order == ResultType.MostPopularMimeType: - sql += group_and_sort("subj_mimetype", wheresql, time_asc=False, - count_asc=False) - elif order == ResultType.LeastPopularMimeType: - sql += group_and_sort("subj_mimetype", wheresql, time_asc=True, - count_asc=True) - - if max_events > 0: - sql += " LIMIT %d" % max_events - result = array("i", self._cursor.execute(sql, where.arguments).fetch(0)) - - if return_mode == 0: - log.debug("Found %d event IDs in %fs" % (len(result), time.time()- t)) - elif return_mode == 1: - log.debug("Found %d events in %fs" % (len(result), time.time()- t)) - result = self.get_events(ids=result, sender=sender) - else: - raise Exception("%d" % return_mode) - - return result - - @staticmethod - def _build_templates(templates): - for event_template in templates: - event_data = event_template[0] - for subject in (event_template[1] or (Subject(),)): - yield Event((event_data, [], None)), Subject(subject) - - def _build_sql_from_event_templates(self, templates): - - where_or = WhereClause(WhereClause.OR) - - for template in templates: - event_template = Event((template[0], [], None)) - if template[1]: - subject_templates = [Subject(data) for data in template[1]] - else: - subject_templates = None - - subwhere = WhereClause(WhereClause.AND) - - if event_template.id: - subwhere.add("id = ?", event_template.id) - - try: - value, negation, wildcard = parse_operators(Event, Event.Interpretation, event_template.interpretation) - # Expand event interpretation children - event_interp_where = WhereClause(WhereClause.OR, negation) - for child_interp in (Symbol.find_child_uris_extended(value)): - if child_interp: - event_interp_where.add_text_condition("interpretation", - child_interp, like=wildcard, cache=self._interpretation) - if event_interp_where: - subwhere.extend(event_interp_where) - - value, negation, wildcard = parse_operators(Event, Event.Manifestation, event_template.manifestation) - # Expand event manifestation children - event_manif_where = WhereClause(WhereClause.OR, negation) - for child_manif in (Symbol.find_child_uris_extended(value)): - if child_manif: - event_manif_where.add_text_condition("manifestation", - child_manif, like=wildcard, cache=self._manifestation) - if event_manif_where: - subwhere.extend(event_manif_where) - - value, negation, wildcard = parse_operators(Event, Event.Actor, event_template.actor) - if value: - subwhere.add_text_condition("actor", value, wildcard, negation, cache=self._actor) - - value, negation, wildcard = parse_operators(Event, Event.Origin, event_template.origin) - if value: - subwhere.add_text_condition("origin", value, wildcard, negation) - - if subject_templates is not None: - for subject_template in subject_templates: - value, negation, wildcard = parse_operators(Subject, Subject.Interpretation, subject_template.interpretation) - # Expand subject interpretation children - su_interp_where = WhereClause(WhereClause.OR, negation) - for child_interp in (Symbol.find_child_uris_extended(value)): - if child_interp: - su_interp_where.add_text_condition("subj_interpretation", - child_interp, like=wildcard, cache=self._interpretation) - if su_interp_where: - subwhere.extend(su_interp_where) - - value, negation, wildcard = parse_operators(Subject, Subject.Manifestation, subject_template.manifestation) - # Expand subject manifestation children - su_manif_where = WhereClause(WhereClause.OR, negation) - for child_manif in (Symbol.find_child_uris_extended(value)): - if child_manif: - su_manif_where.add_text_condition("subj_manifestation", - child_manif, like=wildcard, cache=self._manifestation) - if su_manif_where: - subwhere.extend(su_manif_where) - - # FIXME: Expand mime children as well. - # Right now we only do exact matching for mimetypes - # thekorn: this will be fixed when wildcards are supported - value, negation, wildcard = parse_operators(Subject, Subject.Mimetype, subject_template.mimetype) - if value: - subwhere.add_text_condition("subj_mimetype", - value, wildcard, negation, cache=self._mimetype) - - for key in ("uri", "origin", "text"): - value = getattr(subject_template, key) - if value: - value, negation, wildcard = parse_operators(Subject, getattr(Subject, key.title()), value) - subwhere.add_text_condition("subj_%s" % key, value, wildcard, negation) - - if subject_template.current_uri: - value, negation, wildcard = parse_operators(Subject, - Subject.CurrentUri, subject_template.current_uri) - subwhere.add_text_condition("subj_current_uri", value, wildcard, negation) - - if subject_template.storage: - subwhere.add_text_condition("subj_storage", subject_template.storage) - - except KeyError, e: - # Value not in DB - log.debug("Unknown entity in query: %s" % e) - where_or.register_no_result() - continue - where_or.extend(subwhere) - return where_or - - def _build_sql_event_filter(self, time_range, templates, storage_state): - - where = WhereClause(WhereClause.AND) - - # thekorn: we are using the unary operator here to tell sql to not use - # the index on the timestamp column at the first place. This `fix` for - # (LP: #672965) is based on some benchmarks, which suggest a performance - # win, but we might not oversee all implications. - # (see http://www.sqlite.org/optoverview.html section 6.0) - min_time, max_time = time_range - if min_time != 0: - where.add("+timestamp >= ?", min_time) - if max_time != sys.maxint: - where.add("+timestamp <= ?", max_time) - - if storage_state in (StorageState.Available, StorageState.NotAvailable): - where.add("(subj_storage_state = ? OR subj_storage_state IS NULL)", - storage_state) - elif storage_state != StorageState.Any: - raise ValueError, "Unknown storage state '%d'" % storage_state - - where.extend(self._build_sql_from_event_templates(templates)) - - return where - -if __name__ == "__main__": - mainloop = gobject.MainLoop(is_running=True) - search_engine = SearchEngineExtension() - ZG_CLIENT._iface.connect_exit(lambda: mainloop.quit ()) - mainloop.run() - diff --git a/extensions/fts-python/lrucache.py b/extensions/fts-python/lrucache.py deleted file mode 100644 index 265ed401..00000000 --- a/extensions/fts-python/lrucache.py +++ /dev/null @@ -1,125 +0,0 @@ -# -.- coding: utf-8 -.- - -# lrucache.py -# -# Copyright © 2009 Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com> -# Copyright © 2009 Markus Korn <thekorn@gmx.de> -# Copyright © 2011 Seif Lotfy <seif@lotfy.com> -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation, either version 2.1 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - -class LRUCache: - """ - A simple LRUCache implementation backed by a linked list and a dict. - It can be accessed and updated just like a dict. To check if an element - exists in the cache the following type of statements can be used: - if "foo" in cache - """ - - class _Item: - """ - A container for each item in LRUCache which knows about the - item's position and relations - """ - def __init__(self, item_key, item_value): - self.value = item_value - self.key = item_key - self.next = None - self.prev = None - - def __init__(self, max_size): - """ - The size of the cache (in number of cached items) is guaranteed to - never exceed 'size' - """ - self._max_size = max_size - self.clear() - - - def clear(self): - self._list_end = None # The newest item - self._list_start = None # Oldest item - self._map = {} - - def __len__(self): - return len(self._map) - - def __contains__(self, key): - return key in self._map - - def __delitem__(self, key): - item = self._map[key] - if item.prev: - item.prev.next = item.next - else: - # we are deleting the first item, so we need a new first one - self._list_start = item.next - if item.next: - item.next.prev = item.prev - else: - # we are deleting the last item, get a new last one - self._list_end = item.prev - del self._map[key], item - - def __setitem__(self, key, value): - if key in self._map: - item = self._map[key] - item.value = value - self._move_item_to_end(item) - else: - new = LRUCache._Item(key, value) - self._append_to_list(new) - - if len(self._map) > self._max_size : - # Remove eldest entry from list - self.remove_eldest_item() - - def __getitem__(self, key): - item = self._map[key] - self._move_item_to_end(item) - return item.value - - def __iter__(self): - """ - Iteration is in order from eldest to newest, - and returns (key,value) tuples - """ - iter = self._list_start - while iter != None: - yield (iter.key, iter.value) - iter = iter.next - - def _move_item_to_end(self, item): - del self[item.key] - self._append_to_list(item) - - def _append_to_list(self, item): - self._map[item.key] = item - if not self._list_start: - self._list_start = item - if self._list_end: - self._list_end.next = item - item.prev = self._list_end - item.next = None - self._list_end = item - - def remove_eldest_item(self): - if self._list_start == self._list_end: - self._list_start = None - self._list_end = None - return - old = self._list_start - old.next.prev = None - self._list_start = old.next - del self[old.key], old diff --git a/extensions/fts-python/sql.py b/extensions/fts-python/sql.py deleted file mode 100644 index 20f1bc30..00000000 --- a/extensions/fts-python/sql.py +++ /dev/null @@ -1,301 +0,0 @@ -# -.- coding: utf-8 -.- - -# Zeitgeist -# -# Copyright © 2009-2010 Siegfried-Angel Gevatter Pujals <rainct@ubuntu.com> -# Copyright © 2009 Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com> -# Copyright © 2009-2011 Markus Korn <thekorn@gmx.net> -# Copyright © 2009 Seif Lotfy <seif@lotfy.com> -# Copyright © 2011 J.P. Lacerda <jpaflacerda@gmail.com> -# Copyright © 2011 Collabora Ltd. -# By Siegfried-Angel Gevatter Pujals <rainct@ubuntu.com> -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation, either version 2.1 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - -import sqlite3 -import logging -import time -import os -import shutil - -from constants import constants - -log = logging.getLogger("siis.zeitgeist.sql") - -TABLE_MAP = { - "origin": "uri", - "subj_mimetype": "mimetype", - "subj_origin": "uri", - "subj_uri": "uri", - "subj_current_uri": "uri", -} - -def explain_query(cursor, statement, arguments=()): - plan = "" - for r in cursor.execute("EXPLAIN QUERY PLAN "+statement, arguments).fetchall(): - plan += str(list(r)) + "\n" - log.debug("Got query:\nQUERY:\n%s (%s)\nPLAN:\n%s" % (statement, arguments, plan)) - -class UnicodeCursor(sqlite3.Cursor): - - debug_explain = os.getenv("ZEITGEIST_DEBUG_QUERY_PLANS") - - @staticmethod - def fix_unicode(obj): - if isinstance(obj, (int, long)): - # thekorn: as long as we are using the unary operator for timestamp - # related queries we have to make sure that integers are not - # converted to strings, same applies for long numbers. - return obj - if isinstance(obj, str): - obj = obj.decode("UTF-8") - # seif: Python’s default encoding is ASCII, so whenever a character with - # an ASCII value > 127 is in the input data, you’ll get a UnicodeDecodeError - # because that character can’t be handled by the ASCII encoding. - try: - obj = unicode(obj) - except UnicodeDecodeError, ex: - pass - return obj - - def execute(self, statement, parameters=()): - parameters = [self.fix_unicode(p) for p in parameters] - if UnicodeCursor.debug_explain: - explain_query(super(UnicodeCursor, self), statement, parameters) - return super(UnicodeCursor, self).execute(statement, parameters) - - def fetch(self, index=None): - if index is not None: - for row in self: - yield row[index] - else: - for row in self: - yield row - -def _get_schema_version (cursor, schema_name): - """ - Returns the schema version for schema_name or returns 0 in case - the schema doesn't exist. - """ - try: - schema_version_result = cursor.execute(""" - SELECT version FROM schema_version WHERE schema=? - """, (schema_name,)) - result = schema_version_result.fetchone() - return result[0] if result else 0 - except sqlite3.OperationalError, e: - # The schema isn't there... - log.debug ("Schema '%s' not found: %s" % (schema_name, e)) - return 0 - -def _connect_to_db(file_path): - conn = sqlite3.connect(file_path) - conn.row_factory = sqlite3.Row - cursor = conn.cursor(UnicodeCursor) - return cursor - -_cursor = None -def get_default_cursor(): - global _cursor - if not _cursor: - dbfile = constants.DATABASE_FILE - start = time.time() - log.info("Using database: %s" % dbfile) - new_database = not os.path.exists(dbfile) - _cursor = _connect_to_db(dbfile) - core_schema_version = _get_schema_version(_cursor, constants.CORE_SCHEMA) - if core_schema_version < constants.CORE_SCHEMA_VERSION: - log.exception( - "Database '%s' is on version %s, but %s is required" % \ - (constants.CORE_SCHEMA, core_schema_version, - constants.CORE_SCHEMA_VERSION)) - raise SystemExit(27) - return _cursor -def unset_cursor(): - global _cursor - _cursor = None - -class TableLookup(dict): - - # We are not using an LRUCache as pressumably there won't be thousands - # of manifestations/interpretations/mimetypes/actors on most - # installations, so we can save us the overhead of tracking their usage. - - def __init__(self, cursor, table): - - self._cursor = cursor - self._table = table - - for row in cursor.execute("SELECT id, value FROM %s" % table): - self[row["value"]] = row["id"] - - self._inv_dict = dict((value, key) for key, value in self.iteritems()) - - def __getitem__(self, name): - # Use this for inserting new properties into the database - if name in self: - return super(TableLookup, self).__getitem__(name) - id = self._cursor.execute("SELECT id FROM %s WHERE value=?" - % self._table, (name,)).fetchone()[0] - # If we are here it's a newly inserted value, insert it into cache - self[name] = id - self._inv_dict[id] = name - return id - - def value(self, id): - # When we fetch an event, it either was already in the database - # at the time Zeitgeist started or it was inserted later -using - # Zeitgeist-, so here we always have the data in memory already. - return self._inv_dict[id] - - def id(self, name): - # Use this when fetching values which are supposed to be in the - # database already. Eg., in find_eventids. - return super(TableLookup, self).__getitem__(name) - - def remove_id(self, id): - value = self.value(id) - del self._inv_dict[id] - del self[value] - -def get_right_boundary(text): - """ returns the smallest string which is greater than `text` """ - if not text: - # if the search prefix is empty we query for the whole range - # of 'utf-8 'unicode chars - return unichr(0x10ffff) - if isinstance(text, str): - # we need to make sure the text is decoded as 'utf-8' unicode - text = unicode(text, "UTF-8") - charpoint = ord(text[-1]) - if charpoint == 0x10ffff: - # if the last character is the biggest possible char we need to - # look at the second last - return get_right_boundary(text[:-1]) - return text[:-1] + unichr(charpoint+1) - -class WhereClause: - """ - This class provides a convenient representation a SQL `WHERE' clause, - composed of a set of conditions joined together. - - The relation between conditions can be either of type *AND* or *OR*, but - not both. To create more complex clauses, use several :class:`WhereClause` - instances and joining them together using :meth:`extend`. - - Instances of this class can then be used to obtain a line of SQL code and - a list of arguments, for use with the SQLite3 module, accessing the - appropriate properties: - >>> where.sql, where.arguments - """ - - AND = " AND " - OR = " OR " - NOT = "NOT " - - @staticmethod - def optimize_glob(column, table, prefix): - """returns an optimized version of the GLOB statement as described - in http://www.sqlite.org/optoverview.html `4.0 The LIKE optimization` - """ - if isinstance(prefix, str): - # we need to make sure the text is decoded as 'utf-8' unicode - prefix = unicode(prefix, "UTF-8") - if not prefix: - # empty prefix means 'select all', no way to optimize this - sql = "SELECT %s FROM %s" %(column, table) - return sql, () - elif all([i == unichr(0x10ffff) for i in prefix]): - sql = "SELECT %s FROM %s WHERE value >= ?" %(column, table) - return sql, (prefix,) - else: - sql = "SELECT %s FROM %s WHERE (value >= ? AND value < ?)" %(column, table) - return sql, (prefix, get_right_boundary(prefix)) - - def __init__(self, relation, negation=False): - self._conditions = [] - self.arguments = [] - self._relation = relation - self._no_result_member = False - self._negation = negation - - def __len__(self): - return len(self._conditions) - - def add(self, condition, arguments=None): - if not condition: - return - self._conditions.append(condition) - if arguments is not None: - if not hasattr(arguments, "__iter__"): - self.arguments.append(arguments) - else: - self.arguments.extend(arguments) - - def add_text_condition(self, column, value, like=False, negation=False, cache=None): - if like: - assert column in ("origin", "subj_uri", "subj_current_uri", - "subj_origin", "actor", "subj_mimetype"), \ - "prefix search on the %r column is not supported by zeitgeist" % column - if column == "subj_uri": - # subj_id directly points to the id of an uri entry - view_column = "subj_id" - elif column == "subj_current_uri": - view_column = "subj_id_current" - else: - view_column = column - optimized_glob, value = self.optimize_glob("id", TABLE_MAP.get(column, column), value) - sql = "%s %sIN (%s)" %(view_column, self.NOT if negation else "", optimized_glob) - if negation: - sql += " OR %s IS NULL" % view_column - else: - if column == "origin": - column ="event_origin_uri" - elif column == "subj_origin": - column = "subj_origin_uri" - sql = "%s %s= ?" %(column, "!" if negation else "") - if cache is not None: - value = cache[value] - self.add(sql, value) - - def extend(self, where): - self.add(where.sql, where.arguments) - if not where.may_have_results(): - if self._relation == self.AND: - self.clear() - self.register_no_result() - - @property - def sql(self): - if self: # Do not return "()" if there are no conditions - negation = self.NOT if self._negation else "" - return "%s(%s)" %(negation, self._relation.join(self._conditions)) - - def register_no_result(self): - self._no_result_member = True - - def may_have_results(self): - """ - Return False if we know from our cached data that the query - will give no results. - """ - return len(self._conditions) > 0 or not self._no_result_member - - def clear(self): - """ - Reset this WhereClause to the state of a newly created one. - """ - self._conditions = [] - self.arguments = [] - self._no_result_member = False diff --git a/extensions/fts.vala b/extensions/fts.vala index e6435927..0c614996 100644 --- a/extensions/fts.vala +++ b/extensions/fts.vala @@ -52,8 +52,11 @@ namespace Zeitgeist class SearchEngine: Extension, RemoteSearchEngine { + private const string INDEXER_NAME = "org.gnome.zeitgeist.SimpleIndexer"; + private RemoteSimpleIndexer siin; private uint registration_id; + private MonitorManager? notifier; SearchEngine () { @@ -64,6 +67,15 @@ namespace Zeitgeist { if (Utils.using_in_memory_database ()) return; + // installing a monitor from the daemon will ensure that we don't + // miss any notifications that would be emitted in between + // zeitgeist start and fts daemon start + notifier = MonitorManager.get_default (); + notifier.install_monitor (new BusName (INDEXER_NAME), + "/org/gnome/zeitgeist/monitor/special", + new TimeRange.anytime (), + new GenericArray<Event> ()); + try { var connection = Bus.get_sync (BusType.SESSION, null); @@ -73,7 +85,7 @@ namespace Zeitgeist // FIXME: shouldn't we delay this to next idle callback? // Get SimpleIndexer Bus.watch_name_on_connection (connection, - "org.gnome.zeitgeist.SimpleIndexer", + INDEXER_NAME, BusNameWatcherFlags.AUTO_START, (conn) => { diff --git a/src/datamodel.vala b/src/datamodel.vala index 1c10a07f..4dd4a6b3 100644 --- a/src/datamodel.vala +++ b/src/datamodel.vala @@ -288,7 +288,6 @@ namespace Zeitgeist if (property.has_prefix (parsed)) matches = true; } - debug ("Checking matches for %s", parsed); return (is_negated) ? !matches : matches; } @@ -481,8 +480,6 @@ namespace Zeitgeist */ //Check if interpretation is child of template_event or same - debug("Checking if event %u matches template_event %u\n", - this.id, template_event.id); if (!check_field_match (this.interpretation, template_event.interpretation, true)) return false; //Check if manifestation is child of template_event or same diff --git a/src/engine.vala b/src/engine.vala index 2de9849c..a6bac29b 100644 --- a/src/engine.vala +++ b/src/engine.vala @@ -44,6 +44,7 @@ public class Engine : DbReader Object (database: new Zeitgeist.SQLite.Database ()); // TODO: take care of this if we decide to subclass Engine + // (we need to propagate the error, so it can't go to construct {}) last_id = database.get_last_id (); extension_collection = new ExtensionCollection (this); } diff --git a/src/notify.vala b/src/notify.vala index bc9ae669..c3526bad 100644 --- a/src/notify.vala +++ b/src/notify.vala @@ -26,11 +26,32 @@ namespace Zeitgeist public class MonitorManager : Object { + private static unowned MonitorManager? instance; + private HashTable<string, Monitor> monitors; private HashTable<string, GenericArray<string>> connections; + // ref-counted singleton - it can get destroyed easily, but has + // singleton semantics as long as some top-level instance keeps + // a reference to it + public static MonitorManager get_default () + { + return instance ?? new MonitorManager (); + } + + private MonitorManager () + { + } + + ~MonitorManager () + { + instance = null; + } + construct { + instance = this; + monitors = new HashTable<string, Monitor> (str_hash, str_equal); connections = new HashTable<string, GenericArray<string>> (str_hash, str_equal); @@ -53,7 +74,8 @@ namespace Zeitgeist foreach (var owner in connections.get_keys()) { - if (arg0 == owner) + // Don't disconnect monitors using service names + if (arg0 == owner && g_dbus_is_unique_name (arg0)) { var paths = connections.lookup (arg0); debug("Client disconnected %s", owner); @@ -120,32 +142,49 @@ namespace Zeitgeist { queued_notifications = new SList<QueuedNotification> (); Bus.get_proxy<RemoteMonitor> (BusType.SESSION, peer, - object_path, DBusProxyFlags.DO_NOT_LOAD_PROPERTIES | - DBusProxyFlags.DO_NOT_CONNECT_SIGNALS, + object_path, + DBusProxyFlags.DO_NOT_LOAD_PROPERTIES + | DBusProxyFlags.DO_NOT_CONNECT_SIGNALS + | DBusProxyFlags.DO_NOT_AUTO_START, null, (obj, res) => { try { proxy_object = Bus.get_proxy.end (res); + // Process queued notifications... + flush_notifications (); + + proxy_object.notify["g-name-owner"].connect (name_owner_changed); } catch (IOError err) { warning ("%s", err.message); } - - // Process queued notifications... - queued_notifications.reverse (); - foreach (unowned QueuedNotification notification - in queued_notifications) - { - notification.send (proxy_object); - } - queued_notifications = null; }); time_range = tr; event_templates = templates; } + private void name_owner_changed () + requires (proxy_object != null) + { + // FIXME: can we use this to actually remove the monitor? + // (instead of using NameOwnerChanged signal) + DBusProxy p = proxy_object as DBusProxy; + if (p.g_name_owner != null) flush_notifications (); + } + + private void flush_notifications () + { + queued_notifications.reverse (); + foreach (unowned QueuedNotification notification + in queued_notifications) + { + notification.send (proxy_object); + } + queued_notifications = null; + } + private bool matches (Event event) { if (event_templates.length == 0) @@ -182,8 +221,15 @@ namespace Zeitgeist // between monitors? Variant events_v = Events.to_variant (matching_events); + string? name_owner = null; if (proxy_object != null) { + DBusProxy p = proxy_object as DBusProxy; + if (p != null) name_owner = p.g_name_owner; + } + + if (proxy_object != null && name_owner != null) + { DBusProxy p = (DBusProxy) proxy_object; debug ("Notifying %s about %d insertions", p.get_name (), matching_events.length); @@ -208,8 +254,15 @@ namespace Zeitgeist { Variant time_v = intersect_tr.to_variant (); + string? name_owner = null; if (proxy_object != null) { + DBusProxy p = proxy_object as DBusProxy; + if (p != null) name_owner = p.g_name_owner; + } + + if (proxy_object != null && name_owner != null) + { proxy_object.notify_delete (time_v, event_ids); } else diff --git a/src/sql.vala b/src/sql.vala index feea64bb..8f5bd2be 100644 --- a/src/sql.vala +++ b/src/sql.vala @@ -111,10 +111,10 @@ namespace Zeitgeist.SQLite if (is_read_only) { int ver = DatabaseSchema.get_schema_version (database); - if (ver != DatabaseSchema.CORE_SCHEMA_VERSION) + if (ver < DatabaseSchema.CORE_SCHEMA_VERSION) { throw new EngineError.DATABASE_CANTOPEN ( - "Unable to open database"); + "Unable to open database: old schema version"); } } else diff --git a/src/table-lookup.vala b/src/table-lookup.vala index 0d59c92d..642bfba6 100644 --- a/src/table-lookup.vala +++ b/src/table-lookup.vala @@ -90,6 +90,26 @@ namespace Zeitgeist.SQLite // When we fetch an event, it either was already in the database // at the time Zeitgeist started or it was inserted later -using // Zeitgeist-, so here we always have the data in memory already. + unowned string val = id_to_value.lookup (id); + if (val != null) return val; + + // The above statement isn't exactly true. If this is a standalone + // reader in a separate process, the values won't be kept updated + // so we need to query the DB if we don't find it. + int rc; + + rc = db.exec ("SELECT value FROM %s WHERE id=%d".printf (table, id), + (n_columns, values, column_names) => + { + id_to_value.insert (id, values[0]); + value_to_id.insert (values[0], id); + return 0; + }, null); + if (rc != Sqlite.OK) + { + critical ("Can't get data from table %s: %d, %s\n", table, + rc, db.errmsg ()); + } return id_to_value.lookup (id); } diff --git a/src/zeitgeist-daemon.vala b/src/zeitgeist-daemon.vala index 2f3d32f7..fd8c39f5 100644 --- a/src/zeitgeist-daemon.vala +++ b/src/zeitgeist-daemon.vala @@ -122,7 +122,7 @@ namespace Zeitgeist public Daemon () throws EngineError { engine = new Engine (); - notifications = new MonitorManager (); + notifications = MonitorManager.get_default (); } public Variant get_events (uint32[] event_ids, BusName sender) |