From ce97c491eb4bc3cbf7cf0d6f39d43a3a3538d473 Mon Sep 17 00:00:00 2001 From: "Dirk-Jan C. Binnema" Date: Mon, 29 Aug 2011 23:35:12 +0300 Subject: [PATCH] * mu-store refactoring --- src/mu-store-priv.hh | 197 ++++++++++ src/mu-store-read.cc | 172 ++++++++ src/mu-store-write.cc | 640 ++++++++++++++++++++++++++++++ src/mu-store.cc | 892 +++++------------------------------------- src/mu-store.h | 191 +++++++-- src/mu-util-db.cc | 150 ------- 6 files changed, 1250 insertions(+), 992 deletions(-) create mode 100644 src/mu-store-priv.hh create mode 100644 src/mu-store-read.cc create mode 100644 src/mu-store-write.cc delete mode 100644 src/mu-util-db.cc diff --git a/src/mu-store-priv.hh b/src/mu-store-priv.hh new file mode 100644 index 00000000..af386135 --- /dev/null +++ b/src/mu-store-priv.hh @@ -0,0 +1,197 @@ +/* -*-mode: c++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8-*- */ +/* +** Copyright (C) 2011 +** +** This program is free software; you can redistribute it and/or modify it +** under the terms of the GNU General Public License as published by the +** Free Software Foundation; either version 3, or (at your option) any +** later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software Foundation, +** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +** +*/ + +#ifndef __MU_STORE_PRIV_HH__ +#define __MU_STORE_PRIV_HH__ + +#if HAVE_CONFIG_H +#include "config.h" +#endif /*HAVE_CONFIG_H*/ + +#include +#include +#include +#include + +#include "mu-store.h" +#include "mu-contacts.h" + +struct _MuStore { + +/* by default, use transactions of 30000 messages */ +#define MU_STORE_DEFAULT_BATCH_SIZE 30000 + /* http://article.gmane.org/gmane.comp.search.xapian.general/3656 */ +#define MU_STORE_MAX_TERM_LENGTH 240 + + _MuStore (const char *xpath, const char *contacts_cache, bool read_only): + _in_transaction (false), _processed (0), + _batch_size(MU_STORE_DEFAULT_BATCH_SIZE), + _contacts(0), _version(0), _db(0), _read_only(read_only) { + + if (read_only) + _db = new Xapian::Database (xpath); + else + _db = new Xapian::WritableDatabase (xpath, + Xapian::DB_CREATE_OR_OPEN); + if (!check_version ()) + throw std::runtime_error + ("xapian db version check failed"); + + if (contacts_cache) { + _contacts = mu_contacts_new (contacts_cache); + if (!_contacts) /* don't bail-out for this */ + throw std::runtime_error + ("failed to init contacts cache"); + } + + MU_WRITE_LOG ("%s: opened %s (batch size: %u)", + __FUNCTION__, xpath, batch_size()); + } + + ~_MuStore () { + try { + g_free (_version); + + mu_contacts_destroy (_contacts); + + if (!_read_only) + mu_store_flush (this); + + MU_WRITE_LOG ("closing xapian database with %d documents", + (int)db_read_only()->get_doccount()); + delete _db; + + } MU_XAPIAN_CATCH_BLOCK; + } + + /* get a unique id for this message; note, this function returns a + * static buffer -- not reentrant */ + const char* get_message_uid (const char* path) { + char pfx = 0; + static char buf[PATH_MAX + 10]; + if (G_UNLIKELY(!pfx)) { + pfx = mu_msg_field_xapian_prefix(MU_MSG_FIELD_ID_PATH); + buf[0]=pfx; + } + std::strcpy (buf + 1, path); + return buf; + } + + /* get a unique id for this message; note, this function returns a + * static buffer -- not reentrant */ + const char* get_message_uid (MuMsg *msg) { + return get_message_uid (mu_msg_get_path(msg)); + } + + MuContacts* contacts() { return _contacts; } + + const char* version () { + g_free (_version); + return _version = + mu_store_get_metadata (this, MU_STORE_VERSION_KEY); + } + + void begin_transaction () { + try { + db_writable()->begin_transaction(); + set_in_transaction (true); + } MU_XAPIAN_CATCH_BLOCK; + } + + + void commit_transaction () { + try { + set_in_transaction (false); + db_writable()->commit_transaction(); + } MU_XAPIAN_CATCH_BLOCK; + } + + void rollback_transaction () { + try { + set_in_transaction (false); + db_writable()->cancel_transaction(); + } MU_XAPIAN_CATCH_BLOCK; + } + + Xapian::WritableDatabase* db_writable() { + if (G_UNLIKELY(_read_only)) + throw std::runtime_error + ("database is read-only"); + return (Xapian::WritableDatabase*)_db; + } + + Xapian::Database* db_read_only() const { + return _db; + } + + size_t batch_size () const { return _batch_size;} + size_t set_batch_size (size_t n) { + return _batch_size = ( n == 0) ? MU_STORE_DEFAULT_BATCH_SIZE : n; + } + + bool in_transaction () const { return _in_transaction; } + bool set_in_transaction (bool in_tx) { return _in_transaction = in_tx; } + + int processed () const { return _processed; } + int set_processed (int n) { return _processed = n;} + int inc_processed () { return ++_processed; } + +private: + + bool check_version () { + const gchar *version; + version = mu_store_version (this); + + /* no version yet? it must be a new db then; we'll set the version */ + if (!version) { + if (!mu_store_set_metadata (this, MU_STORE_VERSION_KEY, + MU_XAPIAN_DB_VERSION)) { + g_warning ("failed to set database version"); + return FALSE; + } + return TRUE; /* ok, done. */ + } + + /* we have a version, but is it the right one? */ + if (std::strcmp (version, MU_XAPIAN_DB_VERSION) != 0) { + g_warning ("expected db version %s, but got %s", + MU_XAPIAN_DB_VERSION, + version ? version : "" ); + return FALSE; + } + + return TRUE; + } + + /* transaction handling */ + bool _in_transaction; + int _processed; + size_t _batch_size; /* batch size of a xapian transaction */ + + /* contacts object to cache all the contact information */ + MuContacts *_contacts; + mutable char *_version; + + Xapian::Database *_db; + bool _read_only; +}; + + +#endif /*__MU_STORE_PRIV_HH__*/ diff --git a/src/mu-store-read.cc b/src/mu-store-read.cc new file mode 100644 index 00000000..c384da30 --- /dev/null +++ b/src/mu-store-read.cc @@ -0,0 +1,172 @@ +/* -*-mode: c++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8-*- */ +/* +** Copyright (C) 2008-2011 Dirk-Jan C. Binnema +** +** This program is free software; you can redistribute it and/or modify it +** under the terms of the GNU General Public License as published by the +** Free Software Foundation; either version 3, or (at your option) any +** later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software Foundation, +** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +** +*/ + +#if HAVE_CONFIG_H +#include "config.h" +#endif /*HAVE_CONFIG_H*/ + +#include +#include +#include +#include + +#include "mu-store.h" +#include "mu-store-priv.hh" /* _MuStore */ + +#include "mu-msg.h" +#include "mu-msg-part.h" +#include "mu-store.h" +#include "mu-util.h" +#include "mu-str.h" +#include "mu-date.h" +#include "mu-flags.h" +#include "mu-contacts.h" + + +MuStore* +mu_store_new_read_only (const char* xpath, GError **err) +{ + g_return_val_if_fail (xpath, NULL); + + try { + return new _MuStore (xpath, NULL, true); + + } MU_XAPIAN_CATCH_BLOCK_G_ERROR(err,MU_ERROR_XAPIAN); + + return NULL; +} + + + +unsigned +mu_store_count (MuStore *store) +{ + g_return_val_if_fail (store, 0); + + try { + return store->db_read_only()->get_doccount(); + + } MU_XAPIAN_CATCH_BLOCK; + + return 0; +} + + +const char* +mu_store_version (MuStore *store) +{ + g_return_val_if_fail (store, NULL); + return store->version (); +} + + + +char* +mu_store_get_metadata (MuStore *store, const char *key) +{ + g_return_val_if_fail (store, NULL); + g_return_val_if_fail (key, NULL); + + try { + const std::string val (store->db_read_only()->get_metadata (key)); + return val.empty() ? NULL : g_strdup (val.c_str()); + + } MU_XAPIAN_CATCH_BLOCK; + + return NULL; +} + + +XapianDatabase* +mu_store_get_read_only_database (MuStore *store) +{ + g_return_val_if_fail (store, NULL); + return (XapianWritableDatabase*)store->db_read_only(); +} + + + +gboolean +mu_store_contains_message (MuStore *store, const char* path) +{ + g_return_val_if_fail (store, FALSE); + g_return_val_if_fail (path, FALSE); + + try { + const std::string uid (store->get_message_uid(path)); + return store->db_read_only()->term_exists (uid) ? TRUE: FALSE; + + } MU_XAPIAN_CATCH_BLOCK_RETURN (FALSE); +} + + + +time_t +mu_store_get_timestamp (MuStore *store, const char* msgpath) +{ + char *stampstr; + time_t rv; + + g_return_val_if_fail (store, 0); + g_return_val_if_fail (msgpath, 0); + + stampstr = mu_store_get_metadata (store, msgpath); + if (!stampstr) + return (time_t)0; + + rv = (time_t) g_ascii_strtoull (stampstr, NULL, 10); + g_free (stampstr); + + return rv; +} + + + +MuError +mu_store_foreach (MuStore *self, + MuStoreForeachFunc func, void *user_data) +{ + g_return_val_if_fail (self, MU_ERROR); + g_return_val_if_fail (func, MU_ERROR); + + try { + Xapian::Enquire enq (*self->db_read_only()); + + enq.set_query (Xapian::Query::MatchAll); + enq.set_cutoff (0,0); + + Xapian::MSet matches + (enq.get_mset (0, self->db_read_only()->get_doccount())); + if (matches.empty()) + return MU_OK; /* database is empty */ + + for (Xapian::MSet::iterator iter = matches.begin(); + iter != matches.end(); ++iter) { + Xapian::Document doc (iter.get_document()); + const std::string path(doc.get_value(MU_MSG_FIELD_ID_PATH)); + MuError res = func (path.c_str(), user_data); + if (res != MU_OK) + return res; + } + + } MU_XAPIAN_CATCH_BLOCK_RETURN (MU_ERROR); + + return MU_OK; +} diff --git a/src/mu-store-write.cc b/src/mu-store-write.cc new file mode 100644 index 00000000..103ab17d --- /dev/null +++ b/src/mu-store-write.cc @@ -0,0 +1,640 @@ +/* -*-mode: c++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8-*- */ +/* +** Copyright (C) 2008-2011 Dirk-Jan C. Binnema +** +** This program is free software; you can redistribute it and/or modify it +** under the terms of the GNU General Public License as published by the +** Free Software Foundation; either version 3, or (at your option) any +** later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software Foundation, +** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +** +*/ + +#if HAVE_CONFIG_H +#include "config.h" +#endif /*HAVE_CONFIG_H*/ + +#include +#include +#include +#include + +#include "mu-store.h" +#include "mu-store-priv.hh" /* _MuStore */ + +#include "mu-msg.h" +#include "mu-msg-part.h" +#include "mu-store.h" +#include "mu-util.h" +#include "mu-str.h" +#include "mu-date.h" +#include "mu-flags.h" +#include "mu-contacts.h" + + + +/* we cache these prefix strings, so we don't have to allocate the all + * the time; this should save 10-20 string allocs per message */ +G_GNUC_CONST static const std::string& +prefix (MuMsgFieldId mfid) +{ + static std::string fields[MU_MSG_FIELD_ID_NUM]; + static bool initialized = false; + + if (G_UNLIKELY(!initialized)) { + for (int i = 0; i != MU_MSG_FIELD_ID_NUM; ++i) + fields[i] = std::string (1, mu_msg_field_xapian_prefix + ((MuMsgFieldId)i)); + initialized = true; + } + + return fields[mfid]; +} + + +static void +add_synonym_for_flag (MuFlags flag, Xapian::WritableDatabase *db) +{ + const std::string pfx(prefix(MU_MSG_FIELD_ID_FLAGS)); + + db->clear_synonyms (pfx + mu_flag_name (flag)); + db->add_synonym (pfx + mu_flag_name (flag), pfx + + (std::string(1, tolower(mu_flag_char (flag))))); +} + + +static void +add_synonym_for_prio (MuMsgPrio prio, Xapian::WritableDatabase *db) +{ + const std::string pfx (prefix(MU_MSG_FIELD_ID_PRIO)); + + std::string s1 (pfx + mu_msg_prio_name (prio)); + std::string s2 (pfx + (std::string(1, mu_msg_prio_char (prio)))); + + db->clear_synonyms (s1); + db->clear_synonyms (s2); + + db->add_synonym (s1, s2); +} + + +static void +add_synonyms (MuStore *store) +{ + mu_flags_foreach ((MuFlagsForeachFunc)add_synonym_for_flag, + store->db_writable()); + mu_msg_prio_foreach ((MuMsgPrioForeachFunc)add_synonym_for_prio, + store->db_writable()); +} + + +MuStore* +mu_store_new_writable (const char* xpath, const char *contacts_cache, + GError **err) +{ + g_return_val_if_fail (xpath, NULL); + + try { + MuStore *store; + + store = new _MuStore (xpath, contacts_cache, false); + add_synonyms (store); + + return store; + + } MU_XAPIAN_CATCH_BLOCK_G_ERROR(err,MU_ERROR_XAPIAN); + + return NULL; +} + + + +void +mu_store_set_batch_size (MuStore *store, guint batchsize) +{ + g_return_if_fail (store); + store->set_batch_size (batchsize); +} + + +gboolean +mu_store_set_metadata (MuStore *store, const char *key, const char *val) +{ + g_return_val_if_fail (store, FALSE); + g_return_val_if_fail (key, FALSE); + g_return_val_if_fail (val, FALSE); + + try { + store->db_writable()->set_metadata (key, val); + return TRUE; + + } MU_XAPIAN_CATCH_BLOCK; + + return FALSE; +} + + +void +mu_store_flush (MuStore *store) +{ + g_return_if_fail (store); + + try { + if (store->in_transaction()) + store->commit_transaction (); + store->db_writable()->flush (); /* => commit, post X 1.1.x */ + + } MU_XAPIAN_CATCH_BLOCK; +} + + +static void +add_terms_values_date (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid) +{ + time_t t; + const char *datestr; + + t = (time_t)mu_msg_get_field_numeric (msg, mfid); + if (t != 0) { + datestr = mu_date_time_t_to_str_s (t, FALSE /*UTC*/); + doc.add_value ((Xapian::valueno)mfid, datestr); + } +} + +/* TODO: we could pre-calculate the add_term values for FLAGS */ + +/* pre-calculate; optimization */ +G_GNUC_CONST static const std::string& +flag_val (char flagchar) +{ + static const std::string + pfx (prefix(MU_MSG_FIELD_ID_FLAGS)), + /* xapian wants lowercase */ + draftstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_DRAFT))), + flaggedstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_FLAGGED))), + passedstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_PASSED))), + repliedstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_REPLIED))), + seenstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_SEEN))), + trashedstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_TRASHED))), + newstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_NEW))), + + signedstr (pfx + mu_flag_char(MU_FLAG_SIGNED)), + encryptedstr (pfx + mu_flag_char(MU_FLAG_ENCRYPTED)), + has_attachstr (pfx + mu_flag_char(MU_FLAG_HAS_ATTACH)), + unreadstr (pfx + mu_flag_char(MU_FLAG_UNREAD)); + + switch (flagchar) { + + case 'D': return draftstr; + case 'F': return flaggedstr; + case 'P': return passedstr; + case 'R': return repliedstr; + case 'S': return seenstr; + case 'T': return trashedstr; + + case 'N': return newstr; + + case 's': return signedstr; + case 'x': return encryptedstr; + case 'a': return has_attachstr; + + case 'u': return unreadstr; + + default: + g_return_val_if_reached (flaggedstr); + return flaggedstr; + } +} + + + + +/* pre-calculate; optimization */ +G_GNUC_CONST static const std::string& +prio_val (MuMsgPrio prio) +{ + static const std::string pfx (prefix(MU_MSG_FIELD_ID_PRIO)); + + static const std::string + lowstr (pfx + std::string(1, mu_msg_prio_char(MU_MSG_PRIO_LOW))), + normalstr (pfx + std::string(1, mu_msg_prio_char(MU_MSG_PRIO_NORMAL))), + highstr (pfx + std::string(1, mu_msg_prio_char(MU_MSG_PRIO_HIGH))); + + switch (prio) { + case MU_MSG_PRIO_LOW: return lowstr; + case MU_MSG_PRIO_NORMAL: return normalstr; + case MU_MSG_PRIO_HIGH: return highstr; + default: + g_return_val_if_reached (normalstr); + return normalstr; + } +} + + + +static void +add_terms_values_number (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid) +{ + gint64 num = mu_msg_get_field_numeric (msg, mfid); + + const std::string numstr (Xapian::sortable_serialise((double)num)); + doc.add_value ((Xapian::valueno)mfid, numstr); + + if (mfid == MU_MSG_FIELD_ID_FLAGS) { + const char *cur = mu_flags_to_str_s + ((MuFlags)num,(MuFlagType)MU_FLAG_TYPE_ANY); + g_return_if_fail (cur); + while (*cur) { + doc.add_term (flag_val(*cur)); + ++cur; + } + + } else if (mfid == MU_MSG_FIELD_ID_PRIO) + doc.add_term (prio_val((MuMsgPrio)num)); +} + + +/* for string and string-list */ +static void +add_terms_values_str (Xapian::Document& doc, char *val, + MuMsgFieldId mfid) +{ + /* the value is what we'll display; the unchanged original */ + if (mu_msg_field_xapian_value(mfid)) + doc.add_value ((Xapian::valueno)mfid, val); + + /* now, let's create some search terms... */ + if (mu_msg_field_normalize (mfid)) + mu_str_normalize_in_place (val, TRUE); + if (mu_msg_field_xapian_escape (mfid)) + mu_str_ascii_xapian_escape_in_place (val); + + if (mu_msg_field_xapian_index (mfid)) { + Xapian::TermGenerator termgen; + termgen.set_document (doc); + termgen.index_text_without_positions (val, 1, prefix(mfid)); + } + + if (mu_msg_field_xapian_term(mfid)) + doc.add_term (prefix(mfid) + + std::string(val, 0, MU_STORE_MAX_TERM_LENGTH)); +} + + +static void +add_terms_values_string (Xapian::Document& doc, MuMsg *msg, + MuMsgFieldId mfid) +{ + const char *orig; + char *val; + size_t len; + + if (!(orig = mu_msg_get_field_string (msg, mfid))) + return; /* nothing to do */ + + /* try stack-allocation, it's much faster*/ + len = strlen (orig); + val = (char*)(G_LIKELY(len < 1024)?g_alloca(len+1):g_malloc(len+1)); + strcpy (val, orig); + + add_terms_values_str (doc, val, mfid); + + if (!(G_LIKELY(len < 1024))) + g_free (val); +} + + + +static void +add_terms_values_string_list (Xapian::Document& doc, MuMsg *msg, + MuMsgFieldId mfid) +{ + const GSList *lst; + + lst = mu_msg_get_field_string_list (msg, mfid); + + if (lst && mu_msg_field_xapian_value (mfid)) { + gchar *str; + str = mu_str_from_list (lst, ','); + if (str) + doc.add_value ((Xapian::valueno)mfid, str); + g_free (str); + } + + if (lst && mu_msg_field_xapian_term (mfid)) { + while (lst) { + size_t len; + char *val; + /* try stack-allocation, it's much faster*/ + len = strlen ((char*)lst->data); + if (G_LIKELY(len < 1024)) + val = (char*)g_alloca(len+1); + else + val = (char*)g_malloc(len+1); + strcpy (val, (char*)lst->data); + + add_terms_values_str (doc, val, mfid); + + if (!(G_LIKELY(len < 1024))) + g_free (val); + + lst = g_slist_next ((GSList*)lst); + } + } +} + + +struct PartData { + PartData (Xapian::Document& doc, MuMsgFieldId mfid): + _doc (doc), _mfid(mfid) {} + Xapian::Document _doc; + MuMsgFieldId _mfid; +}; + +static void +each_part (MuMsg *msg, MuMsgPart *part, PartData *pdata) +{ + if (mu_msg_part_looks_like_attachment (part, TRUE) && + (part->file_name)) { + + char val[MU_STORE_MAX_TERM_LENGTH + 1]; + strncpy (val, part->file_name, sizeof(val)); + + /* now, let's create a terms... */ + mu_str_normalize_in_place (val, TRUE); + mu_str_ascii_xapian_escape_in_place (val); + + pdata->_doc.add_term (prefix(pdata->_mfid) + + std::string(val, 0, MU_STORE_MAX_TERM_LENGTH)); + } +} + + +static void +add_terms_values_attach (Xapian::Document& doc, MuMsg *msg, + MuMsgFieldId mfid) +{ + PartData pdata (doc, mfid); + mu_msg_part_foreach (msg, (MuMsgPartForeachFunc)each_part, &pdata); +} + + +static void +add_terms_values_body (Xapian::Document& doc, MuMsg *msg, + MuMsgFieldId mfid) +{ + const char *str; + char *norm; + + if (mu_msg_get_flags(msg) & MU_FLAG_ENCRYPTED) + return; /* ignore encrypted bodies */ + + str = mu_msg_get_body_text (msg); + if (!str) /* FIXME: html->txt fallback needed */ + str = mu_msg_get_body_html (msg); + + if (!str) + return; /* no body... */ + + Xapian::TermGenerator termgen; + termgen.set_document(doc); + + norm = mu_str_normalize (str, TRUE); + termgen.index_text_without_positions + (norm, 1, prefix(mfid)); + + g_free (norm); +} + +struct _MsgDoc { + Xapian::Document *_doc; + MuMsg *_msg; + MuStore *_store; +}; +typedef struct _MsgDoc MsgDoc; + +static void +add_terms_values (MuMsgFieldId mfid, MsgDoc* msgdoc) +{ + /* note: contact-stuff (To/Cc/From) will handled in + * add_contact_info, not here */ + if (!mu_msg_field_xapian_index(mfid) && + !mu_msg_field_xapian_term(mfid) && + !mu_msg_field_xapian_value(mfid)) + return; + + switch (mfid) { + case MU_MSG_FIELD_ID_DATE: + add_terms_values_date (*msgdoc->_doc, msgdoc->_msg, mfid); + break; + case MU_MSG_FIELD_ID_BODY_TEXT: + add_terms_values_body (*msgdoc->_doc, msgdoc->_msg, mfid); + break; + case MU_MSG_FIELD_ID_ATTACH: + add_terms_values_attach (*msgdoc->_doc, msgdoc->_msg, mfid); + break; + default: + if (mu_msg_field_is_numeric (mfid)) + add_terms_values_number (*msgdoc->_doc, msgdoc->_msg, + mfid); + else if (mu_msg_field_is_string (mfid)) + add_terms_values_string (*msgdoc->_doc, + msgdoc->_msg, + mfid); + else if (mu_msg_field_is_string_list(mfid)) + add_terms_values_string_list (*msgdoc->_doc, + msgdoc->_msg, + mfid); + else + g_return_if_reached (); + } +} + + +static const std::string& +xapian_pfx (MuMsgContact *contact) +{ + static const std::string empty; + + /* use ptr to string to prevent copy... */ + switch (contact->type) { + case MU_MSG_CONTACT_TYPE_TO: + return prefix(MU_MSG_FIELD_ID_TO); + case MU_MSG_CONTACT_TYPE_FROM: + return prefix(MU_MSG_FIELD_ID_FROM); + case MU_MSG_CONTACT_TYPE_CC: + return prefix(MU_MSG_FIELD_ID_CC); + case MU_MSG_CONTACT_TYPE_BCC: + return prefix(MU_MSG_FIELD_ID_BCC); + default: + g_warning ("unsupported contact type %u", + (unsigned)contact->type); + return empty; + } +} + + +static void +each_contact_info (MuMsgContact *contact, MsgDoc *msgdoc) +{ + const std::string pfx (xapian_pfx(contact)); + if (pfx.empty()) + return; /* unsupported contact type */ + + if (!mu_str_is_empty(contact->name)) { + Xapian::TermGenerator termgen; + termgen.set_document (*msgdoc->_doc); + char *norm = mu_str_normalize (contact->name, TRUE); + termgen.index_text_without_positions (norm, 1, pfx); + g_free (norm); + } + + /* don't normalize e-mail address, but do lowercase it */ + if (!mu_str_is_empty(contact->address)) { + char *escaped = mu_str_ascii_xapian_escape (contact->address); + msgdoc->_doc->add_term + (std::string (pfx + escaped, 0, + MU_STORE_MAX_TERM_LENGTH)); + g_free (escaped); + + /* store it also in our contacts cache */ + if (msgdoc->_store->contacts()) + mu_contacts_add (msgdoc->_store->contacts(), + contact->address, contact->name, + mu_msg_get_date(msgdoc->_msg)); + } +} + + +gboolean +mu_store_store_msg (MuStore *store, MuMsg *msg, gboolean replace) +{ + g_return_val_if_fail (store, FALSE); + g_return_val_if_fail (msg, FALSE); + + try { + Xapian::Document newdoc; + Xapian::docid id; + MsgDoc msgdoc = { &newdoc, msg, store }; + const std::string uid(store->get_message_uid(msg)); + + if (!store->in_transaction()) + store->begin_transaction(); + + /* we must add a unique term, so we can replace + * matching documents */ + newdoc.add_term (uid); + mu_msg_field_foreach + ((MuMsgFieldForEachFunc)add_terms_values, &msgdoc); + /* also store the contact-info as separate terms */ + mu_msg_contact_foreach + (msg, + (MuMsgContactForeachFunc)each_contact_info, + &msgdoc); + + /* add_document is slightly + faster, we can use it when + * we know the document does not exist yet, eg., in + * case of a rebuild */ + if (replace) /* we replace all existing documents for this file */ + id = store->db_writable()->replace_document (uid, newdoc); + else + id = store->db_writable()->add_document (newdoc); + + if (store->inc_processed() % store->batch_size() == 0) + store->commit_transaction(); + + return TRUE; + + } MU_XAPIAN_CATCH_BLOCK; + + if (store->in_transaction()) + store->rollback_transaction(); + + return FALSE; +} + + +gboolean +mu_store_store_path (MuStore *store, const char *path) +{ + MuMsg *msg; + GError *err; + gboolean rv; + + g_return_val_if_fail (store, FALSE); + g_return_val_if_fail (path, FALSE); + + err = NULL; + msg = mu_msg_new_from_file (path, NULL, &err); + + if (!msg) { + if (err) { + g_warning ("failed to create message %s to store: %s", + path, err->message); + g_error_free (err); + } else + g_warning ("failed to create message %s to store", path); + + return FALSE; + } + + rv = mu_store_store_msg (store, msg, TRUE); + if (!rv) + g_warning ("failed to store %s", path); + + mu_msg_unref (msg); + + return rv; +} + + +XapianWritableDatabase* +mu_store_get_writable_database (MuStore *store) +{ + g_return_val_if_fail (store, NULL); + + return (XapianWritableDatabase*)store->db_writable(); +} + + +gboolean +mu_store_remove_path (MuStore *store, const char *msgpath) +{ + g_return_val_if_fail (store, FALSE); + g_return_val_if_fail (msgpath, FALSE); + + try { + const std::string uid (store->get_message_uid (msgpath)); + + store->db_writable()->delete_document (uid); + store->inc_processed(); + + return TRUE; + + } MU_XAPIAN_CATCH_BLOCK_RETURN (FALSE); +} + + +gboolean +mu_store_set_timestamp (MuStore *store, const char* msgpath, + time_t stamp) +{ + char buf[21]; + + g_return_val_if_fail (store, FALSE); + g_return_val_if_fail (msgpath, FALSE); + + sprintf (buf, "%" G_GUINT64_FORMAT, (guint64)stamp); + return mu_store_set_metadata (store, msgpath, buf); +} + + diff --git a/src/mu-store.cc b/src/mu-store.cc index f1ef64fd..9cbee9e5 100644 --- a/src/mu-store.cc +++ b/src/mu-store.cc @@ -27,6 +27,12 @@ #include #include +#include + +#include "mu-store.h" +#include "mu-store-priv.hh" /* _MuStore */ + + #include "mu-msg.h" #include "mu-msg-part.h" #include "mu-store.h" @@ -36,163 +42,6 @@ #include "mu-flags.h" #include "mu-contacts.h" -/* by default, use transactions of 30000 messages */ -#define MU_STORE_DEFAULT_TRX_SIZE 30000 - -/* http://article.gmane.org/gmane.comp.search.xapian.general/3656 */ -#define MU_STORE_MAX_TERM_LENGTH 240 - -static void add_synonyms (MuStore *store); -static gboolean check_version (MuStore *store); - -struct _MuStore { - _MuStore (const char *xpath, const char *contacts_cache) : - _db (xpath, Xapian::DB_CREATE_OR_OPEN), _in_transaction(0), - _processed (0), _trx_size(MU_STORE_DEFAULT_TRX_SIZE), _contacts (0), - _version (0) { - - if (!check_version (this)) - throw std::runtime_error - ("xapian db version check failed"); - - if (contacts_cache) { - _contacts = mu_contacts_new (contacts_cache); - if (!_contacts) /* don't bail-out for this */ - throw std::runtime_error - ("failed to init contacts cache"); - } - - add_synonyms (this); - MU_WRITE_LOG ("%s: opened %s (batch size: %u)", - __FUNCTION__, xpath, _trx_size); - } - - ~_MuStore () { - try { - g_free (_version); - mu_contacts_destroy (_contacts); - mu_store_flush (this); - - MU_WRITE_LOG ("closing xapian database with %d documents", - (int)_db.get_doccount()); - - } MU_XAPIAN_CATCH_BLOCK; - } - - Xapian::WritableDatabase _db; - - /* transaction handling */ - bool _in_transaction; - int _processed; - size_t _trx_size; - guint _batchsize; /* batch size of a xapian transaction */ - - /* contacts object to cache all the contact information */ - MuContacts *_contacts; - - char *_version; -}; - - -/* we cache these prefix strings, so we don't have to allocate the all - * the time; this should save 10-20 string allocs per message */ -G_GNUC_CONST static const std::string& -prefix (MuMsgFieldId mfid) -{ - static std::string fields[MU_MSG_FIELD_ID_NUM]; - static bool initialized = false; - - if (G_UNLIKELY(!initialized)) { - for (int i = 0; i != MU_MSG_FIELD_ID_NUM; ++i) - fields[i] = std::string (1, mu_msg_field_xapian_prefix - ((MuMsgFieldId)i)); - initialized = true; - } - - return fields[mfid]; -} - - - -static void -add_synonym_for_flag (MuFlags flag, Xapian::WritableDatabase *db) -{ - const std::string pfx(prefix(MU_MSG_FIELD_ID_FLAGS)); - - db->clear_synonyms (pfx + mu_flag_name (flag)); - db->add_synonym (pfx + mu_flag_name (flag), pfx + - (std::string(1, tolower(mu_flag_char (flag))))); -} - - -static void -add_synonym_for_prio (MuMsgPrio prio, Xapian::WritableDatabase *db) -{ - const std::string pfx (prefix(MU_MSG_FIELD_ID_PRIO)); - - std::string s1 (pfx + mu_msg_prio_name (prio)); - std::string s2 (pfx + (std::string(1, mu_msg_prio_char (prio)))); - - db->clear_synonyms (s1); - db->clear_synonyms (s2); - - db->add_synonym (s1, s2); -} - - -static void -add_synonyms (MuStore *store) -{ - mu_flags_foreach ((MuFlagsForeachFunc)add_synonym_for_flag, - &store->_db); - mu_msg_prio_foreach ((MuMsgPrioForeachFunc)add_synonym_for_prio, - &store->_db); -} - -static gboolean -check_version (MuStore *store) -{ - /* FIXME clear up versioning semantics */ - const gchar *version; - - version = mu_store_version (store); - - /* no version yet? it must be a new db then; we'll set the version */ - if (!version) { - if (!mu_store_set_metadata (store, MU_STORE_VERSION_KEY, - MU_XAPIAN_DB_VERSION)) { - g_warning ("failed to set database version"); - return FALSE; - } - return TRUE; /* ok, done. */ - } - - /* we have a version, but is it the right one? */ - if (std::strcmp (version, MU_XAPIAN_DB_VERSION) != 0) { - g_warning ("expected db version %s, but got %s", - MU_XAPIAN_DB_VERSION, - version ? version : "" ); - return FALSE; - } - - return TRUE; -} - -MuStore* -mu_store_new (const char* xpath, - const char *contacts_cache, - GError **err) -{ - g_return_val_if_fail (xpath, NULL); - - try { - return new _MuStore (xpath, contacts_cache); - - } MU_XAPIAN_CATCH_BLOCK_G_ERROR(err,MU_ERROR_XAPIAN); - - return NULL; -} - void mu_store_destroy (MuStore *store) @@ -201,65 +50,20 @@ mu_store_destroy (MuStore *store) } -void -mu_store_set_batch_size (MuStore *store, guint batchsize) +static char* +xapian_get_metadata (const gchar *xpath, const gchar *key) { - g_return_if_fail (store); - - store->_trx_size = batchsize ? batchsize : MU_STORE_DEFAULT_TRX_SIZE; -} - - - -unsigned -mu_store_count (MuStore *store) -{ - g_return_val_if_fail (store, 0); - - try { - return store->_db.get_doccount(); - - } MU_XAPIAN_CATCH_BLOCK; - - return 0; -} - - -const char* -mu_store_version (MuStore *store) -{ - g_return_val_if_fail (store, NULL); - - g_free (store->_version); - return store->_version = - mu_store_get_metadata (store, MU_STORE_VERSION_KEY); -} - -gboolean -mu_store_set_metadata (MuStore *store, const char *key, const char *val) -{ - g_return_val_if_fail (store, FALSE); - g_return_val_if_fail (key, FALSE); - g_return_val_if_fail (val, FALSE); - - try { - store->_db.set_metadata (key, val); - return TRUE; - - } MU_XAPIAN_CATCH_BLOCK; - - return FALSE; -} - - -char* -mu_store_get_metadata (MuStore *store, const char *key) -{ - g_return_val_if_fail (store, NULL); + g_return_val_if_fail (xpath, NULL); g_return_val_if_fail (key, NULL); - + + if (!access(xpath, F_OK) == 0) { + g_warning ("cannot access %s: %s", xpath, strerror(errno)); + return NULL; + } + try { - const std::string val (store->_db.get_metadata (key)); + Xapian::Database db (xpath); + const std::string val(db.get_metadata (key)); return val.empty() ? NULL : g_strdup (val.c_str()); } MU_XAPIAN_CATCH_BLOCK; @@ -267,613 +71,99 @@ mu_store_get_metadata (MuStore *store, const char *key) return NULL; } - - -static void -begin_trx_if (MuStore *store, gboolean cond) +char* +mu_store_database_version (const gchar *xpath) { - if (cond) { - g_debug ("beginning Xapian transaction"); - store->_db.begin_transaction(); - store->_in_transaction = true; - } -} + g_return_val_if_fail (xpath, NULL); -static void -commit_trx_if (MuStore *store, gboolean cond) -{ - if (cond) { - g_debug ("comitting Xapian transaction"); - store->_in_transaction = false; - store->_db.commit_transaction(); - } -} - -static void -rollback_trx_if (MuStore *store, gboolean cond) -{ - if (cond) { - g_debug ("rolling back Xapian transaction"); - store->_in_transaction = false; - store->_db.cancel_transaction(); - } -} - -void -mu_store_flush (MuStore *store) -{ - g_return_if_fail (store); - - try { - commit_trx_if (store, store->_in_transaction); - store->_db.flush (); /* => commit, post X 1.1.x */ - - } MU_XAPIAN_CATCH_BLOCK; -} - - -static void -add_terms_values_date (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid) -{ - time_t t; - const char *datestr; - - t = (time_t)mu_msg_get_field_numeric (msg, mfid); - if (t != 0) { - datestr = mu_date_time_t_to_str_s (t, FALSE /*UTC*/); - doc.add_value ((Xapian::valueno)mfid, datestr); - } -} - -/* TODO: we could pre-calculate the add_term values for FLAGS */ - -/* pre-calculate; optimization */ -G_GNUC_CONST static const std::string& -flag_val (char flagchar) -{ - static const std::string - pfx (prefix(MU_MSG_FIELD_ID_FLAGS)), - /* xapian wants lowercase */ - draftstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_DRAFT))), - flaggedstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_FLAGGED))), - passedstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_PASSED))), - repliedstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_REPLIED))), - seenstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_SEEN))), - trashedstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_TRASHED))), - newstr (pfx + (char)tolower(mu_flag_char(MU_FLAG_NEW))), - - signedstr (pfx + mu_flag_char(MU_FLAG_SIGNED)), - encryptedstr (pfx + mu_flag_char(MU_FLAG_ENCRYPTED)), - has_attachstr (pfx + mu_flag_char(MU_FLAG_HAS_ATTACH)), - unreadstr (pfx + mu_flag_char(MU_FLAG_UNREAD)); - - switch (flagchar) { - - case 'D': return draftstr; - case 'F': return flaggedstr; - case 'P': return passedstr; - case 'R': return repliedstr; - case 'S': return seenstr; - case 'T': return trashedstr; - - case 'N': return newstr; - - case 's': return signedstr; - case 'x': return encryptedstr; - case 'a': return has_attachstr; - - case 'u': return unreadstr; - - default: - g_return_val_if_reached (flaggedstr); - return flaggedstr; - } -} - - - - -/* pre-calculate; optimization */ -G_GNUC_CONST static const std::string& -prio_val (MuMsgPrio prio) -{ - static const std::string pfx (prefix(MU_MSG_FIELD_ID_PRIO)); - - static const std::string - lowstr (pfx + std::string(1, mu_msg_prio_char(MU_MSG_PRIO_LOW))), - normalstr (pfx + std::string(1, mu_msg_prio_char(MU_MSG_PRIO_NORMAL))), - highstr (pfx + std::string(1, mu_msg_prio_char(MU_MSG_PRIO_HIGH))); - - switch (prio) { - case MU_MSG_PRIO_LOW: return lowstr; - case MU_MSG_PRIO_NORMAL: return normalstr; - case MU_MSG_PRIO_HIGH: return highstr; - default: - g_return_val_if_reached (normalstr); - return normalstr; - } -} - - - -static void -add_terms_values_number (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid) -{ - gint64 num = mu_msg_get_field_numeric (msg, mfid); - - const std::string numstr (Xapian::sortable_serialise((double)num)); - doc.add_value ((Xapian::valueno)mfid, numstr); - - if (mfid == MU_MSG_FIELD_ID_FLAGS) { - const char *cur = mu_flags_to_str_s - ((MuFlags)num,(MuFlagType)MU_FLAG_TYPE_ANY); - g_return_if_fail (cur); - while (*cur) { - doc.add_term (flag_val(*cur)); - ++cur; - } - - } else if (mfid == MU_MSG_FIELD_ID_PRIO) - doc.add_term (prio_val((MuMsgPrio)num)); -} - - -/* for string and string-list */ -static void -add_terms_values_str (Xapian::Document& doc, char *val, - MuMsgFieldId mfid) -{ - /* the value is what we'll display; the unchanged original */ - if (mu_msg_field_xapian_value(mfid)) - doc.add_value ((Xapian::valueno)mfid, val); - - /* now, let's create some search terms... */ - if (mu_msg_field_normalize (mfid)) - mu_str_normalize_in_place (val, TRUE); - if (mu_msg_field_xapian_escape (mfid)) - mu_str_ascii_xapian_escape_in_place (val); - - if (mu_msg_field_xapian_index (mfid)) { - Xapian::TermGenerator termgen; - termgen.set_document (doc); - termgen.index_text_without_positions (val, 1, prefix(mfid)); - } - - if (mu_msg_field_xapian_term(mfid)) - doc.add_term (prefix(mfid) + - std::string(val, 0, MU_STORE_MAX_TERM_LENGTH)); -} - - -static void -add_terms_values_string (Xapian::Document& doc, MuMsg *msg, - MuMsgFieldId mfid) -{ - const char *orig; - char *val; - size_t len; - - if (!(orig = mu_msg_get_field_string (msg, mfid))) - return; /* nothing to do */ - - /* try stack-allocation, it's much faster*/ - len = strlen (orig); - val = (char*)(G_LIKELY(len < 1024)?g_alloca(len+1):g_malloc(len+1)); - strcpy (val, orig); - - add_terms_values_str (doc, val, mfid); - - if (!(G_LIKELY(len < 1024))) - g_free (val); -} - - - -static void -add_terms_values_string_list (Xapian::Document& doc, MuMsg *msg, - MuMsgFieldId mfid) -{ - const GSList *lst; - - lst = mu_msg_get_field_string_list (msg, mfid); - - if (lst && mu_msg_field_xapian_value (mfid)) { - gchar *str; - str = mu_str_from_list (lst, ','); - if (str) - doc.add_value ((Xapian::valueno)mfid, str); - g_free (str); - } - - if (lst && mu_msg_field_xapian_term (mfid)) { - while (lst) { - size_t len; - char *val; - /* try stack-allocation, it's much faster*/ - len = strlen ((char*)lst->data); - if (G_LIKELY(len < 1024)) - val = (char*)g_alloca(len+1); - else - val = (char*)g_malloc(len+1); - strcpy (val, (char*)lst->data); - - add_terms_values_str (doc, val, mfid); - - if (!(G_LIKELY(len < 1024))) - g_free (val); - - lst = g_slist_next ((GSList*)lst); - } - } -} - - -struct PartData { - PartData (Xapian::Document& doc, MuMsgFieldId mfid): - _doc (doc), _mfid(mfid) {} - Xapian::Document _doc; - MuMsgFieldId _mfid; -}; - -static void -each_part (MuMsg *msg, MuMsgPart *part, PartData *pdata) -{ - if (mu_msg_part_looks_like_attachment (part, TRUE) && - (part->file_name)) { - - char val[MU_STORE_MAX_TERM_LENGTH + 1]; - strncpy (val, part->file_name, sizeof(val)); - - /* now, let's create a terms... */ - mu_str_normalize_in_place (val, TRUE); - mu_str_ascii_xapian_escape_in_place (val); - - pdata->_doc.add_term (prefix(pdata->_mfid) + - std::string(val, 0, MU_STORE_MAX_TERM_LENGTH)); - } -} - - -static void -add_terms_values_attach (Xapian::Document& doc, MuMsg *msg, - MuMsgFieldId mfid) -{ - PartData pdata (doc, mfid); - mu_msg_part_foreach (msg, (MuMsgPartForeachFunc)each_part, &pdata); -} - - -static void -add_terms_values_body (Xapian::Document& doc, MuMsg *msg, - MuMsgFieldId mfid) -{ - const char *str; - char *norm; - - if (mu_msg_get_flags(msg) & MU_FLAG_ENCRYPTED) - return; /* ignore encrypted bodies */ - - str = mu_msg_get_body_text (msg); - if (!str) /* FIXME: html->txt fallback needed */ - str = mu_msg_get_body_html (msg); - - if (!str) - return; /* no body... */ - - Xapian::TermGenerator termgen; - termgen.set_document(doc); - - norm = mu_str_normalize (str, TRUE); - termgen.index_text_without_positions - (norm, 1, prefix(mfid)); - - g_free (norm); -} - -struct _MsgDoc { - Xapian::Document *_doc; - MuMsg *_msg; - MuStore *_store; -}; -typedef struct _MsgDoc MsgDoc; - -static void -add_terms_values (MuMsgFieldId mfid, MsgDoc* msgdoc) -{ - /* note: contact-stuff (To/Cc/From) will handled in - * add_contact_info, not here */ - if (!mu_msg_field_xapian_index(mfid) && - !mu_msg_field_xapian_term(mfid) && - !mu_msg_field_xapian_value(mfid)) - return; - - switch (mfid) { - case MU_MSG_FIELD_ID_DATE: - add_terms_values_date (*msgdoc->_doc, msgdoc->_msg, mfid); - break; - case MU_MSG_FIELD_ID_BODY_TEXT: - add_terms_values_body (*msgdoc->_doc, msgdoc->_msg, mfid); - break; - case MU_MSG_FIELD_ID_ATTACH: - add_terms_values_attach (*msgdoc->_doc, msgdoc->_msg, mfid); - break; - default: - if (mu_msg_field_is_numeric (mfid)) - add_terms_values_number (*msgdoc->_doc, msgdoc->_msg, - mfid); - else if (mu_msg_field_is_string (mfid)) - add_terms_values_string (*msgdoc->_doc, - msgdoc->_msg, - mfid); - else if (mu_msg_field_is_string_list(mfid)) - add_terms_values_string_list (*msgdoc->_doc, - msgdoc->_msg, - mfid); - else - g_return_if_reached (); - } -} - - -static const std::string& -xapian_pfx (MuMsgContact *contact) -{ - static const std::string empty; - - /* use ptr to string to prevent copy... */ - switch (contact->type) { - case MU_MSG_CONTACT_TYPE_TO: - return prefix(MU_MSG_FIELD_ID_TO); - case MU_MSG_CONTACT_TYPE_FROM: - return prefix(MU_MSG_FIELD_ID_FROM); - case MU_MSG_CONTACT_TYPE_CC: - return prefix(MU_MSG_FIELD_ID_CC); - case MU_MSG_CONTACT_TYPE_BCC: - return prefix(MU_MSG_FIELD_ID_BCC); - default: - g_warning ("unsupported contact type %u", - (unsigned)contact->type); - return empty; - } -} - - -static void -each_contact_info (MuMsgContact *contact, MsgDoc *msgdoc) -{ - const std::string pfx (xapian_pfx(contact)); - if (pfx.empty()) - return; /* unsupported contact type */ - - if (!mu_str_is_empty(contact->name)) { - Xapian::TermGenerator termgen; - termgen.set_document (*msgdoc->_doc); - char *norm = mu_str_normalize (contact->name, TRUE); - termgen.index_text_without_positions (norm, 1, pfx); - g_free (norm); - } - - /* don't normalize e-mail address, but do lowercase it */ - if (!mu_str_is_empty(contact->address)) { - char *escaped = mu_str_ascii_xapian_escape (contact->address); - msgdoc->_doc->add_term - (std::string (pfx + escaped, 0, - MU_STORE_MAX_TERM_LENGTH)); - g_free (escaped); - - /* store it also in our contacts cache */ - if (msgdoc->_store->_contacts) - mu_contacts_add (msgdoc->_store->_contacts, - contact->address, contact->name, - mu_msg_get_date(msgdoc->_msg)); - } -} - -/* get a unique id for this message; note, this function returns a - * static buffer -- not reentrant */ -static const char* -get_message_uid (const char* path) -{ - char pfx = 0; - static char buf[PATH_MAX + 10]; - - if (G_UNLIKELY(!pfx)) { - pfx = mu_msg_field_xapian_prefix(MU_MSG_FIELD_ID_PATH); - buf[0]=pfx; - } - - std::strcpy (buf + 1, path); - - return buf; -} - -/* get a unique id for this message; note, this function returns a - * static buffer -- not reentrant */ -static const char* -get_message_uid (MuMsg *msg) -{ - return get_message_uid (mu_msg_get_path(msg)); + return xapian_get_metadata (xpath, MU_STORE_VERSION_KEY); } gboolean -mu_store_store_msg (MuStore *store, MuMsg *msg, gboolean replace) -{ - g_return_val_if_fail (store, FALSE); - g_return_val_if_fail (msg, FALSE); +mu_store_database_needs_upgrade (const gchar *xpath) +{ + char *version; + gboolean rv; + + g_return_val_if_fail (xpath, TRUE); + + version = mu_store_database_version (xpath); + + if (g_strcmp0 (version, MU_XAPIAN_DB_VERSION) == 0) + rv = FALSE; + else + rv = TRUE; + + g_free (version); + + return rv; +} + + +gboolean +mu_store_database_is_empty (const gchar* xpath) +{ + g_return_val_if_fail (xpath, TRUE); + + /* it's 'empty' (non-existant) */ + if (access(xpath, F_OK) != 0 && errno == ENOENT) + return TRUE; try { - Xapian::Document newdoc; - Xapian::docid id; - MsgDoc msgdoc = { &newdoc, msg, store }; - const std::string uid(get_message_uid(msg)); - - begin_trx_if (store, !store->_in_transaction); - /* we must add a unique term, so we can replace - * matching documents */ - newdoc.add_term (uid); - mu_msg_field_foreach - ((MuMsgFieldForEachFunc)add_terms_values, &msgdoc); - /* also store the contact-info as separate terms */ - mu_msg_contact_foreach - (msg, - (MuMsgContactForeachFunc)each_contact_info, - &msgdoc); - - /* add_document is slightly - faster, we can use it when - * we know the document does not exist yet, eg., in - * case of a rebuild */ - if (replace) /* we replace all existing documents for this file */ - id = store->_db.replace_document (uid, newdoc); - else - id = store->_db.add_document (newdoc); - - ++store->_processed; - commit_trx_if (store, - store->_processed % store->_trx_size == 0); + Xapian::Database db (xpath); + return db.get_doccount() == 0 ? TRUE : FALSE; + + } MU_XAPIAN_CATCH_BLOCK; + + return FALSE; +} + +gboolean +mu_store_database_clear (const gchar *xpath, const char *ccache) +{ + g_return_val_if_fail (xpath, FALSE); + g_return_val_if_fail (ccache, FALSE); + + try { + int rv; + + /* clear the database */ + Xapian::WritableDatabase db + (xpath, Xapian::DB_CREATE_OR_OVERWRITE); + db.flush (); + MU_WRITE_LOG ("emptied database %s", xpath); + + /* clear the contacts cache; this is not totally + * fail-safe, as some other process may still have it + * open... */ + rv = unlink (ccache); + if (rv != 0 && errno != ENOENT) { + g_warning ("failed to remove contacts-cache: %s", + strerror(errno)); + return FALSE; + } return TRUE; } MU_XAPIAN_CATCH_BLOCK; - rollback_trx_if (store, store->_in_transaction); - return FALSE; } gboolean -mu_store_store_path (MuStore *store, const char *path) +mu_store_database_is_locked (const gchar *xpath) { - MuMsg *msg; - GError *err; - gboolean rv; + g_return_val_if_fail (xpath, FALSE); - g_return_val_if_fail (store, FALSE); - g_return_val_if_fail (path, FALSE); - - err = NULL; - msg = mu_msg_new_from_file (path, NULL, &err); - - if (!msg) { - if (err) { - g_warning ("failed to create message %s to store: %s", - path, err->message); - g_error_free (err); - } else - g_warning ("failed to create message %s to store", path); - - return FALSE; + try { + Xapian::WritableDatabase db (xpath, Xapian::DB_OPEN); + } catch (const Xapian::DatabaseLockError& xer) { + return TRUE; + } catch (const Xapian::Error &xer) { + g_warning ("%s: error: %s", __FUNCTION__, + xer.get_msg().c_str()); } - rv = mu_store_store_msg (store, msg, TRUE); - if (!rv) - g_warning ("failed to store %s", path); - - mu_msg_unref (msg); - - return rv; -} - - -gboolean -mu_store_remove_path (MuStore *store, const char *msgpath) -{ - g_return_val_if_fail (store, FALSE); - g_return_val_if_fail (msgpath, FALSE); - - try { - const std::string uid (get_message_uid (msgpath)); - - begin_trx_if (store, !store->_in_transaction); - - store->_db.delete_document (uid); - ++store->_processed; - - /* do we need to commit now? */ - bool commit_now = store->_processed % store->_trx_size == 0; - commit_trx_if (store, commit_now); - - return TRUE; - - } MU_XAPIAN_CATCH_BLOCK_RETURN (FALSE); -} - -gboolean -mu_store_contains_message (MuStore *store, const char* path) -{ - g_return_val_if_fail (store, FALSE); - g_return_val_if_fail (path, FALSE); - - try { - const std::string uid (get_message_uid(path)); - return store->_db.term_exists (uid) ? TRUE: FALSE; - - } MU_XAPIAN_CATCH_BLOCK_RETURN (FALSE); -} - - - -time_t -mu_store_get_timestamp (MuStore *store, const char* msgpath) -{ - char *stampstr; - time_t rv; - - g_return_val_if_fail (store, 0); - g_return_val_if_fail (msgpath, 0); - - stampstr = mu_store_get_metadata (store, msgpath); - if (!stampstr) - return (time_t)0; - - rv = (time_t) g_ascii_strtoull (stampstr, NULL, 10); - g_free (stampstr); - - return rv; -} - -gboolean -mu_store_set_timestamp (MuStore *store, const char* msgpath, - time_t stamp) -{ - char buf[21]; - - g_return_val_if_fail (store, FALSE); - g_return_val_if_fail (msgpath, FALSE); - - sprintf (buf, "%" G_GUINT64_FORMAT, (guint64)stamp); - return mu_store_set_metadata (store, msgpath, buf); -} - - -MuError -mu_store_foreach (MuStore *self, - MuStoreForeachFunc func, void *user_data) -{ - g_return_val_if_fail (self, MU_ERROR); - g_return_val_if_fail (func, MU_ERROR); - - try { - Xapian::Enquire enq (self->_db); - - enq.set_query (Xapian::Query::MatchAll); - enq.set_cutoff (0,0); - - Xapian::MSet matches (enq.get_mset (0, self->_db.get_doccount())); - if (matches.empty()) - return MU_OK; /* database is empty */ - - for (Xapian::MSet::iterator iter = matches.begin(); - iter != matches.end(); ++iter) { - Xapian::Document doc (iter.get_document()); - const std::string path(doc.get_value(MU_MSG_FIELD_ID_PATH)); - MuError res = func (path.c_str(), user_data); - if (res != MU_OK) - return res; - } - - } MU_XAPIAN_CATCH_BLOCK_RETURN (MU_ERROR); - - return MU_OK; + return FALSE; } diff --git a/src/mu-store.h b/src/mu-store.h index 8d4e2385..f9b29cd7 100644 --- a/src/mu-store.h +++ b/src/mu-store.h @@ -1,4 +1,4 @@ -/* +/* ** Copyright (C) 2008-2010 Dirk-Jan C. Binnema ** ** This program is free software; you can redistribute it and/or modify it @@ -13,8 +13,8 @@ ** ** You should have received a copy of the GNU General Public License ** along with this program; if not, write to the Free Software Foundation, -** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -** +** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +** */ #ifndef __MU_STORE_H__ @@ -32,33 +32,83 @@ typedef struct _MuStore MuStore; /** - * create a new Xapian store, a place to store documents - * + * create a new writable Xapian store, a place to store documents + * * @param path the path to the database * @param ccachepath path where to cache the contacts information, or NULL - * @param err to receive error info or NULL. err->code can be found in - * mu-error.h - * + * @param err to receive error info or NULL. err->code is MuError value + * * @return a new MuStore object, or NULL in case of error */ -MuStore* mu_store_new (const char *xpath, const char *ccachepath, - GError **err) G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT; +MuStore* mu_store_new_writable (const char *xpath, const char *ccachepath, + GError **err) + G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT; + + +/** + * create a new read-only Xapian store, for querying documents + * + * @param path the path to the database + * @param err to receive error info or NULL. err->code is MuError value + * + * @return a new MuStore object, or NULL in case of error + */ +MuStore* mu_store_new_read_only (const char* xpath, GError **err) + G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT; + /** * destroy the MuStore object and free resources - * + * * @param store a valid store, or NULL */ void mu_store_destroy (MuStore *store); +/** + * we need this when using Xapian::WritableDatabase* from C + */ +typedef gpointer XapianWritableDatabase; + + + +/** + * get the underlying writable database object for this store; not + * that this pointer becomes in valid after mu_store_destroy + * + * @param store a valid store + * + * @return a Xapian::WritableDatabase (you'll need to cast in C++), or + * NULL in case of error. + */ +XapianWritableDatabase* mu_store_get_writable_database (MuStore *store); + + +/** + * we need this when using Xapian::WritableDatabase* from C + */ +typedef gpointer XapianDatabase; + + +/** + * get the underlying read-only database object for this store; not that this + * pointer becomes in valid after mu_store_destroy + * + * @param store a valid store + * + * @return a Xapian::Database (you'll need to cast in C++), or + * NULL in case of error. + */ +XapianDatabase* mu_store_get_read_only_database (MuStore *store); + + /** * set the Xapian batch size for this store. Normally, there's no need * to use this function as the default is good enough; however, if you * use mu in a very memory-constrained environment, you can set the * batchsize to e.g. 1000 at the cost of significant slow-down. - * + * * @param store a valid store object * @param batchsize the new batch size; or 0 to reset to * the default batch size @@ -68,9 +118,9 @@ void mu_store_set_batch_size (MuStore *store, guint batchsize); /** * get the numbers of documents in the database - * + * * @param index a valid MuStore instance - * + * * @return the number of documents in the database; 0 in case of error * or an empty database */ @@ -80,9 +130,9 @@ unsigned mu_store_count (MuStore *store); * get a version string for the database; it's a const string, which * is valid as long MuStore exists and mu_store_version is not called * again. - * + * * @param store a valid MuStore - * + * * @return the version string or NULL in case of error */ const char* mu_store_version (MuStore *store); @@ -90,46 +140,48 @@ const char* mu_store_version (MuStore *store); /** * try to flush/commit all outstanding work - * + * * @param store a valid xapian store */ void mu_store_flush (MuStore *store); /** * store an email message in the XapianStore - * + * * @param store a valid store - * @param msg a valid message + * @param msg a valid message * @param replace whether or not we should try to replace the same * message if it already exists; that is usually desirable, but when * we're sure already that the document does not exist (e.g, in case * of a initial fill or rebuild of the database), we can set 'replace' * to FALSE for a couple% performance gain - * + * * @return TRUE if it succeeded, FALSE otherwise */ gboolean mu_store_store_msg (MuStore *store, MuMsg *msg, gboolean replace); /** - * store an email message in the XapianStore; similar to mu_store_store, but instead takes a path as parameter instead of a MuMsg* - * + * store an email message in the XapianStore; similar to + * mu_store_store, but instead takes a path as parameter instead of a + * MuMsg* + * * @param store a valid store - * @param path full filesystem path to a valid message - * + * @param path full filesystem path to a valid message + * * @return TRUE if it succeeded, FALSE otherwise */ gboolean mu_store_store_path (MuStore *store, const char *path); /** * remove a message from the database - * + * * @param store a valid store * @param msgpath path of the message (note, this is only used to * *identify* the message; a common use of this function is to remove * a message from the database, for which there is no message anymore * in the filesystem. - * + * * @return TRUE if it succeeded, FALSE otherwise */ gboolean mu_store_remove_path (MuStore *store, const char* msgpath); @@ -137,32 +189,32 @@ gboolean mu_store_remove_path (MuStore *store, const char* msgpath); /** * does a certain message exist in the database already? - * + * * @param store a store * @param path the message path - * + * * @return TRUE if the message exists, FALSE otherwise */ gboolean mu_store_contains_message (MuStore *store, const char* path); /** * store a timestamp for a directory - * + * * @param store a valid store * @param msgpath path to a maildir * @param stamp a timestamp * * @return TRUE if setting the timestamp succeeded, FALSE otherwise */ -gboolean mu_store_set_timestamp (MuStore *store, const char* msgpath, +gboolean mu_store_set_timestamp (MuStore *store, const char* msgpath, time_t stamp); /** * get the timestamp for a directory - * + * * @param store a valid store * @param msgpath path to a maildir - * + * * @return the timestamp, or 0 in case of error */ time_t mu_store_get_timestamp (MuStore *store, @@ -174,37 +226,36 @@ time_t mu_store_get_timestamp (MuStore *store, /** * call a function for each document in the database - * + * * @param self a valid store * @param func a callback function to to call for each document * @param user_data a user pointer passed to the callback function - * + * * @return MU_OK if all went well, MU_STOP if the foreach was interrupted, * MU_ERROR in case of error */ typedef MuError (*MuStoreForeachFunc) (const char* path, void *user_data); -MuError mu_store_foreach (MuStore *self, - MuStoreForeachFunc func, - void *user_data); +MuError mu_store_foreach (MuStore *self, MuStoreForeachFunc func, + void *user_data); /** * set metadata for this MuStore - * + * * @param store a store * @param key metadata key * @param val metadata value - * + * * @return TRUE if succeeded, FALSE otherwise */ gboolean mu_store_set_metadata (MuStore *store, const char *key, const char *val); /** * get metadata for this MuStore - * + * * @param store a store * @param key the metadata key - * + * * @return the value of the metadata (gfree when done with it), or * NULL in case of error */ @@ -212,6 +263,64 @@ char* mu_store_get_metadata (MuStore *store, const char *key) G_GNUC_WARN_UNUSED_RESULT; + +/** +" * get the version of the xapian database (ie., the version of the + * 'schema' we are using). If this version != MU_XAPIAN_DB_VERSION, + * it's means we need to a full reindex. + * + * @param xpath path to the xapian database + * + * @return the version of the database as a newly allocated string + * (free with g_free); if there is no version yet, it will return NULL + */ +gchar* mu_store_database_version (const gchar *xpath) G_GNUC_WARN_UNUSED_RESULT; + + +/** + * check whether the database needs to be upgraded, e.g., when it was + * created with a different version of mu + * + * @param xpath path to the database dir + * + * @return TRUE if the database needs upgrading, FALSE otherwise + */ +gboolean mu_store_database_needs_upgrade (const gchar *xpath); + + +/** + * check whether the database is empty (contains 0 documents); in + * addition, a non-existing database is considered 'empty' too + * + * @param xpath path to the xapian database + * + * @return TRUE if the database is empty, FALSE otherwise + */ +gboolean mu_store_database_is_empty (const gchar *xpath); + +/** + * clear the database, ie., remove all of the contents. This is a + * destructive operation, but the database can be restored be doing a + * full scan of the maildirs. Also, clear the contacts cache file + * + * @param xpath path to the database + * @param ccache path to the contacts cache file + * + * @return TRUE if the clearing succeeded, FALSE otherwise. + */ +gboolean mu_store_database_clear (const gchar *xpath, + const gchar *ccache); + + +/** + * check if the database is locked for writing + * + * @param xpath path to a xapian database + * + * @return TRUE if it is locked, FALSE otherwise (or in case of error) + */ +gboolean mu_store_database_is_locked (const gchar *xpath); + G_END_DECLS #endif /*__MU_STORE_H__*/ diff --git a/src/mu-util-db.cc b/src/mu-util-db.cc deleted file mode 100644 index 138f32b2..00000000 --- a/src/mu-util-db.cc +++ /dev/null @@ -1,150 +0,0 @@ -/* -*-mode: c++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8-*- */ - -/* -** Copyright (C) 2008-2011 Dirk-Jan C. Binnema -** -** This program is free software; you can redistribute it and/or modify -** it under the terms of the GNU General Public License as published by -** the Free Software Foundation; either version 3 of the License, or -** (at your option) any later version. -** -** This program is distributed in the hope that it will be useful, -** but WITHOUT ANY WARRANTY; without even the implied warranty of -** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -** GNU General Public License for more details. -** -** You should have received a copy of the GNU General Public License -** along with this program; if not, write to the Free Software Foundation, -** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -** -*/ - -#if HAVE_CONFIG_H -#include "config.h" -#endif /*HAVE_CONFIG_H*/ - -#include -#include -#include - -#include "mu-util.h" - - -static char* -xapian_get_metadata (const gchar *xpath, const gchar *key) -{ - g_return_val_if_fail (xpath, NULL); - g_return_val_if_fail (key, NULL); - - if (!access(xpath, F_OK) == 0) { - g_warning ("cannot access %s: %s", xpath, strerror(errno)); - return NULL; - } - - try { - Xapian::Database db (xpath); - const std::string val(db.get_metadata (key)); - return val.empty() ? NULL : g_strdup (val.c_str()); - - } MU_XAPIAN_CATCH_BLOCK; - - return NULL; -} - -char* -mu_util_xapian_dbversion (const gchar *xpath) -{ - g_return_val_if_fail (xpath, NULL); - - return xapian_get_metadata (xpath, MU_STORE_VERSION_KEY); -} - -gboolean -mu_util_xapian_needs_upgrade (const gchar *xpath) -{ - char *version; - gboolean rv; - - g_return_val_if_fail (xpath, TRUE); - - version = mu_util_xapian_dbversion (xpath); - - if (g_strcmp0 (version, MU_XAPIAN_DB_VERSION) == 0) - rv = FALSE; - else - rv = TRUE; - - g_free (version); - - return rv; -} - - -gboolean -mu_util_xapian_is_empty (const gchar* xpath) -{ - g_return_val_if_fail (xpath, TRUE); - - /* it's 'empty' (non-existant) */ - if (access(xpath, F_OK) != 0 && errno == ENOENT) - return TRUE; - - try { - Xapian::Database db (xpath); - return db.get_doccount() == 0 ? TRUE : FALSE; - - } MU_XAPIAN_CATCH_BLOCK; - - return FALSE; -} - -gboolean -mu_util_xapian_clear (const gchar *xpath, - const char *ccache) -{ - g_return_val_if_fail (xpath, FALSE); - g_return_val_if_fail (ccache, FALSE); - - try { - int rv; - - /* clear the database */ - Xapian::WritableDatabase db - (xpath, Xapian::DB_CREATE_OR_OVERWRITE); - db.flush (); - MU_WRITE_LOG ("emptied database %s", xpath); - - /* clear the contacts cache; this is not totally - * fail-safe, as some other process may still have it - * open... */ - rv = unlink (ccache); - if (rv != 0 && errno != ENOENT) { - g_warning ("failed to remove contacts-cache: %s", - strerror(errno)); - return FALSE; - } - - return TRUE; - - } MU_XAPIAN_CATCH_BLOCK; - - return FALSE; -} - - -gboolean -mu_util_xapian_is_locked (const gchar *xpath) -{ - g_return_val_if_fail (xpath, FALSE); - - try { - Xapian::WritableDatabase db (xpath, Xapian::DB_OPEN); - } catch (const Xapian::DatabaseLockError& xer) { - return TRUE; - } catch (const Xapian::Error &xer) { - g_warning ("%s: error: %s", __FUNCTION__, - xer.get_msg().c_str()); - } - - return FALSE; -}