From 89b85cf7f7476b899e65c6796fb1c5eecf9767f8 Mon Sep 17 00:00:00 2001 From: "Dirk-Jan C. Binnema" Date: Tue, 23 Nov 2010 00:33:15 +0200 Subject: [PATCH] * mu-store.cc, mu-query.cc: improved date-range matching --- src/mu-query.cc | 162 +++++++++++++++++++++++++++++++++++++----------- src/mu-store.cc | 24 +++---- 2 files changed, 139 insertions(+), 47 deletions(-) diff --git a/src/mu-query.cc b/src/mu-query.cc index 1666cde9..5e39d1ec 100644 --- a/src/mu-query.cc +++ b/src/mu-query.cc @@ -1,4 +1,4 @@ -make/* +/* ** Copyright (C) 2008-2010 Dirk-Jan C. Binnema ** ** This program is free software; you can redistribute it and/or modify @@ -17,13 +17,18 @@ make/* ** */ -#include -#include -#include -#include +#include #include +#include +#include +#include + +#include +#include + #include "mu-query.h" +#include "mu-msg-fields.h" #include "mu-msg-iter.h" #include "mu-msg-iter-priv.hh" @@ -32,29 +37,126 @@ make/* #include "mu-util-db.h" #include "mu-msg-str.h" +/* + * a xapian value date processor with the following properties: + * + * - dates are specified in ISO-8601 format, so: '201001132345' + * means 'January 13th 2010, 23:45' + * + * - with dates, you can add '.', '/', ':' and '-', ',' where you + * want; they are ignored thus, the above could also be written as + * e.g.: 2010-01-13/23:45 + * + * - You can leave out the month, the day, the time or the seconds; + the value range processor handles these as follows: + * - 'begin': + * 2010 => 2010-01-01/00:00 + * 201002 => 2010-02-01/00:00 + * 20100311 => 2010-03-11/00:00 + * 2010031104 => 2010-03-11/04:00 + * - 'end': + * 2010 => 2010-31-12/23:59 + * 201002 => 2010-02-31/23:59 + * 20100311 => 2010-03-11/23:59 + * 2010031104 => 2010-03-11/04:59 + * + * - then we have some special dates: + * - 'begin': + * today => date of today, 00:00 + * epoch => 1970-01-01/00:00 + * - 'end': + * today => date of today, 23:59 + * now => date time of right now + * + * - all dates are in local time + * + */ -struct ISODateRangeProcessor : public Xapian::ValueRangeProcessor { - ISODateRangeProcessor() {} +class MuDateRangeProcessor : public Xapian::ValueRangeProcessor { +public: + MuDateRangeProcessor() {} + + Xapian::valueno operator()(std::string &begin, std::string &end) { + + if (!clear_prefix (begin)) + return Xapian::BAD_VALUENO; + + substitute_date (begin, true); + substitute_date (end, false); + + normalize_date (begin); + normalize_date (end); + + complete_date (begin, true); + complete_date (end, false); + + return (Xapian::valueno)MU_MSG_PSEUDO_FIELD_ID_DATESTR; + } +private: + bool clear_prefix (std::string& begin) { + + const std::string colon (":"); + const std::string name (mu_msg_field_name (MU_MSG_FIELD_ID_DATE) + colon); + const std::string shortcut ( + std::string(1, mu_msg_field_shortcut + (MU_MSG_FIELD_ID_DATE)) + colon); + + if (begin.find (name) == 0) { + begin.erase (0, name.length()); + return true; + } else if (begin.find (shortcut) == 0) { + begin.erase (0, shortcut.length()); + return true; + } else + return false; + } - Xapian::valueno operator()(std::string &begin, std::string &end) { - static const std::string colon (":"); - static const std::string name ( - mu_msg_field_name (MU_MSG_FIELD_ID_DATE) + colon); - static const std::string shortcut ( - std::string(1, mu_msg_field_shortcut - (MU_MSG_FIELD_ID_DATE)) + colon); + void substitute_date (std::string& date, bool is_begin) { + char datebuf[13]; + time_t now = time(NULL); + if (is_begin) { + if (date == "today") { + strftime(datebuf, sizeof(datebuf), "%Y%m%d0000", + localtime(&now)); + date = datebuf; + } else if (date == "epoch") + date = "197001010000"; + } else { /* end */ + if (date == "now") { + strftime(datebuf, sizeof(datebuf), "%Y%m%d%H%M", + localtime(&now)); + date = datebuf; + } + } + } + + void normalize_date (std::string& date) { + std::string cleanup; + for (unsigned i = 0; i != date.length(); ++i) { + char k = date[i]; + if (std::isdigit(k)) + cleanup += date[i]; + else if (k != ':' && k != '-' && k != '/' && k != '.' && + k != ',') + throw std::runtime_error ("error in date str"); + } + date = cleanup; + } + + void complete_date (std::string& date, bool is_begin) { - if (begin.find (name) == 0) - begin.erase (0, name.length()); - else if (begin.find (shortcut) == 0) - begin.erase (0, shortcut.length()); - else - return Xapian::BAD_VALUENO; - - return (Xapian::valueno)MU_MSG_FIELD_ID_DATESTR; - } + const size_t len (date.length()); + if (len % 2 || len < 4 || len > 12) + throw std::runtime_error ("error in date str"); + + const std::string bsuffix ("01010000"); + const std::string esuffix ("12312359"); + + date += is_begin ? bsuffix.substr(len-4) : esuffix.substr (len-4); + } }; + static void add_prefix (MuMsgFieldId field, Xapian::QueryParser* qparser); struct _MuQuery { @@ -77,24 +179,14 @@ init_mu_query (MuQuery *mqx, const char* dbpath) mqx->_qparser->set_database (*mqx->_db); mqx->_qparser->set_default_op (Xapian::Query::OP_AND); - mqx->_range_processor = new ISODateRangeProcessor (); + mqx->_range_processor = new MuDateRangeProcessor (); mqx->_qparser->add_valuerangeprocessor (mqx->_range_processor); memset (mqx->_sorters, 0, sizeof(mqx->_sorters)); mu_msg_field_foreach ((MuMsgFieldForEachFunc)add_prefix, (gpointer)mqx->_qparser); - - // // ////// FIXME - // g_print ("\nsynonyms:\n"); - // for (Xapian::TermIterator iter = mqx->_db->synonym_keys_begin(); - // iter != mqx->_db->synonym_keys_end(); ++iter) { - // for (Xapian::TermIterator jter = mqx->_db->synonyms_begin(*iter); - // jter != mqx->_db->synonyms_end(*iter); ++jter) { - // g_print ("%s => %s\n", (*iter).c_str(), (*jter).c_str()); - // } - // } - + return TRUE; } MU_XAPIAN_CATCH_BLOCK; diff --git a/src/mu-store.cc b/src/mu-store.cc index f41dcf41..501dcbc4 100644 --- a/src/mu-store.cc +++ b/src/mu-store.cc @@ -114,8 +114,6 @@ check_version (MuStore *store) return TRUE; } - - MuStore* mu_store_new (const char* xpath) { @@ -131,12 +129,12 @@ mu_store_new (const char* xpath) mu_store_destroy (store); return NULL; } - + /* keep count of processed docs */ store->_trx_size = MU_STORE_TRX_SIZE; store->_in_transaction = false; store->_processed = 0; - + add_synonyms (store); MU_WRITE_LOG ("%s: opened %s", __FUNCTION__, xpath); @@ -270,17 +268,18 @@ static void add_terms_values_date (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid) { - char datebuf[9]; + char datebuf[13]; /* YYYYMMDDHHMMSS */ static const std::string pfx (1, mu_msg_field_xapian_prefix(mfid)); gint64 num = mu_msg_get_field_numeric (msg, mfid); - if (G_UNLIKELY(strftime(datebuf, sizeof(datebuf), "%Y%m%d", - gmtime((const time_t*)&num)) == 0)) + if (G_UNLIKELY(strftime(datebuf, sizeof(datebuf), "%Y%m%d%H%M", + localtime((const time_t*)&num)) == 0)) g_return_if_reached(); - + const std::string numstr (Xapian::sortable_serialise((double)num)); doc.add_value ((Xapian::valueno)mfid, numstr); - doc.add_value ((Xapian::valueno)MU_MSG_FIELD_ID_DATESTR, datebuf); + doc.add_value ((Xapian::valueno)MU_MSG_PSEUDO_FIELD_ID_DATESTR, + datebuf); } @@ -487,9 +486,10 @@ mu_store_store (MuStore *store, MuMsg *msg) mu_msg_field_foreach ((MuMsgFieldForEachFunc)add_terms_values, &msgdoc); /* also store the contact-info as separate terms */ - mu_msg_contact_foreach (msg, - (MuMsgContactForeachFunc)each_contact_info, - &msgdoc); + mu_msg_contact_foreach + (msg, + (MuMsgContactForeachFunc)each_contact_info, + &msgdoc); /* we replace all existing documents for this file */ id = store->_db->replace_document (uid, newdoc);