* mu-store.cc, mu-query.cc: improved date-range matching
This commit is contained in:
162
src/mu-query.cc
162
src/mu-query.cc
@ -1,4 +1,4 @@
|
|||||||
make/*
|
/*
|
||||||
** Copyright (C) 2008-2010 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
** Copyright (C) 2008-2010 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||||
**
|
**
|
||||||
** This program is free software; you can redistribute it and/or modify
|
** This program is free software; you can redistribute it and/or modify
|
||||||
@ -17,13 +17,18 @@ make/*
|
|||||||
**
|
**
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdexcept>
|
||||||
#include <xapian.h>
|
|
||||||
#include <glib/gstdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
#include <cctype>
|
||||||
|
#include <cstring>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include <xapian.h>
|
||||||
|
#include <glib/gstdio.h>
|
||||||
|
|
||||||
#include "mu-query.h"
|
#include "mu-query.h"
|
||||||
|
#include "mu-msg-fields.h"
|
||||||
|
|
||||||
#include "mu-msg-iter.h"
|
#include "mu-msg-iter.h"
|
||||||
#include "mu-msg-iter-priv.hh"
|
#include "mu-msg-iter-priv.hh"
|
||||||
@ -32,29 +37,126 @@ make/*
|
|||||||
#include "mu-util-db.h"
|
#include "mu-util-db.h"
|
||||||
#include "mu-msg-str.h"
|
#include "mu-msg-str.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* a xapian value date processor with the following properties:
|
||||||
|
*
|
||||||
|
* - dates are specified in ISO-8601 format, so: '201001132345'
|
||||||
|
* means 'January 13th 2010, 23:45'
|
||||||
|
*
|
||||||
|
* - with dates, you can add '.', '/', ':' and '-', ',' where you
|
||||||
|
* want; they are ignored thus, the above could also be written as
|
||||||
|
* e.g.: 2010-01-13/23:45
|
||||||
|
*
|
||||||
|
* - You can leave out the month, the day, the time or the seconds;
|
||||||
|
the value range processor handles these as follows:
|
||||||
|
* - 'begin':
|
||||||
|
* 2010 => 2010-01-01/00:00
|
||||||
|
* 201002 => 2010-02-01/00:00
|
||||||
|
* 20100311 => 2010-03-11/00:00
|
||||||
|
* 2010031104 => 2010-03-11/04:00
|
||||||
|
* - 'end':
|
||||||
|
* 2010 => 2010-31-12/23:59
|
||||||
|
* 201002 => 2010-02-31/23:59
|
||||||
|
* 20100311 => 2010-03-11/23:59
|
||||||
|
* 2010031104 => 2010-03-11/04:59
|
||||||
|
*
|
||||||
|
* - then we have some special dates:
|
||||||
|
* - 'begin':
|
||||||
|
* today => date of today, 00:00
|
||||||
|
* epoch => 1970-01-01/00:00
|
||||||
|
* - 'end':
|
||||||
|
* today => date of today, 23:59
|
||||||
|
* now => date time of right now
|
||||||
|
*
|
||||||
|
* - all dates are in local time
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
struct ISODateRangeProcessor : public Xapian::ValueRangeProcessor {
|
class MuDateRangeProcessor : public Xapian::ValueRangeProcessor {
|
||||||
ISODateRangeProcessor() {}
|
public:
|
||||||
|
MuDateRangeProcessor() {}
|
||||||
|
|
||||||
|
Xapian::valueno operator()(std::string &begin, std::string &end) {
|
||||||
|
|
||||||
|
if (!clear_prefix (begin))
|
||||||
|
return Xapian::BAD_VALUENO;
|
||||||
|
|
||||||
|
substitute_date (begin, true);
|
||||||
|
substitute_date (end, false);
|
||||||
|
|
||||||
|
normalize_date (begin);
|
||||||
|
normalize_date (end);
|
||||||
|
|
||||||
|
complete_date (begin, true);
|
||||||
|
complete_date (end, false);
|
||||||
|
|
||||||
|
return (Xapian::valueno)MU_MSG_PSEUDO_FIELD_ID_DATESTR;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
bool clear_prefix (std::string& begin) {
|
||||||
|
|
||||||
|
const std::string colon (":");
|
||||||
|
const std::string name (mu_msg_field_name (MU_MSG_FIELD_ID_DATE) + colon);
|
||||||
|
const std::string shortcut (
|
||||||
|
std::string(1, mu_msg_field_shortcut
|
||||||
|
(MU_MSG_FIELD_ID_DATE)) + colon);
|
||||||
|
|
||||||
|
if (begin.find (name) == 0) {
|
||||||
|
begin.erase (0, name.length());
|
||||||
|
return true;
|
||||||
|
} else if (begin.find (shortcut) == 0) {
|
||||||
|
begin.erase (0, shortcut.length());
|
||||||
|
return true;
|
||||||
|
} else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
Xapian::valueno operator()(std::string &begin, std::string &end) {
|
void substitute_date (std::string& date, bool is_begin) {
|
||||||
static const std::string colon (":");
|
char datebuf[13];
|
||||||
static const std::string name (
|
time_t now = time(NULL);
|
||||||
mu_msg_field_name (MU_MSG_FIELD_ID_DATE) + colon);
|
if (is_begin) {
|
||||||
static const std::string shortcut (
|
if (date == "today") {
|
||||||
std::string(1, mu_msg_field_shortcut
|
strftime(datebuf, sizeof(datebuf), "%Y%m%d0000",
|
||||||
(MU_MSG_FIELD_ID_DATE)) + colon);
|
localtime(&now));
|
||||||
|
date = datebuf;
|
||||||
|
} else if (date == "epoch")
|
||||||
|
date = "197001010000";
|
||||||
|
} else { /* end */
|
||||||
|
if (date == "now") {
|
||||||
|
strftime(datebuf, sizeof(datebuf), "%Y%m%d%H%M",
|
||||||
|
localtime(&now));
|
||||||
|
date = datebuf;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void normalize_date (std::string& date) {
|
||||||
|
std::string cleanup;
|
||||||
|
for (unsigned i = 0; i != date.length(); ++i) {
|
||||||
|
char k = date[i];
|
||||||
|
if (std::isdigit(k))
|
||||||
|
cleanup += date[i];
|
||||||
|
else if (k != ':' && k != '-' && k != '/' && k != '.' &&
|
||||||
|
k != ',')
|
||||||
|
throw std::runtime_error ("error in date str");
|
||||||
|
}
|
||||||
|
date = cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
void complete_date (std::string& date, bool is_begin) {
|
||||||
|
|
||||||
if (begin.find (name) == 0)
|
const size_t len (date.length());
|
||||||
begin.erase (0, name.length());
|
if (len % 2 || len < 4 || len > 12)
|
||||||
else if (begin.find (shortcut) == 0)
|
throw std::runtime_error ("error in date str");
|
||||||
begin.erase (0, shortcut.length());
|
|
||||||
else
|
const std::string bsuffix ("01010000");
|
||||||
return Xapian::BAD_VALUENO;
|
const std::string esuffix ("12312359");
|
||||||
|
|
||||||
return (Xapian::valueno)MU_MSG_FIELD_ID_DATESTR;
|
date += is_begin ? bsuffix.substr(len-4) : esuffix.substr (len-4);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static void add_prefix (MuMsgFieldId field, Xapian::QueryParser* qparser);
|
static void add_prefix (MuMsgFieldId field, Xapian::QueryParser* qparser);
|
||||||
|
|
||||||
struct _MuQuery {
|
struct _MuQuery {
|
||||||
@ -77,24 +179,14 @@ init_mu_query (MuQuery *mqx, const char* dbpath)
|
|||||||
mqx->_qparser->set_database (*mqx->_db);
|
mqx->_qparser->set_database (*mqx->_db);
|
||||||
mqx->_qparser->set_default_op (Xapian::Query::OP_AND);
|
mqx->_qparser->set_default_op (Xapian::Query::OP_AND);
|
||||||
|
|
||||||
mqx->_range_processor = new ISODateRangeProcessor ();
|
mqx->_range_processor = new MuDateRangeProcessor ();
|
||||||
mqx->_qparser->add_valuerangeprocessor
|
mqx->_qparser->add_valuerangeprocessor
|
||||||
(mqx->_range_processor);
|
(mqx->_range_processor);
|
||||||
|
|
||||||
memset (mqx->_sorters, 0, sizeof(mqx->_sorters));
|
memset (mqx->_sorters, 0, sizeof(mqx->_sorters));
|
||||||
mu_msg_field_foreach ((MuMsgFieldForEachFunc)add_prefix,
|
mu_msg_field_foreach ((MuMsgFieldForEachFunc)add_prefix,
|
||||||
(gpointer)mqx->_qparser);
|
(gpointer)mqx->_qparser);
|
||||||
|
|
||||||
// // ////// FIXME
|
|
||||||
// g_print ("\nsynonyms:\n");
|
|
||||||
// for (Xapian::TermIterator iter = mqx->_db->synonym_keys_begin();
|
|
||||||
// iter != mqx->_db->synonym_keys_end(); ++iter) {
|
|
||||||
// for (Xapian::TermIterator jter = mqx->_db->synonyms_begin(*iter);
|
|
||||||
// jter != mqx->_db->synonyms_end(*iter); ++jter) {
|
|
||||||
// g_print ("%s => %s\n", (*iter).c_str(), (*jter).c_str());
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
return TRUE;
|
return TRUE;
|
||||||
|
|
||||||
} MU_XAPIAN_CATCH_BLOCK;
|
} MU_XAPIAN_CATCH_BLOCK;
|
||||||
|
|||||||
@ -114,8 +114,6 @@ check_version (MuStore *store)
|
|||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
MuStore*
|
MuStore*
|
||||||
mu_store_new (const char* xpath)
|
mu_store_new (const char* xpath)
|
||||||
{
|
{
|
||||||
@ -131,12 +129,12 @@ mu_store_new (const char* xpath)
|
|||||||
mu_store_destroy (store);
|
mu_store_destroy (store);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* keep count of processed docs */
|
/* keep count of processed docs */
|
||||||
store->_trx_size = MU_STORE_TRX_SIZE;
|
store->_trx_size = MU_STORE_TRX_SIZE;
|
||||||
store->_in_transaction = false;
|
store->_in_transaction = false;
|
||||||
store->_processed = 0;
|
store->_processed = 0;
|
||||||
|
|
||||||
add_synonyms (store);
|
add_synonyms (store);
|
||||||
|
|
||||||
MU_WRITE_LOG ("%s: opened %s", __FUNCTION__, xpath);
|
MU_WRITE_LOG ("%s: opened %s", __FUNCTION__, xpath);
|
||||||
@ -270,17 +268,18 @@ static void
|
|||||||
add_terms_values_date (Xapian::Document& doc, MuMsg *msg,
|
add_terms_values_date (Xapian::Document& doc, MuMsg *msg,
|
||||||
MuMsgFieldId mfid)
|
MuMsgFieldId mfid)
|
||||||
{
|
{
|
||||||
char datebuf[9];
|
char datebuf[13]; /* YYYYMMDDHHMMSS */
|
||||||
static const std::string pfx (1, mu_msg_field_xapian_prefix(mfid));
|
static const std::string pfx (1, mu_msg_field_xapian_prefix(mfid));
|
||||||
gint64 num = mu_msg_get_field_numeric (msg, mfid);
|
gint64 num = mu_msg_get_field_numeric (msg, mfid);
|
||||||
|
|
||||||
if (G_UNLIKELY(strftime(datebuf, sizeof(datebuf), "%Y%m%d",
|
if (G_UNLIKELY(strftime(datebuf, sizeof(datebuf), "%Y%m%d%H%M",
|
||||||
gmtime((const time_t*)&num)) == 0))
|
localtime((const time_t*)&num)) == 0))
|
||||||
g_return_if_reached();
|
g_return_if_reached();
|
||||||
|
|
||||||
const std::string numstr (Xapian::sortable_serialise((double)num));
|
const std::string numstr (Xapian::sortable_serialise((double)num));
|
||||||
doc.add_value ((Xapian::valueno)mfid, numstr);
|
doc.add_value ((Xapian::valueno)mfid, numstr);
|
||||||
doc.add_value ((Xapian::valueno)MU_MSG_FIELD_ID_DATESTR, datebuf);
|
doc.add_value ((Xapian::valueno)MU_MSG_PSEUDO_FIELD_ID_DATESTR,
|
||||||
|
datebuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -487,9 +486,10 @@ mu_store_store (MuStore *store, MuMsg *msg)
|
|||||||
mu_msg_field_foreach
|
mu_msg_field_foreach
|
||||||
((MuMsgFieldForEachFunc)add_terms_values, &msgdoc);
|
((MuMsgFieldForEachFunc)add_terms_values, &msgdoc);
|
||||||
/* also store the contact-info as separate terms */
|
/* also store the contact-info as separate terms */
|
||||||
mu_msg_contact_foreach (msg,
|
mu_msg_contact_foreach
|
||||||
(MuMsgContactForeachFunc)each_contact_info,
|
(msg,
|
||||||
&msgdoc);
|
(MuMsgContactForeachFunc)each_contact_info,
|
||||||
|
&msgdoc);
|
||||||
|
|
||||||
/* we replace all existing documents for this file */
|
/* we replace all existing documents for this file */
|
||||||
id = store->_db->replace_document (uid, newdoc);
|
id = store->_db->replace_document (uid, newdoc);
|
||||||
|
|||||||
Reference in New Issue
Block a user