* mu-store/mu-msg-fields: special handling for To:/Cc:/From: so we can better

search for them.
This commit is contained in:
Dirk-Jan C. Binnema
2010-09-12 16:28:34 +03:00
parent d9b5f2514a
commit bb3d9cc0b0
3 changed files with 107 additions and 36 deletions

View File

@ -20,13 +20,22 @@
#include <string.h> #include <string.h>
#include "mu-msg-fields.h" #include "mu-msg-fields.h"
/*
* note: the differences for our purposes between a xapian field and a term:
* - there is only a single value for some item in per document (msg), ie.
* one value containing the list of To: addresses
* - there can be multiple terms, each containing e.g. one of the To: addresses
* - searching uses terms, but to display some field, it must be in the
* value (at least when using MuMsgIter)
*/
enum _FieldFlags { enum _FieldFlags {
FLAG_GMIME = 1 << 1, /* field retrieved through gmime */ FLAG_GMIME = 1 << 1, /* field retrieved through gmime */
FLAG_XAPIAN_INDEX = 1 << 2, /* field is indexed in xapian */ FLAG_XAPIAN_INDEX = 1 << 2, /* field is indexed in xapian */
FLAG_XAPIAN_TERM = 1 << 3, /* field stored as term in xapian */ FLAG_XAPIAN_TERM = 1 << 3, /* field stored as term in xapian */
FLAG_XAPIAN_VALUE = 1 << 4 /* field stored as value in xapian */ FLAG_XAPIAN_VALUE = 1 << 4, /* field stored as value in xapian */
FLAG_XAPIAN_CONTACT = 1 << 5 /* field contains e-mail address */
}; };
typedef enum _FieldFlags FieldFlags; typedef enum _FieldFlags FieldFlags;
/* /*
* this struct describes the fields of an e-mail * this struct describes the fields of an e-mail
@ -40,7 +49,9 @@ struct _MuMsgField {
FieldFlags _flags; /* the flags that tells us what to do */ FieldFlags _flags; /* the flags that tells us what to do */
}; };
/* the name and shortcut fields must be lower case, or they might be
* misinterpreted by the query-preprocesser which turns queries into
* lowercase */
static const MuMsgField FIELD_DATA[] = { static const MuMsgField FIELD_DATA[] = {
{ {
MU_MSG_FIELD_ID_BODY_TEXT, MU_MSG_FIELD_ID_BODY_TEXT,
@ -60,7 +71,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_CC, MU_MSG_FIELD_ID_CC,
MU_MSG_FIELD_TYPE_STRING, MU_MSG_FIELD_TYPE_STRING,
"cc", "c", "C", "cc", "c", "C",
FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_XAPIAN_VALUE FLAG_GMIME | FLAG_XAPIAN_CONTACT | FLAG_XAPIAN_VALUE
}, },
{ {
@ -81,7 +92,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_FROM, MU_MSG_FIELD_ID_FROM,
MU_MSG_FIELD_TYPE_STRING, MU_MSG_FIELD_TYPE_STRING,
"from", "f", "F", "from", "f", "F",
FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_XAPIAN_VALUE FLAG_GMIME | FLAG_XAPIAN_CONTACT | FLAG_XAPIAN_VALUE
}, },
{ {
@ -123,7 +134,7 @@ static const MuMsgField FIELD_DATA[] = {
MU_MSG_FIELD_ID_TO, MU_MSG_FIELD_ID_TO,
MU_MSG_FIELD_TYPE_STRING, MU_MSG_FIELD_TYPE_STRING,
"to", "t", "T", "to", "t", "T",
FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_XAPIAN_VALUE FLAG_GMIME | FLAG_XAPIAN_CONTACT | FLAG_XAPIAN_VALUE
}, },
{ {
@ -232,7 +243,12 @@ mu_msg_field_xapian_term (const MuMsgField *field)
return field->_flags & FLAG_XAPIAN_TERM; return field->_flags & FLAG_XAPIAN_TERM;
} }
gboolean
mu_msg_field_xapian_contact (const MuMsgField *field)
{
g_return_val_if_fail (field, FALSE);
return field->_flags & FLAG_XAPIAN_CONTACT;
}
gboolean gboolean

View File

@ -66,7 +66,7 @@ static const guint MU_MSG_FIELD_TYPE_NONE = MU_MSG_FIELD_TYPE_NUM + 1;
typedef void (*MuMsgFieldForEachFunc) (const MuMsgField *field, typedef void (*MuMsgFieldForEachFunc) (const MuMsgField *field,
gconstpointer data); gconstpointer data);
/** /**
* iterator over all possible message fields * iterator over all possible message fields
* *
* @param func a function called for each field * @param func a function called for each field
@ -75,7 +75,7 @@ typedef void (*MuMsgFieldForEachFunc) (const MuMsgField *field,
void mu_msg_field_foreach (MuMsgFieldForEachFunc func, gconstpointer data); void mu_msg_field_foreach (MuMsgFieldForEachFunc func, gconstpointer data);
/** /**
* get the name of the field -- this a name that can be use in queries, * get the name of the field -- this a name that can be use in queries,
* ie. 'subject:foo', with 'subject' being the name * ie. 'subject:foo', with 'subject' being the name
* *
@ -86,7 +86,7 @@ void mu_msg_field_foreach (MuMsgFieldForEachFunc func, gconstpointer data);
*/ */
const char* mu_msg_field_name (const MuMsgField *field) G_GNUC_CONST; const char* mu_msg_field_name (const MuMsgField *field) G_GNUC_CONST;
/** /**
* get the shortcut of the field -- this a shortcut that can be use in * get the shortcut of the field -- this a shortcut that can be use in
* queries, ie. 's:foo', with 's' meaning 'subject' being the name * queries, ie. 's:foo', with 's' meaning 'subject' being the name
* *
@ -97,7 +97,7 @@ const char* mu_msg_field_name (const MuMsgField *field) G_GNUC_CONST;
*/ */
const char* mu_msg_field_shortcut (const MuMsgField *field) G_GNUC_CONST; const char* mu_msg_field_shortcut (const MuMsgField *field) G_GNUC_CONST;
/** /**
* get the xapian prefix of the field -- that is, the prefix used in * get the xapian prefix of the field -- that is, the prefix used in
* the Xapian database to identify the field * the Xapian database to identify the field
* *
@ -108,7 +108,7 @@ const char* mu_msg_field_shortcut (const MuMsgField *field) G_GNUC_CONST;
*/ */
const char* mu_msg_field_xapian_prefix (const MuMsgField *field) G_GNUC_PURE; const char* mu_msg_field_xapian_prefix (const MuMsgField *field) G_GNUC_PURE;
/** /**
* get the numerical ID of the field * get the numerical ID of the field
* *
* @param field a MuMsgField * @param field a MuMsgField
@ -118,7 +118,7 @@ const char* mu_msg_field_xapian_prefix (const MuMsgField *field) G_GNUC_PURE;
MuMsgFieldId mu_msg_field_id (const MuMsgField *field) G_GNUC_CONST; MuMsgFieldId mu_msg_field_id (const MuMsgField *field) G_GNUC_CONST;
/** /**
* get the type of the field (string, size, time etc.) * get the type of the field (string, size, time etc.)
* *
* @param field a MuMsgField * @param field a MuMsgField
@ -129,7 +129,7 @@ MuMsgFieldId mu_msg_field_id (const MuMsgField *field) G_GNUC_CONST;
MuMsgFieldType mu_msg_field_type (const MuMsgField *field) G_GNUC_CONST; MuMsgFieldType mu_msg_field_type (const MuMsgField *field) G_GNUC_CONST;
/** /**
* is the field numeric (has type MU_MSG_FIELD_TYPE_(BYTESIZE|TIME_T|INT))? * is the field numeric (has type MU_MSG_FIELD_TYPE_(BYTESIZE|TIME_T|INT))?
* *
* @param field a MuMsgField * @param field a MuMsgField
@ -140,7 +140,7 @@ gboolean mu_msg_field_is_numeric (const MuMsgField *field) G_GNUC_CONST
/** /**
* is the field Xapian-indexable? That is, should this field be * is the field Xapian-indexable? That is, should this field be
* indexed in the in the Xapian database, so we can use the all the * indexed in the in the Xapian database, so we can use the all the
* phrasing, stemming etc. magic * phrasing, stemming etc. magic
@ -151,7 +151,7 @@ gboolean mu_msg_field_is_numeric (const MuMsgField *field) G_GNUC_CONST
*/ */
gboolean mu_msg_field_xapian_index (const MuMsgField *field) G_GNUC_PURE; gboolean mu_msg_field_xapian_index (const MuMsgField *field) G_GNUC_PURE;
/** /**
* should this field be stored as a xapian term? * should this field be stored as a xapian term?
* *
* @param field a MuMsgField * @param field a MuMsgField
@ -160,7 +160,7 @@ gboolean mu_msg_field_xapian_index (const MuMsgField *field) G_GNUC_PURE;
*/ */
gboolean mu_msg_field_xapian_term (const MuMsgField *field) G_GNUC_PURE; gboolean mu_msg_field_xapian_term (const MuMsgField *field) G_GNUC_PURE;
/** /**
* should this field be stored as a xapian value? * should this field be stored as a xapian value?
* *
* @param field a MuMsgField * @param field a MuMsgField
@ -169,8 +169,18 @@ gboolean mu_msg_field_xapian_term (const MuMsgField *field) G_GNUC_PURE;
*/ */
gboolean mu_msg_field_xapian_value (const MuMsgField *field) G_GNUC_PURE; gboolean mu_msg_field_xapian_value (const MuMsgField *field) G_GNUC_PURE;
/**
* should this field be stored as contact information? This means that
* e-mail address will be stored as terms, and names will be indexed
*
* @param field a MuMsgField
*
* @return TRUE if the field should be stored as contact information,
* FALSE otherwise
*/
gboolean mu_msg_field_xapian_contact (const MuMsgField *field) G_GNUC_PURE;
/** /**
* is the field gmime-enabled? That is, can be field be retrieved * is the field gmime-enabled? That is, can be field be retrieved
* using GMime? * using GMime?
* *
@ -181,7 +191,7 @@ gboolean mu_msg_field_xapian_value (const MuMsgField *field) G_GNUC_PURE;
gboolean mu_msg_field_gmime (const MuMsgField *field) G_GNUC_PURE; gboolean mu_msg_field_gmime (const MuMsgField *field) G_GNUC_PURE;
/** /**
* get the corresponding MuMsgField for a name (as in mu_msg_field_name) * get the corresponding MuMsgField for a name (as in mu_msg_field_name)
* *
* @param str a name * @param str a name
@ -191,7 +201,7 @@ gboolean mu_msg_field_gmime (const MuMsgField *field) G_GNUC_PURE;
const MuMsgField* mu_msg_field_from_name (const char* str) G_GNUC_PURE; const MuMsgField* mu_msg_field_from_name (const char* str) G_GNUC_PURE;
/** /**
* get the corresponding MuMsgField for a shortcut (as in mu_msg_field_shortcut) * get the corresponding MuMsgField for a shortcut (as in mu_msg_field_shortcut)
* *
* @param kar a shortcut character * @param kar a shortcut character
@ -200,7 +210,7 @@ const MuMsgField* mu_msg_field_from_name (const char* str) G_GNUC_PURE;
*/ */
const MuMsgField* mu_msg_field_from_shortcut (char kar) G_GNUC_CONST; const MuMsgField* mu_msg_field_from_shortcut (char kar) G_GNUC_CONST;
/** /**
* get the corresponding MuMsgField for an id (as in mu_msg_field_id) * get the corresponding MuMsgField for an id (as in mu_msg_field_id)
* *
* @param id an id * @param id an id

View File

@ -23,8 +23,10 @@
#include <cstdio> #include <cstdio>
#include <xapian.h> #include <xapian.h>
#include <cstring>
#include "mu-msg.h" #include "mu-msg.h"
#include "mu-msg-contact.h"
#include "mu-store.h" #include "mu-store.h"
#include "mu-util.h" #include "mu-util.h"
@ -146,7 +148,7 @@ mu_store_destroy (MuStore *store)
mu_store_flush (store); mu_store_flush (store);
MU_WRITE_LOG ("closing xapian database with %d documents", MU_WRITE_LOG ("closing xapian database with %d documents",
(int)store->_db->get_doccount()); (int)store->_db->get_doccount());
delete store->_db; delete store->_db;
g_free (store); g_free (store);
@ -164,7 +166,6 @@ mu_store_flush (MuStore *store)
store->_db->flush (); store->_db->flush ();
} MU_XAPIAN_CATCH_BLOCK; } MU_XAPIAN_CATCH_BLOCK;
} }
@ -200,9 +201,8 @@ add_terms_values_string (Xapian::Document& doc, MuMsg *msg,
} }
if (mu_msg_field_xapian_term(field)) if (mu_msg_field_xapian_term(field))
/* terms can be up to MU_STORE_MAX_TERM_LENGTH /* terms can be up to MU_STORE_MAX_TERM_LENGTH (240)
* (240) long; this is a Xapian limit * long; this is a Xapian limit */
* */
doc.add_term (std::string (prefix + value, 0, doc.add_term (std::string (prefix + value, 0,
MU_STORE_MAX_TERM_LENGTH)); MU_STORE_MAX_TERM_LENGTH));
@ -233,21 +233,23 @@ add_terms_values_body (Xapian::Document& doc, MuMsg *msg,
} }
struct _MsgDoc { struct _MsgDoc {
Xapian::Document *_doc; Xapian::Document *_doc;
MuMsg *_msg; MuMsg *_msg;
}; };
typedef struct _MsgDoc MsgDoc; typedef struct _MsgDoc MsgDoc;
static void static void
add_terms_values (const MuMsgField* field, MsgDoc* msgdoc) add_terms_values (const MuMsgField* field, MsgDoc* msgdoc)
{ {
MuMsgFieldType type; MuMsgFieldType type;
/* note: contact-stuff (To/Cc/From) will handled in
* add_contact_info, not here */
if (!mu_msg_field_xapian_index(field) && if (!mu_msg_field_xapian_index(field) &&
!mu_msg_field_xapian_term(field) && !mu_msg_field_xapian_term(field) &&
!mu_msg_field_xapian_value(field)) !mu_msg_field_xapian_value(field))
return; return;
type = mu_msg_field_type (field); type = mu_msg_field_type (field);
if (type == MU_MSG_FIELD_TYPE_STRING) { if (type == MU_MSG_FIELD_TYPE_STRING) {
@ -270,6 +272,45 @@ add_terms_values (const MuMsgField* field, MsgDoc* msgdoc)
g_return_if_reached (); g_return_if_reached ();
} }
static void
each_contact_info (MuMsgContact *contact, MsgDoc *data)
{
std::string pfx;
static const MuMsgField *to_field =
mu_msg_field_from_id (MU_MSG_FIELD_ID_TO);
static const MuMsgField *from_field =
mu_msg_field_from_id (MU_MSG_FIELD_ID_FROM);
static const MuMsgField *cc_field =
mu_msg_field_from_id (MU_MSG_FIELD_ID_CC);
static const std::string to_pfx (mu_msg_field_xapian_prefix(to_field));
static const std::string from_pfx (mu_msg_field_xapian_prefix(from_field));
static const std::string cc_pfx (mu_msg_field_xapian_prefix(cc_field));
switch (contact->type) {
case MU_MSG_CONTACT_TYPE_TO: pfx = to_pfx; break;
case MU_MSG_CONTACT_TYPE_FROM: pfx = from_pfx; break;
case MU_MSG_CONTACT_TYPE_CC: pfx = cc_pfx; break;
default: return; /* other types (like bcc) are ignored */
}
// g_print ("[%s %s]\n", pfx.c_str(), contact->address);
if (contact->name && strlen(contact->name) > 0) {
Xapian::TermGenerator termgen;
termgen.set_document (*data->_doc);
termgen.index_text_without_positions (contact->name, 1, pfx);
}
if (contact->address && strlen (contact->address))
data->_doc->add_term (std::string (pfx + contact->address, 0,
MU_STORE_MAX_TERM_LENGTH));
}
/* get a unique id for this message */ /* get a unique id for this message */
static std::string static std::string
get_message_uid (const char* path) get_message_uid (const char* path)
@ -282,7 +323,7 @@ get_message_uid (const char* path)
return pathprefix + path; return pathprefix + path;
} }
static std::string static const std::string
get_message_uid (MuMsg *msg) get_message_uid (MuMsg *msg)
{ {
return get_message_uid (mu_msg_get_path(msg)); return get_message_uid (mu_msg_get_path(msg));
@ -304,10 +345,14 @@ mu_store_store (MuStore *store, MuMsg *msg)
begin_trx_if (store, !store->_in_transaction); begin_trx_if (store, !store->_in_transaction);
/* we must add a unique term, so we can replace /* we must add a unique term, so we can replace
* matching documents */ * matching documents */
newdoc.add_term (uid); newdoc.add_term (uid);
mu_msg_field_foreach ((MuMsgFieldForEachFunc)add_terms_values, mu_msg_field_foreach ((MuMsgFieldForEachFunc)add_terms_values,
&msgdoc); &msgdoc);
/* also store the contact-info as separate terms */
mu_msg_contact_foreach (msg,
(MuMsgContactForeachFunc)each_contact_info,
&msgdoc);
/* we replace all existing documents for this file */ /* we replace all existing documents for this file */
id = store->_db->replace_document (uid, newdoc); id = store->_db->replace_document (uid, newdoc);