diff --git a/src/mu-msg-fields.c b/src/mu-msg-fields.c index 5324b954..902ac61f 100644 --- a/src/mu-msg-fields.c +++ b/src/mu-msg-fields.c @@ -20,13 +20,22 @@ #include #include "mu-msg-fields.h" +/* + * note: the differences for our purposes between a xapian field and a term: + * - there is only a single value for some item in per document (msg), ie. + * one value containing the list of To: addresses + * - there can be multiple terms, each containing e.g. one of the To: addresses + * - searching uses terms, but to display some field, it must be in the + * value (at least when using MuMsgIter) + */ enum _FieldFlags { - FLAG_GMIME = 1 << 1, /* field retrieved through gmime */ - FLAG_XAPIAN_INDEX = 1 << 2, /* field is indexed in xapian */ - FLAG_XAPIAN_TERM = 1 << 3, /* field stored as term in xapian */ - FLAG_XAPIAN_VALUE = 1 << 4 /* field stored as value in xapian */ + FLAG_GMIME = 1 << 1, /* field retrieved through gmime */ + FLAG_XAPIAN_INDEX = 1 << 2, /* field is indexed in xapian */ + FLAG_XAPIAN_TERM = 1 << 3, /* field stored as term in xapian */ + FLAG_XAPIAN_VALUE = 1 << 4, /* field stored as value in xapian */ + FLAG_XAPIAN_CONTACT = 1 << 5 /* field contains e-mail address */ }; -typedef enum _FieldFlags FieldFlags; +typedef enum _FieldFlags FieldFlags; /* * this struct describes the fields of an e-mail @@ -40,7 +49,9 @@ struct _MuMsgField { FieldFlags _flags; /* the flags that tells us what to do */ }; - +/* the name and shortcut fields must be lower case, or they might be + * misinterpreted by the query-preprocesser which turns queries into + * lowercase */ static const MuMsgField FIELD_DATA[] = { { MU_MSG_FIELD_ID_BODY_TEXT, @@ -60,7 +71,7 @@ static const MuMsgField FIELD_DATA[] = { MU_MSG_FIELD_ID_CC, MU_MSG_FIELD_TYPE_STRING, "cc", "c", "C", - FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_XAPIAN_VALUE + FLAG_GMIME | FLAG_XAPIAN_CONTACT | FLAG_XAPIAN_VALUE }, { @@ -81,7 +92,7 @@ static const MuMsgField FIELD_DATA[] = { MU_MSG_FIELD_ID_FROM, MU_MSG_FIELD_TYPE_STRING, "from", "f", "F", - FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_XAPIAN_VALUE + FLAG_GMIME | FLAG_XAPIAN_CONTACT | FLAG_XAPIAN_VALUE }, { @@ -123,7 +134,7 @@ static const MuMsgField FIELD_DATA[] = { MU_MSG_FIELD_ID_TO, MU_MSG_FIELD_TYPE_STRING, "to", "t", "T", - FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_XAPIAN_VALUE + FLAG_GMIME | FLAG_XAPIAN_CONTACT | FLAG_XAPIAN_VALUE }, { @@ -232,7 +243,12 @@ mu_msg_field_xapian_term (const MuMsgField *field) return field->_flags & FLAG_XAPIAN_TERM; } - +gboolean +mu_msg_field_xapian_contact (const MuMsgField *field) +{ + g_return_val_if_fail (field, FALSE); + return field->_flags & FLAG_XAPIAN_CONTACT; +} gboolean diff --git a/src/mu-msg-fields.h b/src/mu-msg-fields.h index a79635b6..8774029a 100644 --- a/src/mu-msg-fields.h +++ b/src/mu-msg-fields.h @@ -66,7 +66,7 @@ static const guint MU_MSG_FIELD_TYPE_NONE = MU_MSG_FIELD_TYPE_NUM + 1; typedef void (*MuMsgFieldForEachFunc) (const MuMsgField *field, gconstpointer data); -/** +/** * iterator over all possible message fields * * @param func a function called for each field @@ -75,7 +75,7 @@ typedef void (*MuMsgFieldForEachFunc) (const MuMsgField *field, void mu_msg_field_foreach (MuMsgFieldForEachFunc func, gconstpointer data); -/** +/** * get the name of the field -- this a name that can be use in queries, * ie. 'subject:foo', with 'subject' being the name * @@ -86,7 +86,7 @@ void mu_msg_field_foreach (MuMsgFieldForEachFunc func, gconstpointer data); */ const char* mu_msg_field_name (const MuMsgField *field) G_GNUC_CONST; -/** +/** * get the shortcut of the field -- this a shortcut that can be use in * queries, ie. 's:foo', with 's' meaning 'subject' being the name * @@ -97,7 +97,7 @@ const char* mu_msg_field_name (const MuMsgField *field) G_GNUC_CONST; */ const char* mu_msg_field_shortcut (const MuMsgField *field) G_GNUC_CONST; -/** +/** * get the xapian prefix of the field -- that is, the prefix used in * the Xapian database to identify the field * @@ -108,7 +108,7 @@ const char* mu_msg_field_shortcut (const MuMsgField *field) G_GNUC_CONST; */ const char* mu_msg_field_xapian_prefix (const MuMsgField *field) G_GNUC_PURE; -/** +/** * get the numerical ID of the field * * @param field a MuMsgField @@ -118,7 +118,7 @@ const char* mu_msg_field_xapian_prefix (const MuMsgField *field) G_GNUC_PURE; MuMsgFieldId mu_msg_field_id (const MuMsgField *field) G_GNUC_CONST; -/** +/** * get the type of the field (string, size, time etc.) * * @param field a MuMsgField @@ -129,7 +129,7 @@ MuMsgFieldId mu_msg_field_id (const MuMsgField *field) G_GNUC_CONST; MuMsgFieldType mu_msg_field_type (const MuMsgField *field) G_GNUC_CONST; -/** +/** * is the field numeric (has type MU_MSG_FIELD_TYPE_(BYTESIZE|TIME_T|INT))? * * @param field a MuMsgField @@ -140,7 +140,7 @@ gboolean mu_msg_field_is_numeric (const MuMsgField *field) G_GNUC_CONST -/** +/** * is the field Xapian-indexable? That is, should this field be * indexed in the in the Xapian database, so we can use the all the * phrasing, stemming etc. magic @@ -151,7 +151,7 @@ gboolean mu_msg_field_is_numeric (const MuMsgField *field) G_GNUC_CONST */ gboolean mu_msg_field_xapian_index (const MuMsgField *field) G_GNUC_PURE; -/** +/** * should this field be stored as a xapian term? * * @param field a MuMsgField @@ -160,7 +160,7 @@ gboolean mu_msg_field_xapian_index (const MuMsgField *field) G_GNUC_PURE; */ gboolean mu_msg_field_xapian_term (const MuMsgField *field) G_GNUC_PURE; -/** +/** * should this field be stored as a xapian value? * * @param field a MuMsgField @@ -169,8 +169,18 @@ gboolean mu_msg_field_xapian_term (const MuMsgField *field) G_GNUC_PURE; */ gboolean mu_msg_field_xapian_value (const MuMsgField *field) G_GNUC_PURE; +/** + * should this field be stored as contact information? This means that + * e-mail address will be stored as terms, and names will be indexed + * + * @param field a MuMsgField + * + * @return TRUE if the field should be stored as contact information, + * FALSE otherwise + */ +gboolean mu_msg_field_xapian_contact (const MuMsgField *field) G_GNUC_PURE; -/** +/** * is the field gmime-enabled? That is, can be field be retrieved * using GMime? * @@ -181,7 +191,7 @@ gboolean mu_msg_field_xapian_value (const MuMsgField *field) G_GNUC_PURE; gboolean mu_msg_field_gmime (const MuMsgField *field) G_GNUC_PURE; -/** +/** * get the corresponding MuMsgField for a name (as in mu_msg_field_name) * * @param str a name @@ -191,7 +201,7 @@ gboolean mu_msg_field_gmime (const MuMsgField *field) G_GNUC_PURE; const MuMsgField* mu_msg_field_from_name (const char* str) G_GNUC_PURE; -/** +/** * get the corresponding MuMsgField for a shortcut (as in mu_msg_field_shortcut) * * @param kar a shortcut character @@ -200,7 +210,7 @@ const MuMsgField* mu_msg_field_from_name (const char* str) G_GNUC_PURE; */ const MuMsgField* mu_msg_field_from_shortcut (char kar) G_GNUC_CONST; -/** +/** * get the corresponding MuMsgField for an id (as in mu_msg_field_id) * * @param id an id diff --git a/src/mu-store.cc b/src/mu-store.cc index 7d42e875..8d702be6 100644 --- a/src/mu-store.cc +++ b/src/mu-store.cc @@ -23,8 +23,10 @@ #include #include +#include #include "mu-msg.h" +#include "mu-msg-contact.h" #include "mu-store.h" #include "mu-util.h" @@ -146,7 +148,7 @@ mu_store_destroy (MuStore *store) mu_store_flush (store); MU_WRITE_LOG ("closing xapian database with %d documents", - (int)store->_db->get_doccount()); + (int)store->_db->get_doccount()); delete store->_db; g_free (store); @@ -164,7 +166,6 @@ mu_store_flush (MuStore *store) store->_db->flush (); } MU_XAPIAN_CATCH_BLOCK; - } @@ -200,9 +201,8 @@ add_terms_values_string (Xapian::Document& doc, MuMsg *msg, } if (mu_msg_field_xapian_term(field)) - /* terms can be up to MU_STORE_MAX_TERM_LENGTH - * (240) long; this is a Xapian limit - * */ + /* terms can be up to MU_STORE_MAX_TERM_LENGTH (240) + * long; this is a Xapian limit */ doc.add_term (std::string (prefix + value, 0, MU_STORE_MAX_TERM_LENGTH)); @@ -233,21 +233,23 @@ add_terms_values_body (Xapian::Document& doc, MuMsg *msg, } struct _MsgDoc { - Xapian::Document *_doc; - MuMsg *_msg; + Xapian::Document *_doc; + MuMsg *_msg; }; -typedef struct _MsgDoc MsgDoc; +typedef struct _MsgDoc MsgDoc; static void add_terms_values (const MuMsgField* field, MsgDoc* msgdoc) { MuMsgFieldType type; - + + /* note: contact-stuff (To/Cc/From) will handled in + * add_contact_info, not here */ if (!mu_msg_field_xapian_index(field) && !mu_msg_field_xapian_term(field) && !mu_msg_field_xapian_value(field)) return; - + type = mu_msg_field_type (field); if (type == MU_MSG_FIELD_TYPE_STRING) { @@ -270,6 +272,45 @@ add_terms_values (const MuMsgField* field, MsgDoc* msgdoc) g_return_if_reached (); } + +static void +each_contact_info (MuMsgContact *contact, MsgDoc *data) +{ + std::string pfx; + + static const MuMsgField *to_field = + mu_msg_field_from_id (MU_MSG_FIELD_ID_TO); + static const MuMsgField *from_field = + mu_msg_field_from_id (MU_MSG_FIELD_ID_FROM); + static const MuMsgField *cc_field = + mu_msg_field_from_id (MU_MSG_FIELD_ID_CC); + + static const std::string to_pfx (mu_msg_field_xapian_prefix(to_field)); + static const std::string from_pfx (mu_msg_field_xapian_prefix(from_field)); + static const std::string cc_pfx (mu_msg_field_xapian_prefix(cc_field)); + + switch (contact->type) { + case MU_MSG_CONTACT_TYPE_TO: pfx = to_pfx; break; + case MU_MSG_CONTACT_TYPE_FROM: pfx = from_pfx; break; + case MU_MSG_CONTACT_TYPE_CC: pfx = cc_pfx; break; + default: return; /* other types (like bcc) are ignored */ + } + + // g_print ("[%s %s]\n", pfx.c_str(), contact->address); + + if (contact->name && strlen(contact->name) > 0) { + Xapian::TermGenerator termgen; + termgen.set_document (*data->_doc); + termgen.index_text_without_positions (contact->name, 1, pfx); + } + + if (contact->address && strlen (contact->address)) + data->_doc->add_term (std::string (pfx + contact->address, 0, + MU_STORE_MAX_TERM_LENGTH)); +} + + + /* get a unique id for this message */ static std::string get_message_uid (const char* path) @@ -282,7 +323,7 @@ get_message_uid (const char* path) return pathprefix + path; } -static std::string +static const std::string get_message_uid (MuMsg *msg) { return get_message_uid (mu_msg_get_path(msg)); @@ -304,10 +345,14 @@ mu_store_store (MuStore *store, MuMsg *msg) begin_trx_if (store, !store->_in_transaction); /* we must add a unique term, so we can replace - * matching documents */ + * matching documents */ newdoc.add_term (uid); mu_msg_field_foreach ((MuMsgFieldForEachFunc)add_terms_values, &msgdoc); + /* also store the contact-info as separate terms */ + mu_msg_contact_foreach (msg, + (MuMsgContactForeachFunc)each_contact_info, + &msgdoc); /* we replace all existing documents for this file */ id = store->_db->replace_document (uid, newdoc);