From b4680817d8e7f27590ed8bcc2f4b142bcf98591b Mon Sep 17 00:00:00 2001 From: djcb Date: Thu, 1 Dec 2011 21:23:13 +0200 Subject: [PATCH] * add support for mime:, file: and embed: searches --- src/mu-msg-fields.c | 16 ++++----- src/mu-msg-fields.h | 4 +-- src/mu-msg-file.c | 2 +- src/mu-msg-part.c | 82 +++++++++++++++++++++++++++++++------------ src/mu-msg-part.h | 6 ++-- src/mu-store-write.cc | 50 ++++++++++++++------------ 6 files changed, 102 insertions(+), 58 deletions(-) diff --git a/src/mu-msg-fields.c b/src/mu-msg-fields.c index e396c261..7d698937 100644 --- a/src/mu-msg-fields.c +++ b/src/mu-msg-fields.c @@ -87,14 +87,6 @@ typedef struct _MuMsgField MuMsgField; * lowercase */ static const MuMsgField FIELD_DATA[] = { - { - MU_MSG_FIELD_ID_ATTACH_TEXT, - MU_MSG_FIELD_TYPE_STRING, - "attach", 'a', 'A', - FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_NORMALIZE | - FLAG_DONT_CACHE - }, - { MU_MSG_FIELD_ID_BCC, MU_MSG_FIELD_TYPE_STRING, @@ -133,6 +125,14 @@ static const MuMsgField FIELD_DATA[] = { FLAG_XAPIAN_BOOLEAN | FLAG_XAPIAN_PREFIX_ONLY }, + { + MU_MSG_FIELD_ID_EMBEDDED_TEXT, + MU_MSG_FIELD_TYPE_STRING, + "embed", 'e', 'E', + FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_NORMALIZE | + FLAG_DONT_CACHE + }, + { MU_MSG_FIELD_ID_FILE, MU_MSG_FIELD_TYPE_STRING, diff --git a/src/mu-msg-fields.h b/src/mu-msg-fields.h index 6e5f875d..317a82e4 100644 --- a/src/mu-msg-fields.h +++ b/src/mu-msg-fields.h @@ -29,11 +29,11 @@ G_BEGIN_DECLS enum _MuMsgFieldId { /* first all the string-based ones */ - MU_MSG_FIELD_ID_ATTACH_TEXT = 0, - MU_MSG_FIELD_ID_BCC, + MU_MSG_FIELD_ID_BCC = 0, MU_MSG_FIELD_ID_BODY_HTML, MU_MSG_FIELD_ID_BODY_TEXT, MU_MSG_FIELD_ID_CC, + MU_MSG_FIELD_ID_EMBEDDED_TEXT, MU_MSG_FIELD_ID_FILE, MU_MSG_FIELD_ID_FROM, MU_MSG_FIELD_ID_MAILDIR, diff --git a/src/mu-msg-file.c b/src/mu-msg-file.c index fbdb8e7a..fd7e84b9 100644 --- a/src/mu-msg-file.c +++ b/src/mu-msg-file.c @@ -709,7 +709,7 @@ mu_msg_file_get_str_field (MuMsgFile *self, MuMsgFieldId mfid, switch (mfid) { - case MU_MSG_FIELD_ID_ATTACH_TEXT: *do_free = TRUE; + case MU_MSG_FIELD_ID_EMBEDDED_TEXT: *do_free = TRUE; return NULL; /* FIXME */ case MU_MSG_FIELD_ID_BCC: diff --git a/src/mu-msg-part.c b/src/mu-msg-part.c index 8a5d247f..75ee0466 100644 --- a/src/mu-msg-part.c +++ b/src/mu-msg-part.c @@ -71,21 +71,45 @@ struct _PartData { typedef struct _PartData PartData; - char* -mu_msg_part_to_string (MuMsgPart *part, gboolean *err) +mu_msg_part_get_text (MuMsgPart *self, gboolean *err) { - char *txt; + GMimeObject *mobj; - g_return_val_if_fail (part && part->data, NULL); + g_return_val_if_fail (self && self->data, NULL); - if (GMIME_IS_PART(part->data)) - txt = mu_msg_mime_part_to_string (GMIME_PART(part->data), - err); - else - txt = NULL; + mobj = (GMimeObject*)self->data; - return txt; + if (GMIME_IS_PART(mobj)) + return mu_msg_mime_part_to_string ((GMimePart*)mobj, err); + else if (GMIME_IS_MESSAGE(mobj)) { + /* when it's an (embedded) message, the text is just + * of the message metadata, so we can index it. + */ + GString *data; + GMimeMessage *msg; + InternetAddressList *addresses; + gchar *adrs; + + msg = (GMimeMessage*)mobj; + data = g_string_sized_new (512); /* just a guess */ + + g_string_append (data, g_mime_message_get_sender(msg)); + g_string_append_c (data, '\n'); + g_string_append (data, g_mime_message_get_subject(msg)); + g_string_append_c (data, '\n'); + + addresses = g_mime_message_get_all_recipients (msg); + adrs = internet_address_list_to_string (addresses, FALSE); + g_object_unref(G_OBJECT(addresses)); + + g_string_append (data, adrs); + g_free (adrs); + + return g_string_free (data, FALSE); + } else + + return NULL; } @@ -110,34 +134,46 @@ get_part_size (GMimePart *part) } - static void -part_foreach_cb (GMimeObject *parent, GMimeObject *part, PartData *pdata) +part_foreach_cb (GMimeObject *parent, GMimeObject *mobj, PartData *pdata) { GMimeContentType *ct; MuMsgPart pi; memset (&pi, 0, sizeof pi); pi.index = pdata->_idx++; - pi.content_id = (char*)g_mime_object_get_content_id (part); - pi.data = (gpointer)part; + pi.content_id = (char*)g_mime_object_get_content_id (mobj); + pi.data = (gpointer)mobj; /* check if this is the body part */ - pi.is_body = ((void*)pdata->_body_part == (void*)part); + pi.is_body = ((void*)pdata->_body_part == (void*)mobj); - ct = g_mime_object_get_content_type (part); + ct = g_mime_object_get_content_type (mobj); if (GMIME_IS_CONTENT_TYPE(ct)) { pi.type = (char*)g_mime_content_type_get_media_type (ct); pi.subtype = (char*)g_mime_content_type_get_media_subtype (ct); + + /* g_print ("==> [%s: %s / %s ]\n", pi.type, pi.subtype, */ + /* G_OBJECT_TYPE_NAME(mobj)); */ } - if (GMIME_IS_PART(part)) { - pi.disposition = (char*)g_mime_object_get_disposition (part); - pi.file_name = (char*)g_mime_part_get_filename (GMIME_PART(part)); - pi.size = get_part_size (GMIME_PART(part)); - } else - return; /* only deal with GMimePart */ + if (GMIME_IS_PART(mobj)) { + GMimePart *part; + part = (GMimePart*)mobj; + pi.disposition = (char*)g_mime_object_get_disposition (mobj); + pi.file_name = (char*)g_mime_part_get_filename (part); + pi.size = get_part_size (part); + pi.is_leaf = TRUE; + + } else if (GMIME_IS_MESSAGE_PART(mobj)) { + GMimeMessage *mmsg; + mmsg = g_mime_message_part_get_message ((GMimeMessagePart*)mobj); + if (mmsg) + g_mime_message_foreach /* recurse */ + (mmsg, (GMimeObjectForeachFunc)part_foreach_cb, + pdata); + } pdata->_func(pdata->_msg, &pi, pdata->_user_data); } @@ -177,7 +213,7 @@ mu_msg_part_foreach (MuMsg *msg, MuMsgPartForeachFunc func, PartData pdata; GMimeMessage *mime_msg; - g_return_if_fail (msg && msg->_file && msg->_file->_mime_msg); + g_return_if_fail (msg); if (!load_msg_file_maybe (msg)) return; diff --git a/src/mu-msg-part.h b/src/mu-msg-part.h index 526d2b37..70762169 100644 --- a/src/mu-msg-part.h +++ b/src/mu-msg-part.h @@ -53,6 +53,8 @@ struct _MuMsgPart { gboolean is_body; /* TRUE if this is probably the * message body*/ + gboolean is_leaf; /* if the body is a leaf part (MIME + * Part), not eg. a multipart/ */ /* if TRUE, mu_msg_part_destroy will free the member vars * as well*/ @@ -81,14 +83,14 @@ typedef struct _MuMsgPart MuMsgPart; /** - * convert a MuMsgPart to a string + * get the text in the MuMsgPart (ie. in its GMimePart) * * @param part a MuMsgPart * @param err will receive TRUE if there was an error, FALSE otherwise * * @return utf8 string for this MIME part, to be freed by caller */ -char* mu_msg_part_to_string (MuMsgPart *part, gboolean *err); +char* mu_msg_part_get_text (MuMsgPart *part, gboolean *err); /** diff --git a/src/mu-store-write.cc b/src/mu-store-write.cc index ae0a9932..d34f2dea 100644 --- a/src/mu-store-write.cc +++ b/src/mu-store-write.cc @@ -403,41 +403,43 @@ struct PartData { MuMsgFieldId _mfid; }; +static gboolean +is_textual (MuMsgPart *part) +{ + /* all text parts except text/html */ + if ((strcasecmp (part->type, "text") == 0) && + (strcasecmp (part->subtype, "html") != 0)) + return TRUE; + + /* message/rfc822 */ + if ((strcasecmp (part->type, "message") == 0) && + (strcasecmp (part->subtype, "rfc822") == 0)) + return TRUE; + + return FALSE; +} + static gboolean index_text_part (MuMsgPart *part, PartData *pdata) { - unsigned u; - gboolean txt_type, err; + gboolean err; char *txt, *norm; Xapian::TermGenerator termgen; - /* check wether it's a type we need to store */ - struct { const char* type; const char *subtype; } txt_types[] = { - { "text", "plain"}, - { "message", "rfc822"}, - }; + if (!is_textual (part)) + return FALSE; - txt_type = FALSE; - for (u = 0; u != G_N_ELEMENTS(txt_types) && !txt_type; ++u) { - if ((strcasecmp (part->type, txt_types[u].type) == 0) && - ((strcasecmp (part->subtype, txt_types[u].subtype) == 0) || - (strcmp (txt_types[u].subtype, "*") == 0))) - txt_type = TRUE; - } - - if (!txt_type) - return FALSE; /* not a supported text type */ - - txt = mu_msg_part_to_string (part, &err); + txt = mu_msg_part_get_text (part, &err); if (!txt || err) return FALSE; termgen.set_document(pdata->_doc); norm = mu_str_normalize (txt, TRUE); + termgen.index_text_without_positions - (norm, 1, prefix(MU_MSG_FIELD_ID_ATTACH_TEXT)); + (norm, 1, prefix(MU_MSG_FIELD_ID_EMBEDDED_TEXT)); g_free (norm); g_free (txt); @@ -462,6 +464,8 @@ each_part (MuMsg *msg, MuMsgPart *part, PartData *pdata) char ctype[MuStore::MAX_TERM_LENGTH + 1]; snprintf (ctype, sizeof(ctype), "%s_%s", part->type, part->subtype); + // g_print ("store: %s\n", ctype); + pdata->_doc.add_term (mime + std::string(ctype, 0, MuStore::MAX_TERM_LENGTH)); } @@ -564,11 +568,13 @@ add_terms_values (MuMsgFieldId mfid, MsgDoc* msgdoc) add_terms_values_body (*msgdoc->_doc, msgdoc->_msg, mfid); break; - /* note: add_terms_values_attach handles these three msgfields */ + /* note: add_terms_values_attach handles _FILE, _MIME and + * _ATTACH_TEXT msgfields */ case MU_MSG_FIELD_ID_FILE: add_terms_values_attach (*msgdoc->_doc, msgdoc->_msg, mfid); + break; case MU_MSG_FIELD_ID_MIME: - case MU_MSG_FIELD_ID_ATTACH_TEXT: + case MU_MSG_FIELD_ID_EMBEDDED_TEXT: break; ///////////////////////////////////////////