* add support for mime:, file: and embed: searches

This commit is contained in:
djcb
2011-12-01 21:23:13 +02:00
parent 5834c62d32
commit b4680817d8
6 changed files with 102 additions and 58 deletions

View File

@ -87,14 +87,6 @@ typedef struct _MuMsgField MuMsgField;
* lowercase */ * lowercase */
static const MuMsgField FIELD_DATA[] = { static const MuMsgField FIELD_DATA[] = {
{
MU_MSG_FIELD_ID_ATTACH_TEXT,
MU_MSG_FIELD_TYPE_STRING,
"attach", 'a', 'A',
FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_NORMALIZE |
FLAG_DONT_CACHE
},
{ {
MU_MSG_FIELD_ID_BCC, MU_MSG_FIELD_ID_BCC,
MU_MSG_FIELD_TYPE_STRING, MU_MSG_FIELD_TYPE_STRING,
@ -133,6 +125,14 @@ static const MuMsgField FIELD_DATA[] = {
FLAG_XAPIAN_BOOLEAN | FLAG_XAPIAN_PREFIX_ONLY FLAG_XAPIAN_BOOLEAN | FLAG_XAPIAN_PREFIX_ONLY
}, },
{
MU_MSG_FIELD_ID_EMBEDDED_TEXT,
MU_MSG_FIELD_TYPE_STRING,
"embed", 'e', 'E',
FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_NORMALIZE |
FLAG_DONT_CACHE
},
{ {
MU_MSG_FIELD_ID_FILE, MU_MSG_FIELD_ID_FILE,
MU_MSG_FIELD_TYPE_STRING, MU_MSG_FIELD_TYPE_STRING,

View File

@ -29,11 +29,11 @@ G_BEGIN_DECLS
enum _MuMsgFieldId { enum _MuMsgFieldId {
/* first all the string-based ones */ /* first all the string-based ones */
MU_MSG_FIELD_ID_ATTACH_TEXT = 0, MU_MSG_FIELD_ID_BCC = 0,
MU_MSG_FIELD_ID_BCC,
MU_MSG_FIELD_ID_BODY_HTML, MU_MSG_FIELD_ID_BODY_HTML,
MU_MSG_FIELD_ID_BODY_TEXT, MU_MSG_FIELD_ID_BODY_TEXT,
MU_MSG_FIELD_ID_CC, MU_MSG_FIELD_ID_CC,
MU_MSG_FIELD_ID_EMBEDDED_TEXT,
MU_MSG_FIELD_ID_FILE, MU_MSG_FIELD_ID_FILE,
MU_MSG_FIELD_ID_FROM, MU_MSG_FIELD_ID_FROM,
MU_MSG_FIELD_ID_MAILDIR, MU_MSG_FIELD_ID_MAILDIR,

View File

@ -709,7 +709,7 @@ mu_msg_file_get_str_field (MuMsgFile *self, MuMsgFieldId mfid,
switch (mfid) { switch (mfid) {
case MU_MSG_FIELD_ID_ATTACH_TEXT: *do_free = TRUE; case MU_MSG_FIELD_ID_EMBEDDED_TEXT: *do_free = TRUE;
return NULL; /* FIXME */ return NULL; /* FIXME */
case MU_MSG_FIELD_ID_BCC: case MU_MSG_FIELD_ID_BCC:

View File

@ -71,21 +71,45 @@ struct _PartData {
typedef struct _PartData PartData; typedef struct _PartData PartData;
char* char*
mu_msg_part_to_string (MuMsgPart *part, gboolean *err) mu_msg_part_get_text (MuMsgPart *self, gboolean *err)
{ {
char *txt; GMimeObject *mobj;
g_return_val_if_fail (part && part->data, NULL); g_return_val_if_fail (self && self->data, NULL);
if (GMIME_IS_PART(part->data)) mobj = (GMimeObject*)self->data;
txt = mu_msg_mime_part_to_string (GMIME_PART(part->data),
err);
else
txt = NULL;
return txt; if (GMIME_IS_PART(mobj))
return mu_msg_mime_part_to_string ((GMimePart*)mobj, err);
else if (GMIME_IS_MESSAGE(mobj)) {
/* when it's an (embedded) message, the text is just
* of the message metadata, so we can index it.
*/
GString *data;
GMimeMessage *msg;
InternetAddressList *addresses;
gchar *adrs;
msg = (GMimeMessage*)mobj;
data = g_string_sized_new (512); /* just a guess */
g_string_append (data, g_mime_message_get_sender(msg));
g_string_append_c (data, '\n');
g_string_append (data, g_mime_message_get_subject(msg));
g_string_append_c (data, '\n');
addresses = g_mime_message_get_all_recipients (msg);
adrs = internet_address_list_to_string (addresses, FALSE);
g_object_unref(G_OBJECT(addresses));
g_string_append (data, adrs);
g_free (adrs);
return g_string_free (data, FALSE);
} else
return NULL;
} }
@ -110,34 +134,46 @@ get_part_size (GMimePart *part)
} }
static void static void
part_foreach_cb (GMimeObject *parent, GMimeObject *part, PartData *pdata) part_foreach_cb (GMimeObject *parent, GMimeObject *mobj, PartData *pdata)
{ {
GMimeContentType *ct; GMimeContentType *ct;
MuMsgPart pi; MuMsgPart pi;
memset (&pi, 0, sizeof pi); memset (&pi, 0, sizeof pi);
pi.index = pdata->_idx++; pi.index = pdata->_idx++;
pi.content_id = (char*)g_mime_object_get_content_id (part); pi.content_id = (char*)g_mime_object_get_content_id (mobj);
pi.data = (gpointer)part; pi.data = (gpointer)mobj;
/* check if this is the body part */ /* check if this is the body part */
pi.is_body = ((void*)pdata->_body_part == (void*)part); pi.is_body = ((void*)pdata->_body_part == (void*)mobj);
ct = g_mime_object_get_content_type (part); ct = g_mime_object_get_content_type (mobj);
if (GMIME_IS_CONTENT_TYPE(ct)) { if (GMIME_IS_CONTENT_TYPE(ct)) {
pi.type = (char*)g_mime_content_type_get_media_type (ct); pi.type = (char*)g_mime_content_type_get_media_type (ct);
pi.subtype = (char*)g_mime_content_type_get_media_subtype (ct); pi.subtype = (char*)g_mime_content_type_get_media_subtype (ct);
/* g_print ("==> [%s: %s / %s ]\n", pi.type, pi.subtype, */
/* G_OBJECT_TYPE_NAME(mobj)); */
} }
if (GMIME_IS_PART(part)) { if (GMIME_IS_PART(mobj)) {
pi.disposition = (char*)g_mime_object_get_disposition (part); GMimePart *part;
pi.file_name = (char*)g_mime_part_get_filename (GMIME_PART(part)); part = (GMimePart*)mobj;
pi.size = get_part_size (GMIME_PART(part)); pi.disposition = (char*)g_mime_object_get_disposition (mobj);
} else pi.file_name = (char*)g_mime_part_get_filename (part);
return; /* only deal with GMimePart */ pi.size = get_part_size (part);
pi.is_leaf = TRUE;
} else if (GMIME_IS_MESSAGE_PART(mobj)) {
GMimeMessage *mmsg;
mmsg = g_mime_message_part_get_message ((GMimeMessagePart*)mobj);
if (mmsg)
g_mime_message_foreach /* recurse */
(mmsg, (GMimeObjectForeachFunc)part_foreach_cb,
pdata);
}
pdata->_func(pdata->_msg, &pi, pdata->_user_data); pdata->_func(pdata->_msg, &pi, pdata->_user_data);
} }
@ -177,7 +213,7 @@ mu_msg_part_foreach (MuMsg *msg, MuMsgPartForeachFunc func,
PartData pdata; PartData pdata;
GMimeMessage *mime_msg; GMimeMessage *mime_msg;
g_return_if_fail (msg && msg->_file && msg->_file->_mime_msg); g_return_if_fail (msg);
if (!load_msg_file_maybe (msg)) if (!load_msg_file_maybe (msg))
return; return;

View File

@ -53,6 +53,8 @@ struct _MuMsgPart {
gboolean is_body; /* TRUE if this is probably the gboolean is_body; /* TRUE if this is probably the
* message body*/ * message body*/
gboolean is_leaf; /* if the body is a leaf part (MIME
* Part), not eg. a multipart/ */
/* if TRUE, mu_msg_part_destroy will free the member vars /* if TRUE, mu_msg_part_destroy will free the member vars
* as well*/ * as well*/
@ -81,14 +83,14 @@ typedef struct _MuMsgPart MuMsgPart;
/** /**
* convert a MuMsgPart to a string * get the text in the MuMsgPart (ie. in its GMimePart)
* *
* @param part a MuMsgPart * @param part a MuMsgPart
* @param err will receive TRUE if there was an error, FALSE otherwise * @param err will receive TRUE if there was an error, FALSE otherwise
* *
* @return utf8 string for this MIME part, to be freed by caller * @return utf8 string for this MIME part, to be freed by caller
*/ */
char* mu_msg_part_to_string (MuMsgPart *part, gboolean *err); char* mu_msg_part_get_text (MuMsgPart *part, gboolean *err);
/** /**

View File

@ -403,41 +403,43 @@ struct PartData {
MuMsgFieldId _mfid; MuMsgFieldId _mfid;
}; };
static gboolean
is_textual (MuMsgPart *part)
{
/* all text parts except text/html */
if ((strcasecmp (part->type, "text") == 0) &&
(strcasecmp (part->subtype, "html") != 0))
return TRUE;
/* message/rfc822 */
if ((strcasecmp (part->type, "message") == 0) &&
(strcasecmp (part->subtype, "rfc822") == 0))
return TRUE;
return FALSE;
}
static gboolean static gboolean
index_text_part (MuMsgPart *part, PartData *pdata) index_text_part (MuMsgPart *part, PartData *pdata)
{ {
unsigned u; gboolean err;
gboolean txt_type, err;
char *txt, *norm; char *txt, *norm;
Xapian::TermGenerator termgen; Xapian::TermGenerator termgen;
/* check wether it's a type we need to store */ if (!is_textual (part))
struct { const char* type; const char *subtype; } txt_types[] = { return FALSE;
{ "text", "plain"},
{ "message", "rfc822"},
};
txt_type = FALSE; txt = mu_msg_part_get_text (part, &err);
for (u = 0; u != G_N_ELEMENTS(txt_types) && !txt_type; ++u) {
if ((strcasecmp (part->type, txt_types[u].type) == 0) &&
((strcasecmp (part->subtype, txt_types[u].subtype) == 0) ||
(strcmp (txt_types[u].subtype, "*") == 0)))
txt_type = TRUE;
}
if (!txt_type)
return FALSE; /* not a supported text type */
txt = mu_msg_part_to_string (part, &err);
if (!txt || err) if (!txt || err)
return FALSE; return FALSE;
termgen.set_document(pdata->_doc); termgen.set_document(pdata->_doc);
norm = mu_str_normalize (txt, TRUE); norm = mu_str_normalize (txt, TRUE);
termgen.index_text_without_positions termgen.index_text_without_positions
(norm, 1, prefix(MU_MSG_FIELD_ID_ATTACH_TEXT)); (norm, 1, prefix(MU_MSG_FIELD_ID_EMBEDDED_TEXT));
g_free (norm); g_free (norm);
g_free (txt); g_free (txt);
@ -462,6 +464,8 @@ each_part (MuMsg *msg, MuMsgPart *part, PartData *pdata)
char ctype[MuStore::MAX_TERM_LENGTH + 1]; char ctype[MuStore::MAX_TERM_LENGTH + 1];
snprintf (ctype, sizeof(ctype), "%s_%s", snprintf (ctype, sizeof(ctype), "%s_%s",
part->type, part->subtype); part->type, part->subtype);
// g_print ("store: %s\n", ctype);
pdata->_doc.add_term pdata->_doc.add_term
(mime + std::string(ctype, 0, MuStore::MAX_TERM_LENGTH)); (mime + std::string(ctype, 0, MuStore::MAX_TERM_LENGTH));
} }
@ -564,11 +568,13 @@ add_terms_values (MuMsgFieldId mfid, MsgDoc* msgdoc)
add_terms_values_body (*msgdoc->_doc, msgdoc->_msg, mfid); add_terms_values_body (*msgdoc->_doc, msgdoc->_msg, mfid);
break; break;
/* note: add_terms_values_attach handles these three msgfields */ /* note: add_terms_values_attach handles _FILE, _MIME and
* _ATTACH_TEXT msgfields */
case MU_MSG_FIELD_ID_FILE: case MU_MSG_FIELD_ID_FILE:
add_terms_values_attach (*msgdoc->_doc, msgdoc->_msg, mfid); add_terms_values_attach (*msgdoc->_doc, msgdoc->_msg, mfid);
break;
case MU_MSG_FIELD_ID_MIME: case MU_MSG_FIELD_ID_MIME:
case MU_MSG_FIELD_ID_ATTACH_TEXT: case MU_MSG_FIELD_ID_EMBEDDED_TEXT:
break; break;
/////////////////////////////////////////// ///////////////////////////////////////////