* add support for mime:, file: and embed: searches

This commit is contained in:
djcb
2011-12-01 21:23:13 +02:00
parent 5834c62d32
commit b4680817d8
6 changed files with 102 additions and 58 deletions

View File

@ -87,14 +87,6 @@ typedef struct _MuMsgField MuMsgField;
* lowercase */
static const MuMsgField FIELD_DATA[] = {
{
MU_MSG_FIELD_ID_ATTACH_TEXT,
MU_MSG_FIELD_TYPE_STRING,
"attach", 'a', 'A',
FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_NORMALIZE |
FLAG_DONT_CACHE
},
{
MU_MSG_FIELD_ID_BCC,
MU_MSG_FIELD_TYPE_STRING,
@ -133,6 +125,14 @@ static const MuMsgField FIELD_DATA[] = {
FLAG_XAPIAN_BOOLEAN | FLAG_XAPIAN_PREFIX_ONLY
},
{
MU_MSG_FIELD_ID_EMBEDDED_TEXT,
MU_MSG_FIELD_TYPE_STRING,
"embed", 'e', 'E',
FLAG_GMIME | FLAG_XAPIAN_INDEX | FLAG_NORMALIZE |
FLAG_DONT_CACHE
},
{
MU_MSG_FIELD_ID_FILE,
MU_MSG_FIELD_TYPE_STRING,

View File

@ -29,11 +29,11 @@ G_BEGIN_DECLS
enum _MuMsgFieldId {
/* first all the string-based ones */
MU_MSG_FIELD_ID_ATTACH_TEXT = 0,
MU_MSG_FIELD_ID_BCC,
MU_MSG_FIELD_ID_BCC = 0,
MU_MSG_FIELD_ID_BODY_HTML,
MU_MSG_FIELD_ID_BODY_TEXT,
MU_MSG_FIELD_ID_CC,
MU_MSG_FIELD_ID_EMBEDDED_TEXT,
MU_MSG_FIELD_ID_FILE,
MU_MSG_FIELD_ID_FROM,
MU_MSG_FIELD_ID_MAILDIR,

View File

@ -709,7 +709,7 @@ mu_msg_file_get_str_field (MuMsgFile *self, MuMsgFieldId mfid,
switch (mfid) {
case MU_MSG_FIELD_ID_ATTACH_TEXT: *do_free = TRUE;
case MU_MSG_FIELD_ID_EMBEDDED_TEXT: *do_free = TRUE;
return NULL; /* FIXME */
case MU_MSG_FIELD_ID_BCC:

View File

@ -71,21 +71,45 @@ struct _PartData {
typedef struct _PartData PartData;
char*
mu_msg_part_to_string (MuMsgPart *part, gboolean *err)
mu_msg_part_get_text (MuMsgPart *self, gboolean *err)
{
char *txt;
GMimeObject *mobj;
g_return_val_if_fail (part && part->data, NULL);
g_return_val_if_fail (self && self->data, NULL);
if (GMIME_IS_PART(part->data))
txt = mu_msg_mime_part_to_string (GMIME_PART(part->data),
err);
else
txt = NULL;
mobj = (GMimeObject*)self->data;
return txt;
if (GMIME_IS_PART(mobj))
return mu_msg_mime_part_to_string ((GMimePart*)mobj, err);
else if (GMIME_IS_MESSAGE(mobj)) {
/* when it's an (embedded) message, the text is just
* of the message metadata, so we can index it.
*/
GString *data;
GMimeMessage *msg;
InternetAddressList *addresses;
gchar *adrs;
msg = (GMimeMessage*)mobj;
data = g_string_sized_new (512); /* just a guess */
g_string_append (data, g_mime_message_get_sender(msg));
g_string_append_c (data, '\n');
g_string_append (data, g_mime_message_get_subject(msg));
g_string_append_c (data, '\n');
addresses = g_mime_message_get_all_recipients (msg);
adrs = internet_address_list_to_string (addresses, FALSE);
g_object_unref(G_OBJECT(addresses));
g_string_append (data, adrs);
g_free (adrs);
return g_string_free (data, FALSE);
} else
return NULL;
}
@ -110,34 +134,46 @@ get_part_size (GMimePart *part)
}
static void
part_foreach_cb (GMimeObject *parent, GMimeObject *part, PartData *pdata)
part_foreach_cb (GMimeObject *parent, GMimeObject *mobj, PartData *pdata)
{
GMimeContentType *ct;
MuMsgPart pi;
memset (&pi, 0, sizeof pi);
pi.index = pdata->_idx++;
pi.content_id = (char*)g_mime_object_get_content_id (part);
pi.data = (gpointer)part;
pi.content_id = (char*)g_mime_object_get_content_id (mobj);
pi.data = (gpointer)mobj;
/* check if this is the body part */
pi.is_body = ((void*)pdata->_body_part == (void*)part);
pi.is_body = ((void*)pdata->_body_part == (void*)mobj);
ct = g_mime_object_get_content_type (part);
ct = g_mime_object_get_content_type (mobj);
if (GMIME_IS_CONTENT_TYPE(ct)) {
pi.type = (char*)g_mime_content_type_get_media_type (ct);
pi.subtype = (char*)g_mime_content_type_get_media_subtype (ct);
/* g_print ("==> [%s: %s / %s ]\n", pi.type, pi.subtype, */
/* G_OBJECT_TYPE_NAME(mobj)); */
}
if (GMIME_IS_PART(part)) {
pi.disposition = (char*)g_mime_object_get_disposition (part);
pi.file_name = (char*)g_mime_part_get_filename (GMIME_PART(part));
pi.size = get_part_size (GMIME_PART(part));
} else
return; /* only deal with GMimePart */
if (GMIME_IS_PART(mobj)) {
GMimePart *part;
part = (GMimePart*)mobj;
pi.disposition = (char*)g_mime_object_get_disposition (mobj);
pi.file_name = (char*)g_mime_part_get_filename (part);
pi.size = get_part_size (part);
pi.is_leaf = TRUE;
} else if (GMIME_IS_MESSAGE_PART(mobj)) {
GMimeMessage *mmsg;
mmsg = g_mime_message_part_get_message ((GMimeMessagePart*)mobj);
if (mmsg)
g_mime_message_foreach /* recurse */
(mmsg, (GMimeObjectForeachFunc)part_foreach_cb,
pdata);
}
pdata->_func(pdata->_msg, &pi, pdata->_user_data);
}
@ -177,7 +213,7 @@ mu_msg_part_foreach (MuMsg *msg, MuMsgPartForeachFunc func,
PartData pdata;
GMimeMessage *mime_msg;
g_return_if_fail (msg && msg->_file && msg->_file->_mime_msg);
g_return_if_fail (msg);
if (!load_msg_file_maybe (msg))
return;

View File

@ -53,6 +53,8 @@ struct _MuMsgPart {
gboolean is_body; /* TRUE if this is probably the
* message body*/
gboolean is_leaf; /* if the body is a leaf part (MIME
* Part), not eg. a multipart/ */
/* if TRUE, mu_msg_part_destroy will free the member vars
* as well*/
@ -81,14 +83,14 @@ typedef struct _MuMsgPart MuMsgPart;
/**
* convert a MuMsgPart to a string
* get the text in the MuMsgPart (ie. in its GMimePart)
*
* @param part a MuMsgPart
* @param err will receive TRUE if there was an error, FALSE otherwise
*
* @return utf8 string for this MIME part, to be freed by caller
*/
char* mu_msg_part_to_string (MuMsgPart *part, gboolean *err);
char* mu_msg_part_get_text (MuMsgPart *part, gboolean *err);
/**

View File

@ -403,41 +403,43 @@ struct PartData {
MuMsgFieldId _mfid;
};
static gboolean
is_textual (MuMsgPart *part)
{
/* all text parts except text/html */
if ((strcasecmp (part->type, "text") == 0) &&
(strcasecmp (part->subtype, "html") != 0))
return TRUE;
/* message/rfc822 */
if ((strcasecmp (part->type, "message") == 0) &&
(strcasecmp (part->subtype, "rfc822") == 0))
return TRUE;
return FALSE;
}
static gboolean
index_text_part (MuMsgPart *part, PartData *pdata)
{
unsigned u;
gboolean txt_type, err;
gboolean err;
char *txt, *norm;
Xapian::TermGenerator termgen;
/* check wether it's a type we need to store */
struct { const char* type; const char *subtype; } txt_types[] = {
{ "text", "plain"},
{ "message", "rfc822"},
};
if (!is_textual (part))
return FALSE;
txt_type = FALSE;
for (u = 0; u != G_N_ELEMENTS(txt_types) && !txt_type; ++u) {
if ((strcasecmp (part->type, txt_types[u].type) == 0) &&
((strcasecmp (part->subtype, txt_types[u].subtype) == 0) ||
(strcmp (txt_types[u].subtype, "*") == 0)))
txt_type = TRUE;
}
if (!txt_type)
return FALSE; /* not a supported text type */
txt = mu_msg_part_to_string (part, &err);
txt = mu_msg_part_get_text (part, &err);
if (!txt || err)
return FALSE;
termgen.set_document(pdata->_doc);
norm = mu_str_normalize (txt, TRUE);
termgen.index_text_without_positions
(norm, 1, prefix(MU_MSG_FIELD_ID_ATTACH_TEXT));
(norm, 1, prefix(MU_MSG_FIELD_ID_EMBEDDED_TEXT));
g_free (norm);
g_free (txt);
@ -462,6 +464,8 @@ each_part (MuMsg *msg, MuMsgPart *part, PartData *pdata)
char ctype[MuStore::MAX_TERM_LENGTH + 1];
snprintf (ctype, sizeof(ctype), "%s_%s",
part->type, part->subtype);
// g_print ("store: %s\n", ctype);
pdata->_doc.add_term
(mime + std::string(ctype, 0, MuStore::MAX_TERM_LENGTH));
}
@ -564,11 +568,13 @@ add_terms_values (MuMsgFieldId mfid, MsgDoc* msgdoc)
add_terms_values_body (*msgdoc->_doc, msgdoc->_msg, mfid);
break;
/* note: add_terms_values_attach handles these three msgfields */
/* note: add_terms_values_attach handles _FILE, _MIME and
* _ATTACH_TEXT msgfields */
case MU_MSG_FIELD_ID_FILE:
add_terms_values_attach (*msgdoc->_doc, msgdoc->_msg, mfid);
break;
case MU_MSG_FIELD_ID_MIME:
case MU_MSG_FIELD_ID_ATTACH_TEXT:
case MU_MSG_FIELD_ID_EMBEDDED_TEXT:
break;
///////////////////////////////////////////