* add indexing/searching for text-based mime parts
This commit is contained in:
@ -512,8 +512,8 @@ stream_to_string (GMimeStream *stream, size_t buflen)
|
||||
}
|
||||
|
||||
|
||||
static gchar*
|
||||
part_to_string (GMimePart *part, gboolean *err)
|
||||
gchar*
|
||||
mu_msg_mime_part_to_string (GMimePart *part, gboolean *err)
|
||||
{
|
||||
GMimeDataWrapper *wrapper;
|
||||
GMimeStream *stream = NULL;
|
||||
@ -557,39 +557,53 @@ cleanup:
|
||||
}
|
||||
|
||||
|
||||
static char*
|
||||
get_body (MuMsgFile *self, gboolean want_html)
|
||||
GMimePart*
|
||||
mu_msg_mime_get_body_part (GMimeMessage *msg, gboolean want_html)
|
||||
{
|
||||
GetBodyData data;
|
||||
char *str;
|
||||
gboolean err;
|
||||
|
||||
g_return_val_if_fail (self, NULL);
|
||||
g_return_val_if_fail (GMIME_IS_MESSAGE(self->_mime_msg), NULL);
|
||||
g_return_val_if_fail (GMIME_IS_MESSAGE(msg), NULL);
|
||||
|
||||
memset (&data, 0, sizeof(GetBodyData));
|
||||
data._want_html = want_html;
|
||||
|
||||
err = FALSE;
|
||||
g_mime_message_foreach (self->_mime_msg,
|
||||
g_mime_message_foreach (msg,
|
||||
(GMimeObjectForeachFunc)get_body_cb,
|
||||
&data);
|
||||
if (want_html)
|
||||
str = data._html_part ?
|
||||
part_to_string (GMIME_PART(data._html_part), &err) :
|
||||
NULL;
|
||||
return (GMimePart*)data._html_part;
|
||||
else
|
||||
str = data._txt_part ?
|
||||
part_to_string (GMIME_PART(data._txt_part), &err) :
|
||||
NULL;
|
||||
return (GMimePart*)data._txt_part;
|
||||
}
|
||||
|
||||
/* note, str may be NULL (no body), but that's not necessarily
|
||||
* an error; we only warn when an actual error occured */
|
||||
if (err)
|
||||
g_warning ("error occured while retrieving %s body "
|
||||
"for message %s",
|
||||
want_html ? "html" : "text", self->_path);
|
||||
return str;
|
||||
|
||||
|
||||
static char*
|
||||
get_body (MuMsgFile *self, gboolean want_html)
|
||||
{
|
||||
GMimePart *part;
|
||||
|
||||
g_return_val_if_fail (self, NULL);
|
||||
g_return_val_if_fail (GMIME_IS_MESSAGE(self->_mime_msg), NULL);
|
||||
|
||||
part = mu_msg_mime_get_body_part (self->_mime_msg, want_html);
|
||||
if (GMIME_IS_PART(part)) {
|
||||
gboolean err;
|
||||
gchar *str;
|
||||
|
||||
err = FALSE;
|
||||
str = mu_msg_mime_part_to_string (part, &err);
|
||||
|
||||
/* note, str may be NULL (no body), but that's not necessarily
|
||||
* an error; we only warn when an actual error occured */
|
||||
if (err)
|
||||
g_warning ("error occured while retrieving %s body "
|
||||
"for message %s",
|
||||
want_html ? "html" : "text", self->_path);
|
||||
return str;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
@ -672,6 +686,18 @@ maybe_cleanup (const char* str, const char *path, gboolean *do_free)
|
||||
}
|
||||
|
||||
|
||||
G_GNUC_CONST static GMimeRecipientType
|
||||
recipient_type (MuMsgFieldId mfid)
|
||||
{
|
||||
switch (mfid) {
|
||||
case MU_MSG_FIELD_ID_BCC: return GMIME_RECIPIENT_TYPE_BCC;
|
||||
case MU_MSG_FIELD_ID_CC: return GMIME_RECIPIENT_TYPE_CC;
|
||||
case MU_MSG_FIELD_ID_TO: return GMIME_RECIPIENT_TYPE_TO;
|
||||
default: g_return_val_if_reached (-1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
char*
|
||||
mu_msg_file_get_str_field (MuMsgFile *self, MuMsgFieldId mfid,
|
||||
gboolean *do_free)
|
||||
@ -683,17 +709,18 @@ mu_msg_file_get_str_field (MuMsgFile *self, MuMsgFieldId mfid,
|
||||
|
||||
switch (mfid) {
|
||||
|
||||
case MU_MSG_FIELD_ID_BCC: *do_free = TRUE;
|
||||
return get_recipient (self, GMIME_RECIPIENT_TYPE_BCC);
|
||||
case MU_MSG_FIELD_ID_ATTACH_TEXT: *do_free = TRUE;
|
||||
return NULL; /* FIXME */
|
||||
|
||||
case MU_MSG_FIELD_ID_BODY_TEXT: *do_free = TRUE;
|
||||
return get_body (self, FALSE);
|
||||
case MU_MSG_FIELD_ID_BCC:
|
||||
case MU_MSG_FIELD_ID_CC:
|
||||
case MU_MSG_FIELD_ID_TO: *do_free = TRUE;
|
||||
return get_recipient (self, recipient_type(mfid));
|
||||
|
||||
case MU_MSG_FIELD_ID_BODY_TEXT:
|
||||
case MU_MSG_FIELD_ID_BODY_HTML: *do_free = TRUE;
|
||||
return get_body (self, TRUE);
|
||||
|
||||
case MU_MSG_FIELD_ID_CC: *do_free = TRUE;
|
||||
return get_recipient (self, GMIME_RECIPIENT_TYPE_CC);
|
||||
return get_body
|
||||
(self, mfid == MU_MSG_FIELD_ID_BODY_HTML ? TRUE : FALSE);
|
||||
|
||||
case MU_MSG_FIELD_ID_FROM:
|
||||
return (char*)maybe_cleanup
|
||||
@ -707,9 +734,6 @@ mu_msg_file_get_str_field (MuMsgFile *self, MuMsgFieldId mfid,
|
||||
(g_mime_message_get_subject (self->_mime_msg),
|
||||
self->_path, do_free);
|
||||
|
||||
case MU_MSG_FIELD_ID_TO: *do_free = TRUE;
|
||||
return get_recipient (self, GMIME_RECIPIENT_TYPE_TO);
|
||||
|
||||
case MU_MSG_FIELD_ID_MSGID:
|
||||
return (char*)g_mime_message_get_message_id (self->_mime_msg);
|
||||
|
||||
|
||||
@ -66,10 +66,30 @@ struct _PartData {
|
||||
unsigned _idx;
|
||||
MuMsgPartForeachFunc _func;
|
||||
gpointer _user_data;
|
||||
GMimePart *_body_part;
|
||||
};
|
||||
typedef struct _PartData PartData;
|
||||
|
||||
|
||||
|
||||
char*
|
||||
mu_msg_part_to_string (MuMsgPart *part, gboolean *err)
|
||||
{
|
||||
char *txt;
|
||||
|
||||
g_return_val_if_fail (part && part->data, NULL);
|
||||
|
||||
if (GMIME_IS_PART(part->data))
|
||||
txt = mu_msg_mime_part_to_string (GMIME_PART(part->data),
|
||||
err);
|
||||
else
|
||||
txt = NULL;
|
||||
|
||||
return txt;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static ssize_t
|
||||
get_part_size (GMimePart *part)
|
||||
{
|
||||
@ -100,6 +120,10 @@ part_foreach_cb (GMimeObject *parent, GMimeObject *part, PartData *pdata)
|
||||
memset (&pi, 0, sizeof pi);
|
||||
pi.index = pdata->_idx++;
|
||||
pi.content_id = (char*)g_mime_object_get_content_id (part);
|
||||
pi.data = (gpointer)part;
|
||||
|
||||
/* check if this is the body part */
|
||||
pi.is_body = ((void*)pdata->_body_part == (void*)part);
|
||||
|
||||
ct = g_mime_object_get_content_type (part);
|
||||
|
||||
@ -112,11 +136,13 @@ part_foreach_cb (GMimeObject *parent, GMimeObject *part, PartData *pdata)
|
||||
pi.disposition = (char*)g_mime_object_get_disposition (part);
|
||||
pi.file_name = (char*)g_mime_part_get_filename (GMIME_PART(part));
|
||||
pi.size = get_part_size (GMIME_PART(part));
|
||||
}
|
||||
} else
|
||||
return; /* only deal with GMimePart */
|
||||
|
||||
pdata->_func(pdata->_msg, &pi, pdata->_user_data);
|
||||
}
|
||||
|
||||
|
||||
static gboolean
|
||||
load_msg_file_maybe (MuMsg *msg)
|
||||
{
|
||||
@ -149,14 +175,18 @@ mu_msg_part_foreach (MuMsg *msg, MuMsgPartForeachFunc func,
|
||||
gpointer user_data)
|
||||
{
|
||||
PartData pdata;
|
||||
GMimeMessage *mime_msg;
|
||||
|
||||
g_return_if_fail (msg);
|
||||
g_return_if_fail (msg && msg->_file && msg->_file->_mime_msg);
|
||||
|
||||
if (!load_msg_file_maybe (msg))
|
||||
return;
|
||||
|
||||
mime_msg = msg->_file->_mime_msg;
|
||||
|
||||
pdata._msg = msg;
|
||||
pdata._idx = 0;
|
||||
pdata._body_part = mu_msg_mime_get_body_part (mime_msg, FALSE);
|
||||
pdata._func = func;
|
||||
pdata._user_data = user_data;
|
||||
|
||||
@ -269,6 +299,7 @@ mu_msg_part_filepath (MuMsg *msg, const char* targetdir, guint partidx)
|
||||
|
||||
|
||||
|
||||
|
||||
gchar*
|
||||
mu_msg_part_filepath_cache (MuMsg *msg, guint partid)
|
||||
{
|
||||
|
||||
@ -51,6 +51,9 @@ struct _MuMsgPart {
|
||||
|
||||
gpointer data; /* opaque data */
|
||||
|
||||
gboolean is_body; /* TRUE if this is probably the
|
||||
* message body*/
|
||||
|
||||
/* if TRUE, mu_msg_part_destroy will free the member vars
|
||||
* as well*/
|
||||
gboolean own_members;
|
||||
@ -77,6 +80,17 @@ typedef struct _MuMsgPart MuMsgPart;
|
||||
#define mu_msg_part_content_id(pi) ((pi)->content_id)
|
||||
|
||||
|
||||
/**
|
||||
* convert a MuMsgPart to a string
|
||||
*
|
||||
* @param part a MuMsgPart
|
||||
* @param err will receive TRUE if there was an error, FALSE otherwise
|
||||
*
|
||||
* @return utf8 string for this MIME part, to be freed by caller
|
||||
*/
|
||||
char* mu_msg_part_to_string (MuMsgPart *part, gboolean *err);
|
||||
|
||||
|
||||
/**
|
||||
* does this msg part look like an attachment?
|
||||
*
|
||||
|
||||
@ -66,7 +66,7 @@ _MuStore::rollback_transaction () {
|
||||
}
|
||||
|
||||
|
||||
/* we cache these prefix strings, so we don't have to allocate the all
|
||||
/* we cache these prefix strings, so we don't have to allocate them all
|
||||
* the time; this should save 10-20 string allocs per message */
|
||||
G_GNUC_CONST static const std::string&
|
||||
prefix (MuMsgFieldId mfid)
|
||||
@ -162,6 +162,7 @@ mu_store_set_metadata (MuStore *store, const char *key, const char *val,
|
||||
try {
|
||||
store->db_writable()->set_metadata (key, val);
|
||||
return TRUE;
|
||||
|
||||
} MU_STORE_CATCH_BLOCK_RETURN(err, FALSE);
|
||||
|
||||
} MU_XAPIAN_CATCH_BLOCK_G_ERROR_RETURN(err, MU_ERROR_XAPIAN, FALSE);
|
||||
@ -212,8 +213,6 @@ add_terms_values_date (Xapian::Document& doc, MuMsg *msg, MuMsgFieldId mfid)
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: we could pre-calculate the add_term values for FLAGS */
|
||||
|
||||
/* pre-calculate; optimization */
|
||||
G_GNUC_CONST static const std::string&
|
||||
flag_val (char flagchar)
|
||||
@ -310,7 +309,8 @@ static void
|
||||
add_terms_values_str (Xapian::Document& doc, char *val,
|
||||
MuMsgFieldId mfid)
|
||||
{
|
||||
/* the value is what we'll display; the unchanged original */
|
||||
/* the value is what we display in search results; the
|
||||
* unchanged original */
|
||||
if (mu_msg_field_xapian_value(mfid))
|
||||
doc.add_value ((Xapian::valueno)mfid, val);
|
||||
|
||||
@ -318,7 +318,8 @@ add_terms_values_str (Xapian::Document& doc, char *val,
|
||||
if (mu_msg_field_normalize (mfid))
|
||||
mu_str_normalize_in_place (val, TRUE);
|
||||
if (mu_msg_field_xapian_escape (mfid))
|
||||
mu_str_ascii_xapian_escape_in_place (val);
|
||||
mu_str_ascii_xapian_escape_in_place (val,
|
||||
TRUE /*esc_space*/);
|
||||
|
||||
if (mu_msg_field_xapian_index (mfid)) {
|
||||
Xapian::TermGenerator termgen;
|
||||
@ -402,36 +403,86 @@ struct PartData {
|
||||
MuMsgFieldId _mfid;
|
||||
};
|
||||
|
||||
|
||||
static gboolean
|
||||
index_text_part (MuMsgPart *part, PartData *pdata)
|
||||
{
|
||||
unsigned u;
|
||||
gboolean txt_type, err;
|
||||
char *txt, *norm;
|
||||
Xapian::TermGenerator termgen;
|
||||
|
||||
/* check wether it's a type we need to store */
|
||||
struct { const char* type; const char *subtype; } txt_types[] = {
|
||||
{ "text", "plain"},
|
||||
{ "message", "rfc822"},
|
||||
};
|
||||
|
||||
txt_type = FALSE;
|
||||
for (u = 0; u != G_N_ELEMENTS(txt_types) && !txt_type; ++u) {
|
||||
if ((strcasecmp (part->type, txt_types[u].type) == 0) &&
|
||||
((strcasecmp (part->subtype, txt_types[u].subtype) == 0) ||
|
||||
(strcmp (txt_types[u].subtype, "*") == 0)))
|
||||
txt_type = TRUE;
|
||||
}
|
||||
|
||||
if (!txt_type)
|
||||
return FALSE; /* not a supported text type */
|
||||
|
||||
txt = mu_msg_part_to_string (part, &err);
|
||||
if (!txt || err)
|
||||
return FALSE;
|
||||
|
||||
termgen.set_document(pdata->_doc);
|
||||
|
||||
norm = mu_str_normalize (txt, TRUE);
|
||||
termgen.index_text_without_positions
|
||||
(norm, 1, prefix(MU_MSG_FIELD_ID_ATTACH_TEXT));
|
||||
|
||||
g_free (norm);
|
||||
g_free (txt);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
each_part (MuMsg *msg, MuMsgPart *part, PartData *pdata)
|
||||
{
|
||||
static const std::string
|
||||
att (prefix(MU_MSG_FIELD_ID_ATTACH)),
|
||||
mime (prefix(MU_MSG_FIELD_ID_ATTACH_MIME_TYPE));
|
||||
file (prefix(MU_MSG_FIELD_ID_FILE)),
|
||||
mime (prefix(MU_MSG_FIELD_ID_MIME));
|
||||
|
||||
if (mu_msg_part_looks_like_attachment (part, TRUE) &&
|
||||
(part->file_name)) {
|
||||
/* save the mime type of any part */
|
||||
if (part->type) {
|
||||
/* note, we use '_' instead of '/' to separate
|
||||
* type/subtype -- Xapian doesn't treat '/' as
|
||||
* desired, so we use '_' and pre-process queries; see
|
||||
* mu_query_preprocess */
|
||||
char ctype[MuStore::MAX_TERM_LENGTH + 1];
|
||||
snprintf (ctype, sizeof(ctype), "%s_%s",
|
||||
part->type, part->subtype);
|
||||
pdata->_doc.add_term
|
||||
(mime + std::string(ctype, 0, MuStore::MAX_TERM_LENGTH));
|
||||
}
|
||||
|
||||
/* save the name of anything that has a filename */
|
||||
if (part->file_name) {
|
||||
char val[MuStore::MAX_TERM_LENGTH + 1];
|
||||
strncpy (val, part->file_name, sizeof(val));
|
||||
|
||||
/* now, let's create a term... */
|
||||
mu_str_normalize_in_place (val, TRUE);
|
||||
mu_str_ascii_xapian_escape_in_place (val);
|
||||
mu_str_ascii_xapian_escape_in_place (val, TRUE /*esc space*/);
|
||||
|
||||
pdata->_doc.add_term
|
||||
(att + std::string(val, 0, MuStore::MAX_TERM_LENGTH));
|
||||
|
||||
/* save the mime type */
|
||||
if (part->type) {
|
||||
gchar *str;
|
||||
str = g_strdup_printf ("%s/%s", part->type, part->subtype);
|
||||
pdata->_doc.add_term
|
||||
(mime + std::string(str, 0, MuStore::MAX_TERM_LENGTH));
|
||||
g_free (str);
|
||||
} else
|
||||
pdata->_doc.add_term (mime + "application/octet-stream");
|
||||
(file + std::string(val, 0, MuStore::MAX_TERM_LENGTH));
|
||||
}
|
||||
|
||||
/* now, for non-body parts with some MIME-types, index the
|
||||
* content as well */
|
||||
if (!part->is_body)
|
||||
index_text_part (part, pdata);
|
||||
}
|
||||
|
||||
|
||||
@ -465,8 +516,7 @@ add_terms_values_body (Xapian::Document& doc, MuMsg *msg,
|
||||
termgen.set_document(doc);
|
||||
|
||||
norm = mu_str_normalize (str, TRUE);
|
||||
termgen.index_text_without_positions
|
||||
(norm, 1, prefix(mfid));
|
||||
termgen.index_text_without_positions (norm, 1, prefix(mfid));
|
||||
|
||||
g_free (norm);
|
||||
}
|
||||
@ -478,6 +528,24 @@ struct _MsgDoc {
|
||||
};
|
||||
typedef struct _MsgDoc MsgDoc;
|
||||
|
||||
|
||||
static void
|
||||
add_terms_values_default (MuMsgFieldId mfid, MsgDoc* msgdoc)
|
||||
{
|
||||
if (mu_msg_field_is_numeric (mfid))
|
||||
add_terms_values_number
|
||||
(*msgdoc->_doc, msgdoc->_msg, mfid);
|
||||
else if (mu_msg_field_is_string (mfid))
|
||||
add_terms_values_string
|
||||
(*msgdoc->_doc, msgdoc->_msg, mfid);
|
||||
else if (mu_msg_field_is_string_list(mfid))
|
||||
add_terms_values_string_list
|
||||
(*msgdoc->_doc, msgdoc->_msg, mfid);
|
||||
else
|
||||
g_return_if_reached ();
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
add_terms_values (MuMsgFieldId mfid, MsgDoc* msgdoc)
|
||||
{
|
||||
@ -495,26 +563,19 @@ add_terms_values (MuMsgFieldId mfid, MsgDoc* msgdoc)
|
||||
case MU_MSG_FIELD_ID_BODY_TEXT:
|
||||
add_terms_values_body (*msgdoc->_doc, msgdoc->_msg, mfid);
|
||||
break;
|
||||
case MU_MSG_FIELD_ID_ATTACH: /* also takes care of MU_MSG_FIELD_ID_ATTACH_MIME */
|
||||
|
||||
/* note: add_terms_values_attach handles these three msgfields */
|
||||
case MU_MSG_FIELD_ID_FILE:
|
||||
add_terms_values_attach (*msgdoc->_doc, msgdoc->_msg, mfid);
|
||||
case MU_MSG_FIELD_ID_MIME:
|
||||
case MU_MSG_FIELD_ID_ATTACH_TEXT:
|
||||
break;
|
||||
case MU_MSG_FIELD_ID_ATTACH_MIME_TYPE:
|
||||
///////////////////////////////////////////
|
||||
|
||||
case MU_MSG_FIELD_ID_UID:
|
||||
break; /* already taken care of elsewhere */
|
||||
default:
|
||||
if (mu_msg_field_is_numeric (mfid))
|
||||
add_terms_values_number (*msgdoc->_doc, msgdoc->_msg,
|
||||
mfid);
|
||||
else if (mu_msg_field_is_string (mfid))
|
||||
add_terms_values_string (*msgdoc->_doc,
|
||||
msgdoc->_msg,
|
||||
mfid);
|
||||
else if (mu_msg_field_is_string_list(mfid))
|
||||
add_terms_values_string_list (*msgdoc->_doc,
|
||||
msgdoc->_msg,
|
||||
mfid);
|
||||
else
|
||||
g_return_if_reached ();
|
||||
return add_terms_values_default (mfid, msgdoc);
|
||||
}
|
||||
}
|
||||
|
||||
@ -559,7 +620,11 @@ each_contact_info (MuMsgContact *contact, MsgDoc *msgdoc)
|
||||
|
||||
/* don't normalize e-mail address, but do lowercase it */
|
||||
if (!mu_str_is_empty(contact->address)) {
|
||||
char *escaped = mu_str_ascii_xapian_escape (contact->address);
|
||||
|
||||
char *escaped;
|
||||
|
||||
escaped = mu_str_ascii_xapian_escape (contact->address,
|
||||
FALSE /*dont esc space*/);
|
||||
msgdoc->_doc->add_term
|
||||
(std::string (pfx + escaped, 0, MuStore::MAX_TERM_LENGTH));
|
||||
g_free (escaped);
|
||||
|
||||
Reference in New Issue
Block a user