message: refactor/improve attachment heuristic a bit
Also check for X-MS-Has-Attach
This commit is contained in:
@ -158,3 +158,31 @@ MessagePart::is_encrypted() const noexcept
|
|||||||
{
|
{
|
||||||
return mime_object().is_multipart_encrypted();
|
return mime_object().is_multipart_encrypted();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool /* heuristic */
|
||||||
|
MessagePart::looks_like_attachment() const noexcept
|
||||||
|
{
|
||||||
|
auto matches=[](const MimeContentType& ctype,
|
||||||
|
const std::initializer_list<std::pair<const char*, const char*>>& ctypes) {
|
||||||
|
return std::find_if(ctypes.begin(), ctypes.end(), [&](auto&& item){
|
||||||
|
return ctype.is_type(item.first, item.second); }) != ctypes.end();
|
||||||
|
};
|
||||||
|
|
||||||
|
const auto ctype{mime_object().content_type()};
|
||||||
|
if (!ctype)
|
||||||
|
return false; // no content-type: not an attachment.
|
||||||
|
|
||||||
|
// we consider some parts _not_ to be attachments regardless of disposition
|
||||||
|
if (matches(*ctype,{{"application", "pgp-keys"}}))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// we consider some parts to be attachments regardless of disposition
|
||||||
|
if (matches(*ctype,{{"image", "*"},
|
||||||
|
{"audio", "*"},
|
||||||
|
{"application", "*"},
|
||||||
|
{"application", "x-patch"}}))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
// otherwise, rely on the disposition
|
||||||
|
return is_attachment();
|
||||||
|
}
|
||||||
|
|||||||
@ -105,13 +105,23 @@ public:
|
|||||||
/**
|
/**
|
||||||
* Does this part have an "attachment" disposition? Otherwise it is
|
* Does this part have an "attachment" disposition? Otherwise it is
|
||||||
* "inline". Note that does *not* map 1:1 to a message's HasAttachment
|
* "inline". Note that does *not* map 1:1 to a message's HasAttachment
|
||||||
* flag.
|
* flag (which uses looks_like_attachment())
|
||||||
*
|
*
|
||||||
* @return true or false.
|
* @return true or false.
|
||||||
*/
|
*/
|
||||||
bool is_attachment() const noexcept;
|
bool is_attachment() const noexcept;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Does this part appear to be an attachment from an end-users point of
|
||||||
|
* view? This uses some heuristics to guess. Some parts for which
|
||||||
|
* is_attachment() is true may not "really" be attachments, and
|
||||||
|
* vice-versa
|
||||||
|
*
|
||||||
|
* @return true or false.
|
||||||
|
*/
|
||||||
|
bool looks_like_attachment() const noexcept;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Is this part signed?
|
* Is this part signed?
|
||||||
*
|
*
|
||||||
|
|||||||
@ -340,45 +340,6 @@ get_mailing_list(const MimeMessage& mime_msg)
|
|||||||
return to_string_opt_gchar(std::move(res));
|
return to_string_opt_gchar(std::move(res));
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool /* heuristic */
|
|
||||||
looks_like_attachment(const MimeObject& parent,
|
|
||||||
const MimePart& part, const MimeContentType& ctype)
|
|
||||||
{
|
|
||||||
constexpr std::array<std::pair<const char*, const char*>, 4> att_types = {{
|
|
||||||
{"image", "*"},
|
|
||||||
{"audio", "*"},
|
|
||||||
{"application", "*"},
|
|
||||||
{"application", "x-patch"}
|
|
||||||
}};
|
|
||||||
|
|
||||||
if (parent) { /* crypto multipart children are not considered attachments */
|
|
||||||
if (const auto parent_ctype{parent.content_type()}; parent_ctype) {
|
|
||||||
if (parent_ctype->is_type("multipart", "signed") ||
|
|
||||||
parent_ctype->is_type("multipart", "encrypted"))
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* we also consider patches, images, audio, and non-pgp-signature
|
|
||||||
* application attachments to be attachments... */
|
|
||||||
if (ctype.is_type("*", "pgp-signature"))
|
|
||||||
return false; /* don't consider as a signature */
|
|
||||||
|
|
||||||
if (ctype.is_type("text", "*") &&
|
|
||||||
(ctype.is_type("*", "plain") || ctype.is_type("*", "html")))
|
|
||||||
return false; /* not a signature */
|
|
||||||
|
|
||||||
/* if not one of those special types, consider it any attachment
|
|
||||||
* if it says so */
|
|
||||||
if (part.is_attachment())
|
|
||||||
return true;
|
|
||||||
|
|
||||||
const auto it = seq_find_if(att_types, [&](auto&& item){
|
|
||||||
return ctype.is_type(item.first, item.second);
|
|
||||||
});
|
|
||||||
return it != att_types.cend(); /* if found, it's an attachment */
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
append_text(Option<std::string>& str, Option<std::string> app)
|
append_text(Option<std::string>& str, Option<std::string> app)
|
||||||
{
|
{
|
||||||
@ -403,19 +364,38 @@ accumulate_text(const MimePart& part, Message::Private& info,
|
|||||||
append_text(info.body_html, part.to_string());
|
append_text(info.body_html, part.to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static bool /* heuristic */
|
||||||
|
looks_like_attachment(const MimeObject& parent, const MessagePart& mpart)
|
||||||
|
{
|
||||||
|
if (parent) { /* crypto multipart children are not considered attachments */
|
||||||
|
if (const auto parent_ctype{parent.content_type()}; parent_ctype) {
|
||||||
|
if (parent_ctype->is_type("multipart", "signed") ||
|
||||||
|
parent_ctype->is_type("multipart", "encrypted"))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return mpart.looks_like_attachment();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
process_part(const MimeObject& parent, const MimePart& part,
|
process_part(const MimeObject& parent, const MimePart& part,
|
||||||
Message::Private& info)
|
Message::Private& info, const MessagePart& mpart)
|
||||||
{
|
{
|
||||||
const auto ctype{part.content_type()};
|
const auto ctype{part.content_type()};
|
||||||
if (!ctype)
|
if (!ctype)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (looks_like_attachment(parent, part, *ctype))
|
|
||||||
// flag as calendar, if not already
|
// flag as calendar, if not already
|
||||||
if (none_of(info.flags & Flags::Calendar) &&
|
if (none_of(info.flags & Flags::Calendar) &&
|
||||||
ctype->is_type("text", "calendar"))
|
ctype->is_type("text", "calendar"))
|
||||||
info.flags |= Flags::Calendar;
|
info.flags |= Flags::Calendar;
|
||||||
|
|
||||||
|
// flag as attachment, if not already.
|
||||||
|
if (none_of(info.flags & Flags::HasAttachment) &&
|
||||||
|
looks_like_attachment(parent, mpart))
|
||||||
info.flags |= Flags::HasAttachment;
|
info.flags |= Flags::HasAttachment;
|
||||||
|
|
||||||
// if there are text parts, gather.
|
// if there are text parts, gather.
|
||||||
@ -499,7 +479,7 @@ handle_object(const MimeObject& parent,
|
|||||||
info.parts.emplace_back(obj);
|
info.parts.emplace_back(obj);
|
||||||
|
|
||||||
if (obj.is_part())
|
if (obj.is_part())
|
||||||
process_part(parent, obj, info);
|
process_part(parent, obj, info, info.parts.back());
|
||||||
else if (obj.is_message_part())
|
else if (obj.is_message_part())
|
||||||
process_message_part(obj, info);
|
process_message_part(obj, info);
|
||||||
else if (obj.is_multipart_signed())
|
else if (obj.is_multipart_signed())
|
||||||
@ -553,6 +533,16 @@ process_message(const MimeMessage& mime_msg, const std::string& path,
|
|||||||
info.mailing_list = get_mailing_list(mime_msg);
|
info.mailing_list = get_mailing_list(mime_msg);
|
||||||
if (info.mailing_list)
|
if (info.mailing_list)
|
||||||
info.flags |= Flags::MailingList;
|
info.flags |= Flags::MailingList;
|
||||||
|
|
||||||
|
// Microsoft override; outlook message can tell us directly
|
||||||
|
// wther
|
||||||
|
const auto ms_atthdr{mime_msg.header("X-MS-Has-Attach")};
|
||||||
|
if (ms_atthdr) {
|
||||||
|
if (*ms_atthdr == "yes")
|
||||||
|
info.flags |= Flags::HasAttachment;
|
||||||
|
else
|
||||||
|
info.flags &= ~Flags::HasAttachment;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static Mu::Result<std::string>
|
static Mu::Result<std::string>
|
||||||
|
|||||||
@ -20,6 +20,7 @@
|
|||||||
#include "mu-message.hh"
|
#include "mu-message.hh"
|
||||||
#include "mu-mime-object.hh"
|
#include "mu-mime-object.hh"
|
||||||
#include <glib.h>
|
#include <glib.h>
|
||||||
|
#include <regex>
|
||||||
|
|
||||||
using namespace Mu;
|
using namespace Mu;
|
||||||
|
|
||||||
@ -569,7 +570,45 @@ Moi,
|
|||||||
part.mime_type().value_or("boo").c_str());
|
part.mime_type().value_or("boo").c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_message_ms_attach()
|
||||||
|
{
|
||||||
|
const std::string msgtext =
|
||||||
|
R"(Date: Thu, 31 Jul 2008 14:57:25 -0400
|
||||||
|
From: "John Milton" <jm@example.com>
|
||||||
|
Subject: Fere libenter homines id quod volunt credunt
|
||||||
|
To: "Julius Caesar" <jc@example.com>
|
||||||
|
Message-id: <3BE9E6535E3029448670913581E7A1A20D852173@emss35m06.us.lmco.com>
|
||||||
|
X-MS-Has-Attach:
|
||||||
|
MIME-version: 1.0
|
||||||
|
Content-type: text/plain; charset=us-ascii
|
||||||
|
Content-transfer-encoding: 7BIT
|
||||||
|
|
||||||
|
OF Mans First Disobedience, and the Fruit
|
||||||
|
Of that Forbidden Tree, whose mortal tast
|
||||||
|
Brought Death into the World, and all our woe,
|
||||||
|
With loss of Eden, till one greater Man
|
||||||
|
)";
|
||||||
|
|
||||||
|
{
|
||||||
|
auto message{Message::make_from_text(msgtext)};
|
||||||
|
g_assert_true(!!message);
|
||||||
|
g_assert_true(message->flags() == (Flags::None));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const auto text2 = std::regex_replace(
|
||||||
|
msgtext, std::regex{"X-MS-Has-Attach:"},
|
||||||
|
"X-MS-Has-Attach: yes");
|
||||||
|
|
||||||
|
g_message("%s", text2.c_str());
|
||||||
|
|
||||||
|
auto message{Message::make_from_text(text2)};
|
||||||
|
|
||||||
|
g_assert_true(!!message);
|
||||||
|
g_assert_true(message->flags() == (Flags::HasAttachment));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -841,6 +880,8 @@ main(int argc, char* argv[])
|
|||||||
test_message_multipart_mixed_rfc822);
|
test_message_multipart_mixed_rfc822);
|
||||||
g_test_add_func("/message/message/detect-attachment",
|
g_test_add_func("/message/message/detect-attachment",
|
||||||
test_message_detect_attachment);
|
test_message_detect_attachment);
|
||||||
|
g_test_add_func("/message/message/x-ms-has-attach",
|
||||||
|
test_message_ms_attach);
|
||||||
g_test_add_func("/message/message/calendar",
|
g_test_add_func("/message/message/calendar",
|
||||||
test_message_calendar);
|
test_message_calendar);
|
||||||
g_test_add_func("/message/message/fail",
|
g_test_add_func("/message/message/fail",
|
||||||
|
|||||||
Reference in New Issue
Block a user