message/contact: ensure valid email address in cache

Filter out the (rare but existent) invalid email addresses from the
cache; use the new method Contact::has_valid_email for that.
This commit is contained in:
Dirk-Jan C. Binnema
2022-12-29 19:03:21 +02:00
parent 5187d9eaa5
commit 2229e2e77e
4 changed files with 75 additions and 10 deletions

View File

@ -20,6 +20,7 @@
#include "mu-contact.hh" #include "mu-contact.hh"
#include "mu-message.hh" #include "mu-message.hh"
#include "utils/mu-utils.hh" #include "utils/mu-utils.hh"
#include "utils/mu-regex.hh"
#include "mu-mime-object.hh" #include "mu-mime-object.hh"
#include <gmime/gmime.h> #include <gmime/gmime.h>
@ -49,6 +50,35 @@ Contact::display_name(bool quote) const
return address_rfc2047(*this); return address_rfc2047(*this);
} }
static Regex email_rx;
bool
Contact::has_valid_email() const
{
/* regexp as per:
* https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address
*
* "This requirement is a willful violation of RFC 5322, which defines a
* syntax for email addresses that is simultaneously too strict (before
* the "@" character), too vague (after the "@" character), and too lax
* (allowing comments, whitespace characters, and quoted strings in
* manners unfamiliar to most users) to be of practical use here."
*/
constexpr auto email_rx_str =
R"(^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$)";
/* avoid std::regex here, since speed matters. PCRE is faster */
if (!email_rx) {
if (auto&& res = Regex::make(email_rx_str, G_REGEX_OPTIMIZE); !res) {
g_error("BUG: error in regex: %s", res.error().what()); /* terminates process */
} else
email_rx = res.value();
}
return email_rx.matches(email);
}
std::string std::string
Mu::to_string(const Mu::Contacts& contacts) Mu::to_string(const Mu::Contacts& contacts)
{ {
@ -94,6 +124,7 @@ test_ctor_foo()
assert_equal(c.email, "foo@example.com"); assert_equal(c.email, "foo@example.com");
assert_equal(c.name, "Foo Bar"); assert_equal(c.name, "Foo Bar");
g_assert_true(c.has_valid_email());
g_assert_true(*c.field_id() == Field::Id::Bcc); g_assert_true(*c.field_id() == Field::Id::Bcc);
g_assert_cmpuint(c.message_date,==,1645214647); g_assert_cmpuint(c.message_date,==,1645214647);
@ -115,6 +146,7 @@ test_ctor_blinky()
assert_equal(c.email, "bar@example.com"); assert_equal(c.email, "bar@example.com");
assert_equal(c.name, "Blinky"); assert_equal(c.name, "Blinky");
g_assert_true(c.has_valid_email());
g_assert_true(c.personal); g_assert_true(c.personal);
g_assert_cmpuint(c.frequency,==,13); g_assert_cmpuint(c.frequency,==,13);
g_assert_cmpuint(c.tstamp,==,12345); g_assert_cmpuint(c.tstamp,==,12345);
@ -138,6 +170,7 @@ test_ctor_cleanup()
assert_equal(c.email, "bar@example.com"); assert_equal(c.email, "bar@example.com");
assert_equal(c.name, "Bli ky"); assert_equal(c.name, "Bli ky");
g_assert_true(c.personal); g_assert_true(c.personal);
g_assert_true(c.has_valid_email());
g_assert_cmpuint(c.frequency,==,13); g_assert_cmpuint(c.frequency,==,13);
g_assert_cmpuint(c.tstamp,==,12345); g_assert_cmpuint(c.tstamp,==,12345);
g_assert_cmpuint(c.message_date,==,1645215014); g_assert_cmpuint(c.message_date,==,1645215014);
@ -159,6 +192,7 @@ test_encode()
assert_equal(c.email, "cassius@example.com"); assert_equal(c.email, "cassius@example.com");
assert_equal(c.name, "Ali, Muhammad \"The Greatest\""); assert_equal(c.name, "Ali, Muhammad \"The Greatest\"");
g_assert_true(c.has_valid_email());
g_assert_false(c.personal); g_assert_false(c.personal);
g_assert_cmpuint(c.frequency,==,333); g_assert_cmpuint(c.frequency,==,333);
g_assert_cmpuint(c.tstamp,==,768); g_assert_cmpuint(c.tstamp,==,768);
@ -177,6 +211,7 @@ test_sender()
assert_equal(c.email, "aa@example.com"); assert_equal(c.email, "aa@example.com");
assert_equal(c.name, "Anders Ångström"); assert_equal(c.name, "Anders Ångström");
g_assert_true(c.has_valid_email());
g_assert_false(c.personal); g_assert_false(c.personal);
g_assert_cmpuint(c.frequency,==,1); g_assert_cmpuint(c.frequency,==,1);
g_assert_cmpuint(c.message_date,==,54321); g_assert_cmpuint(c.message_date,==,54321);
@ -191,6 +226,14 @@ test_misc()
g_assert_false(!!contact_type_from_field_id(Field::Id::Subject)); g_assert_false(!!contact_type_from_field_id(Field::Id::Subject));
} }
static void
test_broken_email()
{
Contact c{"a***@@booa@example..com", "abcdef",
Contact::Type::Sender, 54321};
g_assert_false(c.has_valid_email());
}
int int
main(int argc, char* argv[]) main(int argc, char* argv[])
@ -205,6 +248,7 @@ main(int argc, char* argv[])
g_test_add_func("/message/contact/sender", test_sender); g_test_add_func("/message/contact/sender", test_sender);
g_test_add_func("/message/contact/misc", test_misc); g_test_add_func("/message/contact/misc", test_misc);
g_test_add_func("/message/contact/broken-email", test_broken_email);
return g_test_run(); return g_test_run();
} }

View File

@ -96,6 +96,15 @@ struct Contact {
std::string display_name(bool quote_if_needed=false) const; std::string display_name(bool quote_if_needed=false) const;
/**
* Does the contact contain a valid email address as per
* https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address
* ?
*
* @return true or false
*/
bool has_valid_email() const;
/** /**
* Operator==; based on the hash values (ie. lowercase e-mail address) * Operator==; based on the hash values (ie. lowercase e-mail address)
* *
@ -151,13 +160,13 @@ struct Contact {
* data members * data members
*/ */
std::string email; /**< Email address for this contact.Not empty */ std::string email; /**< Email address for this contact.Not empty */
std::string name; /**< Name for this contact; can be empty. */ std::string name; /**< Name for this contact; can be empty. */
Type type; /**< Type of contact */ Type type; /**< Type of contact */
int64_t message_date; /**< date of the contact's message */ int64_t message_date; /**< Date of the contact's message */
bool personal; /**< A personal message? */ bool personal; /**< A personal message? */
size_t frequency; /**< Frequency of this contact */ size_t frequency; /**< Frequency of this contact */
int64_t tstamp; /**< Timestamp for this contact (internal use) */ int64_t tstamp; /**< Timestamp for this contact (internal use) */
private: private:
void cleanup_name() { // replace control characters by spaces. void cleanup_name() { // replace control characters by spaces.

View File

@ -128,7 +128,7 @@ ContactsCache::Private::deserialize(const std::string& serialized) const
g_warning("error: '%s'", line.c_str()); g_warning("error: '%s'", line.c_str());
continue; continue;
} }
Contact ci(parts[1], // email Contact ci(parts[1], // email
std::move(parts[2]), // name std::move(parts[2]), // name
(time_t)g_ascii_strtoll(parts[4].c_str(), NULL, 10), // message_date (time_t)g_ascii_strtoll(parts[4].c_str(), NULL, 10), // message_date
parts[3][0] == '1' ? true : false, // personal parts[3][0] == '1' ? true : false, // personal
@ -152,6 +152,9 @@ ContactsCache::serialize() const
std::lock_guard<std::mutex> l_{priv_->mtx_}; std::lock_guard<std::mutex> l_{priv_->mtx_};
std::string s; std::string s;
// XXX: 'display_name' is cached but unused; remove it, the next time we
// update the database schema.
for (auto& item : priv_->contacts_) { for (auto& item : priv_->contacts_) {
const auto& ci{item.second}; const auto& ci{item.second};
s += Mu::format("%s%s" s += Mu::format("%s%s"
@ -184,10 +187,16 @@ ContactsCache::dirty() const
return priv_->dirty_; return priv_->dirty_;
} }
//const Contact
void void
ContactsCache::add(Contact&& contact) ContactsCache::add(Contact&& contact)
{ {
/* we do _not_ cache invalid email addresses, so we won't offer them in completions etc. It
* should be _rare_, but we've seen cases ( broken local messages) */
if (!contact.has_valid_email()) {
g_warning("not caching invalid e-mail address '%s'", contact.email.c_str());
return;
}
std::lock_guard<std::mutex> l_{priv_->mtx_}; std::lock_guard<std::mutex> l_{priv_->mtx_};
++priv_->dirty_; ++priv_->dirty_;

View File

@ -53,6 +53,8 @@ public:
/** /**
* Add a contact * Add a contact
* *
* Invalid email address are not cached (but we log a warning)
*
* @param contact a Contact object * @param contact a Contact object
* *
*/ */
@ -65,6 +67,8 @@ public:
* if any of the contacts matches one of the personal addresses, * if any of the contacts matches one of the personal addresses,
* any of the senders/recipients are considered "personal" * any of the senders/recipients are considered "personal"
* *
* Invalid email address are not cached (but we log a warning)
*
* @param contacts a Contact object sequence * @param contacts a Contact object sequence
* @param is_personal receives true if any of the contacts was personal; * @param is_personal receives true if any of the contacts was personal;
* false otherwise * false otherwise
@ -75,7 +79,6 @@ public:
add(std::move(contacts), _ignore); add(std::move(contacts), _ignore);
} }
/** /**
* Clear all contacts * Clear all contacts
* *