From 2229e2e77ea087e5d66cf8fc6aaa9965303b6168 Mon Sep 17 00:00:00 2001 From: "Dirk-Jan C. Binnema" Date: Thu, 29 Dec 2022 19:03:21 +0200 Subject: [PATCH] message/contact: ensure valid email address in cache Filter out the (rare but existent) invalid email addresses from the cache; use the new method Contact::has_valid_email for that. --- lib/message/mu-contact.cc | 44 +++++++++++++++++++++++++++++++++++++++ lib/message/mu-contact.hh | 23 +++++++++++++------- lib/mu-contacts-cache.cc | 13 ++++++++++-- lib/mu-contacts-cache.hh | 5 ++++- 4 files changed, 75 insertions(+), 10 deletions(-) diff --git a/lib/message/mu-contact.cc b/lib/message/mu-contact.cc index 711f4d27..d9e03ddb 100644 --- a/lib/message/mu-contact.cc +++ b/lib/message/mu-contact.cc @@ -20,6 +20,7 @@ #include "mu-contact.hh" #include "mu-message.hh" #include "utils/mu-utils.hh" +#include "utils/mu-regex.hh" #include "mu-mime-object.hh" #include @@ -49,6 +50,35 @@ Contact::display_name(bool quote) const return address_rfc2047(*this); } + +static Regex email_rx; + +bool +Contact::has_valid_email() const +{ + /* regexp as per: + * https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address + * + * "This requirement is a willful violation of RFC 5322, which defines a + * syntax for email addresses that is simultaneously too strict (before + * the "@" character), too vague (after the "@" character), and too lax + * (allowing comments, whitespace characters, and quoted strings in + * manners unfamiliar to most users) to be of practical use here." + */ + constexpr auto email_rx_str = + R"(^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$)"; + /* avoid std::regex here, since speed matters. PCRE is faster */ + if (!email_rx) { + if (auto&& res = Regex::make(email_rx_str, G_REGEX_OPTIMIZE); !res) { + g_error("BUG: error in regex: %s", res.error().what()); /* terminates process */ + } else + email_rx = res.value(); + } + + return email_rx.matches(email); +} + + std::string Mu::to_string(const Mu::Contacts& contacts) { @@ -94,6 +124,7 @@ test_ctor_foo() assert_equal(c.email, "foo@example.com"); assert_equal(c.name, "Foo Bar"); + g_assert_true(c.has_valid_email()); g_assert_true(*c.field_id() == Field::Id::Bcc); g_assert_cmpuint(c.message_date,==,1645214647); @@ -115,6 +146,7 @@ test_ctor_blinky() assert_equal(c.email, "bar@example.com"); assert_equal(c.name, "Blinky"); + g_assert_true(c.has_valid_email()); g_assert_true(c.personal); g_assert_cmpuint(c.frequency,==,13); g_assert_cmpuint(c.tstamp,==,12345); @@ -138,6 +170,7 @@ test_ctor_cleanup() assert_equal(c.email, "bar@example.com"); assert_equal(c.name, "Bli ky"); g_assert_true(c.personal); + g_assert_true(c.has_valid_email()); g_assert_cmpuint(c.frequency,==,13); g_assert_cmpuint(c.tstamp,==,12345); g_assert_cmpuint(c.message_date,==,1645215014); @@ -159,6 +192,7 @@ test_encode() assert_equal(c.email, "cassius@example.com"); assert_equal(c.name, "Ali, Muhammad \"The Greatest\""); + g_assert_true(c.has_valid_email()); g_assert_false(c.personal); g_assert_cmpuint(c.frequency,==,333); g_assert_cmpuint(c.tstamp,==,768); @@ -177,6 +211,7 @@ test_sender() assert_equal(c.email, "aa@example.com"); assert_equal(c.name, "Anders Ångström"); + g_assert_true(c.has_valid_email()); g_assert_false(c.personal); g_assert_cmpuint(c.frequency,==,1); g_assert_cmpuint(c.message_date,==,54321); @@ -191,6 +226,14 @@ test_misc() g_assert_false(!!contact_type_from_field_id(Field::Id::Subject)); } +static void +test_broken_email() +{ + Contact c{"a***@@booa@example..com", "abcdef", + Contact::Type::Sender, 54321}; + + g_assert_false(c.has_valid_email()); +} int main(int argc, char* argv[]) @@ -205,6 +248,7 @@ main(int argc, char* argv[]) g_test_add_func("/message/contact/sender", test_sender); g_test_add_func("/message/contact/misc", test_misc); + g_test_add_func("/message/contact/broken-email", test_broken_email); return g_test_run(); } diff --git a/lib/message/mu-contact.hh b/lib/message/mu-contact.hh index 06d9d16b..91d1555b 100644 --- a/lib/message/mu-contact.hh +++ b/lib/message/mu-contact.hh @@ -96,6 +96,15 @@ struct Contact { std::string display_name(bool quote_if_needed=false) const; + /** + * Does the contact contain a valid email address as per + * https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address + * ? + * + * @return true or false + */ + bool has_valid_email() const; + /** * Operator==; based on the hash values (ie. lowercase e-mail address) * @@ -151,13 +160,13 @@ struct Contact { * data members */ - std::string email; /**< Email address for this contact.Not empty */ - std::string name; /**< Name for this contact; can be empty. */ - Type type; /**< Type of contact */ - int64_t message_date; /**< date of the contact's message */ - bool personal; /**< A personal message? */ - size_t frequency; /**< Frequency of this contact */ - int64_t tstamp; /**< Timestamp for this contact (internal use) */ + std::string email; /**< Email address for this contact.Not empty */ + std::string name; /**< Name for this contact; can be empty. */ + Type type; /**< Type of contact */ + int64_t message_date; /**< Date of the contact's message */ + bool personal; /**< A personal message? */ + size_t frequency; /**< Frequency of this contact */ + int64_t tstamp; /**< Timestamp for this contact (internal use) */ private: void cleanup_name() { // replace control characters by spaces. diff --git a/lib/mu-contacts-cache.cc b/lib/mu-contacts-cache.cc index cef0291b..c84a72ca 100644 --- a/lib/mu-contacts-cache.cc +++ b/lib/mu-contacts-cache.cc @@ -128,7 +128,7 @@ ContactsCache::Private::deserialize(const std::string& serialized) const g_warning("error: '%s'", line.c_str()); continue; } - Contact ci(parts[1], // email + Contact ci(parts[1], // email std::move(parts[2]), // name (time_t)g_ascii_strtoll(parts[4].c_str(), NULL, 10), // message_date parts[3][0] == '1' ? true : false, // personal @@ -152,6 +152,9 @@ ContactsCache::serialize() const std::lock_guard l_{priv_->mtx_}; std::string s; + // XXX: 'display_name' is cached but unused; remove it, the next time we + // update the database schema. + for (auto& item : priv_->contacts_) { const auto& ci{item.second}; s += Mu::format("%s%s" @@ -184,10 +187,16 @@ ContactsCache::dirty() const return priv_->dirty_; } -//const Contact void ContactsCache::add(Contact&& contact) { + /* we do _not_ cache invalid email addresses, so we won't offer them in completions etc. It + * should be _rare_, but we've seen cases ( broken local messages) */ + if (!contact.has_valid_email()) { + g_warning("not caching invalid e-mail address '%s'", contact.email.c_str()); + return; + } + std::lock_guard l_{priv_->mtx_}; ++priv_->dirty_; diff --git a/lib/mu-contacts-cache.hh b/lib/mu-contacts-cache.hh index 7c871d75..dc54d4bd 100644 --- a/lib/mu-contacts-cache.hh +++ b/lib/mu-contacts-cache.hh @@ -53,6 +53,8 @@ public: /** * Add a contact * + * Invalid email address are not cached (but we log a warning) + * * @param contact a Contact object * */ @@ -65,6 +67,8 @@ public: * if any of the contacts matches one of the personal addresses, * any of the senders/recipients are considered "personal" * + * Invalid email address are not cached (but we log a warning) + * * @param contacts a Contact object sequence * @param is_personal receives true if any of the contacts was personal; * false otherwise @@ -75,7 +79,6 @@ public: add(std::move(contacts), _ignore); } - /** * Clear all contacts *