message/contact: ensure valid email address in cache

Filter out the (rare but existent) invalid email addresses from the
cache; use the new method Contact::has_valid_email for that.
This commit is contained in:
Dirk-Jan C. Binnema
2022-12-29 19:03:21 +02:00
parent 5187d9eaa5
commit 2229e2e77e
4 changed files with 75 additions and 10 deletions

View File

@ -20,6 +20,7 @@
#include "mu-contact.hh"
#include "mu-message.hh"
#include "utils/mu-utils.hh"
#include "utils/mu-regex.hh"
#include "mu-mime-object.hh"
#include <gmime/gmime.h>
@ -49,6 +50,35 @@ Contact::display_name(bool quote) const
return address_rfc2047(*this);
}
static Regex email_rx;
bool
Contact::has_valid_email() const
{
/* regexp as per:
* https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address
*
* "This requirement is a willful violation of RFC 5322, which defines a
* syntax for email addresses that is simultaneously too strict (before
* the "@" character), too vague (after the "@" character), and too lax
* (allowing comments, whitespace characters, and quoted strings in
* manners unfamiliar to most users) to be of practical use here."
*/
constexpr auto email_rx_str =
R"(^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$)";
/* avoid std::regex here, since speed matters. PCRE is faster */
if (!email_rx) {
if (auto&& res = Regex::make(email_rx_str, G_REGEX_OPTIMIZE); !res) {
g_error("BUG: error in regex: %s", res.error().what()); /* terminates process */
} else
email_rx = res.value();
}
return email_rx.matches(email);
}
std::string
Mu::to_string(const Mu::Contacts& contacts)
{
@ -94,6 +124,7 @@ test_ctor_foo()
assert_equal(c.email, "foo@example.com");
assert_equal(c.name, "Foo Bar");
g_assert_true(c.has_valid_email());
g_assert_true(*c.field_id() == Field::Id::Bcc);
g_assert_cmpuint(c.message_date,==,1645214647);
@ -115,6 +146,7 @@ test_ctor_blinky()
assert_equal(c.email, "bar@example.com");
assert_equal(c.name, "Blinky");
g_assert_true(c.has_valid_email());
g_assert_true(c.personal);
g_assert_cmpuint(c.frequency,==,13);
g_assert_cmpuint(c.tstamp,==,12345);
@ -138,6 +170,7 @@ test_ctor_cleanup()
assert_equal(c.email, "bar@example.com");
assert_equal(c.name, "Bli ky");
g_assert_true(c.personal);
g_assert_true(c.has_valid_email());
g_assert_cmpuint(c.frequency,==,13);
g_assert_cmpuint(c.tstamp,==,12345);
g_assert_cmpuint(c.message_date,==,1645215014);
@ -159,6 +192,7 @@ test_encode()
assert_equal(c.email, "cassius@example.com");
assert_equal(c.name, "Ali, Muhammad \"The Greatest\"");
g_assert_true(c.has_valid_email());
g_assert_false(c.personal);
g_assert_cmpuint(c.frequency,==,333);
g_assert_cmpuint(c.tstamp,==,768);
@ -177,6 +211,7 @@ test_sender()
assert_equal(c.email, "aa@example.com");
assert_equal(c.name, "Anders Ångström");
g_assert_true(c.has_valid_email());
g_assert_false(c.personal);
g_assert_cmpuint(c.frequency,==,1);
g_assert_cmpuint(c.message_date,==,54321);
@ -191,6 +226,14 @@ test_misc()
g_assert_false(!!contact_type_from_field_id(Field::Id::Subject));
}
static void
test_broken_email()
{
Contact c{"a***@@booa@example..com", "abcdef",
Contact::Type::Sender, 54321};
g_assert_false(c.has_valid_email());
}
int
main(int argc, char* argv[])
@ -205,6 +248,7 @@ main(int argc, char* argv[])
g_test_add_func("/message/contact/sender", test_sender);
g_test_add_func("/message/contact/misc", test_misc);
g_test_add_func("/message/contact/broken-email", test_broken_email);
return g_test_run();
}

View File

@ -96,6 +96,15 @@ struct Contact {
std::string display_name(bool quote_if_needed=false) const;
/**
* Does the contact contain a valid email address as per
* https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address
* ?
*
* @return true or false
*/
bool has_valid_email() const;
/**
* Operator==; based on the hash values (ie. lowercase e-mail address)
*
@ -151,13 +160,13 @@ struct Contact {
* data members
*/
std::string email; /**< Email address for this contact.Not empty */
std::string name; /**< Name for this contact; can be empty. */
Type type; /**< Type of contact */
int64_t message_date; /**< date of the contact's message */
bool personal; /**< A personal message? */
size_t frequency; /**< Frequency of this contact */
int64_t tstamp; /**< Timestamp for this contact (internal use) */
std::string email; /**< Email address for this contact.Not empty */
std::string name; /**< Name for this contact; can be empty. */
Type type; /**< Type of contact */
int64_t message_date; /**< Date of the contact's message */
bool personal; /**< A personal message? */
size_t frequency; /**< Frequency of this contact */
int64_t tstamp; /**< Timestamp for this contact (internal use) */
private:
void cleanup_name() { // replace control characters by spaces.