From d13feb2d992d52e36eb3e767619945ac02163326 Mon Sep 17 00:00:00 2001 From: "Dirk-Jan C. Binnema" Date: Mon, 1 Apr 2024 20:51:40 +0300 Subject: [PATCH] mu-contact: move email validation to contacts cache So we can be sure the regexp is initialized. This _may_ help for https://bugzilla.opensuse.org/show_bug.cgi?id=1221861 though it is very hard to tell! --- lib/message/mu-contact.cc | 46 +------------------------------- lib/mu-contacts-cache.cc | 56 ++++++++++++++++++++++++++++++++------- lib/mu-contacts-cache.hh | 10 ++++++- mu/mu-cmd-cfind.cc | 2 +- 4 files changed, 58 insertions(+), 56 deletions(-) diff --git a/lib/message/mu-contact.cc b/lib/message/mu-contact.cc index 4ce9a5c9..c6439b06 100644 --- a/lib/message/mu-contact.cc +++ b/lib/message/mu-contact.cc @@ -1,5 +1,5 @@ /* -** Copyright (C) 2022-2023 Dirk-Jan C. Binnema +** Copyright (C) 2022-2024 Dirk-Jan C. Binnema ** ** This program is free software; you can redistribute it and/or modify it ** under the terms of the GNU General Public License as published by the @@ -20,7 +20,6 @@ #include "mu-contact.hh" #include "mu-message.hh" #include "utils/mu-utils.hh" -#include "utils/mu-regex.hh" #include "mu-mime-object.hh" #include @@ -47,34 +46,6 @@ Contact::display_name() const return Mu::quote(name) + " <" + email + '>'; } -static Regex email_rx; - -bool -Contact::has_valid_email() const -{ - /* regexp as per: - * https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address - * - * "This requirement is a willful violation of RFC 5322, which defines a - * syntax for email addresses that is simultaneously too strict (before - * the "@" character), too vague (after the "@" character), and too lax - * (allowing comments, whitespace characters, and quoted strings in - * manners unfamiliar to most users) to be of practical use here." - */ - constexpr auto email_rx_str = - R"(^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$)"; - /* avoid std::regex here, since speed matters. PCRE is faster */ - if (!email_rx) { - if (auto&& res = Regex::make(email_rx_str, G_REGEX_OPTIMIZE); !res) { - g_error("BUG: error in regex: %s", res.error().what()); /* terminates process */ - } else - email_rx = res.value(); - } - - return email_rx.matches(email); -} - - std::string Mu::to_string(const Mu::Contacts& contacts) { @@ -120,7 +91,6 @@ test_ctor_foo() assert_equal(c.email, "foo@example.com"); assert_equal(c.name, "Foo Bar"); - g_assert_true(c.has_valid_email()); g_assert_true(*c.field_id() == Field::Id::Bcc); g_assert_cmpuint(c.message_date,==,1645214647); @@ -142,7 +112,6 @@ test_ctor_blinky() assert_equal(c.email, "bar@example.com"); assert_equal(c.name, "Blinky"); - g_assert_true(c.has_valid_email()); g_assert_true(c.personal); g_assert_cmpuint(c.frequency,==,13); g_assert_cmpuint(c.tstamp,==,12345); @@ -166,7 +135,6 @@ test_ctor_cleanup() assert_equal(c.email, "bar@example.com"); assert_equal(c.name, "Bli ky"); g_assert_true(c.personal); - g_assert_true(c.has_valid_email()); g_assert_cmpuint(c.frequency,==,13); g_assert_cmpuint(c.tstamp,==,12345); g_assert_cmpuint(c.message_date,==,1645215014); @@ -188,7 +156,6 @@ test_encode() assert_equal(c.email, "cassius@example.com"); assert_equal(c.name, "Ali, Muhammad \"The Greatest\""); - g_assert_true(c.has_valid_email()); g_assert_false(c.personal); g_assert_cmpuint(c.frequency,==,333); g_assert_cmpuint(c.tstamp,==,768); @@ -207,7 +174,6 @@ test_sender() assert_equal(c.email, "aa@example.com"); assert_equal(c.name, "Anders Ångström"); - g_assert_true(c.has_valid_email()); g_assert_false(c.personal); g_assert_cmpuint(c.frequency,==,1); g_assert_cmpuint(c.message_date,==,54321); @@ -222,15 +188,6 @@ test_misc() g_assert_false(!!contact_type_from_field_id(Field::Id::Subject)); } -static void -test_broken_email() -{ - Contact c{"a***@@booa@example..com", "abcdef", - Contact::Type::Sender, 54321}; - - g_assert_false(c.has_valid_email()); -} - int main(int argc, char* argv[]) { @@ -244,7 +201,6 @@ main(int argc, char* argv[]) g_test_add_func("/message/contact/sender", test_sender); g_test_add_func("/message/contact/misc", test_misc); - g_test_add_func("/message/contact/broken-email", test_broken_email); return g_test_run(); } diff --git a/lib/mu-contacts-cache.cc b/lib/mu-contacts-cache.cc index 4e60ce66..b9b9b50f 100644 --- a/lib/mu-contacts-cache.cc +++ b/lib/mu-contacts-cache.cc @@ -1,5 +1,5 @@ /* -** Copyright (C) 2019-2022 Dirk-Jan C. Binnema +** Copyright (C) 2019-2024 Dirk-Jan C. Binnema ** ** This program is free software; you can redistribute it and/or modify it ** under the terms of the GNU General Public License as published by the @@ -53,13 +53,19 @@ struct ContactsCache::Private { personal_rx_{make_rx_matchers()}, ignored_plain_{make_matchers()}, ignored_rx_{make_rx_matchers()}, - dirty_{0} {} + dirty_{0}, + email_rx_{unwrap(Regex::make(email_rx_str, G_REGEX_OPTIMIZE))} + {} ~Private() { serialize(); } ContactUMap deserialize(const std::string&) const; void serialize() const; + bool is_valid_email(const std::string& email) const { + return email_rx_.matches(email); + } + Config& config_db_; ContactUMap contacts_; mutable std::mutex mtx_; @@ -70,7 +76,8 @@ struct ContactsCache::Private { const StringVec ignored_plain_; const std::vector ignored_rx_; - mutable size_t dirty_; + mutable size_t dirty_; + Regex email_rx_; private: static bool is_rx(const std::string& p) { @@ -100,6 +107,19 @@ private: } return rxvec; } + + /* regexp as per: + * https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address + * + * "This requirement is a willful violation of RFC 5322, which defines a + * syntax for email addresses that is simultaneously too strict (before + * the "@" character), too vague (after the "@" character), and too lax + * (allowing comments, whitespace characters, and quoted strings in + * manners unfamiliar to most users) to be of practical use here." + */ + static constexpr auto email_rx_str = + R"(^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$)"; + }; ContactUMap @@ -176,7 +196,7 @@ ContactsCache::add(Contact&& contact) { /* we do _not_ cache invalid email addresses, so we won't offer them in completions etc. It * should be _rare_, but we've seen cases ( broken local messages) */ - if (!contact.has_valid_email()) { + if (!is_valid(contact.email)) { mu_warning("not caching invalid e-mail address '{}'", contact.email); return; } @@ -346,6 +366,13 @@ ContactsCache::is_ignored(const std::string& addr) const return address_matches(addr, priv_->ignored_plain_, priv_->ignored_rx_); } +bool +ContactsCache::is_valid(const std::string& addr) const +{ + return priv_->is_valid_email(addr); +} + + #ifdef BUILD_TESTS /* * Tests. @@ -554,17 +581,28 @@ test_mu_contacts_cache_sort() } } +static void +test_mu_contacts_valid_address() +{ + MemDb xdb{}; + Config cdb{xdb}; + ContactsCache ccache(cdb); + + g_assert_true(ccache.is_valid("a@example.com")); + g_assert_false(ccache.is_valid("a***@@booa@example..com")); +} int main(int argc, char* argv[]) { mu_test_init(&argc, &argv); - g_test_add_func("/lib/contacts-cache/base", test_mu_contacts_cache_base); - g_test_add_func("/lib/contacts-cache/personal", test_mu_contacts_cache_personal); - g_test_add_func("/lib/contacts-cache/ignored", test_mu_contacts_cache_ignored); - g_test_add_func("/lib/contacts-cache/for-each", test_mu_contacts_cache_foreach); - g_test_add_func("/lib/contacts-cache/sort", test_mu_contacts_cache_sort); + g_test_add_func("/contacts-cache/base", test_mu_contacts_cache_base); + g_test_add_func("/contacts-cache/personal", test_mu_contacts_cache_personal); + g_test_add_func("/contacts-cache/ignored", test_mu_contacts_cache_ignored); + g_test_add_func("/contacts-cache/for-each", test_mu_contacts_cache_foreach); + g_test_add_func("/contacts-cache/sort", test_mu_contacts_cache_sort); + g_test_add_func("/contacts-cache/valid-address", test_mu_contacts_valid_address); return g_test_run(); } diff --git a/lib/mu-contacts-cache.hh b/lib/mu-contacts-cache.hh index 090195b5..d31c9dc3 100644 --- a/lib/mu-contacts-cache.hh +++ b/lib/mu-contacts-cache.hh @@ -116,7 +116,6 @@ public: */ bool is_personal(const std::string& addr) const; - /** * Does this look like an email-address that should be ignored? * @@ -126,6 +125,15 @@ public: */ bool is_ignored(const std::string& addr) const; + /** + * Does this look like a valid email-address? + * + * @param addr some e-mail address + * + * @return true or false + */ + bool is_valid(const std::string& addr) const; + /** * Find a contact based on the email address. This is not safe, since * the returned ptr can be invalidated at any time; only for unit-tests. diff --git a/mu/mu-cmd-cfind.cc b/mu/mu-cmd-cfind.cc index 5385c9c6..9c61595f 100644 --- a/mu/mu-cmd-cfind.cc +++ b/mu/mu-cmd-cfind.cc @@ -278,7 +278,7 @@ Mu::mu_cmd_cfind(const Mu::Store& store, const Mu::Options& opts) if (opts.cfind.maxnum && num > *opts.cfind.maxnum) return false; /* stop the loop */ - if (!contact.has_valid_email()) + if (!store.contacts_cache().is_valid(contact.email)) return true; /* next */ // filter for maxnum, personal & "after"