contacts/contacts-cache: cleanups

refactor code a bit; move sorting to Contact

remove unneeded hashing in Contact (just use the email address)
This commit is contained in:
Dirk-Jan C. Binnema
2025-05-25 11:20:52 +03:00
parent 9f08397fdd
commit 668d5ffb99
4 changed files with 105 additions and 132 deletions

View File

@ -61,16 +61,6 @@ Mu::to_string(const Mu::Contacts& contacts)
return res; return res;
} }
size_t
Mu::lowercase_hash(const std::string& s)
{
std::size_t djb = 5381; // djb hash
for (const auto c : s)
djb = ((djb << 5) + djb) +
static_cast<size_t>(g_ascii_tolower(c));
return djb;
}
#ifdef BUILD_TESTS #ifdef BUILD_TESTS
/* /*
* Tests. * Tests.

View File

@ -1,5 +1,5 @@
/* /*
** Copyright (C) 2022 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl> ** Copyright (C) 2022-2025 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
** **
** This program is free software; you can redistribute it and/or modify it ** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the ** under the terms of the GNU General Public License as published by the
@ -32,101 +32,118 @@
#include <utils/mu-option.hh> #include <utils/mu-option.hh>
#include "mu-fields.hh" #include "mu-fields.hh"
struct _InternetAddressList;
namespace Mu { namespace Mu {
/**
* Get the hash value for a lowercase value of s; useful for email-addresses
*
* @param s a string
*
* @return a hash value.
*/
size_t lowercase_hash(const std::string& s);
struct Contact { struct Contact {
enum struct Type { enum struct Type:char {
None, Sender, From, ReplyTo, To, Cc, Bcc None='\0', Sender='s', From='f',
ReplyTo='r', To='t', Cc='c', Bcc='b'
}; };
/** /**
* Construct a new Contact * Construct a new Contact
* *
* @param email_ email address * @param email email address
* @param name_ name or empty * @param name name or empty
* @param type_ contact field type * @param type contact field type
* @param message_date_ data for the message for this contact * @param message_date data for the message for this contact
*/ */
Contact(const std::string& email_, const std::string& name_ = "", Contact(const std::string& email, const std::string& name = {},
Type type_ = Type::None, ::time_t message_date_ = 0) Type type = {}, int64_t message_date ={})
: email{email_}, name{name_}, type{type_}, : email{email}, name{name}, type{type},
message_date{message_date_}, personal{}, frequency{1}, tstamp{} message_date{message_date}, personal{}, frequency{1}, tstamp{}
{ cleanup_name(); } { cleanup_name(); }
/** /**
* Construct a new Contact * Construct a new Contact
* *
* @param email_ email address * @param email email address
* @param name_ name or empty * @param name name or empty
* @param message_date_ date of message this contact originate from * @param message_date date of message this contact originate from
* @param personal_ is this a personal contact? * @param personal is this a personal contact?
* @param freq_ how often was this contact seen? * @param freq how often was this contact seen?
* @param tstamp_ timestamp for last change * @param tstamp timestamp for last change
*/ */
Contact(const std::string& email_, const std::string& name_, Contact(const std::string& email, const std::string& name,
time_t message_date_, bool personal_, size_t freq_, int64_t message_date, bool personal, size_t freq,
int64_t tstamp_) int64_t tstamp)
: email{email_}, name{name_}, type{Type::None}, : email{email}, name{name}, type{},
message_date{message_date_}, personal{personal_}, frequency{freq_}, message_date{message_date}, personal{personal}, frequency{freq},
tstamp{tstamp_} tstamp{tstamp}
{ cleanup_name(); } { cleanup_name(); }
/** /**
* Get the "display name" for this contact: * Get the "display name" for this contact:
* *
* If there's a non-empty name, it's Jane Doe <email@example.com> * If there is a non-empty name, it is of the form
* otherwise it's just the e-mail address. Names with commas are quoted * Jane Doe <email@example.com>
* Otherwise it is just the e-mail address. Names with commas are quoted
* (with the quotes escaped). * (with the quotes escaped).
* *
* @return the display name * @return the display name
*/ */
std::string display_name() const; std::string display_name() const;
/** /**
* Does the contact contain a valid email address as per * Does the contact contain a valid email address as per
* https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address * https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address
* ? * ?
*
* @return true or false * @return true or false
*/ */
bool has_valid_email() const; bool has_valid_email() const;
/** /**
* Operator==; based on the hash values (ie. lowercase e-mail address) * Operator==; based on the e-mail address only
* *
* @param rhs some other Contact * @param rhs some other Contact
* *
* @return true orf false. * @return true or false.
*/ */
bool operator== (const Contact& rhs) const noexcept { bool operator== (const Contact& rhs) const noexcept {
return hash() == rhs.hash(); return email == rhs.email;
}
/**
* Operator!=
*
* @param rhs some other Contact
*
* @return true or false.
*/
bool operator!= (const Contact& rhs) const noexcept {
return !(*this == rhs);
} }
static constexpr int64_t RecentOffset{15 * 24 * 3600};
/**< Contacts seen after now - RecentOffset seconds are considered
* "recent" */
/** /**
* Get a hash-value for this contact, which gets lazily calculated. This * operator<
* * is for use with container classes. This uses the _lowercase_ email
* address.
* *
* @return the hash * Less "relevant" contacts are smaller than more relevant.
*
* Used as a somewhat over-engineered way to sort by relevance
*
* This is currently used for the ordering in mu-cfind and
* auto-completion in mu4e, if the various completion methods don't
* override it...
*
* @param rhs some other contact
*
* @return true of false
*/ */
size_t hash() const { bool operator<(const Contact& rhs) const noexcept {
static size_t cached_hash; // non-personal is less relevant.
if (cached_hash == 0) { if (personal != rhs.personal)
cached_hash = lowercase_hash(email); return personal < rhs.personal;
} // older is less relevant for recent messages
return cached_hash; if (message_date != rhs.message_date &&
(message_date > recently() || rhs.message_date > recently()))
return message_date < rhs.message_date;
// less frequent is less relevant
if (frequency != rhs.frequency)
return frequency < rhs.frequency;
// if all else fails, alphabetically
return email < rhs.email;
} }
/** /**
@ -153,18 +170,16 @@ struct Contact {
#pragma GCC diagnostic pop #pragma GCC diagnostic pop
} }
/* /*
* data members * data members
*/ */
std::string email{}; /**< Email address for this contact.Not empty */
std::string email; /**< Email address for this contact.Not empty */ std::string name{}; /**< Name for this contact; can be empty. */
std::string name; /**< Name for this contact; can be empty. */ Type type{Type::None};/**< Type of contact */
Type type; /**< Type of contact */ int64_t message_date{}; /**< Date of the contact's message */
int64_t message_date; /**< Date of the contact's message */ bool personal{}; /**< A personal message? */
bool personal; /**< A personal message? */ size_t frequency{}; /**< Frequency of this contact */
size_t frequency; /**< Frequency of this contact */ int64_t tstamp{}; /**< Timestamp for this contact (internal use) */
int64_t tstamp; /**< Timestamp for this contact (internal use) */
private: private:
void cleanup_name() { // replace control characters by spaces. void cleanup_name() { // replace control characters by spaces.
@ -172,6 +187,18 @@ private:
if (iscntrl(c)) if (iscntrl(c))
c = ' '; c = ' ';
} }
/**
* Oldest timestamp considered "recent"
*
* This is arbitrary of course
*
* @return timestamp
*/
static int64_t recently() {
static const auto recent{::time({}) - RecentOffset};
return recent;
}
}; };
constexpr Option<Contact::Type> constexpr Option<Contact::Type>
@ -212,7 +239,7 @@ std::string to_string(const Contacts& contacts);
*/ */
template<> struct std::hash<Mu::Contact> { template<> struct std::hash<Mu::Contact> {
std::size_t operator()(const Mu::Contact& c) const noexcept { std::size_t operator()(const Mu::Contact& c) const noexcept {
return c.hash(); return std::hash<std::string>{}(c.email);
} }
}; };

View File

@ -323,8 +323,7 @@ MimeMessage::make_from_text(const std::string& text)
Option<int64_t> Option<int64_t>
MimeMessage::date() const noexcept MimeMessage::date() const noexcept
{ {
GDateTime *dt{g_mime_message_get_date(self())}; if (/*const*/GDateTime *dt{g_mime_message_get_date(self())}; !dt)
if (!dt)
return Nothing; return Nothing;
else else
return g_date_time_to_unix(dt); return g_date_time_to_unix(dt);
@ -395,12 +394,10 @@ MimeMessage::contacts(Contact::Type ctype) const noexcept
contacts.reserve(lst_len); contacts.reserve(lst_len);
for (auto i = 0; i != lst_len; ++i) { for (auto i = 0; i != lst_len; ++i) {
auto&& addr{internet_address_list_get_address(addrs, i)}; const auto addr{internet_address_list_get_address(addrs, i)};
const auto name{internet_address_get_name(addr)}; const auto name{internet_address_get_name(addr)};
if (G_UNLIKELY(!INTERNET_ADDRESS_IS_MAILBOX(addr))) if (G_UNLIKELY(!INTERNET_ADDRESS_IS_MAILBOX(addr)))
continue; continue;
const auto email{internet_address_mailbox_get_addr ( const auto email{internet_address_mailbox_get_addr (
INTERNET_ADDRESS_MAILBOX(addr))}; INTERNET_ADDRESS_MAILBOX(addr))};
if (G_UNLIKELY(!email)) if (G_UNLIKELY(!email))

View File

@ -1,5 +1,5 @@
/* /*
** Copyright (C) 2019-2024 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl> ** Copyright (C) 2019-2025 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
** **
** This program is free software; you can redistribute it and/or modify it ** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the ** under the terms of the GNU General Public License as published by the
@ -33,18 +33,7 @@
using namespace Mu; using namespace Mu;
struct EmailHash { using ContactUMap = std::unordered_map<std::string, Contact>;
std::size_t operator()(const std::string& email) const {
return lowercase_hash(email);
}
};
struct EmailEqual {
bool operator()(const std::string& email1, const std::string& email2) const {
return lowercase_hash(email1) == lowercase_hash(email2);
}
};
using ContactUMap = std::unordered_map<const std::string, Contact, EmailHash, EmailEqual>;
struct ContactsCache::Private { struct ContactsCache::Private {
Private(Config& config_db) Private(Config& config_db)
:config_db_{config_db}, :config_db_{config_db},
@ -137,7 +126,7 @@ ContactsCache::Private::deserialize(const std::string& serialized) const
} }
Contact ci(parts[0], // email Contact ci(parts[0], // email
std::move(parts[1]), // name std::move(parts[1]), // name
(time_t)g_ascii_strtoll(parts[3].c_str(), NULL, 10), // message_date static_cast<int64_t>(g_ascii_strtoll(parts[3].c_str(), NULL, 10)), // message_date
parts[2][0] == '1' ? true : false, // personal parts[2][0] == '1' ? true : false, // personal
(std::size_t)g_ascii_strtoll(parts[4].c_str(), NULL, 10), // frequency (std::size_t)g_ascii_strtoll(parts[4].c_str(), NULL, 10), // frequency
g_get_monotonic_time()); // tstamp g_get_monotonic_time()); // tstamp
@ -147,7 +136,6 @@ ContactsCache::Private::deserialize(const std::string& serialized) const
return contacts; return contacts;
} }
void void
ContactsCache::Private::serialize() const ContactsCache::Private::serialize() const
{ {
@ -287,41 +275,15 @@ ContactsCache::size() const
/** /**
* This is used for sorting the Contacts in order of relevance. A highly * Wrapper to allow for Contact sorting
* specific algorithm, but the details don't matter _too_ much. * (needed due to reference_wrapper)
*
* This is currently used for the ordering in mu-cfind and auto-completion in
* mu4e, if the various completion methods don't override it...
*/ */
constexpr auto RecentOffset{15 * 24 * 3600};
struct ContactLessThan { struct ContactLessThan {
ContactLessThan() bool operator()(const Mu::Contact& c1, const Mu::Contact& c2) const {
: recently_{::time({}) - RecentOffset} {} return c1 < c2;
bool operator()(const Mu::Contact& ci1, const Mu::Contact& ci2) const {
// non-personal is less relevant.
if (ci1.personal != ci2.personal)
return ci1.personal < ci2.personal;
// older is less relevant for recent messages
if (std::max(ci1.message_date, ci2.message_date) > recently_ &&
ci1.message_date != ci2.message_date)
return ci1.message_date < ci2.message_date;
// less frequent is less relevant
if (ci1.frequency != ci2.frequency)
return ci1.frequency < ci2.frequency;
// if all else fails, alphabetically
return ci1.email < ci2.email;
} }
// only sort recently seen contacts by recency; approx 15 days.
// this changes during the lifetime, but that's all fine.
const time_t recently_;
}; };
using ContactSet = std::set<std::reference_wrapper<const Contact>, ContactLessThan>;
using ContactSet = std::set<std::reference_wrapper<const Contact>,
ContactLessThan>;
void void
ContactsCache::for_each(const EachContactFunc& each_contact) const ContactsCache::for_each(const EachContactFunc& each_contact) const
@ -382,6 +344,8 @@ ContactsCache::is_valid(const std::string& addr) const
#include "utils/mu-test-utils.hh" #include "utils/mu-test-utils.hh"
constexpr auto RecentOffset = Contact::RecentOffset;
static void static void
test_mu_contacts_cache_base() test_mu_contacts_cache_base()
{ {
@ -406,10 +370,8 @@ test_mu_contacts_cache_base()
Mu::Contact("foo.bar@example.com", "Foo", {}, 77777)); Mu::Contact("foo.bar@example.com", "Foo", {}, 77777));
g_assert_cmpuint(contacts.size(), ==, 2); g_assert_cmpuint(contacts.size(), ==, 2);
contacts.add( contacts.add(Mu::Contact("Foo.Bar@Example.Com", "Foo", {}, 88888));
Mu::Contact("Foo.Bar@Example.Com", "Foo", {}, 88888)); g_assert_cmpuint(contacts.size(), ==, 3);
g_assert_cmpuint(contacts.size(), ==, 2);
// note: replaces first.
{ {
const auto info = contacts._find("bla@example.com"); const auto info = contacts._find("bla@example.com");
@ -417,9 +379,8 @@ test_mu_contacts_cache_base()
} }
{ {
const auto info = contacts._find("foo.BAR@example.com"); const auto info = contacts._find("Foo.Bar@Example.Com");
g_assert_true(info); g_assert_true(info);
g_assert_cmpstr(info->email.c_str(), ==, "Foo.Bar@Example.Com"); g_assert_cmpstr(info->email.c_str(), ==, "Foo.Bar@Example.Com");
} }
@ -515,8 +476,6 @@ test_mu_contacts_cache_foreach()
} }
} }
static void static void
test_mu_contacts_cache_sort() test_mu_contacts_cache_sort()
{ {