contacts/contacts-cache: cleanups

refactor code a bit; move sorting to Contact

remove unneeded hashing in Contact (just use the email address)
This commit is contained in:
Dirk-Jan C. Binnema
2025-05-25 11:20:52 +03:00
parent 9f08397fdd
commit 668d5ffb99
4 changed files with 105 additions and 132 deletions

View File

@ -61,16 +61,6 @@ Mu::to_string(const Mu::Contacts& contacts)
return res;
}
size_t
Mu::lowercase_hash(const std::string& s)
{
std::size_t djb = 5381; // djb hash
for (const auto c : s)
djb = ((djb << 5) + djb) +
static_cast<size_t>(g_ascii_tolower(c));
return djb;
}
#ifdef BUILD_TESTS
/*
* Tests.

View File

@ -1,5 +1,5 @@
/*
** Copyright (C) 2022 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
** Copyright (C) 2022-2025 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the
@ -32,101 +32,118 @@
#include <utils/mu-option.hh>
#include "mu-fields.hh"
struct _InternetAddressList;
namespace Mu {
/**
* Get the hash value for a lowercase value of s; useful for email-addresses
*
* @param s a string
*
* @return a hash value.
*/
size_t lowercase_hash(const std::string& s);
struct Contact {
enum struct Type {
None, Sender, From, ReplyTo, To, Cc, Bcc
enum struct Type:char {
None='\0', Sender='s', From='f',
ReplyTo='r', To='t', Cc='c', Bcc='b'
};
/**
* Construct a new Contact
*
* @param email_ email address
* @param name_ name or empty
* @param type_ contact field type
* @param message_date_ data for the message for this contact
* @param email email address
* @param name name or empty
* @param type contact field type
* @param message_date data for the message for this contact
*/
Contact(const std::string& email_, const std::string& name_ = "",
Type type_ = Type::None, ::time_t message_date_ = 0)
: email{email_}, name{name_}, type{type_},
message_date{message_date_}, personal{}, frequency{1}, tstamp{}
Contact(const std::string& email, const std::string& name = {},
Type type = {}, int64_t message_date ={})
: email{email}, name{name}, type{type},
message_date{message_date}, personal{}, frequency{1}, tstamp{}
{ cleanup_name(); }
/**
* Construct a new Contact
*
* @param email_ email address
* @param name_ name or empty
* @param message_date_ date of message this contact originate from
* @param personal_ is this a personal contact?
* @param freq_ how often was this contact seen?
* @param tstamp_ timestamp for last change
* @param email email address
* @param name name or empty
* @param message_date date of message this contact originate from
* @param personal is this a personal contact?
* @param freq how often was this contact seen?
* @param tstamp timestamp for last change
*/
Contact(const std::string& email_, const std::string& name_,
time_t message_date_, bool personal_, size_t freq_,
int64_t tstamp_)
: email{email_}, name{name_}, type{Type::None},
message_date{message_date_}, personal{personal_}, frequency{freq_},
tstamp{tstamp_}
Contact(const std::string& email, const std::string& name,
int64_t message_date, bool personal, size_t freq,
int64_t tstamp)
: email{email}, name{name}, type{},
message_date{message_date}, personal{personal}, frequency{freq},
tstamp{tstamp}
{ cleanup_name(); }
/**
* Get the "display name" for this contact:
*
* If there's a non-empty name, it's Jane Doe <email@example.com>
* otherwise it's just the e-mail address. Names with commas are quoted
* If there is a non-empty name, it is of the form
* Jane Doe <email@example.com>
* Otherwise it is just the e-mail address. Names with commas are quoted
* (with the quotes escaped).
*
* @return the display name
*/
std::string display_name() const;
/**
* Does the contact contain a valid email address as per
* https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address
* ?
*
* @return true or false
*/
bool has_valid_email() const;
/**
* Operator==; based on the hash values (ie. lowercase e-mail address)
* Operator==; based on the e-mail address only
*
* @param rhs some other Contact
*
* @return true orf false.
* @return true or false.
*/
bool operator== (const Contact& rhs) const noexcept {
return hash() == rhs.hash();
return email == rhs.email;
}
/**
* Operator!=
*
* @param rhs some other Contact
*
* @return true or false.
*/
bool operator!= (const Contact& rhs) const noexcept {
return !(*this == rhs);
}
static constexpr int64_t RecentOffset{15 * 24 * 3600};
/**< Contacts seen after now - RecentOffset seconds are considered
* "recent" */
/**
* Get a hash-value for this contact, which gets lazily calculated. This
* * is for use with container classes. This uses the _lowercase_ email
* address.
* operator<
*
* @return the hash
* Less "relevant" contacts are smaller than more relevant.
*
* Used as a somewhat over-engineered way to sort by relevance
*
* This is currently used for the ordering in mu-cfind and
* auto-completion in mu4e, if the various completion methods don't
* override it...
*
* @param rhs some other contact
*
* @return true of false
*/
size_t hash() const {
static size_t cached_hash;
if (cached_hash == 0) {
cached_hash = lowercase_hash(email);
}
return cached_hash;
bool operator<(const Contact& rhs) const noexcept {
// non-personal is less relevant.
if (personal != rhs.personal)
return personal < rhs.personal;
// older is less relevant for recent messages
if (message_date != rhs.message_date &&
(message_date > recently() || rhs.message_date > recently()))
return message_date < rhs.message_date;
// less frequent is less relevant
if (frequency != rhs.frequency)
return frequency < rhs.frequency;
// if all else fails, alphabetically
return email < rhs.email;
}
/**
@ -153,18 +170,16 @@ struct Contact {
#pragma GCC diagnostic pop
}
/*
* data members
*/
std::string email; /**< Email address for this contact.Not empty */
std::string name; /**< Name for this contact; can be empty. */
Type type; /**< Type of contact */
int64_t message_date; /**< Date of the contact's message */
bool personal; /**< A personal message? */
size_t frequency; /**< Frequency of this contact */
int64_t tstamp; /**< Timestamp for this contact (internal use) */
std::string email{}; /**< Email address for this contact.Not empty */
std::string name{}; /**< Name for this contact; can be empty. */
Type type{Type::None};/**< Type of contact */
int64_t message_date{}; /**< Date of the contact's message */
bool personal{}; /**< A personal message? */
size_t frequency{}; /**< Frequency of this contact */
int64_t tstamp{}; /**< Timestamp for this contact (internal use) */
private:
void cleanup_name() { // replace control characters by spaces.
@ -172,6 +187,18 @@ private:
if (iscntrl(c))
c = ' ';
}
/**
* Oldest timestamp considered "recent"
*
* This is arbitrary of course
*
* @return timestamp
*/
static int64_t recently() {
static const auto recent{::time({}) - RecentOffset};
return recent;
}
};
constexpr Option<Contact::Type>
@ -212,7 +239,7 @@ std::string to_string(const Contacts& contacts);
*/
template<> struct std::hash<Mu::Contact> {
std::size_t operator()(const Mu::Contact& c) const noexcept {
return c.hash();
return std::hash<std::string>{}(c.email);
}
};

View File

@ -323,8 +323,7 @@ MimeMessage::make_from_text(const std::string& text)
Option<int64_t>
MimeMessage::date() const noexcept
{
GDateTime *dt{g_mime_message_get_date(self())};
if (!dt)
if (/*const*/GDateTime *dt{g_mime_message_get_date(self())}; !dt)
return Nothing;
else
return g_date_time_to_unix(dt);
@ -395,12 +394,10 @@ MimeMessage::contacts(Contact::Type ctype) const noexcept
contacts.reserve(lst_len);
for (auto i = 0; i != lst_len; ++i) {
auto&& addr{internet_address_list_get_address(addrs, i)};
const auto addr{internet_address_list_get_address(addrs, i)};
const auto name{internet_address_get_name(addr)};
if (G_UNLIKELY(!INTERNET_ADDRESS_IS_MAILBOX(addr)))
continue;
const auto email{internet_address_mailbox_get_addr (
INTERNET_ADDRESS_MAILBOX(addr))};
if (G_UNLIKELY(!email))

View File

@ -1,5 +1,5 @@
/*
** Copyright (C) 2019-2024 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
** Copyright (C) 2019-2025 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the
@ -33,18 +33,7 @@
using namespace Mu;
struct EmailHash {
std::size_t operator()(const std::string& email) const {
return lowercase_hash(email);
}
};
struct EmailEqual {
bool operator()(const std::string& email1, const std::string& email2) const {
return lowercase_hash(email1) == lowercase_hash(email2);
}
};
using ContactUMap = std::unordered_map<const std::string, Contact, EmailHash, EmailEqual>;
using ContactUMap = std::unordered_map<std::string, Contact>;
struct ContactsCache::Private {
Private(Config& config_db)
:config_db_{config_db},
@ -137,7 +126,7 @@ ContactsCache::Private::deserialize(const std::string& serialized) const
}
Contact ci(parts[0], // email
std::move(parts[1]), // name
(time_t)g_ascii_strtoll(parts[3].c_str(), NULL, 10), // message_date
static_cast<int64_t>(g_ascii_strtoll(parts[3].c_str(), NULL, 10)), // message_date
parts[2][0] == '1' ? true : false, // personal
(std::size_t)g_ascii_strtoll(parts[4].c_str(), NULL, 10), // frequency
g_get_monotonic_time()); // tstamp
@ -147,7 +136,6 @@ ContactsCache::Private::deserialize(const std::string& serialized) const
return contacts;
}
void
ContactsCache::Private::serialize() const
{
@ -287,41 +275,15 @@ ContactsCache::size() const
/**
* This is used for sorting the Contacts in order of relevance. A highly
* specific algorithm, but the details don't matter _too_ much.
*
* This is currently used for the ordering in mu-cfind and auto-completion in
* mu4e, if the various completion methods don't override it...
* Wrapper to allow for Contact sorting
* (needed due to reference_wrapper)
*/
constexpr auto RecentOffset{15 * 24 * 3600};
struct ContactLessThan {
ContactLessThan()
: recently_{::time({}) - RecentOffset} {}
bool operator()(const Mu::Contact& ci1, const Mu::Contact& ci2) const {
// non-personal is less relevant.
if (ci1.personal != ci2.personal)
return ci1.personal < ci2.personal;
// older is less relevant for recent messages
if (std::max(ci1.message_date, ci2.message_date) > recently_ &&
ci1.message_date != ci2.message_date)
return ci1.message_date < ci2.message_date;
// less frequent is less relevant
if (ci1.frequency != ci2.frequency)
return ci1.frequency < ci2.frequency;
// if all else fails, alphabetically
return ci1.email < ci2.email;
bool operator()(const Mu::Contact& c1, const Mu::Contact& c2) const {
return c1 < c2;
}
// only sort recently seen contacts by recency; approx 15 days.
// this changes during the lifetime, but that's all fine.
const time_t recently_;
};
using ContactSet = std::set<std::reference_wrapper<const Contact>,
ContactLessThan>;
using ContactSet = std::set<std::reference_wrapper<const Contact>, ContactLessThan>;
void
ContactsCache::for_each(const EachContactFunc& each_contact) const
@ -382,6 +344,8 @@ ContactsCache::is_valid(const std::string& addr) const
#include "utils/mu-test-utils.hh"
constexpr auto RecentOffset = Contact::RecentOffset;
static void
test_mu_contacts_cache_base()
{
@ -406,10 +370,8 @@ test_mu_contacts_cache_base()
Mu::Contact("foo.bar@example.com", "Foo", {}, 77777));
g_assert_cmpuint(contacts.size(), ==, 2);
contacts.add(
Mu::Contact("Foo.Bar@Example.Com", "Foo", {}, 88888));
g_assert_cmpuint(contacts.size(), ==, 2);
// note: replaces first.
contacts.add(Mu::Contact("Foo.Bar@Example.Com", "Foo", {}, 88888));
g_assert_cmpuint(contacts.size(), ==, 3);
{
const auto info = contacts._find("bla@example.com");
@ -417,9 +379,8 @@ test_mu_contacts_cache_base()
}
{
const auto info = contacts._find("foo.BAR@example.com");
const auto info = contacts._find("Foo.Bar@Example.Com");
g_assert_true(info);
g_assert_cmpstr(info->email.c_str(), ==, "Foo.Bar@Example.Com");
}
@ -515,8 +476,6 @@ test_mu_contacts_cache_foreach()
}
}
static void
test_mu_contacts_cache_sort()
{