lib: support 'personal' regexp, move to mu-contacts

Move the determination of "personal" to MuContacts; add support for
regexps (POSIX-basic, in //)
This commit is contained in:
Dirk-Jan C. Binnema
2020-10-13 23:38:26 +03:00
parent 5cd6226ebd
commit dbff5671dd
7 changed files with 186 additions and 99 deletions

View File

@ -25,6 +25,7 @@
#include <sstream>
#include <functional>
#include <algorithm>
#include <regex>
#include <utils/mu-utils.hh>
#include <glib.h>
@ -34,7 +35,21 @@ using namespace Mu;
ContactInfo::ContactInfo (const std::string& _full_address,
const std::string& _email,
const std::string& _name,
bool _personal, time_t _last_seen, size_t _freq):
time_t _last_seen):
full_address{_full_address},
email{_email},
name{_name},
last_seen{_last_seen},
freq{1},
tstamp{g_get_monotonic_time()} {}
ContactInfo::ContactInfo (const std::string& _full_address,
const std::string& _email,
const std::string& _name,
bool _personal,
time_t _last_seen,
size_t _freq):
full_address{_full_address},
email{_email},
name{_name},
@ -43,7 +58,6 @@ ContactInfo::ContactInfo (const std::string& _full_address,
freq{_freq},
tstamp{g_get_monotonic_time()} {}
struct EmailHash {
std::size_t operator()(const std::string& email) const {
std::size_t djb = 5381; // djb hash
@ -95,19 +109,55 @@ using ContactUMap = std::unordered_map<const std::string, ContactInfo, EmailHash
using ContactSet = std::set<std::reference_wrapper<const ContactInfo>, ContactInfoLessThan>;
struct Contacts::Private {
Private(const std::string& serialized):
contacts_{deserialize(serialized)}
{}
Private(const std::string& serialized,
const StringVec& personal):
contacts_{deserialize(serialized)} {
make_personal(personal);
}
void make_personal(const StringVec& personal);
ContactUMap deserialize(const std::string&) const;
std::string serialize() const;
ContactUMap contacts_;
std::mutex mtx_;
StringVec personal_plain_;
std::vector<std::regex> personal_rx_;
};
constexpr auto Separator = "\xff"; // Invalid in UTF-8
void
Contacts::Private::make_personal (const StringVec& personal)
{
for (auto&& p: personal) {
if (p.empty())
continue; // invalid
if (p.size() < 2 || p.at(0) != '/' || p.at(p.length() - 1) != '/')
personal_plain_.emplace_back(p); // normal address
else {
// a regex pattern.
try {
const auto rxstr{p.substr(1, p.length()-2)};
personal_rx_.emplace_back(
std::regex(rxstr,
std::regex::basic |
std::regex::optimize |
std::regex::icase));
} catch (const std::regex_error& rex) {
g_warning ("invalid personal address regexp '%s': %s",
p.c_str(), rex.what());
}
}
}
}
ContactUMap
Contacts::Private::deserialize(const std::string& serialized) const
{
@ -131,15 +181,14 @@ Contacts::Private::deserialize(const std::string& serialized) const
(std::size_t)g_ascii_strtoll(parts[5].c_str(), NULL, 10)); // freq
contacts.emplace(std::move(parts[1]), std::move(ci));
}
return contacts;
}
Contacts::Contacts (const std::string& serialized) :
priv_{std::make_unique<Private>(serialized)}
Contacts::Contacts (const std::string& serialized, const StringVec& personal) :
priv_{std::make_unique<Private>(serialized, personal)}
{}
Contacts::~Contacts() = default;
@ -170,44 +219,42 @@ Contacts::serialize() const
}
// for now, we only care about _not_ having newlines.
static void
wash (std::string& str)
{
str.erase(std::remove(str.begin(), str.end(), '\n'), str.end());
}
void
Contacts::add (ContactInfo&& ci)
{
std::lock_guard<std::mutex> l_{priv_->mtx_};
auto down = g_ascii_strdown (ci.email.c_str(), -1);
std::string email{down};
g_free(down);
auto it = priv_->contacts_.find(ci.email);
auto it = priv_->contacts_.find(email);
if (it != priv_->contacts_.end()) {
auto& ci2 = it->second;
++ci2.freq;
if (ci.last_seen > ci2.last_seen) {
ci2.last_seen = ci.last_seen;
wash(ci.email);
ci2.email = std::move(ci.email);
if (!ci.name.empty()) {
wash(ci.name);
ci2.name = std::move(ci.name);
}
if (it == priv_->contacts_.end()) { // completely new contact
wash(ci.name);
wash(ci.full_address);
ci.freq = 1;
ci.personal = is_personal(ci.email);
auto email{ci.email};
priv_->contacts_.emplace(ContactUMap::value_type(email, std::move(ci)));
} else { // existing contact.
auto& ci_existing{it->second};
++ci_existing.freq;
if (ci.last_seen > ci_existing.last_seen) {
// update.
wash(ci.name);
ci_existing.name = std::move(ci.name);
ci_existing.email = std::move(ci.email);
wash(ci.full_address);
ci_existing.full_address = std::move(ci.full_address);
ci_existing.tstamp = g_get_monotonic_time();
}
}
wash(ci.name);
wash(ci.email);
wash(ci.full_address);
priv_->contacts_.emplace(
ContactUMap::value_type(std::move(email), std::move(ci)));
}
@ -216,8 +263,7 @@ Contacts::_find (const std::string& email) const
{
std::lock_guard<std::mutex> l_{priv_->mtx_};
ContactInfo ci{"", email, "", false, 0};
const auto it = priv_->contacts_.find(ci.email);
const auto it = priv_->contacts_.find(email);
if (it == priv_->contacts_.end())
return {};
else
@ -260,6 +306,23 @@ Contacts::for_each(const EachContactFunc& each_contact) const
each_contact (ci);
}
bool
Contacts::is_personal(const std::string& addr) const
{
for (auto&& p: priv_->personal_plain_)
if (g_ascii_strcasecmp(addr.c_str(), p.c_str()) == 0)
return true;
for (auto&& rx: priv_->personal_rx_) {
std::smatch m; // perhaps cache addr in personal_plain_?
if (std::regex_match(addr, m, rx))
return true;
}
return false;
}
/// C binding
size_t

View File

@ -34,6 +34,7 @@ typedef struct _MuContacts MuContacts;
#include <string>
#include <time.h>
#include <inttypes.h>
#include <utils/mu-utils.hh>
namespace Mu {
@ -46,25 +47,38 @@ struct ContactInfo {
* @param _full_address the full email address + name.
* @param _email email address
* @param _name name or empty
* @param _personal is this a personal contact?
* @param _last_seen when was this contact last seen?
* @param _freq how often was this contact seen?
*
* @return
*/
ContactInfo (const std::string& _full_address,
const std::string& _email,
const std::string& _name,
bool _personal, time_t _last_seen, size_t _freq=1);
time_t _last_seen);
/**
* Construct a new ContactInfo
*
* @param _full_address the full email address + name.
* @param _email email address
* @param _name name or empty
* @param _personal is this a personal contact?
* @param _last_seen when was this contact last seen?
* @param _freq how often was this contact seen?
*/
ContactInfo (const std::string& _full_address,
const std::string& _email,
const std::string& _name,
bool personal,
time_t _last_seen,
size_t freq);
std::string full_address; /**< Full name <email> */
std::string email; /**< email address */
std::string name; /**< name (or empty) */
bool personal; /**< is this a personal contact? */
time_t last_seen; /**< when was this contact last seen? */
std::size_t freq; /**< how often was this contact seen? */
bool personal{}; /**< is this a personal contact? */
time_t last_seen{}; /**< when was this contact last seen? */
std::size_t freq{}; /**< how often was this contact seen? */
int64_t tstamp; /**< Time-stamp, as per g_get_monotonic_time */
int64_t tstamp{}; /**< Time-stamp, as per g_get_monotonic_time */
};
/// All contacts
@ -74,8 +88,10 @@ public:
* Construct a new contacts objects
*
* @param serialized serialized contacts
* @param personal personal addresses
*/
Contacts (const std::string& serialized = "");
Contacts (const std::string& serialized = "",
const StringVec& personal={});
/**
* DTOR
@ -118,6 +134,16 @@ public:
*/
std::string serialize() const;
/**
* Does this look like a 'personal' address?
*
* @param addr some e-mail address
*
* @return true or false
*/
bool is_personal(const std::string& addr) const;
/**
* Find a contact based on the email address. This is not safe, since
* the returned ptr can be invalidated at any time; only for unit-tests.

View File

@ -114,7 +114,7 @@ struct Store::Private {
Private (const std::string& path, bool readonly):
db_{make_xapian(path, readonly ? XapianOpts::ReadOnly : XapianOpts::Open)},
mdata_{make_metadata(path)},
contacts_{db()->get_metadata(ContactsKey)} {
contacts_{db()->get_metadata(ContactsKey), mdata_.personal_addresses} {
if (!readonly)
wdb()->begin_transaction();
@ -123,7 +123,8 @@ struct Store::Private {
Private (const std::string& path, const std::string& root_maildir,
const StringVec& personal_addresses, const Store::Config& conf):
db_{make_xapian(path, XapianOpts::CreateOverwrite)},
mdata_{init_metadata(conf, path, root_maildir, personal_addresses)} {
mdata_{init_metadata(conf, path, root_maildir, personal_addresses)},
contacts_{"", mdata_.personal_addresses} {
wdb()->begin_transaction();
}
@ -307,7 +308,6 @@ Store::metadata() const
const Contacts&
Store::contacts() const
{
LOCKED;
return priv_->contacts_;
}
@ -1045,32 +1045,11 @@ each_contact_info (MuMsgContact *contact, MsgDoc *msgdoc)
contacts.add(Mu::ContactInfo(contact->full_address,
contact->email,
contact->name ? contact->name : "",
msgdoc->_personal,
mu_msg_get_date(msgdoc->_msg)));
}
return TRUE;
}
static gboolean
each_contact_check_if_personal (MuMsgContact *contact, MsgDoc *msgdoc)
{
if (msgdoc->_personal || !contact->email)
return TRUE;
for (const auto& cur : *msgdoc->_my_addresses) {
if (g_ascii_strcasecmp
(contact->email,
(const char*)cur.c_str()) == 0) {
msgdoc->_personal = TRUE;
break;
}
}
return TRUE;
}
static Xapian::Document
new_doc_from_message (MuStore *store, MuMsg *msg)
{
@ -1079,17 +1058,20 @@ new_doc_from_message (MuStore *store, MuMsg *msg)
mu_msg_field_foreach ((MuMsgFieldForeachFunc)add_terms_values, &docinfo);
/* determine whether this is 'personal' email, ie. one of my
* e-mail addresses is explicitly mentioned -- it's not a
* mailing list message. Callback will update docinfo->_personal */
const auto& personal_addresses = self(store)->metadata().personal_addresses;
if (personal_addresses.size()) {
docinfo._my_addresses = &personal_addresses;
mu_msg_contact_foreach
(msg,
(MuMsgContactForeachFunc)each_contact_check_if_personal,
&docinfo);
}
mu_msg_contact_foreach
(msg, [](auto contact, gpointer msgdocptr)->gboolean {
auto msgdoc{reinterpret_cast<MsgDoc*>(msgdocptr)};
if (!contact->email)
return FALSE; // invalid contact
else if (msgdoc->_personal)
return TRUE; // already deemed personal
if (msgdoc->_store->contacts().is_personal(contact->email))
msgdoc->_personal = true; // this one's personal.
return TRUE;
}, &docinfo);
/* also store the contact-info as separate terms, and add it
* to the cache */

View File

@ -96,8 +96,6 @@ public:
* @return the metadata
*/
const Metadata& metadata() const;
/**
* Get the Contacts object for this store
*
@ -105,7 +103,6 @@ public:
*/
const Contacts& contacts() const;
/**
* Get the Indexer associated with this store. It is an error
* to call this on a read-only store.
@ -177,7 +174,6 @@ public:
*/
bool contains_message (const std::string& path) const;
/**
* Prototype for the ForEachFunc
*

View File

@ -33,25 +33,21 @@ test_mu_contacts_01()
g_assert_cmpuint (contacts.size(), ==, 0);
contacts.add(std::move(Mu::ContactInfo ("Foo <foo.bar@example.com>",
"foo.bar@example.com", "Foo",
false, 12345)));
"foo.bar@example.com", "Foo", 12345)));
g_assert_false (contacts.empty());
g_assert_cmpuint (contacts.size(), ==, 1);
contacts.add(std::move(Mu::ContactInfo ("Cuux <cuux.fnorb@example.com>",
"cuux@example.com", "Cuux", true,
54321)));
"cuux@example.com", "Cuux", 54321)));
g_assert_cmpuint (contacts.size(), ==, 2);
contacts.add(std::move(Mu::ContactInfo ("foo.bar@example.com",
"foo.bar@example.com", "Foo",
false, 77777)));
"foo.bar@example.com", "Foo", 77777)));
g_assert_cmpuint (contacts.size(), ==, 2);
contacts.add(std::move(Mu::ContactInfo ("Foo.Bar@Example.Com",
"Foo.Bar@Example.Com", "Foo",
false, 88888)));
"Foo.Bar@Example.Com", "Foo", 88888)));
g_assert_cmpuint (contacts.size(), ==, 2);
// note: replaces first.
@ -60,7 +56,6 @@ test_mu_contacts_01()
g_assert_false (info);
}
{
const auto info = contacts._find("foo.BAR@example.com");
g_assert_true (info);
@ -73,6 +68,27 @@ test_mu_contacts_01()
g_assert_cmpuint (contacts.size(), ==, 0);
}
static void
test_mu_contacts_02()
{
Mu::StringVec personal = {
"foo@example.com",
"bar@cuux.org",
"/bar-.*@fnorb.f./"
};
Mu::Contacts contacts{"", personal};
g_assert_true (contacts.is_personal("foo@example.com"));
g_assert_true (contacts.is_personal("Bar@CuuX.orG"));
g_assert_true (contacts.is_personal("bar-123abc@fnorb.fi"));
g_assert_true (contacts.is_personal("bar-zzz@fnorb.fr"));
g_assert_false (contacts.is_personal("foo@bar.com"));
g_assert_false (contacts.is_personal("BÂr@CuuX.orG"));
g_assert_false (contacts.is_personal("bar@fnorb.fi"));
g_assert_false (contacts.is_personal("bar-zzz@fnorb.xr"));
}
int
@ -81,6 +97,7 @@ main (int argc, char *argv[])
g_test_init (&argc, &argv, NULL);
g_test_add_func ("/mu-contacts/01", test_mu_contacts_01);
g_test_add_func ("/mu-contacts/02", test_mu_contacts_02);
g_log_set_handler (NULL,
(GLogLevelFlags)

View File

@ -79,7 +79,6 @@ test_store_add_count_remove ()
}
int
main (int argc, char *argv[])
{