support xapian ngrams

Xapian supports an "ngrams" option to help with languages/scripts
without explicit wordbreaks, such as Chinese / Japanese / Korean.

Add some plumbing for supporting this in mu as well. Experimental for
now.
This commit is contained in:
Dirk-Jan C. Binnema
2023-09-09 11:57:05 +03:00
parent f6122ecc9e
commit 264bb092f0
20 changed files with 207 additions and 81 deletions

View File

@ -70,7 +70,8 @@ struct Store::Private {
: XapianDb::Flavor::Open)},
config_{xapian_db_},
contacts_cache_{config_},
root_maildir_{remove_slash(config_.get<Config::Id::RootMaildir>())}
root_maildir_{remove_slash(config_.get<Config::Id::RootMaildir>())},
message_opts_{make_message_options(config_)}
{}
Private(const std::string& path, const std::string& root_maildir,
@ -78,7 +79,8 @@ struct Store::Private {
xapian_db_{XapianDb(path, XapianDb::Flavor::CreateOverwrite)},
config_{make_config(xapian_db_, root_maildir, conf)},
contacts_cache_{config_},
root_maildir_{remove_slash(config_.get<Config::Id::RootMaildir>())}
root_maildir_{remove_slash(config_.get<Config::Id::RootMaildir>())},
message_opts_{make_message_options(config_)}
{}
~Private() try {
@ -133,6 +135,13 @@ struct Store::Private {
return config;
}
Message::Options make_message_options(const Config& conf) {
if (conf.get<Config::Id::SupportNgrams>())
return Message::Options::SupportNgrams;
else
return Message::Options::None;
}
Option<Message> find_message_unlocked(Store::Id docid) const;
Store::IdVec find_duplicates_unlocked(const Store& store,
const std::string& message_id) const;
@ -150,7 +159,8 @@ struct Store::Private {
ContactsCache contacts_cache_;
std::unique_ptr<Indexer> indexer_;
const std::string root_maildir_;
const std::string root_maildir_;
const Message::Options message_opts_;
size_t transaction_size_{};
std::mutex lock_;
@ -340,6 +350,11 @@ Store::add_message(Message& msg, bool use_transaction, bool is_new)
if (auto&& res = msg.set_maildir(mdir.value()); !res)
return Err(res.error());
// we shouldn't mix ngrams/non-ngrams messages.
if (any_of(msg.options() & Message::Options::SupportNgrams) !=
any_of(message_options() & Message::Options::SupportNgrams))
return Err(Error::Code::InvalidArgument, "incompatible message options");
/* add contacts from this message to cache; this cache
* also determines whether those contacts are _personal_, i.e. match
* our personal addresses.
@ -371,6 +386,16 @@ Store::add_message(Message& msg, bool use_transaction, bool is_new)
return res;
}
Result<Store::Id>
Store::add_message(const std::string& path, bool use_transaction, bool is_new)
{
if (auto msg{Message::make_from_path(path, priv_->message_opts_)}; !msg)
return Err(msg.error());
else
return add_message(msg.value(), use_transaction, is_new);
}
bool
Store::remove_message(const std::string& path)
{
@ -649,3 +674,9 @@ Store::maildirs() const
return mdirs;
}
Message::Options
Store::message_options() const
{
return priv_->message_opts_;
}