support xapian ngrams

Xapian supports an "ngrams" option to help with languages/scripts
without explicit wordbreaks, such as Chinese / Japanese / Korean.

Add some plumbing for supporting this in mu as well. Experimental for
now.
This commit is contained in:
Dirk-Jan C. Binnema
2023-09-09 11:57:05 +03:00
parent f6122ecc9e
commit 264bb092f0
20 changed files with 207 additions and 81 deletions

View File

@ -87,10 +87,11 @@ struct Indexer::Private {
was_empty_{store.empty()} {
mu_message("created indexer for {} -> "
"{} (batch-size: {}; was-empty: {})",
"{} (batch-size: {}; was-empty: {}; ngrams: {})",
store.root_maildir(), store.path(),
store.config().get<Mu::Config::Id::BatchSize>(),
was_empty_);
was_empty_,
store.config().get<Mu::Config::Id::SupportNgrams>());
}
~Private() {
@ -238,7 +239,7 @@ Indexer::Private::add_message(const std::string& path)
*
* std::unique_lock lock{w_lock_};
*/
auto msg{Message::make_from_path(path)};
auto msg{Message::make_from_path(path, store_.message_options())};
if (!msg) {
mu_warning("failed to create message from {}: {}", path, msg.error().what());
return false;