indexer: make lazy check even lazier

In lazy-mode, we were skipping directories that did not change; however,
this didn't help for the case were users received new messages in big
maildirs.

So, add another check where we compare the ctime of message files with
the time of the last indexing operation. If it's smaller, ignore the
message-file. This is faster than having to consult the Xapian database
for each message.

Note that this requires in mu4e:
      (setq mu4e-index-lazy-check t)
or
   --lazy-check
as a parameter for 'mu index'.
This commit is contained in:
Dirk-Jan C. Binnema
2024-11-10 13:37:17 +02:00
parent fa59040ebc
commit b0d8d42dd2
3 changed files with 30 additions and 15 deletions

View File

@ -145,6 +145,8 @@ struct Indexer::Private {
std::mutex lock_, w_lock_;
std::atomic<time_t> completed_{};
bool was_empty_{};
uint64_t last_index_{};
};
bool
@ -206,12 +208,16 @@ Indexer::Private::handler(const std::string& fullpath, struct stat* statbuf,
case Scanner::HandleType::File: {
++progress_.checked;
if ((size_t)statbuf->st_size > max_message_size_) {
mu_debug("skip {} (too big: {} bytes)", fullpath, statbuf->st_size);
if (conf_.lazy_check && static_cast<uint64_t>(statbuf->st_ctime) < last_index_) {
// in lazy mode, ignore the file if it has not changed
// since the last indexing op.
return false;
}
if (static_cast<size_t>(statbuf->st_size) > max_message_size_) {
mu_debug("skip {} (too big: {} bytes)", fullpath, statbuf->st_size);
return false;
}
// if the message is not in the db yet, or not up-to-date, queue
// it for updating/inserting.
if (statbuf->st_ctime <= dirstamp_ && store_.contains_message(fullpath))
@ -414,6 +420,10 @@ Indexer::Private::start(const Indexer::Config& conf, bool block)
mu_debug("indexing: {}; clean-up: {}", conf_.scan ? "yes" : "no",
conf_.cleanup ? "yes" : "no");
// remember the _previous_ indexing, so in lazy mode we can skip
// those files.
last_index_ = store_.config().get<Mu::Config::Id::LastIndex>();
state_.change_to(IndexState::Scanning);
/* kick off the first worker, which will spawn more if needed. */
workers_.emplace_back(std::thread([this] { item_worker(); }));