index/scanner: implement i-node sorting

On rotational devices (HDD) processing direntries is much faster when
doing so sorted by i-node for the dir-entries. This is an old
optimization (perhaps mu <= 1.6 or so?) that was implemented yet after
indexing changed, likely because my systems use SDDs instead!

But, let's restore that optimization; the sorting is fast enough that we
don't care for SDDs; on HDD it should be quite a bit faster.
This commit is contained in:
Dirk-Jan C. Binnema
2023-07-25 22:39:12 +03:00
parent 23ba61a650
commit 4d8ba5f579
2 changed files with 73 additions and 72 deletions

View File

@ -39,17 +39,30 @@
using namespace Mu; using namespace Mu;
struct Scanner::Private { struct Scanner::Private {
Private(const std::string& root_dir, Scanner::Handler handler) Private(const std::string& root_dir, Scanner::Handler handler):
: root_dir_{root_dir}, handler_{handler} root_dir_{root_dir}, handler_{handler} {
{ if (root_dir_.length() > PATH_MAX)
throw Mu::Error{Error::Code::InvalidArgument,
"path is too long"};
if (!handler_) if (!handler_)
throw Mu::Error{Error::Code::Internal, "missing handler"}; throw Mu::Error{Error::Code::InvalidArgument,
"missing handler"};
} }
~Private() { stop(); } ~Private() { stop(); }
bool start(); Result<void> start();
bool stop(); void stop();
bool process_dentry(const std::string& path, struct dirent* dentry, bool is_maildir);
struct dentry_t {
dentry_t(const struct dirent *dentry):
d_ino{dentry->d_ino},
d_name{static_cast<const char*>(dentry->d_name)} {}
ino_t d_ino;
std::string d_name;
};
bool process_dentry(const std::string& path, const dentry_t& dentry,
bool is_maildir);
bool process_dir(const std::string& path, bool is_maildir); bool process_dir(const std::string& path, bool is_maildir);
const std::string root_dir_; const std::string root_dir_;
@ -88,10 +101,10 @@ do_ignore(const char *d_name)
} }
bool bool
Scanner::Private::process_dentry(const std::string& path, struct dirent *dentry, Scanner::Private::process_dentry(const std::string& path, const dentry_t& dentry,
bool is_maildir) bool is_maildir)
{ {
const auto d_name{dentry->d_name}; const auto d_name{dentry.d_name.c_str()};
if (is_dotdir(d_name) || std::strcmp(d_name, "tmp") == 0) if (is_dotdir(d_name) || std::strcmp(d_name, "tmp") == 0)
return true; // ignore. return true; // ignore.
@ -111,13 +124,14 @@ Scanner::Private::process_dentry(const std::string& path, struct dirent *dentry,
const auto new_cur = const auto new_cur =
std::strcmp(d_name, "cur") == 0 || std::strcmp(d_name, "new") == 0; std::strcmp(d_name, "cur") == 0 || std::strcmp(d_name, "new") == 0;
const auto htype = const auto htype =
new_cur ? Scanner::HandleType::EnterNewCur : Scanner::HandleType::EnterDir; new_cur ?
Scanner::HandleType::EnterNewCur :
Scanner::HandleType::EnterDir;
const auto res = handler_(fullpath, &statbuf, htype); const auto res = handler_(fullpath, &statbuf, htype);
if (!res) if (!res)
return true; // skip return true; // skip
process_dir(fullpath, new_cur); process_dir(fullpath, new_cur);
return handler_(fullpath, &statbuf, Scanner::HandleType::LeaveDir); return handler_(fullpath, &statbuf, Scanner::HandleType::LeaveDir);
} else if (S_ISREG(statbuf.st_mode) && is_maildir) } else if (S_ISREG(statbuf.st_mode) && is_maildir)
@ -135,9 +149,8 @@ Scanner::Private::process_dir(const std::string& path, bool is_maildir)
return true; /* we're done */ return true; /* we're done */
if (G_UNLIKELY(path.length() > PATH_MAX)) { if (G_UNLIKELY(path.length() > PATH_MAX)) {
// note: unlikely to hit this, one case would be a // note: unlikely to hit this, one case would be a self-referential
// self-referential symlink; that should be caught earlier, // symlink; that should be caught earlier, so this is just a backstop.
// so this is just a backstop.
mu_warning("path is too long: {}", path); mu_warning("path is too long: {}", path);
return false; return false;
} }
@ -148,63 +161,57 @@ Scanner::Private::process_dir(const std::string& path, bool is_maildir)
return false; return false;
} }
// TODO: sort dentries by inode order, which makes things faster for extfs. std::vector<dentry_t> dir_entries;
// see mu-maildir.c
while (running_) { while (running_) {
errno = 0; errno = 0;
const auto dentry{::readdir(dir)}; if (const auto& dentry{::readdir(dir)}; dentry) {
dir_entries.emplace_back(dentry);
if (G_LIKELY(dentry)) {
process_dentry(path, dentry, is_maildir);
continue; continue;
} } else if (errno != 0) {
if (errno != 0) {
mu_warning("failed to read {}: {}", path, g_strerror(errno)); mu_warning("failed to read {}: {}", path, g_strerror(errno));
continue; continue;
} }
break; break;
} }
closedir(dir); ::closedir(dir);
// sort by i-node; much faster on rotational (HDDs) devices and on SSDs
// sort is quick enough to not matter much
std::sort(dir_entries.begin(), dir_entries.end(),
[](auto&& d1, auto&& d2){ return d1.d_ino < d2.d_ino; });
// now process...
for (auto&& dentry: dir_entries)
process_dentry(path, dentry, is_maildir);
return true; return true;
} }
bool Result<void>
Scanner::Private::start() Scanner::Private::start()
{ {
const auto& path{root_dir_};
if (G_UNLIKELY(path.length() > PATH_MAX)) {
mu_warning("path is too long: {}", path);
return false;
}
const auto mode{F_OK | R_OK}; const auto mode{F_OK | R_OK};
if (G_UNLIKELY(access(path.c_str(), mode) != 0)) { if (G_UNLIKELY(::access(root_dir_.c_str(), mode) != 0))
mu_warning("'{}' is not readable: {}", path, g_strerror(errno)); return Err(Error::Code::File,
return false; "'{}' is not readable: {}", root_dir_,
} g_strerror(errno));
struct stat statbuf {}; struct stat statbuf {};
if (G_UNLIKELY(stat(path.c_str(), &statbuf) != 0)) { if (G_UNLIKELY(::stat(root_dir_.c_str(), &statbuf) != 0))
mu_warning("'{}' is not stat'able: {}", path, g_strerror(errno)); return Err(Error::Code::File,
return false; "'{}' is not stat'able: {}",
} root_dir_, g_strerror(errno));
if (G_UNLIKELY(!S_ISDIR(statbuf.st_mode))) { if (G_UNLIKELY(!S_ISDIR(statbuf.st_mode)))
mu_warning("'{}' is not a directory", path); return Err(Error::Code::File,
return false; "'{}' is not a directory", root_dir_);
}
running_ = true; running_ = true;
mu_debug("starting scan @ {}", root_dir_); mu_debug("starting scan @ {}", root_dir_);
auto basename{g_path_get_basename(root_dir_.c_str())}; auto basename{to_string_gchar(g_path_get_basename(root_dir_.c_str()))};
const auto is_maildir = const auto is_maildir = basename == "cur" || basename == "new";
(g_strcmp0(basename, "cur") == 0 || g_strcmp0(basename, "new") == 0);
g_free(basename);
const auto start{std::chrono::steady_clock::now()}; const auto start{std::chrono::steady_clock::now()};
process_dir(root_dir_, is_maildir); process_dir(root_dir_, is_maildir);
@ -212,19 +219,16 @@ Scanner::Private::start()
mu_debug("finished scan of {} in {} ms", root_dir_, to_ms(elapsed)); mu_debug("finished scan of {} in {} ms", root_dir_, to_ms(elapsed));
running_ = false; running_ = false;
return true; return Ok();
} }
bool void
Scanner::Private::stop() Scanner::Private::stop()
{ {
if (!running_) if (running_) {
return true; // nothing to do
mu_debug("stopping scan"); mu_debug("stopping scan");
running_ = false; running_ = false;
}
return true;
} }
Scanner::Scanner(const std::string& root_dir, Scanner::Handler handler) Scanner::Scanner(const std::string& root_dir, Scanner::Handler handler)
@ -234,24 +238,23 @@ Scanner::Scanner(const std::string& root_dir, Scanner::Handler handler)
Scanner::~Scanner() = default; Scanner::~Scanner() = default;
bool Result<void>
Scanner::start() Scanner::start()
{ {
if (priv_->running_) if (priv_->running_)
return true; // nothing to do return Ok(); // nothing to do
const auto res = priv_->start(); /* blocks */ auto res = priv_->start(); /* blocks */
priv_->running_ = false; priv_->running_ = false;
return res; return res;
} }
bool void
Scanner::stop() Scanner::stop()
{ {
std::lock_guard l(priv_->lock_); std::lock_guard l(priv_->lock_);
priv_->stop();
return priv_->stop();
} }
bool bool

View File

@ -1,5 +1,5 @@
/* /*
** Copyright (C) 2020 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl> ** Copyright (C) 2020-2023 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
** **
** This program is free software; you can redistribute it and/or modify it ** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the ** under the terms of the GNU General Public License as published by the
@ -22,6 +22,7 @@
#include <functional> #include <functional>
#include <memory> #include <memory>
#include <utils/mu-result.hh>
#include <dirent.h> #include <dirent.h>
#include <sys/types.h> #include <sys/types.h>
@ -57,7 +58,6 @@ class Scanner {
* *
* If handler is a directory * If handler is a directory
* *
*
* @param root_dir root dir to start scanning * @param root_dir root dir to start scanning
* @param handler handler function for some direntry * @param handler handler function for some direntry
*/ */
@ -72,16 +72,14 @@ class Scanner {
* Start the scan; this is a blocking call than runs until * Start the scan; this is a blocking call than runs until
* finished or (from another thread) stop() is called. * finished or (from another thread) stop() is called.
* *
* @return true if starting worked; false otherwise * @return Ok if starting worked; an Error otherwise
*/ */
bool start(); Result<void> start();
/** /**
* Stop the scan * Request stopping the scan if it's running; otherwise do nothing
*
* @return true if stopping worked; false otherwi%sse
*/ */
bool stop(); void stop();
/** /**
* Is a scan currently running? * Is a scan currently running?