mu: add '--lazy-check' option for indexing

Add an option --lazy-check to ignore any directories that don't have
their ctime changed since the last indexing operation.

There are a few corner-cases (such as editing a message outside mu's
control) where this might miss a change, but apart from that, makes
indexing in for a maildir (and its sub-maildirs) almost a no-op if there
were no changes.
This commit is contained in:
djcb
2016-07-23 19:18:09 +03:00
parent 2a83b02ce2
commit 9477071e63
7 changed files with 115 additions and 72 deletions

View File

@ -95,6 +95,7 @@ struct _MuIndexCallbackData {
void* _user_data; void* _user_data;
MuIndexStats* _stats; MuIndexStats* _stats;
gboolean _reindex; gboolean _reindex;
gboolean _lazy_check;
time_t _dirstamp; time_t _dirstamp;
guint _max_filesize; guint _max_filesize;
}; };
@ -216,30 +217,51 @@ on_run_maildir_msg (const char *fullpath, const char *mdir,
return result; return result;
} }
static time_t
get_dir_timestamp (const char *path)
{
struct stat statbuf;
if (stat (path, &statbuf) != 0) {
g_warning ("failed to stat %s: %s",
path, strerror(errno));
return 0;
}
return statbuf.st_ctime;
}
static MuError static MuError
on_run_maildir_dir (const char* fullpath, gboolean enter, on_run_maildir_dir (const char* fullpath, gboolean enter,
MuIndexCallbackData *data) MuIndexCallbackData *data)
{ {
GError *err; GError *err;
err = NULL; err = NULL;
/* xapian stores a per-dir timestamp; we use this timestamp /* xapian stores a per-dir timestamp; we use this timestamp to determine
* to determine whether a message is up-to-data * whether a message is up-to-date
*/ */
if (enter) { if (enter) {
data->_dirstamp = data->_dirstamp =
mu_store_get_timestamp (data->_store, fullpath, &err); mu_store_get_timestamp (data->_store, fullpath, &err);
g_debug ("entering %s (ts==%u)", /* in 'lazy' mode, we only check the dir timestamp, and if it's
fullpath, (unsigned)data->_dirstamp); * up to date, we don't bother with this dir. This fails to
* account for messages below this dir that have merely
* _changed_ though */
if (data->_lazy_check && mu_maildir_is_leaf_dir(fullpath)) {
time_t dirstamp;
dirstamp = get_dir_timestamp (fullpath);
if (dirstamp <= data->_dirstamp) {
g_debug ("ignore %s (up-to-date)", fullpath);
return MU_IGNORE;
}
}
g_debug ("entering %s", fullpath);
} else { } else {
time_t now;
now = time (NULL);
mu_store_set_timestamp (data->_store, fullpath, mu_store_set_timestamp (data->_store, fullpath,
now, &err); time(NULL), &err);
g_debug ("leaving %s (ts=%u)", g_debug ("leaving %s", fullpath);
fullpath, (unsigned)data->_dirstamp);
} }
if (data->_idx_dir_cb) if (data->_idx_dir_cb)
@ -276,7 +298,8 @@ check_path (const char *path)
static void static void
init_cb_data (MuIndexCallbackData *cb_data, MuStore *xapian, init_cb_data (MuIndexCallbackData *cb_data, MuStore *xapian,
gboolean reindex, guint max_filesize, MuIndexStats *stats, gboolean reindex, gboolean lazycheck,
guint max_filesize, MuIndexStats *stats,
MuIndexMsgCallback msg_cb, MuIndexDirCallback dir_cb, MuIndexMsgCallback msg_cb, MuIndexDirCallback dir_cb,
void *user_data) void *user_data)
{ {
@ -287,6 +310,7 @@ init_cb_data (MuIndexCallbackData *cb_data, MuStore *xapian,
cb_data->_store = xapian; cb_data->_store = xapian;
cb_data->_reindex = reindex; cb_data->_reindex = reindex;
cb_data->_lazy_check = lazycheck;
cb_data->_dirstamp = 0; cb_data->_dirstamp = 0;
cb_data->_max_filesize = max_filesize; cb_data->_max_filesize = max_filesize;
@ -318,7 +342,8 @@ mu_index_set_xbatch_size (MuIndex *index, guint xbatchsize)
MuError MuError
mu_index_run (MuIndex *index, const char *path, mu_index_run (MuIndex *index, const char *path,
gboolean reindex, MuIndexStats *stats, gboolean reindex, gboolean lazycheck,
MuIndexStats *stats,
MuIndexMsgCallback msg_cb, MuIndexDirCallback dir_cb, MuIndexMsgCallback msg_cb, MuIndexDirCallback dir_cb,
void *user_data) void *user_data)
{ {
@ -336,7 +361,7 @@ mu_index_run (MuIndex *index, const char *path,
return MU_ERROR; return MU_ERROR;
} }
init_cb_data (&cb_data, index->_store, reindex, init_cb_data (&cb_data, index->_store, reindex, lazycheck,
index->_max_filesize, stats, index->_max_filesize, stats,
msg_cb, dir_cb, user_data); msg_cb, dir_cb, user_data);

View File

@ -119,6 +119,8 @@ typedef MuError (*MuIndexDirCallback) (const char* path, gboolean enter,
* @param path the path to index. This must be an absolute path * @param path the path to index. This must be an absolute path
* @param force if != 0, force re-indexing already index messages; this is * @param force if != 0, force re-indexing already index messages; this is
* obviously a lot slower than only indexing new/changed messages * obviously a lot slower than only indexing new/changed messages
* @param lazycheck whether ignore subdirectoryies that have up-to-date
* timestamps.
* @param stats a structure with some statistics about the results; * @param stats a structure with some statistics about the results;
* note that this function does *not* reset the struct values to allow * note that this function does *not* reset the struct values to allow
* for cumulative stats from multiple calls. If needed, you can use * for cumulative stats from multiple calls. If needed, you can use
@ -132,12 +134,13 @@ typedef MuError (*MuIndexDirCallback) (const char* path, gboolean enter,
* case of some error. * case of some error.
*/ */
MuError mu_index_run (MuIndex *index, const char *path, gboolean force, MuError mu_index_run (MuIndex *index, const char *path, gboolean force,
MuIndexStats *stats, MuIndexMsgCallback msg_cb, gboolean lazycheck, MuIndexStats *stats,
MuIndexMsgCallback msg_cb,
MuIndexDirCallback dir_cb, void *user_data); MuIndexDirCallback dir_cb, void *user_data);
/** /**
* gather some statistics about the Maildir; this is usually much faster * gather some statistics about the Maildir; this is usually much faster than
* than mu_index_run, and can thus be used to provide some information to the user * mu_index_run, and can thus be used to provide some information to the user
* note though that the statistics may be different from the reality that * note though that the statistics may be different from the reality that
* mu_index_run sees, when there are updates in the Maildir * mu_index_run sees, when there are updates in the Maildir
* *

View File

@ -248,21 +248,20 @@ process_file (const char* fullpath, const gchar* mdir,
* determine if path is a maildir leaf-dir; ie. if it's 'cur' or 'new' * determine if path is a maildir leaf-dir; ie. if it's 'cur' or 'new'
* (we're skipping 'tmp' for obvious reasons) * (we're skipping 'tmp' for obvious reasons)
*/ */
G_GNUC_CONST static gboolean gboolean
is_maildir_new_or_cur (const char *path) mu_maildir_is_leaf_dir (const char *path)
{ {
size_t len; size_t len;
g_return_val_if_fail (path, FALSE);
/* path is the full path; it cannot possibly be shorter /* path is the full path; it cannot possibly be shorter
* than 4 for a maildir (/cur or /new) */ * than 4 for a maildir (/cur or /new) */
len = strlen (path); len = path ? strlen (path) : 0;
if (G_UNLIKELY(len < 4)) if (G_UNLIKELY(len < 4))
return FALSE; return FALSE;
/* optimization; one further idea would be cast the 4 bytes to an integer /* optimization; one further idea would be cast the 4 bytes to an
* and compare that -- need to think about alignment, endianness */ * integer and compare that -- need to think about alignment,
* endianness */
if (path[len - 4] == G_DIR_SEPARATOR && if (path[len - 4] == G_DIR_SEPARATOR &&
path[len - 3] == 'c' && path[len - 3] == 'c' &&
@ -415,7 +414,7 @@ process_dir_entry (const char* path, const char* mdir, struct dirent *entry,
switch (d_type) { switch (d_type) {
case DT_REG: /* we only want files in cur/ and new/ */ case DT_REG: /* we only want files in cur/ and new/ */
if (!is_maildir_new_or_cur (path)) if (!mu_maildir_is_leaf_dir (path))
return MU_OK; return MU_OK;
return process_file (fullpath, mdir, cb_msg, data); return process_file (fullpath, mdir, cb_msg, data);
@ -531,27 +530,30 @@ process_dir (const char* path, const char* mdir,
return MU_OK; return MU_OK;
} }
if (dir_cb) {
MuError rv;
rv = dir_cb (path, TRUE/*enter*/, data);
/* ignore this dir; not necessarily an _error_, dir might
* be up-to-date and return MU_IGNORE */
if (rv == MU_IGNORE)
return MU_OK;
else if (rv != MU_OK)
return rv;
}
dir = opendir (path); dir = opendir (path);
if (!dir) { if (!dir) {
g_warning ("cannot access %s: %s", path, strerror(errno)); g_warning ("cannot access %s: %s", path, strerror(errno));
return MU_OK; return MU_OK;
} }
if (dir_cb) { result = process_dir_entries (dir, path, mdir, msg_cb, dir_cb,
MuError rv; full, data);
rv = dir_cb (path, TRUE, data);
if (rv != MU_OK) {
closedir (dir);
return rv;
}
}
result = process_dir_entries (dir, path, mdir, msg_cb, dir_cb, full, data);
closedir (dir); closedir (dir);
/* only run dir_cb if it exists and so far, things went ok */ /* only run dir_cb if it exists and so far, things went ok */
if (dir_cb && result == MU_OK) if (dir_cb && result == MU_OK)
return dir_cb (path, FALSE, data); return dir_cb (path, FALSE/*leave*/, data);
return result; return result;
} }
@ -798,15 +800,16 @@ mu_maildir_get_new_path (const char *oldpath, const char *new_mdir,
if (new_name) if (new_name)
mfile = get_new_basename (); mfile = get_new_basename ();
else { else {
/* determine the name of the mailfile, stripped of its flags, as well /* determine the name of the mailfile, stripped of its flags, as
* as any custom (non-standard) flags */ * well as any custom (non-standard) flags */
char *cur; char *cur;
mfile = g_path_get_basename (oldpath); mfile = g_path_get_basename (oldpath);
for (cur = &mfile[strlen(mfile)-1]; cur > mfile; --cur) { for (cur = &mfile[strlen(mfile)-1]; cur > mfile; --cur) {
if ((*cur == ':' || *cur == '!') && if ((*cur == ':' || *cur == '!') &&
(cur[1] == '2' && cur[2] == ',')) { (cur[1] == '2' && cur[2] == ',')) {
/* get the custom flags (if any) */ /* get the custom flags (if any) */
custom_flags = mu_flags_custom_from_str (cur + 3); custom_flags =
mu_flags_custom_from_str (cur + 3);
cur[0] = '\0'; /* strip the flags */ cur[0] = '\0'; /* strip the flags */
break; break;
} }
@ -839,8 +842,6 @@ get_file_size (const char* path)
} }
static gboolean static gboolean
msg_move_check_pre (const gchar *src, const gchar *dst, GError **err) msg_move_check_pre (const gchar *src, const gchar *dst, GError **err)
{ {

View File

@ -129,6 +129,15 @@ MuError mu_maildir_walk (const char *path, MuMaildirWalkMsgCallback cb_msg,
gboolean mu_maildir_clear_links (const gchar* dir, GError **err); gboolean mu_maildir_clear_links (const gchar* dir, GError **err);
/**
* whether the directory path ends in '/cur/' or '/new/'
*
* @param path some path
*/
gboolean mu_maildir_is_leaf_dir (const char *path);
/** /**
* get the Maildir flags from the full path of a mailfile. The flags * get the Maildir flags from the full path of a mailfile. The flags
* are as specified in http://cr.yp.to/proto/maildir.html, plus * are as specified in http://cr.yp.to/proto/maildir.html, plus

View File

@ -1,7 +1,7 @@
/* -*-mode: c; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-*/ /* -*-mode: c; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-*/
/* /*
** Copyright (C) 2008-2013 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl> ** Copyright (C) 2008-2016 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
** **
** This program is free software; you can redistribute it and/or modify it ** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the ** under the terms of the GNU General Public License as published by the
@ -319,7 +319,8 @@ cmd_index (MuIndex *midx, MuConfig *opts, MuIndexStats *stats, GError **err)
newline_before_on(); newline_before_on();
rv = mu_index_run (midx, opts->maildir, opts->rebuild, stats, rv = mu_index_run (midx, opts->maildir, opts->rebuild,
opts->lazycheck, stats,
show_progress ? show_progress ?
(MuIndexMsgCallback)index_msg_cb : (MuIndexMsgCallback)index_msg_cb :
(MuIndexMsgCallback)index_msg_silent_cb, (MuIndexMsgCallback)index_msg_silent_cb,

View File

@ -148,6 +148,8 @@ config_options_group_index (void)
"top of the maildir", "<maildir>"}, "top of the maildir", "<maildir>"},
{"rebuild", 0, 0, G_OPTION_ARG_NONE, &MU_CONFIG.rebuild, {"rebuild", 0, 0, G_OPTION_ARG_NONE, &MU_CONFIG.rebuild,
"rebuild the database from scratch (false)", NULL}, "rebuild the database from scratch (false)", NULL},
{"lazy-check", 0, 0, G_OPTION_ARG_NONE, &MU_CONFIG.lazycheck,
"only check dir-timestamps (false)", NULL},
{"my-address", 0, 0, G_OPTION_ARG_STRING_ARRAY, {"my-address", 0, 0, G_OPTION_ARG_STRING_ARRAY,
&MU_CONFIG.my_addresses, &MU_CONFIG.my_addresses,
"my e-mail address (regexp); can be used multiple times", "my e-mail address (regexp); can be used multiple times",

View File

@ -112,6 +112,8 @@ struct _MuConfig {
gboolean rebuild; /* empty the database before indexing */ gboolean rebuild; /* empty the database before indexing */
gboolean autoupgrade; /* automatically upgrade db gboolean autoupgrade; /* automatically upgrade db
* when needed */ * when needed */
gboolean lazycheck; /* don't check dirs with up-to-date
* timestamps */
int xbatchsize; /* batchsize for xapian int xbatchsize; /* batchsize for xapian
* commits, or 0 for * commits, or 0 for
* default */ * default */