diff --git a/src/Makefile.am b/src/Makefile.am index 34d425ce..ecb6ba8d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -27,8 +27,6 @@ mu_SOURCES= \ mu-query.c \ mu-query-xapian.cc \ mu-query-xapian.h \ - mu-walk.c \ - mu-walk.h \ mu-result.h \ mu-store-xapian.cc \ mu-store-xapian.h \ diff --git a/src/mu-index.c b/src/mu-index.c index 4202734e..317f1a17 100644 --- a/src/mu-index.c +++ b/src/mu-index.c @@ -28,7 +28,7 @@ #include #include -#include "mu-walk.h" +#include "mu-maildir.h" #include "mu-index.h" #include "mu-store-xapian.h" @@ -227,9 +227,9 @@ mu_index_run (MuIndex *index, const char* path, cb_data._dirstamp = 0; - return mu_walk_maildir (path, - (MuWalkMsgCallback)on_run_maildir_msg, - (MuWalkDirCallback)on_run_maildir_dir, + return mu_maildir_walk (path, + (MuMaildirWalkMsgCallback)on_run_maildir_msg, + (MuMaildirWalkDirCallback)on_run_maildir_dir, &cb_data); } @@ -276,8 +276,8 @@ mu_index_stats (MuIndex *index, const char* path, cb_data._dirstamp = 0; - return mu_walk_maildir (path, - (MuWalkMsgCallback)on_stats_maildir_file, + return mu_maildir_walk (path, + (MuMaildirWalkMsgCallback)on_stats_maildir_file, NULL,&cb_data); } diff --git a/src/mu-maildir.c b/src/mu-maildir.c index 4b5205f7..1f38d96d 100644 --- a/src/mu-maildir.c +++ b/src/mu-maildir.c @@ -21,12 +21,18 @@ #include #include #include +#include #include #include +#include #include "mu-maildir.h" +#define MU_MAILDIR_WALK_MAX_FILE_SIZE (15*1000*1000) +#define MU_MAILDIR_NOINDEX_FILE ".noindex" + + static gboolean _create_maildir (const char *path, int mode, GError **err) { @@ -70,7 +76,7 @@ _create_noindex (const char *path, GError **err) gchar *noindexpath; noindexpath = g_strdup_printf ("%s%c%s", path, G_DIR_SEPARATOR, - ".noindex"); + MU_MAILDIR_NOINDEX_FILE); fd = creat (noindexpath, 0644); g_free (noindexpath); if (fd < 0 || close (fd) != 0) { @@ -180,3 +186,259 @@ mu_maildir_link (const char* src, const char *targetpath, return TRUE; } + +static MuResult process_dir (const char* path, MuMaildirWalkMsgCallback msg_cb, + MuMaildirWalkDirCallback dir_cb, void *data); + +static MuResult +process_file (const char* fullpath, MuMaildirWalkMsgCallback cb, void *data) +{ + MuResult result; + struct stat statbuf; + + if (!cb) + return MU_OK; + + if (G_UNLIKELY(access(fullpath, R_OK) != 0)) { + g_warning ("cannot access %s: %s", fullpath, strerror(errno)); + return MU_ERROR; + } + + if (G_UNLIKELY(stat (fullpath, &statbuf) != 0)) { + g_warning ("cannot stat %s: %s", fullpath, strerror(errno)); + return MU_ERROR; + } + + if (G_UNLIKELY(statbuf.st_size > MU_MAILDIR_WALK_MAX_FILE_SIZE)) { + g_warning ("ignoring because bigger than %d bytes: %s", + MU_MAILDIR_WALK_MAX_FILE_SIZE, fullpath); + return MU_ERROR; + } + + result = (cb)(fullpath,statbuf.st_mtime,data); + + if (G_LIKELY(result == MU_OK || result == MU_STOP)) + return result; + else { + g_warning ("%s: failed %d in callback (%s)", + __FUNCTION__, result, fullpath); + return result; + } +} + + +/* + * determine if path is a maildir leaf-dir; ie. if it's 'cur' or 'new' + * (we're skipping 'tmp' for obvious reasons) + */ +G_GNUC_CONST static gboolean +is_maildir_new_or_cur (const char *path) +{ + size_t len; + const char *sfx; + + /* path is the full path; it cannot possibly be shorter + * than 4 for a maildir (/cur or /new) + */ + if (!path||(len = strlen(path)) < 4) + return FALSE; + + sfx = &path[len - 4]; + + if (sfx[0] != G_DIR_SEPARATOR) /* small optimization */ + return FALSE; + else if (sfx[1] != 'c' && sfx[1] != 'n') + return FALSE; /* optimization */ + else + return (strcmp (sfx + 1, "cur") == 0 || + strcmp (sfx + 1, "new") == 0); +} + +/* check if there is a noindex file (MU_WALK_NOINDEX_FILE) in this dir; */ +static gboolean +has_noindex_file (const char *path) +{ + char *fname; + + fname = g_newa (char, strlen(path) + 1 + + strlen(MU_MAILDIR_NOINDEX_FILE) + 1); + g_sprintf (fname, "%s%c%s", path, G_DIR_SEPARATOR, MU_MAILDIR_NOINDEX_FILE); + + if (access (fname, F_OK) == 0) + return TRUE; + else if (errno != ENOENT) + g_warning ("error testing for noindex file: %s", + strerror(errno)); + + return FALSE; +} + +static gboolean +_ignore_dir_entry (struct dirent *entry) +{ + const char *name; + + /* if it's not a dir and not a file, ignore it. + * note, this means also symlinks (DT_LNK) are ignored, + * maybe make this optional. Also note that entry->d_type is + * defined on Linux, BSDs is not part of POSIX; this needs a + * configure check */ + if (entry->d_type != DT_REG && + entry->d_type != DT_DIR) + return TRUE; + + name = entry->d_name; + + if (name[0] != '.') + return FALSE; + + /* ignore . and .. */ + if (name[1] == '\0' || + (name[1] == '.' && name[2] == '\0')) + return TRUE; + + /* ignore .notmuch, .nnmaildir */ + if (name[1] == 'n') {/* optimization */ + if (strcmp (name, ".notmuch") == 0 || + strcmp (name, ".nnmaildir") == 0) + return TRUE; + } + + return FALSE; +} + +static MuResult +process_dir_entry (const char* path,struct dirent *entry, + MuMaildirWalkMsgCallback cb_msg, MuMaildirWalkDirCallback cb_dir, + void *data) +{ + char* fullpath; + + /* ignore special dirs: */ + if (_ignore_dir_entry (entry)) + return MU_OK; + + fullpath = g_newa (char, strlen(path) + 1 + strlen(entry->d_name) + 1); + sprintf (fullpath, "%s%c%s", path, G_DIR_SEPARATOR, entry->d_name); + + switch (entry->d_type) { + case DT_REG: + /* we only want files in cur/ and new/ */ + if (!is_maildir_new_or_cur (path)) + return MU_OK; + + return process_file (fullpath, cb_msg, data); + + case DT_DIR: { + /* if it has a noindex file, we ignore this dir */ + if (has_noindex_file (fullpath)) { + g_message ("ignoring dir %s", fullpath); + return MU_OK; + } + + return process_dir (fullpath, cb_msg, cb_dir, data); + } + + + default: + return MU_OK; /* ignore other types */ + } +} + +static struct dirent* +dirent_copy (struct dirent *entry) +{ + struct dirent *d = g_slice_new (struct dirent); + /* NOTE: simply memcpy'ing sizeof(struct dirent) bytes will + * give memory errors*/ + return (struct dirent*)memcpy (d, entry, entry->d_reclen); +} + +static void +dirent_destroy (struct dirent *entry) +{ + g_slice_free(struct dirent, entry); +} + +static gint +dirent_cmp (struct dirent *d1, struct dirent *d2) +{ + return d1->d_ino - d2->d_ino; +} + +static MuResult +process_dir (const char* path, MuMaildirWalkMsgCallback msg_cb, + MuMaildirWalkDirCallback dir_cb, void *data) +{ + MuResult result = MU_OK; + GList *lst, *c; + struct dirent *entry; + DIR* dir; + + dir = opendir (path); + if (G_UNLIKELY(!dir)) { + g_warning ("failed to open %s: %s", path, strerror(errno)); + return MU_ERROR; + } + + if (dir_cb) { + MuResult rv = dir_cb (path, TRUE, data); + if (rv != MU_OK) { + closedir (dir); + return rv; + } + } + + /* we sort the inodes, which makes file-access much faster on + some filesystems, such as ext3fs */ + lst = NULL; + while ((entry = readdir (dir))) + lst = g_list_prepend (lst, dirent_copy(entry)); + + c = lst = g_list_sort (lst, (GCompareFunc)dirent_cmp); + for (c = lst; c && result == MU_OK; c = c->next) + result = process_dir_entry (path, (struct dirent*)c->data, + msg_cb, dir_cb, data); + + g_list_foreach (lst, (GFunc)dirent_destroy, NULL); + g_list_free (lst); + + closedir (dir); + + if (dir_cb) + return dir_cb (path, FALSE, data); + + return result; +} + +MuResult +mu_maildir_walk (const char *path, MuMaildirWalkMsgCallback cb_msg, + MuMaildirWalkDirCallback cb_dir, void *data) +{ + struct stat statbuf; + + g_return_val_if_fail (path && cb_msg, MU_ERROR); + + if (access(path, R_OK) != 0) { + g_warning ("cannot access %s: %s", path, strerror(errno)); + return MU_ERROR; + } + + if (stat (path, &statbuf) != 0) { + g_warning ("cannot stat %s: %s", path, strerror(errno)); + return MU_ERROR; + } + + if (S_ISREG(statbuf.st_mode)) + return process_file (path, cb_msg, data); + else if (S_ISDIR(statbuf.st_mode)) + return process_dir (path, cb_msg, cb_dir, data); + else + g_warning ("%s: unsupported file type for %s", + __FUNCTION__, path); + + return MU_ERROR; +} + + + diff --git a/src/mu-maildir.h b/src/mu-maildir.h index ee38fa01..ad4a694c 100644 --- a/src/mu-maildir.h +++ b/src/mu-maildir.h @@ -21,6 +21,10 @@ #define __MU_MAILDIR_H__ #include +#include +#include /* for mode_t */ +#include "mu-result.h" /* for MuResult */ + /** * create a new maildir. Note, if the function fails 'halfway', it @@ -53,4 +57,50 @@ gboolean mu_maildir_mkmdir (const char* path, int mode, gboolean mu_maildir_link (const char* src, const char *targetpath, GError **err); + + +/** + * MuMaildirWalkMsgCallback -- callback function for mu_path_walk_maildir; + * see the documentation there. It will be called for each message + * found, with fullpath contain the full path to the message, a + * timestamp of the last modification time of this file, and a user_data + * pointer + */ +typedef MuResult (*MuMaildirWalkMsgCallback) +(const char* fullpath, time_t timestamp, void *user_data); + +/** + * MuPathWalkDirCallback -- callback function for mu_path_walk_maildir; see the + * documentation there. It will be called each time a dir is entered or left, + * with 'enter' being TRUE upon entering, FALSE otherwise + */ +typedef MuResult (*MuMaildirWalkDirCallback) +(const char* fullpath, gboolean enter, void *user_data); + +/** + * start a recursive walk of a maildir; for each file found, we call + * callback with the path (with the Maildir path of scanner_new as + * root), the filename, the timestamp (mtime) of the file,and the + * *data pointer, for user data. dot-files are ignored, as well as + * files outside cur/ and new/ dirs and unreadable files; however, + * dotdirs are visited (ie. '.dotdir/cur'), so this enables Maildir++. + * (http://www.inter7.com/courierimap/README.maildirquota.html, search + * for 'Mission statement') + * + * mu_walk_maildir stops if the callbacks return something different + * from MU_OK. For example, it can return MU_STOP to stop the scan, or + * some error. + * + * @param path the maildir path to scan + * @param cb_msg the callback function called for each msg + * @param cb_dir the callback function called for each dir + * @param data user data pointer + * + * @return a scanner result; MU_OK if everything went ok, + * MU_STOP if we want to stop, or MU_ERROR in + * case of error + */ +MuResult mu_maildir_walk (const char *path, MuMaildirWalkMsgCallback cb_msg, + MuMaildirWalkDirCallback cb_dir, void *data); + #endif /*__MU_MAILDIR_H__*/