* make mag-msg-store a configurable parameter; refactor the setting of the

xapian batch size
This commit is contained in:
Dirk-Jan C. Binnema
2011-01-15 13:27:41 +02:00
parent 62acc7739d
commit c76af05a7a
11 changed files with 145 additions and 71 deletions

View File

@ -94,7 +94,12 @@ check_index_or_cleanup_params (MuConfig *opts)
g_warning ("the Xapian batch size must be non-negative");
return FALSE;
}
if (opts->max_msg_size < 0) {
g_warning ("the maximum message size must be non-negative");
return FALSE;
}
return TRUE;
}
@ -346,10 +351,12 @@ cmd_index_or_cleanup (MuConfig *opts)
return MU_EXITCODE_ERROR;
err = NULL;
if (!(midx = mu_index_new
(mu_runtime_xapian_dir(), opts->xbatchsize, &err)))
if (!(midx = mu_index_new (mu_runtime_xapian_dir(), &err)))
return handle_index_error_and_free (err);
mu_index_set_max_msg_size (midx, opts->max_msg_size);
mu_index_set_xbatch_size (midx, opts->xbatchsize);
/* we determine the maildir path only here, as it may depend on
* mu_index_last_used_maildir
*/

View File

@ -96,6 +96,8 @@ config_options_group_index (MuConfig * opts)
"don't clean up the database after indexing (false)", NULL},
{"xbatchsize", 0, 0, G_OPTION_ARG_INT, &opts->xbatchsize,
"set transaction batchsize for xapian commits (0)", NULL},
{"max-msg-size", 0, 0, G_OPTION_ARG_INT, &opts->max_msg_size,
"set the maximum size for message files", NULL},
{NULL, 0, 0, 0, NULL, NULL, NULL}
};

View File

@ -72,9 +72,10 @@ struct _MuConfig {
gboolean rebuild; /* empty the database before indexing */
gboolean autoupgrade; /* automatically upgrade db
* when needed */
int xbatchsize; /* batchsize for xapian
int xbatchsize; /* batchsize for xapian
* commits, or 0 for
* default */
int max_msg_size; /* maximum size for message files */
/* options for querying */
gboolean xquery; /* (obsolete) give the Xapian

View File

@ -35,16 +35,17 @@
#include "mu-util.h"
#define MU_LAST_USED_MAILDIR_KEY "last_used_maildir"
#define MU_MAILDIR_WALK_MAX_FILE_SIZE (64*1000*1000)
#define MU_INDEX_MAX_FILE_SIZE (50*1000*1000) /* 50 Mb */
struct _MuIndex {
MuStore *_store;
gboolean _needs_reindex;
gchar *_last_used_maildir;
guint _max_filesize;
};
MuIndex*
mu_index_new (const char *xpath, guint xbatchsize, GError **err)
mu_index_new (const char *xpath, GError **err)
{
MuIndex *index;
@ -52,7 +53,7 @@ mu_index_new (const char *xpath, guint xbatchsize, GError **err)
index = g_new0 (MuIndex, 1);
index->_store = mu_store_new (xpath, xbatchsize, err);
index->_store = mu_store_new (xpath, err);
if (!index->_store) {
g_warning ("%s: failed to open xapian store (%s)",
__FUNCTION__, xpath);
@ -60,6 +61,9 @@ mu_index_new (const char *xpath, guint xbatchsize, GError **err)
return NULL;
}
/* set the default max file size */
index->_max_filesize = MU_INDEX_MAX_FILE_SIZE;
/* see we need to reindex the database; note, there is a small
* race-condition here, between mu_index_new and
* mu_index_run. Maybe do the check in mu_index_run
@ -73,7 +77,6 @@ mu_index_new (const char *xpath, guint xbatchsize, GError **err)
return index;
}
void
mu_index_destroy (MuIndex *index)
{
@ -88,13 +91,14 @@ mu_index_destroy (MuIndex *index)
struct _MuIndexCallbackData {
MuIndexMsgCallback _idx_msg_cb;
MuIndexDirCallback _idx_dir_cb;
MuStore* _store;
void* _user_data;
MuIndexStats* _stats;
gboolean _reindex;
time_t _dirstamp;
MuIndexMsgCallback _idx_msg_cb;
MuIndexDirCallback _idx_dir_cb;
MuStore* _store;
void* _user_data;
MuIndexStats* _stats;
gboolean _reindex;
time_t _dirstamp;
guint _max_filesize;
};
typedef struct _MuIndexCallbackData MuIndexCallbackData;
@ -181,9 +185,9 @@ on_run_maildir_msg (const char* fullpath, const char* mdir,
gboolean updated;
/* protect against too big messages */
if (G_UNLIKELY(statbuf->st_size > MU_MAILDIR_WALK_MAX_FILE_SIZE)) {
g_warning ("ignoring because bigger than %d bytes: %s",
MU_MAILDIR_WALK_MAX_FILE_SIZE, fullpath);
if (G_UNLIKELY(statbuf->st_size > data->_max_filesize)) {
g_warning ("ignoring because bigger than %u bytes: %s",
data->_max_filesize, fullpath);
return MU_OK; /* not an error */
}
@ -200,10 +204,6 @@ on_run_maildir_msg (const char* fullpath, const char* mdir,
if (result == MU_OK && data && data->_stats) { /* update statistics */
++data->_stats->_processed;
updated ? ++data->_stats->_updated : ++data->_stats->_uptodate;
/* if (updated) */
/* ++data->_stats->_updated; */
/* else */
/* ++data->_stats->_uptodate; */
}
return result;
@ -260,7 +260,7 @@ check_path (const char* path)
static void
init_cb_data (MuIndexCallbackData *cb_data, MuStore *xapian,
gboolean reindex, MuIndexStats *stats,
gboolean reindex, guint max_filesize, MuIndexStats *stats,
MuIndexMsgCallback msg_cb, MuIndexDirCallback dir_cb,
void *user_data)
{
@ -268,11 +268,12 @@ init_cb_data (MuIndexCallbackData *cb_data, MuStore *xapian,
cb_data->_idx_dir_cb = dir_cb;
cb_data->_user_data = user_data;
cb_data->_store = xapian;
cb_data->_store = xapian;
cb_data->_reindex = reindex;
cb_data->_dirstamp = 0;
cb_data->_max_filesize = max_filesize;
cb_data->_stats = stats;
if (cb_data->_stats)
memset (cb_data->_stats, 0, sizeof(MuIndexStats));
@ -298,6 +299,26 @@ mu_index_last_used_maildir (MuIndex *index)
MU_LAST_USED_MAILDIR_KEY);
}
void
mu_index_set_max_msg_size (MuIndex *index, guint max_size)
{
g_return_if_fail (index);
if (max_size == 0)
index->_max_filesize = MU_INDEX_MAX_FILE_SIZE;
else
index->_max_filesize = max_size;
}
void
mu_index_set_xbatch_size (MuIndex *index, guint xbatchsize)
{
g_return_if_fail (index);
mu_store_set_batch_size (index->_store, xbatchsize);
}
MuResult
mu_index_run (MuIndex *index, const char* path,
@ -319,7 +340,8 @@ mu_index_run (MuIndex *index, const char* path,
return MU_ERROR;
}
init_cb_data (&cb_data, index->_store, reindex, stats,
init_cb_data (&cb_data, index->_store, reindex,
index->_max_filesize, stats,
msg_cb, dir_cb, user_data);
update_last_used_maildir (index, path);
@ -400,15 +422,11 @@ typedef struct _CleanupData CleanupData;
static MuResult
foreach_doc_cb (const char* path, CleanupData *cudata)
{
MuResult rv;
if (access (path, R_OK) != 0) {
g_debug ("not readable: %s; removing", path);
rv = mu_store_remove (cudata->_store, path);
if (rv != MU_OK)
return rv; /* something went wrong... bail out */
if (errno != EACCES)
g_warning ("cannot access %s: %s", path, strerror(errno));
if (!mu_store_remove (cudata->_store, path))
return MU_ERROR; /* something went wrong... bail out */
if (cudata->_stats)
++cudata->_stats->_cleaned_up;
}

View File

@ -46,13 +46,12 @@ typedef struct _MuIndexStats MuIndexStats;
*
* @param xpath path to the 'homedir'; the xapian directory will be
* this homedir/xapian
* @param batchsize for Xapian queries, or 0 for the default
* @param err to receive error or NULL; there are only errors when this
* function returns NULL. Possible errors: see mu-error.h
*
* @return a new MuIndex instance, or NULL in case of error
*/
MuIndex* mu_index_new (const char* muhome, guint batchsize, GError **err)
MuIndex* mu_index_new (const char* muhome, GError **err)
G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
@ -64,6 +63,28 @@ MuIndex* mu_index_new (const char* muhome, guint batchsize, GError **err)
void mu_index_destroy (MuIndex *index);
/**
* change the maximum file size that mu-index considers from its
* default (MU_INDEX_MAX_FILE_SIZE). Note that the maximum size is a
* protection against mu (or the libraries it uses) allocating too
* much memory, which can lead to problems
*
* @param index a mu index object
* @param max_size the maximum msg size, or 0 to reset to the default
*/
void mu_index_set_max_msg_size (MuIndex *index, guint max_size);
/**
* change batch size for Xapian store transaction (see
* 'mu_store_set_batch_size')
*
* @param index a mu index object
* @param max_size the batch size, or 0 to reset to the default
*/
void mu_index_set_xbatch_size (MuIndex *index, guint xbatchsize);
/**
* get the maildir for the last run of indexing for the
* current database

View File

@ -1,5 +1,5 @@
/*
** Copyright (C) 2008-2010 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
** Copyright (C) 2008-2011 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
**
** This program is free software; you can redistribute it and/or modify it
** under the terms of the GNU General Public License as published by the
@ -119,7 +119,7 @@ check_version (MuStore *store)
}
MuStore*
mu_store_new (const char* xpath, guint batchsize, GError **err)
mu_store_new (const char* xpath, GError **err)
{
MuStore *store (0);
@ -139,7 +139,7 @@ mu_store_new (const char* xpath, guint batchsize, GError **err)
/* keep count of processed docs */
store->_in_transaction = false;
store->_processed = 0;
store->_trx_size = batchsize ? batchsize : MU_STORE_DEFAULT_TRX_SIZE;
store->_trx_size = MU_STORE_DEFAULT_TRX_SIZE;
add_synonyms (store);
@ -177,6 +177,18 @@ mu_store_destroy (MuStore *store)
}
void
mu_store_set_batch_size (MuStore *store, guint batchsize)
{
g_return_if_fail (store);
if (batchsize == 0)
store->_trx_size = MU_STORE_DEFAULT_TRX_SIZE;
else
store->_trx_size = batchsize;
}
unsigned
mu_store_count (MuStore *store)
@ -521,11 +533,11 @@ mu_store_store (MuStore *store, MuMsg *msg)
}
MuResult
mu_store_remove (MuStore *store, const char* msgpath)
gboolean
mu_store_remove (MuStore *store, const char *msgpath)
{
g_return_val_if_fail (store, MU_ERROR);
g_return_val_if_fail (msgpath, MU_ERROR);
g_return_val_if_fail (store, FALSE);
g_return_val_if_fail (msgpath, FALSE);
try {
const std::string uid (get_message_uid (msgpath));
@ -533,18 +545,15 @@ mu_store_remove (MuStore *store, const char* msgpath)
begin_trx_if (store, !store->_in_transaction);
store->_db->delete_document (uid);
g_debug ("deleting %s", msgpath);
++store->_processed;
/* do we need to commit now? */
bool commit_now = store->_processed % store->_trx_size == 0;
commit_trx_if (store, commit_now);
return MU_OK;
return TRUE;
} MU_XAPIAN_CATCH_BLOCK_RETURN (MU_ERROR);
} MU_XAPIAN_CATCH_BLOCK_RETURN (FALSE);
}
gboolean

View File

@ -41,7 +41,7 @@ typedef struct _MuStore MuStore;
*
* @return a new MuStore object, or NULL in case of error
*/
MuStore* mu_store_new (const char *path, guint batchsize, GError **err)
MuStore* mu_store_new (const char *path, GError **err)
G_GNUC_MALLOC G_GNUC_WARN_UNUSED_RESULT;
@ -53,6 +53,20 @@ MuStore* mu_store_new (const char *path, guint batchsize, GError **err)
void mu_store_destroy (MuStore *store);
/**
* set the Xapian batch size for this store. Normally, there's no need
* to use this function as the default is good enough; however, if you
* use mu in a very memory-constrained environment, you can set the
* batchsize to e.g. 1000 at the cost of significant slow-down.
*
* @param store a valid store object
* @param batchsize the new batch size; or 0 to reset to
* the default batch size
*/
void mu_store_set_batch_size (MuStore *store, guint batchsize);
/**
* get the numbers of documents in the database
*
@ -104,8 +118,7 @@ MuResult mu_store_store (MuStore *store, MuMsg *msg);
*
* @return TRUE if it succeeded, FALSE otherwise
*/
MuResult mu_store_remove (MuStore *store,
const char* msgpath);
gboolean mu_store_remove (MuStore *store, const char* msgpath);
/**
@ -114,10 +127,9 @@ MuResult mu_store_remove (MuStore *store,
* @param store a store
* @param path the message path
*
* @return
* @return TRUE if the message exists, FALSE otherwise
*/
gboolean mu_store_contains_message (MuStore *store,
const char* path);
gboolean mu_store_contains_message (MuStore *store, const char* path);
/**
* store a timestamp for a directory

View File

@ -110,7 +110,7 @@ test_mu_index (void)
xpath = g_strdup_printf ("%s%c%s", muhome, G_DIR_SEPARATOR, "xapian");
store = mu_store_new (xpath, 0, NULL);
store = mu_store_new (xpath, NULL);
g_assert (store);
g_assert_cmpuint (mu_store_count (store), ==, 5);

View File

@ -42,7 +42,7 @@ test_mu_store_new_destroy (void)
g_assert (tmpdir);
err = NULL;
store = mu_store_new (tmpdir, 12345, &err);
store = mu_store_new (tmpdir, &err);
g_assert (store);
g_assert (err == NULL);
@ -67,7 +67,7 @@ test_mu_store_version (void)
g_assert (tmpdir);
err = NULL;
store = mu_store_new (tmpdir, 789, &err);
store = mu_store_new (tmpdir, &err);
g_assert (store);
g_assert (err == NULL);
@ -90,7 +90,7 @@ test_mu_store_store_and_count (void)
tmpdir = test_mu_common_get_random_tmpdir();
g_assert (tmpdir);
store = mu_store_new (tmpdir, 1, NULL);
store = mu_store_new (tmpdir, NULL);
g_assert (store);
g_assert_cmpuint (0,==,mu_store_count (store));
@ -138,7 +138,7 @@ test_mu_store_store_remove_and_count (void)
tmpdir = test_mu_common_get_random_tmpdir();
g_assert (tmpdir);
store = mu_store_new (tmpdir, 0, NULL);
store = mu_store_new (tmpdir, NULL);
g_assert (store);
g_assert_cmpuint (0,==,mu_store_count (store));