* mu-query: pre-process queries to work around some mismatches with xapian

query parsing
This commit is contained in:
Dirk-Jan C. Binnema
2010-09-12 16:30:29 +03:00
parent bb3d9cc0b0
commit 80d3fb22b2
2 changed files with 103 additions and 27 deletions

View File

@ -54,7 +54,7 @@ init_mu_query (MuQuery *mqx, const char* dbpath)
mqx->_qparser->set_database(*mqx->_db); mqx->_qparser->set_database(*mqx->_db);
mqx->_qparser->set_default_op(Xapian::Query::OP_AND); mqx->_qparser->set_default_op(Xapian::Query::OP_AND);
mqx->_qparser->set_stemming_strategy mqx->_qparser->set_stemming_strategy
(Xapian::QueryParser::STEM_ALL); (Xapian::QueryParser::STEM_NONE);
memset (mqx->_sorters, 0, sizeof(mqx->_sorters)); memset (mqx->_sorters, 0, sizeof(mqx->_sorters));
mu_msg_field_foreach ((MuMsgFieldForEachFunc)add_prefix, mu_msg_field_foreach ((MuMsgFieldForEachFunc)add_prefix,
@ -86,6 +86,7 @@ uninit_mu_query (MuQuery *mqx)
} MU_XAPIAN_CATCH_BLOCK; } MU_XAPIAN_CATCH_BLOCK;
} }
static Xapian::Query static Xapian::Query
get_query (MuQuery * mqx, const char* searchexpr, int *err = 0) { get_query (MuQuery * mqx, const char* searchexpr, int *err = 0) {
@ -94,10 +95,7 @@ get_query (MuQuery * mqx, const char* searchexpr, int *err = 0) {
(searchexpr, (searchexpr,
Xapian::QueryParser::FLAG_BOOLEAN | Xapian::QueryParser::FLAG_BOOLEAN |
Xapian::QueryParser::FLAG_PHRASE | Xapian::QueryParser::FLAG_PHRASE |
Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE | Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE);
Xapian::QueryParser::FLAG_WILDCARD |
Xapian::QueryParser::FLAG_PURE_NOT |
Xapian::QueryParser::FLAG_PARTIAL);
} MU_XAPIAN_CATCH_BLOCK; } MU_XAPIAN_CATCH_BLOCK;
@ -111,7 +109,8 @@ static void
add_prefix (const MuMsgField* field, Xapian::QueryParser* qparser) add_prefix (const MuMsgField* field, Xapian::QueryParser* qparser)
{ {
if (!mu_msg_field_xapian_index(field) && if (!mu_msg_field_xapian_index(field) &&
!mu_msg_field_xapian_term(field)) !mu_msg_field_xapian_term(field) &&
!mu_msg_field_xapian_contact(field))
return; return;
const std::string prefix (mu_msg_field_xapian_prefix(field)); const std::string prefix (mu_msg_field_xapian_prefix(field));
@ -151,7 +150,7 @@ mu_query_new (const char* xpath)
mqx = g_new (MuQuery, 1); mqx = g_new (MuQuery, 1);
if (!init_mu_query (mqx, xpath)) { if (!init_mu_query (mqx, xpath)) {
g_warning ("failed to initialize the Xapian query"); g_critical ("failed to initialize the Xapian query object");
g_free (mqx); g_free (mqx);
return NULL; return NULL;
} }
@ -163,20 +162,85 @@ mu_query_new (const char* xpath)
void void
mu_query_destroy (MuQuery *self) mu_query_destroy (MuQuery *self)
{ {
if (!self) if (self) {
uninit_mu_query (self);
g_free (self);
}
}
struct _CheckPrefix {
const char *pfx;
guint len;
gboolean match;
};
typedef struct _CheckPrefix CheckPrefix;
static void
each_check_prefix (const MuMsgField *field, CheckPrefix *cpfx)
{
const char *field_name, *field_shortcut;
if (cpfx->match)
return; return;
uninit_mu_query (self); field_shortcut = mu_msg_field_shortcut (field);
if (field_shortcut &&
strncmp (cpfx->pfx, field_shortcut, cpfx->len) == 0) {
cpfx->match = TRUE;
return;
}
g_free (self); field_name = mu_msg_field_name (field);
if (field_name &&
strncmp (cpfx->pfx, field_name, cpfx->len) == 0) {
cpfx->match = TRUE;
return;
}
}
/* colon is a position inside q pointing at a ':' character. function
* determines whether the prefix is a registered prefix (like
* 'subject' or 'from' or 's') */
static gboolean
is_xapian_prefix (const char *q, const char *colon)
{
const char *cur;
if (colon == q)
return FALSE; /* : at beginning, not a prefix */
/* track back from colon until a boundary or beginning of the
* str */
for (cur = colon - 1; cur >= q; --cur) {
if (cur == q || !isalpha (*(cur-1))) {
CheckPrefix cpfx;
memset (&cpfx, 0, sizeof(CheckPrefix));
cpfx.pfx = cur;
cpfx.len = (colon - cur);
cpfx.match = FALSE;
mu_msg_field_foreach ((MuMsgFieldForEachFunc)each_check_prefix,
&cpfx);
return (cpfx.match);
}
}
return FALSE;
} }
/* preprocess a query to make them a bit more permissive */ /* preprocess a query to make them a bit more permissive */
gchar* char*
query_preprocess (const char *query) mu_query_preprocess (const char *query)
{ {
gchar *my_query; gchar *my_query;
//gchar *cur; gchar *cur;
g_return_val_if_fail (query, NULL);
/* translate the the searchexpr to all lowercase; this /* translate the the searchexpr to all lowercase; this
* fill fixes some of the false-negatives. A full fix * fill fixes some of the false-negatives. A full fix
@ -184,12 +248,15 @@ query_preprocess (const char *query)
*/ */
my_query = g_utf8_strdown (query, -1); my_query = g_utf8_strdown (query, -1);
/* replace @ with ' '; this fixes some other Xapian issues. for (cur = my_query; *cur; ++cur) {
* should be done in a bit nice way though... if (*cur == ':') /* we found a ':' */
/* if there's a registered xapian prefix before the
* ':', don't touch it. Otherwise replace ':' with
* a space'
*/ */
// for (cur = my_query; *cur; ++cur) if (!is_xapian_prefix (my_query, cur))
// if (*cur == '@') *cur = ' ';
// *cur = ' '; }
return my_query; return my_query;
} }
@ -207,7 +274,7 @@ mu_query_run (MuQuery *self, const char* searchexpr,
char *preprocessed; char *preprocessed;
int err (0); int err (0);
preprocessed = query_preprocess (searchexpr); preprocessed = mu_query_preprocess (searchexpr);
Xapian::Query q(get_query(self, preprocessed, &err)); Xapian::Query q(get_query(self, preprocessed, &err));
if (err) { if (err) {

View File

@ -92,6 +92,15 @@ MuMsgIter* mu_query_run (MuQuery *self,
char* mu_query_as_string (MuQuery *self, char* mu_query_as_string (MuQuery *self,
const char* searchexpr) G_GNUC_WARN_UNUSED_RESULT; const char* searchexpr) G_GNUC_WARN_UNUSED_RESULT;
/**
* pre-process the query; this function is useful mainly for debugging mu
*
* @param query a query string
*
* @return a pre-processed query, free it with g_free
*/
char* mu_query_preprocess (const char *query) G_GNUC_WARN_UNUSED_RESULT;
G_END_DECLS G_END_DECLS