* mu-query: pre-process queries to work around some mismatches with xapian
query parsing
This commit is contained in:
105
src/mu-query.cc
105
src/mu-query.cc
@ -54,7 +54,7 @@ init_mu_query (MuQuery *mqx, const char* dbpath)
|
|||||||
mqx->_qparser->set_database(*mqx->_db);
|
mqx->_qparser->set_database(*mqx->_db);
|
||||||
mqx->_qparser->set_default_op(Xapian::Query::OP_AND);
|
mqx->_qparser->set_default_op(Xapian::Query::OP_AND);
|
||||||
mqx->_qparser->set_stemming_strategy
|
mqx->_qparser->set_stemming_strategy
|
||||||
(Xapian::QueryParser::STEM_ALL);
|
(Xapian::QueryParser::STEM_NONE);
|
||||||
|
|
||||||
memset (mqx->_sorters, 0, sizeof(mqx->_sorters));
|
memset (mqx->_sorters, 0, sizeof(mqx->_sorters));
|
||||||
mu_msg_field_foreach ((MuMsgFieldForEachFunc)add_prefix,
|
mu_msg_field_foreach ((MuMsgFieldForEachFunc)add_prefix,
|
||||||
@ -86,6 +86,7 @@ uninit_mu_query (MuQuery *mqx)
|
|||||||
} MU_XAPIAN_CATCH_BLOCK;
|
} MU_XAPIAN_CATCH_BLOCK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static Xapian::Query
|
static Xapian::Query
|
||||||
get_query (MuQuery * mqx, const char* searchexpr, int *err = 0) {
|
get_query (MuQuery * mqx, const char* searchexpr, int *err = 0) {
|
||||||
|
|
||||||
@ -94,10 +95,7 @@ get_query (MuQuery * mqx, const char* searchexpr, int *err = 0) {
|
|||||||
(searchexpr,
|
(searchexpr,
|
||||||
Xapian::QueryParser::FLAG_BOOLEAN |
|
Xapian::QueryParser::FLAG_BOOLEAN |
|
||||||
Xapian::QueryParser::FLAG_PHRASE |
|
Xapian::QueryParser::FLAG_PHRASE |
|
||||||
Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE |
|
Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE);
|
||||||
Xapian::QueryParser::FLAG_WILDCARD |
|
|
||||||
Xapian::QueryParser::FLAG_PURE_NOT |
|
|
||||||
Xapian::QueryParser::FLAG_PARTIAL);
|
|
||||||
|
|
||||||
} MU_XAPIAN_CATCH_BLOCK;
|
} MU_XAPIAN_CATCH_BLOCK;
|
||||||
|
|
||||||
@ -111,7 +109,8 @@ static void
|
|||||||
add_prefix (const MuMsgField* field, Xapian::QueryParser* qparser)
|
add_prefix (const MuMsgField* field, Xapian::QueryParser* qparser)
|
||||||
{
|
{
|
||||||
if (!mu_msg_field_xapian_index(field) &&
|
if (!mu_msg_field_xapian_index(field) &&
|
||||||
!mu_msg_field_xapian_term(field))
|
!mu_msg_field_xapian_term(field) &&
|
||||||
|
!mu_msg_field_xapian_contact(field))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
const std::string prefix (mu_msg_field_xapian_prefix(field));
|
const std::string prefix (mu_msg_field_xapian_prefix(field));
|
||||||
@ -151,7 +150,7 @@ mu_query_new (const char* xpath)
|
|||||||
mqx = g_new (MuQuery, 1);
|
mqx = g_new (MuQuery, 1);
|
||||||
|
|
||||||
if (!init_mu_query (mqx, xpath)) {
|
if (!init_mu_query (mqx, xpath)) {
|
||||||
g_warning ("failed to initialize the Xapian query");
|
g_critical ("failed to initialize the Xapian query object");
|
||||||
g_free (mqx);
|
g_free (mqx);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -163,20 +162,85 @@ mu_query_new (const char* xpath)
|
|||||||
void
|
void
|
||||||
mu_query_destroy (MuQuery *self)
|
mu_query_destroy (MuQuery *self)
|
||||||
{
|
{
|
||||||
if (!self)
|
if (self) {
|
||||||
|
uninit_mu_query (self);
|
||||||
|
g_free (self);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct _CheckPrefix {
|
||||||
|
const char *pfx;
|
||||||
|
guint len;
|
||||||
|
gboolean match;
|
||||||
|
};
|
||||||
|
typedef struct _CheckPrefix CheckPrefix;
|
||||||
|
|
||||||
|
static void
|
||||||
|
each_check_prefix (const MuMsgField *field, CheckPrefix *cpfx)
|
||||||
|
{
|
||||||
|
const char *field_name, *field_shortcut;
|
||||||
|
|
||||||
|
if (cpfx->match)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
uninit_mu_query (self);
|
field_shortcut = mu_msg_field_shortcut (field);
|
||||||
|
if (field_shortcut &&
|
||||||
|
strncmp (cpfx->pfx, field_shortcut, cpfx->len) == 0) {
|
||||||
|
cpfx->match = TRUE;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
g_free (self);
|
field_name = mu_msg_field_name (field);
|
||||||
|
if (field_name &&
|
||||||
|
strncmp (cpfx->pfx, field_name, cpfx->len) == 0) {
|
||||||
|
cpfx->match = TRUE;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* colon is a position inside q pointing at a ':' character. function
|
||||||
|
* determines whether the prefix is a registered prefix (like
|
||||||
|
* 'subject' or 'from' or 's') */
|
||||||
|
static gboolean
|
||||||
|
is_xapian_prefix (const char *q, const char *colon)
|
||||||
|
{
|
||||||
|
const char *cur;
|
||||||
|
|
||||||
|
if (colon == q)
|
||||||
|
return FALSE; /* : at beginning, not a prefix */
|
||||||
|
|
||||||
|
/* track back from colon until a boundary or beginning of the
|
||||||
|
* str */
|
||||||
|
for (cur = colon - 1; cur >= q; --cur) {
|
||||||
|
|
||||||
|
if (cur == q || !isalpha (*(cur-1))) {
|
||||||
|
|
||||||
|
CheckPrefix cpfx;
|
||||||
|
memset (&cpfx, 0, sizeof(CheckPrefix));
|
||||||
|
|
||||||
|
cpfx.pfx = cur;
|
||||||
|
cpfx.len = (colon - cur);
|
||||||
|
cpfx.match = FALSE;
|
||||||
|
|
||||||
|
mu_msg_field_foreach ((MuMsgFieldForEachFunc)each_check_prefix,
|
||||||
|
&cpfx);
|
||||||
|
|
||||||
|
return (cpfx.match);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* preprocess a query to make them a bit more permissive */
|
/* preprocess a query to make them a bit more permissive */
|
||||||
gchar*
|
char*
|
||||||
query_preprocess (const char *query)
|
mu_query_preprocess (const char *query)
|
||||||
{
|
{
|
||||||
gchar *my_query;
|
gchar *my_query;
|
||||||
//gchar *cur;
|
gchar *cur;
|
||||||
|
|
||||||
|
g_return_val_if_fail (query, NULL);
|
||||||
|
|
||||||
/* translate the the searchexpr to all lowercase; this
|
/* translate the the searchexpr to all lowercase; this
|
||||||
* fill fixes some of the false-negatives. A full fix
|
* fill fixes some of the false-negatives. A full fix
|
||||||
@ -184,12 +248,15 @@ query_preprocess (const char *query)
|
|||||||
*/
|
*/
|
||||||
my_query = g_utf8_strdown (query, -1);
|
my_query = g_utf8_strdown (query, -1);
|
||||||
|
|
||||||
/* replace @ with ' '; this fixes some other Xapian issues.
|
for (cur = my_query; *cur; ++cur) {
|
||||||
* should be done in a bit nice way though...
|
if (*cur == ':') /* we found a ':' */
|
||||||
|
/* if there's a registered xapian prefix before the
|
||||||
|
* ':', don't touch it. Otherwise replace ':' with
|
||||||
|
* a space'
|
||||||
*/
|
*/
|
||||||
// for (cur = my_query; *cur; ++cur)
|
if (!is_xapian_prefix (my_query, cur))
|
||||||
// if (*cur == '@')
|
*cur = ' ';
|
||||||
// *cur = ' ';
|
}
|
||||||
|
|
||||||
return my_query;
|
return my_query;
|
||||||
}
|
}
|
||||||
@ -207,7 +274,7 @@ mu_query_run (MuQuery *self, const char* searchexpr,
|
|||||||
char *preprocessed;
|
char *preprocessed;
|
||||||
int err (0);
|
int err (0);
|
||||||
|
|
||||||
preprocessed = query_preprocess (searchexpr);
|
preprocessed = mu_query_preprocess (searchexpr);
|
||||||
|
|
||||||
Xapian::Query q(get_query(self, preprocessed, &err));
|
Xapian::Query q(get_query(self, preprocessed, &err));
|
||||||
if (err) {
|
if (err) {
|
||||||
|
|||||||
@ -92,6 +92,15 @@ MuMsgIter* mu_query_run (MuQuery *self,
|
|||||||
char* mu_query_as_string (MuQuery *self,
|
char* mu_query_as_string (MuQuery *self,
|
||||||
const char* searchexpr) G_GNUC_WARN_UNUSED_RESULT;
|
const char* searchexpr) G_GNUC_WARN_UNUSED_RESULT;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* pre-process the query; this function is useful mainly for debugging mu
|
||||||
|
*
|
||||||
|
* @param query a query string
|
||||||
|
*
|
||||||
|
* @return a pre-processed query, free it with g_free
|
||||||
|
*/
|
||||||
|
char* mu_query_preprocess (const char *query) G_GNUC_WARN_UNUSED_RESULT;
|
||||||
|
|
||||||
|
|
||||||
G_END_DECLS
|
G_END_DECLS
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user