integrate new query parser
This commit is contained in:
418
lib/mu-str.c
418
lib/mu-str.c
@ -175,30 +175,6 @@ mu_str_display_contact (const char *str)
|
||||
}
|
||||
|
||||
|
||||
gint64
|
||||
mu_str_size_parse_bkm (const char* str)
|
||||
{
|
||||
gint64 num;
|
||||
|
||||
g_return_val_if_fail (str, -1);
|
||||
|
||||
if (!isdigit(str[0]))
|
||||
return -1;
|
||||
|
||||
num = atoi(str);
|
||||
for (++str; isdigit(*str); ++str);
|
||||
|
||||
switch (tolower(*str)) {
|
||||
case '\0':
|
||||
case 'b' : return num; /* bytes */
|
||||
case 'k': return num * 1000; /* kilobyte */
|
||||
case 'm': return num * 1000 * 1000; /* megabyte */
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
char*
|
||||
mu_str_replace (const char *str, const char *substr, const char *repl)
|
||||
{
|
||||
@ -224,9 +200,6 @@ mu_str_replace (const char *str, const char *substr, const char *repl)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
char*
|
||||
mu_str_from_list (const GSList *lst, char sepa)
|
||||
{
|
||||
@ -396,397 +369,6 @@ mu_str_subject_normalize (const gchar* str)
|
||||
}
|
||||
|
||||
|
||||
struct _CheckPrefix {
|
||||
const char *str;
|
||||
gboolean match;
|
||||
gboolean range_field;
|
||||
};
|
||||
typedef struct _CheckPrefix CheckPrefix;
|
||||
|
||||
|
||||
|
||||
static void
|
||||
each_check_prefix (MuMsgFieldId mfid, CheckPrefix *cpfx)
|
||||
{
|
||||
const char *pfx;
|
||||
char pfx_short[3] = { 'X', ':', '\0'};
|
||||
char k;
|
||||
|
||||
if (!cpfx || cpfx->match)
|
||||
return;
|
||||
|
||||
k = pfx_short[0] = mu_msg_field_shortcut (mfid);
|
||||
if (k && g_str_has_prefix (cpfx->str, pfx_short)) {
|
||||
cpfx->match = TRUE;
|
||||
cpfx->range_field = mu_msg_field_is_range_field (mfid);
|
||||
}
|
||||
|
||||
pfx = mu_msg_field_name (mfid);
|
||||
if (pfx && g_str_has_prefix (cpfx->str, pfx) &&
|
||||
cpfx->str[strlen(pfx)] == ':') {
|
||||
cpfx->match = TRUE;
|
||||
cpfx->range_field = mu_msg_field_is_range_field (mfid);
|
||||
}
|
||||
}
|
||||
|
||||
/* check if it looks like either i:<msgid> or msgid:<msgid> */
|
||||
static gboolean
|
||||
is_msgid_field (const char *str)
|
||||
{
|
||||
const char *name;
|
||||
|
||||
if (!str || strlen(str) < 3)
|
||||
return FALSE;
|
||||
|
||||
if (str[0] == mu_msg_field_shortcut (MU_MSG_FIELD_ID_MSGID) &&
|
||||
str[1] == ':')
|
||||
return TRUE;
|
||||
|
||||
name = mu_msg_field_name (MU_MSG_FIELD_ID_MSGID);
|
||||
if (g_str_has_prefix (str, name) && str[strlen(name)] == ':')
|
||||
return TRUE;
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* message-ids need a bit more massaging -- we replace all
|
||||
* non-alphanum with '_'. Note, this function assumes we're looking at
|
||||
* a msg-id field, ie. i:<msgid> or msgid:<msgid> */
|
||||
char*
|
||||
mu_str_process_msgid (const char *str, gboolean query)
|
||||
{
|
||||
char *s, *c;
|
||||
|
||||
g_return_val_if_fail (str, NULL);
|
||||
g_return_val_if_fail (!query || strchr(str, ':'), NULL);
|
||||
|
||||
if (!str)
|
||||
return NULL;
|
||||
|
||||
s = g_strdup (str);
|
||||
|
||||
if (query)
|
||||
c = strchr (s, ':') + 1;
|
||||
else
|
||||
c = s;
|
||||
|
||||
for (; *c; ++c)
|
||||
*c = isalnum (*c) ? tolower (*c) : '_';
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void
|
||||
check_for_field (const char *str, gboolean *is_field,
|
||||
gboolean *is_range_field)
|
||||
{
|
||||
CheckPrefix pfx;
|
||||
|
||||
pfx.str = str;
|
||||
|
||||
/* skip any non-alphanum starts in cpfx->str; this is to
|
||||
* handle the case where we have e.g. "(maildir:/abc)"
|
||||
*/
|
||||
while (pfx.str && *pfx.str && !isalnum(*pfx.str))
|
||||
++pfx.str;
|
||||
|
||||
pfx.match = pfx.range_field = FALSE;
|
||||
|
||||
mu_msg_field_foreach ((MuMsgFieldForeachFunc)each_check_prefix,
|
||||
&pfx);
|
||||
/* also check special prefixes... */
|
||||
if (!pfx.match)
|
||||
pfx.match =
|
||||
g_str_has_prefix
|
||||
(str, MU_MSG_FIELD_PSEUDO_CONTACT ":") ||
|
||||
g_str_has_prefix
|
||||
(str, MU_MSG_FIELD_PSEUDO_RECIP ":");
|
||||
|
||||
*is_field = pfx.match;
|
||||
*is_range_field = pfx.range_field;
|
||||
}
|
||||
|
||||
|
||||
static gboolean
|
||||
handle_esc_maybe (GString *gstr, char **cur, gunichar uc,
|
||||
gboolean query_esc, gboolean range_field)
|
||||
{
|
||||
char kar;
|
||||
|
||||
kar = *cur[0];
|
||||
|
||||
if (query_esc) {
|
||||
switch (kar) {
|
||||
case ':':
|
||||
case '(':
|
||||
case ')':
|
||||
case '*':
|
||||
case '&':
|
||||
case '"':
|
||||
g_string_append_c (gstr, kar);
|
||||
return TRUE;
|
||||
case '.':
|
||||
if (!range_field)
|
||||
break;
|
||||
|
||||
if ((*cur)[1] == '.' && (*cur)[2] != '.') {
|
||||
g_string_append (gstr, "..");
|
||||
*cur = g_utf8_next_char (*cur);
|
||||
return TRUE;
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
if (g_unichar_ispunct(uc) || isblank(kar)) {
|
||||
g_string_append_c (gstr, '_');
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
static char*
|
||||
process_str (const char *str, gboolean xapian_esc, gboolean query_esc)
|
||||
{
|
||||
GString *gstr;
|
||||
char *norm, *cur;
|
||||
gboolean is_field, is_range_field;
|
||||
|
||||
norm = g_utf8_normalize (str, -1, G_NORMALIZE_ALL);
|
||||
if (G_UNLIKELY(!norm)) { /* not valid utf8? */
|
||||
char *u8;
|
||||
u8 = mu_str_utf8ify (str);
|
||||
norm = g_utf8_normalize (u8, -1, G_NORMALIZE_ALL);
|
||||
g_free (u8);
|
||||
}
|
||||
|
||||
if (!norm)
|
||||
return NULL;
|
||||
|
||||
/* msg-id needs some special care in queries */
|
||||
if (query_esc && is_msgid_field (str))
|
||||
return mu_str_process_msgid (str, TRUE);
|
||||
|
||||
check_for_field (str, &is_field, &is_range_field);
|
||||
gstr = g_string_sized_new (strlen (norm));
|
||||
|
||||
for (cur = norm; cur && *cur; cur = g_utf8_next_char (cur)) {
|
||||
|
||||
gunichar uc;
|
||||
uc = g_utf8_get_char (cur);
|
||||
if (xapian_esc)
|
||||
if (handle_esc_maybe (gstr, &cur, uc, query_esc,
|
||||
is_range_field))
|
||||
continue;
|
||||
|
||||
if (g_unichar_ismark(uc))
|
||||
continue;
|
||||
|
||||
if (!is_range_field)
|
||||
uc = g_unichar_tolower (uc);
|
||||
|
||||
g_string_append_unichar (gstr, uc);
|
||||
}
|
||||
|
||||
g_free (norm);
|
||||
return g_string_free (gstr, FALSE);
|
||||
}
|
||||
|
||||
|
||||
char*
|
||||
mu_str_process_text (const char *str)
|
||||
{
|
||||
g_return_val_if_fail (str, NULL);
|
||||
|
||||
return process_str (str, FALSE, FALSE);
|
||||
|
||||
}
|
||||
|
||||
|
||||
char*
|
||||
mu_str_process_term (const char *str)
|
||||
{
|
||||
g_return_val_if_fail (str, NULL);
|
||||
|
||||
return process_str (str, TRUE, FALSE);
|
||||
|
||||
}
|
||||
|
||||
|
||||
char*
|
||||
mu_str_process_query_term (const char *str)
|
||||
{
|
||||
g_return_val_if_fail (str, NULL);
|
||||
|
||||
return process_str (str, TRUE, TRUE);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Split simple search term into prefix, expression and suffix.
|
||||
* Meant to handle cases like "(maildir:/abc)", prefix and
|
||||
* suffix are the non-alphanumeric stuff at the beginning
|
||||
* and the end of string.
|
||||
*
|
||||
* Values of *pfx, *cond and *sfx will be allocated from heap
|
||||
* and must be g_free()d.
|
||||
*
|
||||
* Returns TRUE if all went fine and FALSE if some error was
|
||||
* occured.
|
||||
*/
|
||||
static gboolean
|
||||
split_term (const gchar *term,
|
||||
const gchar **pfx, const gchar **cond, const gchar **sfx)
|
||||
{
|
||||
size_t l;
|
||||
const gchar *start, *tail;
|
||||
const gchar *p, *c, *s;
|
||||
|
||||
g_return_val_if_fail (term, FALSE);
|
||||
g_return_val_if_fail (pfx, FALSE);
|
||||
g_return_val_if_fail (cond, FALSE);
|
||||
g_return_val_if_fail (sfx, FALSE);
|
||||
|
||||
l = strlen (term);
|
||||
if (l == 0) {
|
||||
p = g_strdup ("");
|
||||
c = g_strdup ("");
|
||||
s = g_strdup ("");
|
||||
goto _done;
|
||||
}
|
||||
|
||||
/*
|
||||
* Invariants:
|
||||
* - start will point to the first symbol after leading
|
||||
* non-alphanumerics (can be alphanumeric or '\0');
|
||||
* - tail will point to the beginning of trailing
|
||||
* non-alphanumerics or '\0'.
|
||||
* So:
|
||||
* - len (prefix) = start - term;
|
||||
* - len (cond) = tail - start;
|
||||
* - len (suffix) = term + len (term) - tail.
|
||||
*/
|
||||
for (start = term; *start && !isalnum (*start); start++);
|
||||
for (tail = term + l; tail > start && !isalnum (*(tail-1)); tail--);
|
||||
|
||||
p = g_strndup (term, start - term);
|
||||
c = g_strndup (start, tail - start);
|
||||
s = g_strndup (tail, term + l - tail);
|
||||
|
||||
_done:
|
||||
if (!p || !c || !s) {
|
||||
g_free ((gchar *)p);
|
||||
g_free ((gchar *)c);
|
||||
g_free ((gchar *)s);
|
||||
return FALSE;
|
||||
} else {
|
||||
*pfx = p;
|
||||
*cond = c;
|
||||
*sfx = s;
|
||||
return TRUE;
|
||||
}
|
||||
/* NOTREACHED */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Fixup handlers.
|
||||
*
|
||||
* Every fixup handler will take three string arguments,
|
||||
* prefix, condition and suffix (as split by split_term).
|
||||
*
|
||||
* It will either return NULL that means "no fixup was done"
|
||||
* or the pointer to the newly-allocated string with the
|
||||
* new contents.
|
||||
*/
|
||||
typedef gchar *
|
||||
(*fixup_handler_t)(const gchar *pfx, const gchar *cond, const gchar *sfx);
|
||||
|
||||
static gchar*
|
||||
fixup_date(const gchar *pfx, const gchar *cond, const gchar *sfx)
|
||||
{
|
||||
const gchar *p;
|
||||
|
||||
p = cond + sizeof ("date:") - 1;
|
||||
|
||||
if (strstr (p, ".."))
|
||||
return NULL;
|
||||
return g_strdup_printf ("%s%s..%s%s", pfx, cond, p, sfx);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Looks up fixup handler for the given condition.
|
||||
*
|
||||
* Returns fixup handler if we can and NULL if there is
|
||||
* no fixup for this condition.
|
||||
*/
|
||||
static fixup_handler_t
|
||||
find_fixup (const gchar *cond)
|
||||
{
|
||||
size_t n;
|
||||
/* NULL-terminated list of term names for fixups. */
|
||||
static struct {
|
||||
const char *name;
|
||||
size_t len;
|
||||
fixup_handler_t handler;
|
||||
} fixups[] = {
|
||||
{"date:", sizeof("date:") - 1, fixup_date},
|
||||
{NULL, 0, NULL}
|
||||
};
|
||||
|
||||
g_return_val_if_fail (cond, NULL);
|
||||
|
||||
for (n = 0; fixups[n].name; n++) {
|
||||
if (!strncasecmp (cond, fixups[n].name, fixups[n].len))
|
||||
break;
|
||||
}
|
||||
|
||||
return fixups[n].handler;
|
||||
}
|
||||
|
||||
|
||||
gchar*
|
||||
mu_str_xapian_fixup_terms (const gchar *term)
|
||||
{
|
||||
gboolean is_field, is_range_field;
|
||||
const gchar *cond, *pfx, *sfx;
|
||||
gchar *retval;
|
||||
fixup_handler_t fixup;
|
||||
|
||||
g_return_val_if_fail (term, NULL);
|
||||
|
||||
if (strlen (term) == 0)
|
||||
return g_strdup (term);
|
||||
|
||||
check_for_field (term, &is_field, &is_range_field);
|
||||
if (!is_field || !is_range_field)
|
||||
return g_strdup (term);
|
||||
|
||||
if (!split_term (term, &pfx, &cond, &sfx))
|
||||
return g_strdup (term);
|
||||
|
||||
retval = NULL;
|
||||
fixup = find_fixup (cond);
|
||||
if (fixup)
|
||||
retval = fixup (pfx, cond, sfx);
|
||||
if (!retval)
|
||||
retval = g_strdup (term);
|
||||
|
||||
/* At this point retval should contain the result */
|
||||
g_free ((gchar *)pfx);
|
||||
g_free ((gchar *)sfx);
|
||||
g_free ((gchar *)cond);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
/* note: this function is *not* re-entrant, it returns a static buffer */
|
||||
const char*
|
||||
mu_str_fullpath_s (const char* path, const char* name)
|
||||
|
||||
Reference in New Issue
Block a user