integrate new query parser

2017-10-24 22:57:57 +03:00
parent b75f9f508b
commit 5e9cafea59
17 changed files with 373 additions and 979 deletions
--- a/lib/mu-str.c
+++ b/lib/mu-str.c
@ -175,30 +175,6 @@ mu_str_display_contact (const char *str)
 }


-gint64
-mu_str_size_parse_bkm (const char* str)
-{
-	gint64 num;
-
-	g_return_val_if_fail (str, -1);
-
-	if (!isdigit(str[0]))
-		return -1;
-
-	num = atoi(str);
-	for (++str; isdigit(*str); ++str);
-
-	switch (tolower(*str)) {
-	case '\0':
-	case 'b' : return num;                      /* bytes */
-	case 'k':  return num * 1000;               /* kilobyte */
-	case 'm':  return num * 1000 * 1000;        /* megabyte */
-	default:
-		return -1;
-	}
-}
-
-
 char*
 mu_str_replace (const char *str, const char *substr, const char *repl)
 {
@ -224,9 +200,6 @@ mu_str_replace (const char *str, const char *substr, const char *repl)



-
-
-
 char*
 mu_str_from_list (const GSList *lst, char sepa)
 {
@ -396,397 +369,6 @@ mu_str_subject_normalize (const gchar* str)
 }


-struct _CheckPrefix {
-	const char *str;
-	gboolean   match;
-	gboolean   range_field;
-};
-typedef struct _CheckPrefix	 CheckPrefix;
-
-
-
-static void
-each_check_prefix (MuMsgFieldId mfid, CheckPrefix *cpfx)
-{
-	const char *pfx;
-	char pfx_short[3] = { 'X', ':', '\0'};
-	char k;
-
-	if (!cpfx || cpfx->match)
-		return;
-
-	k = pfx_short[0] = mu_msg_field_shortcut (mfid);
-	if (k && g_str_has_prefix (cpfx->str, pfx_short)) {
-		cpfx->match = TRUE;
-		cpfx->range_field = mu_msg_field_is_range_field (mfid);
-	}
-
-	pfx = mu_msg_field_name (mfid);
-	if (pfx && g_str_has_prefix (cpfx->str, pfx) &&
-	    cpfx->str[strlen(pfx)] == ':') {
-		cpfx->match = TRUE;
-		cpfx->range_field = mu_msg_field_is_range_field (mfid);
-	}
-}
-
-/* check if it looks like either i:<msgid> or msgid:<msgid> */
-static gboolean
-is_msgid_field (const char *str)
-{
-	const char *name;
-
-	if (!str || strlen(str) < 3)
-		return FALSE;
-
-	if (str[0] == mu_msg_field_shortcut (MU_MSG_FIELD_ID_MSGID) &&
-	    str[1] == ':')
-		return TRUE;
-
-	name = mu_msg_field_name (MU_MSG_FIELD_ID_MSGID);
-	if (g_str_has_prefix (str, name) && str[strlen(name)] == ':')
-		return TRUE;
-
-	return FALSE;
-}
-
-/* message-ids need a bit more massaging -- we replace all
- * non-alphanum with '_'. Note, this function assumes we're looking at
- * a msg-id field, ie. i:<msgid> or msgid:<msgid> */
-char*
-mu_str_process_msgid (const char *str, gboolean query)
-{
-	char *s, *c;
-
-	g_return_val_if_fail (str, NULL);
-	g_return_val_if_fail (!query || strchr(str, ':'), NULL);
-
-	if (!str)
-		return NULL;
-
-	s = g_strdup (str);
-
-	if (query)
-		c = strchr (s, ':') + 1;
-	else
-		c = s;
-
-	for (; *c; ++c)
-		*c = isalnum (*c) ? tolower (*c) : '_';
-
-	return s;
-}
-
-
-
-static void
-check_for_field (const char *str, gboolean *is_field,
-		 gboolean *is_range_field)
-{
-	CheckPrefix pfx;
-
-	pfx.str   = str;
-
-	/* skip any non-alphanum starts in cpfx->str; this is to
-	 * handle the case where we have e.g. "(maildir:/abc)"
-	 */
-	while (pfx.str && *pfx.str && !isalnum(*pfx.str))
-		++pfx.str;
-
-	pfx.match = pfx.range_field = FALSE;
-
-	mu_msg_field_foreach ((MuMsgFieldForeachFunc)each_check_prefix,
-			      &pfx);
-	/* also check special prefixes... */
-	if (!pfx.match)
-		pfx.match =
-			g_str_has_prefix
-			(str, MU_MSG_FIELD_PSEUDO_CONTACT ":") ||
-			g_str_has_prefix
-			(str, MU_MSG_FIELD_PSEUDO_RECIP ":");
-
-	*is_field	= pfx.match;
-	*is_range_field = pfx.range_field;
-}
-
-
-static gboolean
-handle_esc_maybe (GString *gstr, char **cur, gunichar uc,
-		  gboolean query_esc, gboolean range_field)
-{
-	char kar;
-
-	kar = *cur[0];
-
-	if (query_esc) {
-		switch (kar) {
-		case ':':
-		case '(':
-		case ')':
-		case '*':
-		case '&':
-		case '"':
-			g_string_append_c (gstr, kar);
-			return TRUE;
-		case '.':
-			if (!range_field)
-				break;
-
-			if ((*cur)[1] == '.' && (*cur)[2] != '.') {
-				g_string_append (gstr, "..");
-				*cur = g_utf8_next_char (*cur);
-				return TRUE;
-			}
-		default: break;
-		}
-	}
-
-	if (g_unichar_ispunct(uc) || isblank(kar)) {
-		g_string_append_c (gstr, '_');
-		return TRUE;
-	}
-
-	return FALSE;
-}
-
-
-static char*
-process_str (const char *str, gboolean xapian_esc, gboolean query_esc)
-{
-	GString *gstr;
-	char *norm, *cur;
-	gboolean is_field, is_range_field;
-
-	norm = g_utf8_normalize (str, -1, G_NORMALIZE_ALL);
-	if (G_UNLIKELY(!norm)) {  /* not valid utf8? */
-		char *u8;
-		u8 = mu_str_utf8ify (str);
-		norm = g_utf8_normalize (u8, -1, G_NORMALIZE_ALL);
-		g_free (u8);
-	}
-
- 	if (!norm)
-		return NULL;
-
-	/* msg-id needs some special care in queries */
-	if (query_esc && is_msgid_field (str))
-		return mu_str_process_msgid (str, TRUE);
-
-	check_for_field (str, &is_field, &is_range_field);
-	gstr = g_string_sized_new (strlen (norm));
-
-	for (cur = norm; cur && *cur; cur = g_utf8_next_char (cur)) {
-
-		gunichar uc;
-		uc = g_utf8_get_char (cur);
-		if (xapian_esc)
-			if (handle_esc_maybe (gstr, &cur, uc, query_esc,
-					      is_range_field))
-				continue;
-
-		if (g_unichar_ismark(uc))
-			continue;
-
-		if (!is_range_field)
-			uc = g_unichar_tolower (uc);
-
-		g_string_append_unichar (gstr, uc);
-	}
-
-	g_free (norm);
-	return g_string_free (gstr, FALSE);
-}
-
-
-char*
-mu_str_process_text (const char *str)
-{
-	g_return_val_if_fail (str, NULL);
-
-	return process_str (str, FALSE, FALSE);
-
-}
-
-
-char*
-mu_str_process_term (const char *str)
-{
-	g_return_val_if_fail (str, NULL);
-
-	return process_str (str, TRUE, FALSE);
-
-}
-
-
-char*
-mu_str_process_query_term (const char *str)
-{
-	g_return_val_if_fail (str, NULL);
-
-	return process_str (str, TRUE, TRUE);
-
-}
-
-
-
-/*
- * Split simple search term into prefix, expression and suffix.
- * Meant to handle cases like "(maildir:/abc)", prefix and
- * suffix are the non-alphanumeric stuff at the beginning
- * and the end of string.
- *
- * Values of *pfx, *cond and *sfx will be allocated from heap
- * and must be g_free()d.
- *
- * Returns TRUE if all went fine and FALSE if some error was
- * occured.
- */
-static gboolean
-split_term (const gchar *term,
-	    const gchar **pfx, const gchar **cond, const gchar **sfx)
-{
-	size_t l;
-	const gchar *start, *tail;
-	const gchar *p, *c, *s;
-
-	g_return_val_if_fail (term, FALSE);
-	g_return_val_if_fail (pfx, FALSE);
-	g_return_val_if_fail (cond, FALSE);
-	g_return_val_if_fail (sfx, FALSE);
-
-	l = strlen (term);
-	if (l == 0) {
-		p = g_strdup ("");
-		c = g_strdup ("");
-		s = g_strdup ("");
-		goto _done;
-	}
-
-	/*
-	 * Invariants:
-	 * - start will point to the first symbol after leading
-	 *   non-alphanumerics (can be alphanumeric or '\0');
-	 * - tail will point to the beginning of trailing
-	 *   non-alphanumerics or '\0'.
-	 * So:
-	 * - len (prefix) = start - term;
-	 * - len (cond) = tail - start;
-	 * - len (suffix) = term + len (term) - tail.
-	 */
-	for (start = term; *start && !isalnum (*start); start++);
-	for (tail = term + l; tail > start && !isalnum (*(tail-1)); tail--);
-
-	p = g_strndup (term, start - term);
-	c = g_strndup (start, tail - start);
-	s = g_strndup (tail, term + l - tail);
-
-_done:
-	if (!p || !c || !s) {
-		g_free ((gchar *)p);
-		g_free ((gchar *)c);
-		g_free ((gchar *)s);
-		return FALSE;
-	} else {
-		*pfx = p;
-		*cond = c;
-		*sfx = s;
-		return TRUE;
-	}
-	/* NOTREACHED */
-}
-
-
-/*
- * Fixup handlers.
- *
- * Every fixup handler will take three string arguments,
- * prefix, condition and suffix (as split by split_term).
- *
- * It will either return NULL that means "no fixup was done"
- * or the pointer to the newly-allocated string with the
- * new contents.
- */
-typedef gchar *
-  (*fixup_handler_t)(const gchar *pfx, const gchar *cond, const gchar *sfx);
-
-static gchar*
-fixup_date(const gchar *pfx, const gchar *cond, const gchar *sfx)
-{
-	const gchar *p;
-
-	p = cond + sizeof ("date:") - 1;
-
-	if (strstr (p, ".."))
-		return NULL;
-	return g_strdup_printf ("%s%s..%s%s", pfx, cond, p, sfx);
-}
-
-
-/*
- * Looks up fixup handler for the given condition.
- *
- * Returns fixup handler if we can and NULL if there is
- * no fixup for this condition.
- */
-static fixup_handler_t
-find_fixup (const gchar *cond)
-{
-	size_t n;
-	/* NULL-terminated list of term names for fixups. */
-	static struct {
-		const char *name;
-		size_t len;
-		fixup_handler_t handler;
-	} fixups[] = {
-	  {"date:", sizeof("date:") - 1, fixup_date},
-	  {NULL, 0, NULL}
-	};
-
-	g_return_val_if_fail (cond, NULL);
-
-	for (n = 0; fixups[n].name; n++) {
-		if (!strncasecmp (cond, fixups[n].name, fixups[n].len))
-			break;
-	}
-
-	return fixups[n].handler;
-}
-
-
-gchar*
-mu_str_xapian_fixup_terms (const gchar *term)
-{
-	gboolean is_field, is_range_field;
-	const gchar *cond, *pfx, *sfx;
-	gchar *retval;
-	fixup_handler_t fixup;
-
-	g_return_val_if_fail (term, NULL);
-
-	if (strlen (term) == 0)
-		return g_strdup (term);
-
-	check_for_field (term, &is_field, &is_range_field);
-	if (!is_field || !is_range_field)
-		return g_strdup (term);
-
-	if (!split_term (term, &pfx, &cond, &sfx))
-		return g_strdup (term);
-
-	retval = NULL;
-	fixup = find_fixup (cond);
-	if (fixup)
-		retval = fixup (pfx, cond, sfx);
-	if (!retval)
-		retval = g_strdup (term);
-
-	/* At this point retval should contain the result */
-	g_free ((gchar *)pfx);
-	g_free ((gchar *)sfx);
-	g_free ((gchar *)cond);
-
-	return retval;
-}
-
 /* note: this function is *not* re-entrant, it returns a static buffer */
 const char*
 mu_str_fullpath_s (const char* path, const char* name)