lib: implement new query parser
mu's query parser is the piece of software that turns your queries
into something the Xapian database can understand. So, if you query
"maildir:/inbox and subject:bla" this must be translated into a
Xapian::Query object which will retrieve the sought after messages.
Since mu's beginning, almost a decade ago, this parser was based on
Xapian's default Xapian::QueryParser. It works okay, but wasn't really
designed for the mu use-case, and had a bit of trouble with anything
that's not A..Z (think: spaces, special characters, unicode etc.).
Over the years, mu added quite a bit of pre-processing trickery to
deal with that. Still, there were corner cases and bugs that were
practically unfixable.
The solution to all of this is to have a custom query processor that
replaces Xapian's, and write it from the ground up to deal with the
special characters etc. I wrote one, as part of my "future, post-1.0
mu" reseach project, and I have now backported it to the mu 0.9.19.
From a technical perspective, this is a major cleanup, and allows us
to get rid of much of the fragile preprocessing both for indexing and
querying. From and end-user perspective this (hopefully) means that
many of the little parsing issues are gone, and it opens the way for
some new features.
From an end-user perspective:
- better support for special characters.
- regexp search! yes, you can now search for regular expressions, e.g.
subject:/h.ll?o/
will find subjects with hallo, hello, halo, philosophy, ...
As you can imagine, this can be a _heavy_ operation on the database,
and might take quite a bit longer than a normal query; but it can be
quite useful.
This commit is contained in:
121
lib/parser/test-parser.cc
Normal file
121
lib/parser/test-parser.cc
Normal file
@ -0,0 +1,121 @@
|
||||
/*
|
||||
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
**
|
||||
** This library is free software; you can redistribute it and/or
|
||||
** modify it under the terms of the GNU Lesser General Public License
|
||||
** as published by the Free Software Foundation; either version 2.1
|
||||
** of the License, or (at your option) any later version.
|
||||
**
|
||||
** This library is distributed in the hope that it will be useful,
|
||||
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
** Lesser General Public License for more details.
|
||||
**
|
||||
** You should have received a copy of the GNU Lesser General Public
|
||||
** License along with this library; if not, write to the Free
|
||||
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
** 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
#include <glib.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include "parser.hh"
|
||||
using namespace Mux;
|
||||
|
||||
struct Case {
|
||||
const std::string expr;
|
||||
const std::string expected;
|
||||
WarningVec warnings;
|
||||
};
|
||||
|
||||
using CaseVec = std::vector<Case>;
|
||||
|
||||
static void
|
||||
test_cases(const CaseVec& cases)
|
||||
{
|
||||
for (const auto& casus : cases ) {
|
||||
|
||||
WarningVec warnings;
|
||||
const auto tree = parse (casus.expr, warnings);
|
||||
|
||||
std::stringstream ss;
|
||||
ss << tree;
|
||||
|
||||
if (g_test_verbose()) {
|
||||
std::cout << "\n";
|
||||
std::cout << casus.expr << std::endl;
|
||||
std::cout << "exp:" << casus.expected << std::endl;
|
||||
std::cout << "got:" << ss.str() << std::endl;
|
||||
}
|
||||
g_assert_true (casus.expected == ss.str());
|
||||
|
||||
// g_assert_cmpuint (casus.warnings.size(), ==, warnings.size());
|
||||
// for (auto i = 0; i != (int)casus.warnings.size(); ++i) {
|
||||
// std::cout << "exp:" << casus.warnings[i] << std::endl;
|
||||
// std::cout << "got:" << warnings[i] << std::endl;
|
||||
// g_assert_true (casus.warnings[i] == warnings[i]);
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
test_basic ()
|
||||
{
|
||||
CaseVec cases = {
|
||||
//{ "", R"#((atom :value ""))#"},
|
||||
{ "foo", R"#((value "" "foo"))#", },
|
||||
{ "foo or bar",
|
||||
R"#((or(value "" "foo")(value "" "bar")))#" },
|
||||
{ "foo and bar",
|
||||
R"#((and(value "" "foo")(value "" "bar")))#"},
|
||||
};
|
||||
|
||||
test_cases (cases);
|
||||
}
|
||||
|
||||
static void
|
||||
test_complex ()
|
||||
{
|
||||
CaseVec cases = {
|
||||
{ "foo and bar or cuux",
|
||||
R"#((or(and(value "" "foo")(value "" "bar")))#" +
|
||||
std::string(R"#((value "" "cuux")))#") },
|
||||
{ "a and not b",
|
||||
R"#((andnot(value "" "a")(value "" "b")))#"
|
||||
},
|
||||
{ "a and b and c",
|
||||
R"#((and(value "" "a")(and(value "" "b")(value "" "c"))))#"
|
||||
},
|
||||
{ "(a or b) and c",
|
||||
R"#((and(or(value "" "a")(value "" "b"))(value "" "c")))#"
|
||||
}
|
||||
};
|
||||
|
||||
test_cases (cases);
|
||||
}
|
||||
|
||||
static void
|
||||
test_flatten ()
|
||||
{
|
||||
CaseVec cases = {
|
||||
{ " Mötørhęåđ", R"#((value "" "motorhead"))#" }
|
||||
};
|
||||
|
||||
test_cases (cases);
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
g_test_init (&argc, &argv, NULL);
|
||||
|
||||
g_test_add_func ("/parser/basic", test_basic);
|
||||
g_test_add_func ("/parser/complex", test_complex);
|
||||
g_test_add_func ("/parser/flatten", test_flatten);
|
||||
|
||||
return g_test_run ();
|
||||
}
|
||||
Reference in New Issue
Block a user