From 6290e4ad9a3f4c1101b49c90736d7f7e617ea1bf Mon Sep 17 00:00:00 2001 From: djcb Date: Fri, 18 May 2018 15:55:40 +0300 Subject: [PATCH] query-parser: special-case wildcards We were transforming wild-card searches into regular-expression searches; while that works, it's also significantly slower. So, instead, special-case wildcards, and use the Xapian machinery for wildcard queries. --- lib/parser/parser.cc | 9 ++------- lib/parser/xapian.cc | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/lib/parser/parser.cc b/lib/parser/parser.cc index 253825f6..72ae6524 100644 --- a/lib/parser/parser.cc +++ b/lib/parser/parser.cc @@ -167,14 +167,9 @@ data (Mux::Tokens& tokens, ProcPtr proc, WarningVec& warnings) } // does it look like a regexp? - if (val.length()>=2) { - if (val[0]=='/' && val[val.length()-1] == '/') + if (val.length() >=2 ) + if (val[0] == '/' && val[val.length()-1] == '/') return regex (fields, val, token.pos, proc, warnings); - else if (val[val.length()-1] == '*') - return regex (fields, // transfrom wildcard into regexp - "/" + val.substr(0, val.length()-1) + ".*/", - token.pos, proc, warnings); - } // does it look like a range? const auto dotdot = val.find(".."); diff --git a/lib/parser/xapian.cc b/lib/parser/xapian.cc index b95ff569..08bdebe7 100644 --- a/lib/parser/xapian.cc +++ b/lib/parser/xapian.cc @@ -48,18 +48,30 @@ xapian_query_op (const Mux::Tree& tree) return Xapian::Query(op, childvec.begin(), childvec.end()); } + +static Xapian::Query +maybe_wildcard (const Value* val, const std::string& str) +{ + const auto vlen = str.length(); + if (vlen <= 1 || str[vlen-1] != '*') + return Xapian::Query(val->prefix + str); + else + return Xapian::Query(Xapian::Query::OP_WILDCARD, + val->prefix + str.substr(0, vlen-1)); +} + static Xapian::Query xapian_query_value (const Mux::Tree& tree) { const auto v = dynamic_cast (tree.node.data.get()); if (!v->phrase) - return Xapian::Query(v->prefix + v->value); + return maybe_wildcard(v, v->value); const auto parts = split (v->value, " "); std::vector phvec; for (const auto p: parts) - phvec.push_back(Xapian::Query(v->prefix + p)); + phvec.emplace_back(maybe_wildcard(v, p)); if (parts.empty()) return Xapian::Query::MatchNothing; // shouldn't happen