move parser/utils to utils, Mux->Mu
Move the parser utils to utils/ and rename the Mux namespace into Mu.
This commit is contained in:
@ -1,4 +1,4 @@
|
||||
## Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
## Copyright (C) 2017-2019 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
##
|
||||
## This program is free software; you can redistribute it and/or modify
|
||||
## it under the terms of the GNU General Public License as published by
|
||||
@ -47,7 +47,8 @@ tokenize_SOURCES= \
|
||||
tokenize_LDADD= \
|
||||
$(WARN_LDFLAGS) \
|
||||
$(GCOV_LDADD) \
|
||||
libmuxparser.la
|
||||
libmu-parser.la \
|
||||
../utils/libmu-utils.la
|
||||
|
||||
parse_SOURCES= \
|
||||
parse.cc
|
||||
@ -55,13 +56,13 @@ parse_SOURCES= \
|
||||
parse_LDADD= \
|
||||
$(WARN_LDFLAGS) \
|
||||
$(GCOV_LDADD) \
|
||||
libmuxparser.la
|
||||
|
||||
libmu-parser.la \
|
||||
../utils/libmu-utils.la
|
||||
|
||||
noinst_LTLIBRARIES= \
|
||||
libmuxparser.la
|
||||
libmu-parser.la
|
||||
|
||||
libmuxparser_la_SOURCES= \
|
||||
libmu_parser_la_SOURCES= \
|
||||
data.hh \
|
||||
parser.cc \
|
||||
parser.hh \
|
||||
@ -69,30 +70,37 @@ libmuxparser_la_SOURCES= \
|
||||
tokenizer.cc \
|
||||
tokenizer.hh \
|
||||
tree.hh \
|
||||
utils.cc \
|
||||
utils.hh \
|
||||
xapian.cc \
|
||||
xapian.hh
|
||||
|
||||
libmuxparser_la_LIBADD= \
|
||||
libmu_parser_la_LIBADD= \
|
||||
$(WARN_LDFLAGS) \
|
||||
$(GLIB_LIBS) \
|
||||
$(XAPIAN_LIBS)
|
||||
|
||||
VALGRIND_SUPPRESSIONS_FILES= ${top_srcdir}/mux.supp
|
||||
|
||||
VALGRIND_SUPPRESSIONS_FILES= \
|
||||
${top_srcdir}/mu.supp
|
||||
|
||||
noinst_PROGRAMS+=$(TEST_PROGS)
|
||||
|
||||
TEST_PROGS += test-tokenizer
|
||||
test_tokenizer_SOURCES=test-tokenizer.cc
|
||||
test_tokenizer_LDADD=$(GCOV_LDADD) libmuxparser.la
|
||||
TEST_PROGS+= \
|
||||
test-tokenizer
|
||||
test_tokenizer_SOURCES= \
|
||||
test-tokenizer.cc
|
||||
test_tokenizer_LDADD= \
|
||||
$(GCOV_LDADD) \
|
||||
libmu-parser.la \
|
||||
../utils/libmu-utils.la
|
||||
|
||||
TEST_PROGS += test-parser
|
||||
test_parser_SOURCES=test-parser.cc
|
||||
test_parser_LDADD=$(GCOV_LDADD) libmuxparser.la
|
||||
TEST_PROGS+= \
|
||||
test-parser
|
||||
test_parser_SOURCES= \
|
||||
test-parser.cc
|
||||
test_parser_LDADD= \
|
||||
$(GCOV_LDADD) \
|
||||
libmu-parser.la \
|
||||
../utils/libmu-utils.la
|
||||
|
||||
TEST_PROGS += test-utils
|
||||
test_utils_SOURCES=test-utils.cc
|
||||
test_utils_LDADD= $(GCOV_LDADD) libmuxparser.la
|
||||
|
||||
TESTS=$(TEST_PROGS)
|
||||
|
||||
@ -24,9 +24,9 @@
|
||||
#include <iostream>
|
||||
#include <regex>
|
||||
|
||||
#include <parser/utils.hh>
|
||||
#include <utils//mu-utils.hh>
|
||||
|
||||
namespace Mux {
|
||||
namespace Mu {
|
||||
|
||||
// class representing some data item; either a Value or a Range a Value can still be a Regex (but
|
||||
// that's not a separate type here)
|
||||
@ -149,7 +149,7 @@ operator<< (std::ostream& os, const std::unique_ptr<Data>& v)
|
||||
return os;
|
||||
}
|
||||
|
||||
} // namespace Mux
|
||||
} // namespace Mu
|
||||
|
||||
|
||||
#endif /* __DATA_HH__ */
|
||||
|
||||
@ -1,30 +0,0 @@
|
||||
/*
|
||||
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
**
|
||||
** This library is free software; you can redistribute it and/or
|
||||
** modify it under the terms of the GNU Lesser General Public License
|
||||
** as published by the Free Software Foundation; either version 2.1
|
||||
** of the License, or (at your option) any later version.
|
||||
**
|
||||
** This library is distributed in the hope that it will be useful,
|
||||
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
** Lesser General Public License for more details.
|
||||
**
|
||||
** You should have received a copy of the GNU Lesser General Public
|
||||
** License along with this library; if not, write to the Free
|
||||
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
** 02110-1301, USA.
|
||||
*/
|
||||
#ifndef __DUMMY_PROCESSOR_HH__
|
||||
#define __DUMMY_PROCESSOR_HH__
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
|
||||
namespace Mux {
|
||||
|
||||
|
||||
|
||||
#endif /* __FIELDS_HH__ */
|
||||
@ -29,9 +29,9 @@ main (int argc, char *argv[])
|
||||
for (auto i = 1; i < argc; ++i)
|
||||
s += " " + std::string(argv[i]);
|
||||
|
||||
Mux::WarningVec warnings;
|
||||
Mu::WarningVec warnings;
|
||||
|
||||
const auto tree = Mux::parse (s, warnings);
|
||||
const auto tree = Mu::parse (s, warnings);
|
||||
for (const auto& w: warnings)
|
||||
std::cerr << "1:" << w.pos << ": " << w.msg << std::endl;
|
||||
|
||||
|
||||
@ -18,9 +18,9 @@
|
||||
*/
|
||||
#include "parser.hh"
|
||||
#include "tokenizer.hh"
|
||||
#include "utils.hh"
|
||||
#include "utils/mu-utils.hh"
|
||||
|
||||
using namespace Mux;
|
||||
using namespace Mu;
|
||||
|
||||
// 3 precedence levels: units (NOT,()) > factors (OR) > terms (AND)
|
||||
|
||||
@ -40,21 +40,21 @@ using namespace Mux;
|
||||
+ format(__VA_ARGS__))
|
||||
|
||||
static Token
|
||||
look_ahead (const Mux::Tokens& tokens)
|
||||
look_ahead (const Mu::Tokens& tokens)
|
||||
{
|
||||
return tokens.front();
|
||||
}
|
||||
|
||||
static Mux::Tree
|
||||
static Mu::Tree
|
||||
empty()
|
||||
{
|
||||
return {{Node::Type::Empty}};
|
||||
}
|
||||
|
||||
static Mux::Tree term_1 (Mux::Tokens& tokens, ProcPtr proc, WarningVec& warnings);
|
||||
static Mu::Tree term_1 (Mu::Tokens& tokens, ProcPtr proc, WarningVec& warnings);
|
||||
|
||||
|
||||
static Mux::Tree
|
||||
static Mu::Tree
|
||||
value (const ProcIface::FieldInfoVec& fields, const std::string& v,
|
||||
size_t pos, ProcPtr proc, WarningVec& warnings)
|
||||
{
|
||||
@ -83,7 +83,7 @@ value (const ProcIface::FieldInfoVec& fields, const std::string& v,
|
||||
return tree;
|
||||
}
|
||||
|
||||
static Mux::Tree
|
||||
static Mu::Tree
|
||||
regex (const ProcIface::FieldInfoVec& fields, const std::string& v,
|
||||
size_t pos, ProcPtr proc, WarningVec& warnings)
|
||||
{
|
||||
@ -119,7 +119,7 @@ regex (const ProcIface::FieldInfoVec& fields, const std::string& v,
|
||||
|
||||
|
||||
|
||||
static Mux::Tree
|
||||
static Mu::Tree
|
||||
range (const ProcIface::FieldInfoVec& fields, const std::string& lower,
|
||||
const std::string& upper, size_t pos, ProcPtr proc,
|
||||
WarningVec& warnings)
|
||||
@ -141,8 +141,8 @@ range (const ProcIface::FieldInfoVec& fields, const std::string& lower,
|
||||
}
|
||||
|
||||
|
||||
static Mux::Tree
|
||||
data (Mux::Tokens& tokens, ProcPtr proc, WarningVec& warnings)
|
||||
static Mu::Tree
|
||||
data (Mu::Tokens& tokens, ProcPtr proc, WarningVec& warnings)
|
||||
{
|
||||
const auto token = look_ahead(tokens);
|
||||
if (token.type != Token::Type::Data)
|
||||
@ -185,8 +185,8 @@ data (Mux::Tokens& tokens, ProcPtr proc, WarningVec& warnings)
|
||||
return value (fields, val, token.pos, proc, warnings);
|
||||
}
|
||||
|
||||
static Mux::Tree
|
||||
unit (Mux::Tokens& tokens, ProcPtr proc, WarningVec& warnings)
|
||||
static Mu::Tree
|
||||
unit (Mu::Tokens& tokens, ProcPtr proc, WarningVec& warnings)
|
||||
{
|
||||
if (tokens.empty()) {
|
||||
warnings.push_back ({0, "expected: unit"});
|
||||
@ -225,11 +225,11 @@ unit (Mux::Tokens& tokens, ProcPtr proc, WarningVec& warnings)
|
||||
return data (tokens, proc, warnings);
|
||||
}
|
||||
|
||||
static Mux::Tree factor_1 (Mux::Tokens& tokens, ProcPtr proc,
|
||||
static Mu::Tree factor_1 (Mu::Tokens& tokens, ProcPtr proc,
|
||||
WarningVec& warnings);
|
||||
|
||||
static Mux::Tree
|
||||
factor_2 (Mux::Tokens& tokens, Node::Type& op, ProcPtr proc,
|
||||
static Mu::Tree
|
||||
factor_2 (Mu::Tokens& tokens, Node::Type& op, ProcPtr proc,
|
||||
WarningVec& warnings)
|
||||
{
|
||||
if (tokens.empty())
|
||||
@ -256,8 +256,8 @@ factor_2 (Mux::Tokens& tokens, Node::Type& op, ProcPtr proc,
|
||||
return factor_1 (tokens, proc, warnings);
|
||||
}
|
||||
|
||||
static Mux::Tree
|
||||
factor_1 (Mux::Tokens& tokens, ProcPtr proc, WarningVec& warnings)
|
||||
static Mu::Tree
|
||||
factor_1 (Mu::Tokens& tokens, ProcPtr proc, WarningVec& warnings)
|
||||
{
|
||||
Node::Type op { Node::Type::Invalid };
|
||||
|
||||
@ -275,8 +275,8 @@ factor_1 (Mux::Tokens& tokens, ProcPtr proc, WarningVec& warnings)
|
||||
}
|
||||
|
||||
|
||||
static Mux::Tree
|
||||
term_2 (Mux::Tokens& tokens, Node::Type& op, ProcPtr proc,
|
||||
static Mu::Tree
|
||||
term_2 (Mu::Tokens& tokens, Node::Type& op, ProcPtr proc,
|
||||
WarningVec& warnings)
|
||||
{
|
||||
if (tokens.empty())
|
||||
@ -302,8 +302,8 @@ term_2 (Mux::Tokens& tokens, Node::Type& op, ProcPtr proc,
|
||||
return term_1 (tokens, proc, warnings);
|
||||
}
|
||||
|
||||
static Mux::Tree
|
||||
term_1 (Mux::Tokens& tokens, ProcPtr proc, WarningVec& warnings)
|
||||
static Mu::Tree
|
||||
term_1 (Mu::Tokens& tokens, ProcPtr proc, WarningVec& warnings)
|
||||
{
|
||||
Node::Type op { Node::Type::Invalid };
|
||||
|
||||
@ -320,8 +320,8 @@ term_1 (Mux::Tokens& tokens, ProcPtr proc, WarningVec& warnings)
|
||||
}
|
||||
}
|
||||
|
||||
static Mux::Tree
|
||||
query (Mux::Tokens& tokens, ProcPtr proc, WarningVec& warnings)
|
||||
static Mu::Tree
|
||||
query (Mu::Tokens& tokens, ProcPtr proc, WarningVec& warnings)
|
||||
{
|
||||
if (tokens.empty())
|
||||
return empty ();
|
||||
@ -329,8 +329,8 @@ query (Mux::Tokens& tokens, ProcPtr proc, WarningVec& warnings)
|
||||
return term_1 (tokens, proc, warnings);
|
||||
}
|
||||
|
||||
Mux::Tree
|
||||
Mux::parse (const std::string& expr, WarningVec& warnings, ProcPtr proc)
|
||||
Mu::Tree
|
||||
Mu::parse (const std::string& expr, WarningVec& warnings, ProcPtr proc)
|
||||
{
|
||||
try {
|
||||
auto tokens = tokenize (expr);
|
||||
|
||||
@ -32,7 +32,7 @@
|
||||
// A simple recursive-descent parser for queries. Follows the Xapian syntax,
|
||||
// but better handles non-alphanum; also implements regexp
|
||||
|
||||
namespace Mux {
|
||||
namespace Mu {
|
||||
|
||||
/**
|
||||
* A parser warning
|
||||
@ -84,6 +84,6 @@ using ProcPtr = const std::unique_ptr<ProcIface>&;
|
||||
Tree parse (const std::string& query, WarningVec& warnings,
|
||||
ProcPtr proc = std::make_unique<DummyProc>());
|
||||
|
||||
} // namespace Mux
|
||||
} // namespace Mu
|
||||
|
||||
#endif /* __PARSER_HH__ */
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
#include <tuple>
|
||||
#include <regex>
|
||||
|
||||
namespace Mux {
|
||||
namespace Mu {
|
||||
|
||||
struct ProcIface {
|
||||
|
||||
@ -127,6 +127,6 @@ struct DummyProc: public ProcIface { // For testing
|
||||
}; //Dummy
|
||||
|
||||
|
||||
} // Mux
|
||||
} // Mu
|
||||
|
||||
#endif /* __PROC_IFACE_HH__ */
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
#include <sstream>
|
||||
|
||||
#include "parser.hh"
|
||||
using namespace Mux;
|
||||
using namespace Mu;
|
||||
|
||||
struct Case {
|
||||
const std::string expr;
|
||||
|
||||
@ -26,12 +26,12 @@
|
||||
|
||||
struct Case {
|
||||
const char *str;
|
||||
const Mux::Tokens tokens;
|
||||
const Mu::Tokens tokens;
|
||||
};
|
||||
|
||||
using CaseVec = std::vector<Case>;
|
||||
|
||||
using namespace Mux;
|
||||
using namespace Mu;
|
||||
using TT = Token::Type;
|
||||
|
||||
static void
|
||||
|
||||
@ -1,170 +0,0 @@
|
||||
/*
|
||||
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
**
|
||||
** This library is free software; you can redistribute it and/or
|
||||
** modify it under the terms of the GNU Lesser General Public License
|
||||
** as published by the Free Software Foundation; either version 2.1
|
||||
** of the License, or (at your option) any later version.
|
||||
**
|
||||
** This library is distributed in the hope that it will be useful,
|
||||
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
** Lesser General Public License for more details.
|
||||
**
|
||||
** You should have received a copy of the GNU Lesser General Public
|
||||
** License along with this library; if not, write to the Free
|
||||
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
** 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
#include <glib.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include "parser.hh"
|
||||
using namespace Mux;
|
||||
|
||||
struct Case {
|
||||
const std::string expr;
|
||||
bool is_first;
|
||||
const std::string expected;
|
||||
};
|
||||
using CaseVec = std::vector<Case>;
|
||||
using ProcFunc = std::function<std::string(std::string, bool)>;
|
||||
|
||||
|
||||
static void
|
||||
test_cases(const CaseVec& cases, ProcFunc proc)
|
||||
{
|
||||
for (const auto& casus : cases ) {
|
||||
|
||||
const auto res = proc(casus.expr, casus.is_first);
|
||||
if (g_test_verbose()) {
|
||||
std::cout << "\n";
|
||||
std::cout << casus.expr << ' ' << casus.is_first << std::endl;
|
||||
std::cout << "exp: '" << casus.expected << "'" << std::endl;
|
||||
std::cout << "got: '" << res << "'" << std::endl;
|
||||
}
|
||||
|
||||
g_assert_true (casus.expected == res);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
test_date_basic ()
|
||||
{
|
||||
g_setenv ("TZ", "Europe/Helsinki", TRUE);
|
||||
|
||||
CaseVec cases = {
|
||||
{ "2015-09-18T09:10:23", true, "1442556623" },
|
||||
{ "1972-12-14T09:10:23", true, "0093165023" },
|
||||
{ "1854-11-18T17:10:23", true, "0000000000" },
|
||||
|
||||
{ "2000-02-31T09:10:23", true, "0951861599" },
|
||||
{ "2000-02-29T23:59:59", true, "0951861599" },
|
||||
|
||||
{ "2016", true, "1451599200" },
|
||||
{ "2016", false, "1483221599" },
|
||||
|
||||
{ "fnorb", true, "0000000000" },
|
||||
{ "fnorb", false, "9999999999" },
|
||||
{ "", false, "9999999999" },
|
||||
{ "", true, "0000000000" }
|
||||
};
|
||||
|
||||
test_cases (cases, [](auto s, auto f){ return date_to_time_t_string(s,f); });
|
||||
}
|
||||
|
||||
static void
|
||||
test_date_ymwdhMs (void)
|
||||
{
|
||||
struct {
|
||||
std::string expr;
|
||||
long diff;
|
||||
int tolerance;
|
||||
} tests[] = {
|
||||
{ "3h", 3 * 60 * 60, 1 },
|
||||
{ "21d", 21 * 24 * 60 * 60, 3600 + 1 },
|
||||
{ "2w", 2 * 7 * 24 * 60 * 60, 3600 + 1 },
|
||||
|
||||
{ "2y", 2 * 365 * 24 * 60 * 60, 24 * 3600 + 1 },
|
||||
{ "3m", 3 * 30 * 24 * 60 * 60, 3 * 24 * 3600 + 1 }
|
||||
};
|
||||
|
||||
for (auto i = 0; i != G_N_ELEMENTS(tests); ++i) {
|
||||
const auto diff = time(NULL) -
|
||||
strtol(Mux::date_to_time_t_string(tests[i].expr, true).c_str(),
|
||||
NULL, 10);
|
||||
if (g_test_verbose())
|
||||
std::cerr << tests[i].expr << ' '
|
||||
<< diff << ' '
|
||||
<< tests[i].diff << std::endl;
|
||||
|
||||
g_assert_true (tests[i].diff - diff <= tests[i].tolerance);
|
||||
}
|
||||
|
||||
g_assert_true (strtol(Mux::date_to_time_t_string("-1y", true).c_str(),
|
||||
NULL, 10) == 0);
|
||||
}
|
||||
|
||||
static void
|
||||
test_size ()
|
||||
{
|
||||
CaseVec cases = {
|
||||
{ "456", true, "0000000456" },
|
||||
{ "", false, "9999999999" },
|
||||
{ "", true, "0000000000" },
|
||||
};
|
||||
|
||||
test_cases (cases, [](auto s, auto f){ return size_to_string(s,f); });
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
test_flatten ()
|
||||
{
|
||||
CaseVec cases = {
|
||||
{ "Менделе́ев", true, "менделеев" },
|
||||
{ "", false, "" },
|
||||
{ "Ångström", true, "angstrom" },
|
||||
};
|
||||
|
||||
test_cases (cases, [](auto s, auto f){ return utf8_flatten(s); });
|
||||
}
|
||||
|
||||
static void
|
||||
test_clean ()
|
||||
{
|
||||
CaseVec cases = {
|
||||
{ "\t a\t\nb ", true, "a b" },
|
||||
{ "", false, "" },
|
||||
{ "Ångström", true, "Ångström" },
|
||||
};
|
||||
|
||||
test_cases (cases, [](auto s, auto f){ return utf8_clean(s); });
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
test_format ()
|
||||
{
|
||||
g_assert_true (format ("hello %s, %u", "world", 123) ==
|
||||
"hello world, 123");
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
g_test_init (&argc, &argv, NULL);
|
||||
|
||||
g_test_add_func ("/utils/date-basic", test_date_basic);
|
||||
g_test_add_func ("/utils/date-ymwdhMs", test_date_ymwdhMs);
|
||||
g_test_add_func ("/utils/size", test_size);
|
||||
g_test_add_func ("/utils/flatten", test_flatten);
|
||||
g_test_add_func ("/utils/clean", test_clean);
|
||||
g_test_add_func ("/utils/format", test_format);
|
||||
|
||||
return g_test_run ();
|
||||
}
|
||||
@ -30,7 +30,7 @@ main (int argc, char *argv[])
|
||||
for (auto i = 1; i < argc; ++i)
|
||||
s += " " + std::string(argv[i]);
|
||||
|
||||
const auto tvec = Mux::tokenize (s);
|
||||
const auto tvec = Mu::tokenize (s);
|
||||
for (const auto& t : tvec)
|
||||
std::cout << t << std::endl;
|
||||
|
||||
|
||||
@ -18,13 +18,13 @@
|
||||
*/
|
||||
|
||||
#include "tokenizer.hh"
|
||||
#include "utils.hh"
|
||||
#include "utils/mu-utils.hh"
|
||||
|
||||
#include <cctype>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
|
||||
using namespace Mux;
|
||||
using namespace Mu;
|
||||
|
||||
static bool
|
||||
is_separator (char c)
|
||||
@ -37,7 +37,7 @@ is_separator (char c)
|
||||
}
|
||||
|
||||
|
||||
static Mux::Token
|
||||
static Mu::Token
|
||||
op_or_value (size_t pos, const std::string& val)
|
||||
{
|
||||
auto s = val;
|
||||
@ -62,7 +62,7 @@ unread_char (std::string& food, char kar, size_t& pos)
|
||||
--pos;
|
||||
}
|
||||
|
||||
static Mux::Token
|
||||
static Mu::Token
|
||||
eat_token (std::string& food, size_t& pos)
|
||||
{
|
||||
bool quoted{};
|
||||
@ -115,8 +115,8 @@ eat_token (std::string& food, size_t& pos)
|
||||
}
|
||||
|
||||
|
||||
Mux::Tokens
|
||||
Mux::tokenize (const std::string& s)
|
||||
Mu::Tokens
|
||||
Mu::tokenize (const std::string& s)
|
||||
{
|
||||
Tokens tokens{};
|
||||
|
||||
|
||||
@ -36,7 +36,7 @@
|
||||
// Furthermore, we detect ranges ("a..b") and regexps (/../) at the parser level, since we need a
|
||||
// bit more context to resolve ambiguities.
|
||||
|
||||
namespace Mux {
|
||||
namespace Mu {
|
||||
|
||||
// A token
|
||||
struct Token {
|
||||
@ -135,6 +135,6 @@ operator<< (std::ostream& os, const Token& t)
|
||||
using Tokens = std::deque<Token>;
|
||||
Tokens tokenize (const std::string& s);
|
||||
|
||||
} // namespace Mux
|
||||
} // namespace Mu
|
||||
|
||||
#endif /* __TOKENIZER_HH__ */
|
||||
|
||||
@ -17,13 +17,16 @@
|
||||
** 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef TREE_HH__
|
||||
#define TREE_HH__
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
#include <parser/data.hh>
|
||||
|
||||
namespace Mux {
|
||||
namespace Mu {
|
||||
|
||||
// A node in the parse tree
|
||||
struct Node {
|
||||
@ -101,4 +104,7 @@ operator<< (std::ostream& os, const Tree& tree)
|
||||
return os;
|
||||
}
|
||||
|
||||
} // namespace Mux
|
||||
} // namespace Mu
|
||||
|
||||
|
||||
#endif /* TREE_HH__ */
|
||||
|
||||
@ -1,435 +0,0 @@
|
||||
/*
|
||||
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
**
|
||||
** This library is free software; you can redistribute it and/or
|
||||
** modify it under the terms of the GNU Lesser General Public License
|
||||
** as published by the Free Software Foundation; either version 2.1
|
||||
** of the License, or (at your option) any later version.
|
||||
**
|
||||
** This library is distributed in the hope that it will be useful,
|
||||
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
** Lesser General Public License for more details.
|
||||
**
|
||||
** You should have received a copy of the GNU Lesser General Public
|
||||
** License along with this library; if not, write to the Free
|
||||
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
** 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#define _XOPEN_SOURCE
|
||||
#include <time.h>
|
||||
|
||||
#define GNU_SOURCE
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "utils.hh"
|
||||
|
||||
#include <string.h>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
|
||||
#include <glib.h>
|
||||
#include <glib/gprintf.h>
|
||||
|
||||
using namespace Mux;
|
||||
|
||||
namespace {
|
||||
|
||||
static gunichar
|
||||
unichar_tolower (gunichar uc)
|
||||
{
|
||||
if (!g_unichar_isalpha(uc))
|
||||
return uc;
|
||||
|
||||
if (g_unichar_get_script (uc) != G_UNICODE_SCRIPT_LATIN)
|
||||
return g_unichar_tolower (uc);
|
||||
|
||||
switch (uc)
|
||||
{
|
||||
case 0x00e6:
|
||||
case 0x00c6: return 'e'; /* æ */
|
||||
case 0x00f8: return 'o'; /* ø */
|
||||
case 0x0110:
|
||||
case 0x0111: return 'd'; /* đ */
|
||||
/* todo: many more */
|
||||
default: return g_unichar_tolower (uc);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* gx_utf8_flatten:
|
||||
* @str: a UTF-8 string
|
||||
* @len: the length of @str, or -1 if it is %NULL-terminated
|
||||
*
|
||||
* Flatten some UTF-8 string; that is, downcase it and remove any diacritics.
|
||||
*
|
||||
* Returns: (transfer full): a flattened string, free with g_free().
|
||||
*/
|
||||
static char*
|
||||
gx_utf8_flatten (const gchar *str, gssize len)
|
||||
{
|
||||
GString *gstr;
|
||||
char *norm, *cur;
|
||||
|
||||
g_return_val_if_fail (str, NULL);
|
||||
|
||||
norm = g_utf8_normalize (str, len, G_NORMALIZE_ALL);
|
||||
if (!norm)
|
||||
return NULL;
|
||||
|
||||
gstr = g_string_sized_new (strlen (norm));
|
||||
|
||||
for (cur = norm; cur && *cur; cur = g_utf8_next_char (cur))
|
||||
{
|
||||
gunichar uc;
|
||||
|
||||
uc = g_utf8_get_char (cur);
|
||||
if (g_unichar_combining_class (uc) != 0)
|
||||
continue;
|
||||
|
||||
g_string_append_unichar (gstr, unichar_tolower(uc));
|
||||
}
|
||||
|
||||
g_free (norm);
|
||||
|
||||
return g_string_free (gstr, FALSE);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::string // gx_utf8_flatten
|
||||
Mux::utf8_flatten (const char *str)
|
||||
{
|
||||
if (!str)
|
||||
return {};
|
||||
|
||||
// the pure-ascii case
|
||||
if (g_str_is_ascii(str)) {
|
||||
auto l = g_ascii_strdown (str, -1);
|
||||
std::string s{l};
|
||||
g_free (l);
|
||||
return s;
|
||||
}
|
||||
|
||||
// seems we need the big guns
|
||||
char *flat = gx_utf8_flatten (str, -1);
|
||||
if (!flat)
|
||||
return {};
|
||||
|
||||
std::string s{flat};
|
||||
g_free (flat);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
std::string
|
||||
Mux::utf8_clean (const std::string& dirty)
|
||||
{
|
||||
GString *gstr = g_string_sized_new (dirty.length());
|
||||
|
||||
for (auto cur = dirty.c_str(); cur && *cur; cur = g_utf8_next_char (cur)) {
|
||||
|
||||
const gunichar uc = g_utf8_get_char (cur);
|
||||
if (g_unichar_iscntrl (uc))
|
||||
g_string_append_c (gstr, ' ');
|
||||
else
|
||||
g_string_append_unichar (gstr, uc);
|
||||
}
|
||||
|
||||
std::string clean(gstr->str, gstr->len);
|
||||
g_string_free (gstr, TRUE);
|
||||
|
||||
clean.erase (0, clean.find_first_not_of(" "));
|
||||
clean.erase (clean.find_last_not_of(" ") + 1); // remove trailing space
|
||||
|
||||
return clean;
|
||||
}
|
||||
|
||||
std::vector<std::string>
|
||||
Mux::split (const std::string& str, const std::string& sepa)
|
||||
{
|
||||
char **parts = g_strsplit(str.c_str(), sepa.c_str(), -1);
|
||||
std::vector<std::string> vec;
|
||||
for (auto part = parts; part && *part; ++part)
|
||||
vec.push_back (*part);
|
||||
|
||||
g_strfreev(parts);
|
||||
|
||||
return vec;
|
||||
}
|
||||
|
||||
std::string
|
||||
Mux::quote (const std::string& str)
|
||||
{
|
||||
char *s = g_strescape (str.c_str(), NULL);
|
||||
if (!s)
|
||||
return {};
|
||||
|
||||
std::string res (s);
|
||||
g_free (s);
|
||||
|
||||
return "\"" + res + "\"";
|
||||
}
|
||||
|
||||
std::string
|
||||
Mux::format (const char *frm, ...)
|
||||
{
|
||||
va_list args;
|
||||
|
||||
va_start (args, frm);
|
||||
|
||||
char *s = {};
|
||||
const auto res = g_vasprintf (&s, frm, args);
|
||||
va_end (args);
|
||||
if (res == -1) {
|
||||
std::cerr << "string format failed" << std::endl;
|
||||
return {};
|
||||
}
|
||||
|
||||
std::string str = s;
|
||||
free (s);
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
constexpr const auto InternalDateFormat = "%010" G_GINT64_FORMAT;
|
||||
constexpr const char InternalDateMin[] = "0000000000";
|
||||
constexpr const char InternalDateMax[] = "9999999999";
|
||||
static_assert(sizeof(InternalDateMin) == 10 + 1, "invalid");
|
||||
static_assert(sizeof(InternalDateMax) == 10 + 1, "invalid");
|
||||
|
||||
static std::string
|
||||
date_boundary (bool is_first)
|
||||
{
|
||||
return is_first ? InternalDateMin : InternalDateMax;
|
||||
}
|
||||
|
||||
std::string
|
||||
Mux::date_to_time_t_string (int64_t t)
|
||||
{
|
||||
char buf[sizeof(InternalDateMax)];
|
||||
snprintf (buf, sizeof(buf), InternalDateFormat, t);
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
static std::string
|
||||
delta_ymwdhMs (const std::string& expr)
|
||||
{
|
||||
char *endptr;
|
||||
auto num = strtol (expr.c_str(), &endptr, 10);
|
||||
if (num <= 0 || num > 9999 || !endptr || !*endptr)
|
||||
return date_boundary (true);
|
||||
|
||||
int years, months, weeks, days, hours, minutes, seconds;
|
||||
years = months = weeks = days = hours = minutes = seconds = 0;
|
||||
|
||||
switch (endptr[0]) {
|
||||
case 's': seconds = num; break;
|
||||
case 'M': minutes = num; break;
|
||||
case 'h': hours = num; break;
|
||||
case 'd': days = num; break;
|
||||
case 'w': weeks = num; break;
|
||||
case 'm': months = num; break;
|
||||
case 'y': years = num; break;
|
||||
default:
|
||||
return date_boundary (true);
|
||||
}
|
||||
|
||||
GDateTime *then, *now = g_date_time_new_now_local ();
|
||||
if (weeks != 0)
|
||||
then = g_date_time_add_weeks (now, -weeks);
|
||||
else
|
||||
then = g_date_time_add_full (now, -years, -months,-days,
|
||||
-hours, -minutes, -seconds);
|
||||
|
||||
time_t t = MAX (0, (gint64)g_date_time_to_unix (then));
|
||||
|
||||
g_date_time_unref (then);
|
||||
g_date_time_unref (now);
|
||||
|
||||
return date_to_time_t_string (t);
|
||||
}
|
||||
|
||||
static std::string
|
||||
special_date (const std::string& d, bool is_first)
|
||||
{
|
||||
if (d == "now")
|
||||
return date_to_time_t_string (time(NULL));
|
||||
|
||||
else if (d == "today") {
|
||||
|
||||
GDateTime *dt, *midnight;
|
||||
dt = g_date_time_new_now_local ();
|
||||
|
||||
if (!is_first) {
|
||||
GDateTime *tmp = dt;
|
||||
dt = g_date_time_add_days (dt, 1);
|
||||
g_date_time_unref (tmp);
|
||||
}
|
||||
|
||||
midnight = g_date_time_add_full (dt, 0, 0, 0,
|
||||
-g_date_time_get_hour(dt),
|
||||
-g_date_time_get_minute (dt),
|
||||
-g_date_time_get_second (dt));
|
||||
time_t t = MAX(0, (gint64)g_date_time_to_unix (midnight));
|
||||
g_date_time_unref (dt);
|
||||
g_date_time_unref (midnight);
|
||||
return date_to_time_t_string ((time_t)t);
|
||||
|
||||
} else
|
||||
return date_boundary (is_first);
|
||||
}
|
||||
|
||||
// if a date has a month day greater than the number of days in that month,
|
||||
// change it to a valid date point to the last second in that month
|
||||
static void
|
||||
fixup_month (struct tm *tbuf)
|
||||
{
|
||||
decltype(tbuf->tm_mday) max_days;
|
||||
const auto month = tbuf->tm_mon + 1;
|
||||
const auto year = tbuf->tm_year + 1900;
|
||||
|
||||
switch (month) {
|
||||
case 2:
|
||||
if (year % 4 == 0 && (year % 100 != 0 || year % 400 == 0))
|
||||
max_days = 29;
|
||||
else
|
||||
max_days = 28;
|
||||
break;
|
||||
case 4:
|
||||
case 6:
|
||||
case 9:
|
||||
case 11:
|
||||
max_days = 30;
|
||||
break;
|
||||
default:
|
||||
max_days = 31;
|
||||
break;
|
||||
}
|
||||
|
||||
if (tbuf->tm_mday > max_days) {
|
||||
tbuf->tm_mday = max_days;
|
||||
tbuf->tm_hour = 23;
|
||||
tbuf->tm_min = 59;
|
||||
tbuf->tm_sec = 59;
|
||||
}
|
||||
}
|
||||
|
||||
std::string
|
||||
Mux::date_to_time_t_string (const std::string& dstr, bool is_first)
|
||||
{
|
||||
gint64 t;
|
||||
struct tm tbuf;
|
||||
GDateTime *dtime;
|
||||
|
||||
/* one-sided dates */
|
||||
if (dstr.empty())
|
||||
return date_boundary (is_first);
|
||||
else if (dstr == "today" || dstr == "now")
|
||||
return special_date (dstr, is_first);
|
||||
|
||||
else if (dstr.find_first_of("ymdwhMs") != std::string::npos)
|
||||
return delta_ymwdhMs (dstr);
|
||||
|
||||
constexpr char UserDateMin[] = "19700101000000";
|
||||
constexpr char UserDateMax[] = "29991231235959";
|
||||
|
||||
std::string date (is_first ? UserDateMin : UserDateMax);
|
||||
std::copy_if (dstr.begin(), dstr.end(), date.begin(),[](auto c){return isdigit(c);});
|
||||
|
||||
memset (&tbuf, 0, sizeof tbuf);
|
||||
if (!strptime (date.c_str(), "%Y%m%d%H%M%S", &tbuf) &&
|
||||
!strptime (date.c_str(), "%Y%m%d%H%M", &tbuf) &&
|
||||
!strptime (date.c_str(), "%Y%m%d", &tbuf) &&
|
||||
!strptime (date.c_str(), "%Y%m", &tbuf) &&
|
||||
!strptime (date.c_str(), "%Y", &tbuf))
|
||||
return date_boundary (is_first);
|
||||
|
||||
fixup_month(&tbuf);
|
||||
|
||||
dtime = g_date_time_new_local (tbuf.tm_year + 1900,
|
||||
tbuf.tm_mon + 1,
|
||||
tbuf.tm_mday,
|
||||
tbuf.tm_hour,
|
||||
tbuf.tm_min,
|
||||
tbuf.tm_sec);
|
||||
if (!dtime) {
|
||||
g_warning ("invalid %s date '%s'",
|
||||
is_first ? "lower" : "upper", date.c_str());
|
||||
return date_boundary (is_first);
|
||||
}
|
||||
|
||||
t = g_date_time_to_unix (dtime);
|
||||
g_date_time_unref (dtime);
|
||||
|
||||
if (t < 0 || t > 9999999999)
|
||||
return date_boundary (is_first);
|
||||
else
|
||||
return date_to_time_t_string (t);
|
||||
}
|
||||
|
||||
constexpr const auto SizeFormat = "%010" G_GINT64_FORMAT;
|
||||
|
||||
constexpr const char SizeMin[] = "0000000000";
|
||||
constexpr const char SizeMax[] = "9999999999";
|
||||
static_assert(sizeof(SizeMin) == 10 + 1, "invalid");
|
||||
static_assert(sizeof(SizeMax) == 10 + 1, "invalid");
|
||||
|
||||
static std::string
|
||||
size_boundary (bool is_first)
|
||||
{
|
||||
return is_first ? SizeMin : SizeMax;
|
||||
}
|
||||
|
||||
std::string
|
||||
Mux::size_to_string (int64_t size)
|
||||
{
|
||||
char buf[sizeof(SizeMax)];
|
||||
snprintf (buf, sizeof(buf), SizeFormat, size);
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
std::string
|
||||
Mux::size_to_string (const std::string& val, bool is_first)
|
||||
{
|
||||
std::string str;
|
||||
GRegex *rx;
|
||||
GMatchInfo *minfo;
|
||||
|
||||
/* one-sided ranges */
|
||||
if (val.empty())
|
||||
return size_boundary (is_first);
|
||||
|
||||
rx = g_regex_new ("(\\d+)(b|k|kb|m|mb|g|gb)?",
|
||||
G_REGEX_CASELESS, (GRegexMatchFlags)0, NULL);
|
||||
minfo = NULL;
|
||||
if (g_regex_match (rx, val.c_str(), (GRegexMatchFlags)0, &minfo)) {
|
||||
gint64 size;
|
||||
char *s;
|
||||
|
||||
s = g_match_info_fetch (minfo, 1);
|
||||
size = atoll (s);
|
||||
g_free (s);
|
||||
|
||||
s = g_match_info_fetch (minfo, 2);
|
||||
switch (s ? g_ascii_tolower(s[0]) : 0) {
|
||||
case 'k': size *= 1024; break;
|
||||
case 'm': size *= (1024 * 1024); break;
|
||||
case 'g': size *= (1024 * 1024 * 1024); break;
|
||||
default: break;
|
||||
}
|
||||
|
||||
g_free (s);
|
||||
str = size_to_string (size);
|
||||
} else
|
||||
str = size_boundary (is_first);
|
||||
|
||||
g_regex_unref (rx);
|
||||
g_match_info_unref (minfo);
|
||||
|
||||
return str;
|
||||
}
|
||||
@ -1,126 +0,0 @@
|
||||
/*
|
||||
** Copyright (C) 2017 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
|
||||
**
|
||||
** This library is free software; you can redistribute it and/or
|
||||
** modify it under the terms of the GNU Lesser General Public License
|
||||
** as published by the Free Software Foundation; either version 2.1
|
||||
** of the License, or (at your option) any later version.
|
||||
**
|
||||
** This library is distributed in the hope that it will be useful,
|
||||
** but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
** Lesser General Public License for more details.
|
||||
**
|
||||
** You should have received a copy of the GNU Lesser General Public
|
||||
** License along with this library; if not, write to the Free
|
||||
** Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
** 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#ifndef __UTILS_HH__
|
||||
#define __UTILS_HH__
|
||||
|
||||
namespace Mux {
|
||||
|
||||
/**
|
||||
* Flatten a string -- downcase and fold diacritics etc.
|
||||
*
|
||||
* @param str a string
|
||||
*
|
||||
* @return a flattened string
|
||||
*/
|
||||
std::string utf8_flatten (const char *str);
|
||||
inline std::string utf8_flatten (const std::string& s) { return utf8_flatten(s.c_str()); }
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Replace all control characters with spaces, and remove leading and trailing space.
|
||||
*
|
||||
* @param dirty an unclean string
|
||||
*
|
||||
* @return a cleaned-up string.
|
||||
*/
|
||||
std::string utf8_clean (const std::string& dirty);
|
||||
|
||||
|
||||
/**
|
||||
* Split a string in parts
|
||||
*
|
||||
* @param str a string
|
||||
* @param sepa the separator
|
||||
*
|
||||
* @return the parts.
|
||||
*/
|
||||
std::vector<std::string> split (const std::string& str,
|
||||
const std::string& sepa);
|
||||
|
||||
/**
|
||||
* Quote & escape a string
|
||||
*
|
||||
* @param str a string
|
||||
*
|
||||
* @return quoted string
|
||||
*/
|
||||
std::string quote (const std::string& str);
|
||||
|
||||
/**
|
||||
* Format a string, printf style
|
||||
*
|
||||
* @param frm format string
|
||||
* @param ... parameters
|
||||
*
|
||||
* @return a formatted string
|
||||
*/
|
||||
std::string format (const char *frm, ...)
|
||||
__attribute__((format(printf, 1, 2)));
|
||||
|
||||
/**
|
||||
* Convert an ISO date to the corresponding time expressed as a string
|
||||
* with a 10-digit time_t
|
||||
*
|
||||
* @param date
|
||||
* @param first
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
std::string date_to_time_t_string (const std::string& date, bool first);
|
||||
|
||||
/**
|
||||
* 64-bit incarnation of time_t expressed as a 10-digit string. Uses 64-bit for the time-value,
|
||||
* regardless of the size of time_t.
|
||||
*
|
||||
* @param t some time value
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
std::string date_to_time_t_string (int64_t t);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Convert a size string to a size in bytes
|
||||
*
|
||||
* @param sizestr the size string
|
||||
* @param first
|
||||
*
|
||||
* @return the size expressed as a string with the decimal number of bytes
|
||||
*/
|
||||
std::string size_to_string (const std::string& sizestr, bool first);
|
||||
|
||||
/**
|
||||
* Convert a size into a size in bytes string
|
||||
*
|
||||
* @param size the size
|
||||
* @param first
|
||||
*
|
||||
* @return the size expressed as a string with the decimal number of bytes
|
||||
*/
|
||||
std::string size_to_string (int64_t size);
|
||||
|
||||
} // namespace Mux
|
||||
|
||||
#endif /* __UTILS_HH__ */
|
||||
@ -24,10 +24,10 @@
|
||||
#include <xapian.h>
|
||||
#include "parser/xapian.hh"
|
||||
|
||||
using namespace Mux;
|
||||
using namespace Mu;
|
||||
|
||||
static Xapian::Query
|
||||
xapian_query_op (const Mux::Tree& tree)
|
||||
xapian_query_op (const Mu::Tree& tree)
|
||||
{
|
||||
Xapian::Query::op op;
|
||||
|
||||
@ -68,7 +68,7 @@ make_query (const Value* val, const std::string& str, bool maybe_wildcard)
|
||||
}
|
||||
|
||||
static Xapian::Query
|
||||
xapian_query_value (const Mux::Tree& tree)
|
||||
xapian_query_value (const Mu::Tree& tree)
|
||||
{
|
||||
const auto v = dynamic_cast<Value*> (tree.node.data.get());
|
||||
if (!v->phrase)
|
||||
@ -89,7 +89,7 @@ xapian_query_value (const Mux::Tree& tree)
|
||||
}
|
||||
|
||||
static Xapian::Query
|
||||
xapian_query_range (const Mux::Tree& tree)
|
||||
xapian_query_range (const Mu::Tree& tree)
|
||||
{
|
||||
const auto r { dynamic_cast<Range *>(tree.node.data.get()) };
|
||||
|
||||
@ -98,7 +98,7 @@ xapian_query_range (const Mux::Tree& tree)
|
||||
}
|
||||
|
||||
Xapian::Query
|
||||
Mux::xapian_query (const Mux::Tree& tree)
|
||||
Mu::xapian_query (const Mu::Tree& tree)
|
||||
{
|
||||
switch (tree.node.type) {
|
||||
case Node::Type::Empty:
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
#include <xapian.h>
|
||||
#include <parser/parser.hh>
|
||||
|
||||
namespace Mux {
|
||||
namespace Mu {
|
||||
|
||||
/**
|
||||
* Transform a parse-tree into a Xapian query object
|
||||
@ -33,8 +33,8 @@ namespace Mux {
|
||||
*
|
||||
* @return a Xapian query object
|
||||
*/
|
||||
Xapian::Query xapian_query (const Mux::Tree& tree);
|
||||
Xapian::Query xapian_query (const Mu::Tree& tree);
|
||||
|
||||
};
|
||||
} // namespace Mu
|
||||
|
||||
#endif /* __XAPIAN_H__ */
|
||||
|
||||
Reference in New Issue
Block a user