many: use Mu::Regex instead of std::regex

The former is PCRE-compatible, and faster than std::regex.
This commit is contained in:
Dirk-Jan C. Binnema
2022-12-30 09:58:54 +02:00
parent e97bbb83e3
commit 27ecbbdd65
8 changed files with 45 additions and 66 deletions

View File

@ -289,14 +289,14 @@ extract_tags(const MimeMessage& mime_msg)
constexpr std::array<std::pair<const char*, char>, 3> tag_headers = {{ constexpr std::array<std::pair<const char*, char>, 3> tag_headers = {{
{"X-Label", ' '}, {"X-Keywords", ','}, {"Keywords", ' '} {"X-Label", ' '}, {"X-Keywords", ','}, {"Keywords", ' '}
}}; }};
static const auto strip_rx{std::regex("^\\s+| +$|( )\\s+")};
std::vector<std::string> tags; std::vector<std::string> tags;
seq_for_each(tag_headers, [&](auto&& item) { seq_for_each(tag_headers, [&](auto&& item) {
if (auto&& hdr = mime_msg.header(item.first); hdr) { if (auto&& hdr = mime_msg.header(item.first); hdr) {
for (auto&& tagval : split(*hdr, item.second)) { for (auto&& tagval : split(*hdr, item.second)) {
tags.emplace_back( tagval.erase(0, tagval.find_first_not_of(' '));
std::regex_replace(tagval, strip_rx, "$1")); tagval.erase(tagval.find_last_not_of(' ')+1);
tags.emplace_back(std::move(tagval));
} }
} }
}); });

View File

@ -25,10 +25,10 @@
#include <sstream> #include <sstream>
#include <functional> #include <functional>
#include <algorithm> #include <algorithm>
#include <regex>
#include <ctime> #include <ctime>
#include <utils/mu-utils.hh> #include <utils/mu-utils.hh>
#include <utils/mu-regex.hh>
#include <glib.h> #include <glib.h>
using namespace Mu; using namespace Mu;
@ -60,7 +60,7 @@ struct ContactsCache::Private {
std::mutex mtx_; std::mutex mtx_;
const StringVec personal_plain_; const StringVec personal_plain_;
const std::vector<std::regex> personal_rx_; const std::vector<Regex> personal_rx_;
size_t dirty_; size_t dirty_;
@ -76,8 +76,8 @@ private:
StringVec svec; StringVec svec;
std::copy_if(personal.begin(), personal.end(), std::copy_if(personal.begin(), personal.end(),
std::back_inserter(svec), [&](auto&& p) { std::back_inserter(svec), [&](auto&& p) {
return p.size() < 2 return p.size() < 2 || p.at(0) != '/' ||
|| p.at(0) != '/' || p.at(p.length() - 1) != '/'; p.at(p.length() - 1) != '/';
}); });
return svec; return svec;
} }
@ -89,18 +89,20 @@ private:
* *
* @return * @return
*/ */
std::vector<std::regex> make_personal_rx(const StringVec& personal) const { std::vector<Regex> make_personal_rx(const StringVec& personal) const {
std::vector<std::regex> rxvec; std::vector<Regex> rxvec;
for(auto&& p: personal) { for(auto&& p: personal) {
if (p.size() < 2 || p[0] != '/' || p[p.length()- 1] != '/') if (p.size() < 2 || p[0] != '/' || p[p.length()- 1] != '/')
continue; continue;
// a regex pattern. // a regex pattern.
try { try {
const auto rxstr{p.substr(1, p.length() - 2)}; const auto rxstr{p.substr(1, p.length() - 2)};
rxvec.emplace_back(std::regex( auto opts = static_cast<GRegexCompileFlags>(G_REGEX_OPTIMIZE|G_REGEX_CASELESS);
rxstr, std::regex::basic | std::regex::optimize | auto rx = Regex::make(rxstr, opts);
std::regex::icase)); if (!rx)
} catch (const std::regex_error& rex) { throw rx.error();
rxvec.emplace_back(rx.value());
} catch (const Error& rex) {
g_warning("invalid personal address regexp '%s': %s", g_warning("invalid personal address regexp '%s': %s",
p.c_str(), p.c_str(),
rex.what()); rex.what());
@ -328,8 +330,7 @@ ContactsCache::is_personal(const std::string& addr) const
return true; return true;
for (auto&& rx : priv_->personal_rx_) { for (auto&& rx : priv_->personal_rx_) {
std::smatch m; // perhaps cache addr in personal_plain_? if (rx.matches(addr))
if (std::regex_match(addr, m, rx))
return true; return true;
} }

View File

@ -19,12 +19,12 @@
#include "mu-parser.hh" #include "mu-parser.hh"
#include <algorithm> #include <algorithm>
#include <regex>
#include <limits> #include <limits>
#include "mu-tokenizer.hh" #include "mu-tokenizer.hh"
#include "utils/mu-utils.hh" #include "utils/mu-utils.hh"
#include "utils/mu-error.hh" #include "utils/mu-error.hh"
#include "utils/mu-regex.hh"
#include "message/mu-message.hh" #include "message/mu-message.hh"
using namespace Mu; using namespace Mu;
@ -64,7 +64,7 @@ struct Parser::Private {
Private(const Store& store, Parser::Flags flags) : store_{store}, flags_{flags} {} Private(const Store& store, Parser::Flags flags) : store_{store}, flags_{flags} {}
std::vector<std::string> process_regex(const std::string& field, std::vector<std::string> process_regex(const std::string& field,
const std::regex& rx) const; const Regex& rx) const;
Mu::Tree term_1(Mu::Tokens& tokens, WarningVec& warnings) const; Mu::Tree term_1(Mu::Tokens& tokens, WarningVec& warnings) const;
Mu::Tree term_2(Mu::Tokens& tokens, Node::Type& op, WarningVec& warnings) const; Mu::Tree term_2(Mu::Tokens& tokens, Node::Type& op, WarningVec& warnings) const;
@ -194,7 +194,7 @@ process_range(const std::string& field_str,
std::vector<std::string> std::vector<std::string>
Parser::Private::process_regex(const std::string& field_str, Parser::Private::process_regex(const std::string& field_str,
const std::regex& rx) const const Regex& rx) const
{ {
const auto field_opt{field_from_name(field_str)}; const auto field_opt{field_from_name(field_str)};
if (!field_opt) if (!field_opt)
@ -204,7 +204,7 @@ Parser::Private::process_regex(const std::string& field_str,
std::vector<std::string> terms; std::vector<std::string> terms;
store_.for_each_term(field_opt->id, [&](auto&& str) { store_.for_each_term(field_opt->id, [&](auto&& str) {
auto val{str.c_str() + 1}; // strip off the Xapian prefix. auto val{str.c_str() + 1}; // strip off the Xapian prefix.
if (std::regex_search(val, rx)) if (rx.matches(val))
terms.emplace_back(std::move(val)); terms.emplace_back(std::move(val));
return true; return true;
}); });
@ -263,9 +263,11 @@ Parser::Private::regex(const FieldInfoVec& fields,
try { try {
Tree tree(Node{Node::Type::OpOr}); Tree tree(Node{Node::Type::OpOr});
const auto rx = std::regex(rxstr); const auto rx = Regex::make(rxstr, G_REGEX_OPTIMIZE);
if (!rx)
throw rx.error();
for (const auto& field : fields) { for (const auto& field : fields) {
const auto terms = process_regex(field.field, rx); const auto terms = process_regex(field.field, *rx);
for (const auto& term : terms) { for (const auto& term : terms) {
tree.add_child(Tree({Node::Type::ValueAtomic, tree.add_child(Tree({Node::Type::ValueAtomic,
FieldValue{field.id, term}})); FieldValue{field.id, term}}));

View File

@ -21,10 +21,10 @@
#include <thread> #include <thread>
#include <vector> #include <vector>
#include <iostream> #include <iostream>
#include <regex>
#include <fstream> #include <fstream>
#include <utils/mu-utils.hh> #include <utils/mu-utils.hh>
#include <utils/mu-regex.hh>
#include <mu-store.hh> #include <mu-store.hh>
#include "mu-maildir.hh" #include "mu-maildir.hh"
@ -390,13 +390,15 @@ The archives can be found at:
https://lists.cs.wisc.edu/archive/htcondor-users/ https://lists.cs.wisc.edu/archive/htcondor-users/
--===============0678627779074767862==--)"; --===============0678627779074767862==--)";
static std::string static std::string
message(const std::regex& rx, size_t id) message(const Regex& rx, size_t id)
{ {
char buf[16]; char buf[16];
::snprintf(buf, sizeof(buf), "%zu", id); ::snprintf(buf, sizeof(buf), "%zu", id);
return std::regex_replace(test_msg, rx, buf);
return to_string_gchar(
g_regex_replace(rx, test_msg, -1, 0, buf,
G_REGEX_MATCH_DEFAULT, {}));
} }
struct TestData { struct TestData {
@ -420,7 +422,7 @@ setup(const TestData& tdata)
auto res = maildir_mkdir(mdir); auto res = maildir_mkdir(mdir);
g_assert(!!res); g_assert(!!res);
} }
const auto rx = std::regex("@ID@"); const auto rx = Regex::make("@ID@");
/* create messages */ /* create messages */
for (size_t n = 0; n != tdata.num_messages; ++n) { for (size_t n = 0; n != tdata.num_messages; ++n) {
auto mpath = format("%s/maildir-%zu/cur/msg-%zu:2,S", auto mpath = format("%s/maildir-%zu/cur/msg-%zu:2,S",
@ -428,7 +430,7 @@ setup(const TestData& tdata)
n % tdata.num_maildirs, n % tdata.num_maildirs,
n); n);
std::ofstream stream(mpath); std::ofstream stream(mpath);
auto msg = message(rx, n); auto msg = message(*rx, n);
stream.write(msg.c_str(), msg.size()); stream.write(msg.c_str(), msg.size());
g_assert_true(stream.good()); g_assert_true(stream.good());
} }

View File

@ -39,7 +39,6 @@
#include <cinttypes> #include <cinttypes>
#include <charconv> #include <charconv>
#include <limits> #include <limits>
#include <regex>
#include <glib.h> #include <glib.h>
#include <glib/gprintf.h> #include <glib/gprintf.h>
@ -261,15 +260,6 @@ Mu::split(const std::string& str, char sepa)
return vec; return vec;
} }
std::vector<std::string>
Mu::split(const std::string& str, const std::regex& sepa_rx)
{
std::sregex_token_iterator it(str.begin(), str.end(), sepa_rx, -1);
std::sregex_token_iterator end;
return {it, end};
}
std::string std::string
Mu::join(const std::vector<std::string>& svec, const std::string& sepa) Mu::join(const std::vector<std::string>& svec, const std::string& sepa)
{ {

View File

@ -33,7 +33,6 @@
#include <type_traits> #include <type_traits>
#include <algorithm> #include <algorithm>
#include <numeric> #include <numeric>
#include <regex>
#include "mu-utils-format.hh" #include "mu-utils-format.hh"
#include "mu-option.hh" #include "mu-option.hh"
@ -97,16 +96,6 @@ std::vector<std::string> split(const std::string& str, const std::string& sepa);
*/ */
std::vector<std::string> split(const std::string& str, char sepa); std::vector<std::string> split(const std::string& str, char sepa);
/**
* Split a string in parts
*
* @param str a string
* @param sepa the separator regex
*
* @return the parts.
*/
std::vector<std::string> split(const std::string& str, const std::regex& sepa_rx);
/** /**
* Join the strings in svec into a string, separated by sepa * Join the strings in svec into a string, separated by sepa
* *
@ -173,8 +162,6 @@ bool locale_workaround();
*/ */
bool timezone_available(const std::string& tz); bool timezone_available(const std::string& tz);
/** /**
* Well-known runtime paths * Well-known runtime paths
* *

View File

@ -220,9 +220,6 @@ test_split()
// char sepa // char sepa
assert_equal_svec(split("axbxc", 'x'), {"a", "b", "c"}); assert_equal_svec(split("axbxc", 'x'), {"a", "b", "c"});
assert_equal_svec(split("axbxcx", 'x'), {"a", "b", "c", ""}); assert_equal_svec(split("axbxcx", 'x'), {"a", "b", "c", ""});
// rx sexp
assert_equal_svec(split("axbyc", std::regex("[xy]")), {"a", "b", "c"});
} }
static void static void

View File

@ -21,12 +21,11 @@
#include "mu-cmd.hh" #include "mu-cmd.hh"
#include "utils/mu-util.h" #include "utils/mu-util.h"
#include "utils/mu-utils.hh" #include "utils/mu-utils.hh"
#include "utils/mu-regex.hh"
#include <message/mu-message.hh> #include <message/mu-message.hh>
#include <regex>
using namespace Mu; using namespace Mu;
static Result<void> static Result<void>
save_part(const Message::Part& part, size_t idx, const Options& opts) save_part(const Message::Part& part, size_t idx, const Options& opts)
{ {
@ -73,11 +72,12 @@ save_parts(const std::string& path, const std::string& filename_rx,
else if (seq_some(opts.extract.parts, else if (seq_some(opts.extract.parts,
[&](auto&& num){return num==partnum;})) [&](auto&& num){return num==partnum;}))
return true; return true;
else if (!filename_rx.empty() && part.raw_filename() && else if (!filename_rx.empty() && part.raw_filename()) {
std::regex_match(*part.raw_filename(), if (auto rx = Regex::make(filename_rx); !rx)
std::regex{filename_rx})) throw rx.error();
else if (rx->matches(*part.raw_filename()))
return true; return true;
else }
return false; return false;
}); });