many: use Mu::Regex instead of std::regex

The former is PCRE-compatible, and faster than std::regex.
This commit is contained in:
Dirk-Jan C. Binnema
2022-12-30 09:58:54 +02:00
parent e97bbb83e3
commit 27ecbbdd65
8 changed files with 45 additions and 66 deletions

View File

@ -289,14 +289,14 @@ extract_tags(const MimeMessage& mime_msg)
constexpr std::array<std::pair<const char*, char>, 3> tag_headers = {{
{"X-Label", ' '}, {"X-Keywords", ','}, {"Keywords", ' '}
}};
static const auto strip_rx{std::regex("^\\s+| +$|( )\\s+")};
std::vector<std::string> tags;
seq_for_each(tag_headers, [&](auto&& item) {
if (auto&& hdr = mime_msg.header(item.first); hdr) {
for (auto&& tagval : split(*hdr, item.second)) {
tags.emplace_back(
std::regex_replace(tagval, strip_rx, "$1"));
tagval.erase(0, tagval.find_first_not_of(' '));
tagval.erase(tagval.find_last_not_of(' ')+1);
tags.emplace_back(std::move(tagval));
}
}
});
@ -631,10 +631,10 @@ fill_document(Message::Private& priv)
const auto path{doc.string_value(Field::Id::Path)};
const auto refs{mime_msg.references()};
const auto& raw_message_id = mime_msg.message_id();
const auto message_id = raw_message_id.has_value() && !raw_message_id->empty()
? *raw_message_id
: fake_message_id(path);
const auto& raw_message_id = mime_msg.message_id();
const auto message_id = raw_message_id.has_value() && !raw_message_id->empty()
? *raw_message_id
: fake_message_id(path);
process_message(mime_msg, path, priv);

View File

@ -25,10 +25,10 @@
#include <sstream>
#include <functional>
#include <algorithm>
#include <regex>
#include <ctime>
#include <utils/mu-utils.hh>
#include <utils/mu-regex.hh>
#include <glib.h>
using namespace Mu;
@ -59,8 +59,8 @@ struct ContactsCache::Private {
ContactUMap contacts_;
std::mutex mtx_;
const StringVec personal_plain_;
const std::vector<std::regex> personal_rx_;
const StringVec personal_plain_;
const std::vector<Regex> personal_rx_;
size_t dirty_;
@ -76,8 +76,8 @@ private:
StringVec svec;
std::copy_if(personal.begin(), personal.end(),
std::back_inserter(svec), [&](auto&& p) {
return p.size() < 2
|| p.at(0) != '/' || p.at(p.length() - 1) != '/';
return p.size() < 2 || p.at(0) != '/' ||
p.at(p.length() - 1) != '/';
});
return svec;
}
@ -89,18 +89,20 @@ private:
*
* @return
*/
std::vector<std::regex> make_personal_rx(const StringVec& personal) const {
std::vector<std::regex> rxvec;
std::vector<Regex> make_personal_rx(const StringVec& personal) const {
std::vector<Regex> rxvec;
for(auto&& p: personal) {
if (p.size() < 2 || p[0] != '/' || p[p.length()- 1] != '/')
continue;
// a regex pattern.
try {
const auto rxstr{p.substr(1, p.length() - 2)};
rxvec.emplace_back(std::regex(
rxstr, std::regex::basic | std::regex::optimize |
std::regex::icase));
} catch (const std::regex_error& rex) {
auto opts = static_cast<GRegexCompileFlags>(G_REGEX_OPTIMIZE|G_REGEX_CASELESS);
auto rx = Regex::make(rxstr, opts);
if (!rx)
throw rx.error();
rxvec.emplace_back(rx.value());
} catch (const Error& rex) {
g_warning("invalid personal address regexp '%s': %s",
p.c_str(),
rex.what());
@ -328,8 +330,7 @@ ContactsCache::is_personal(const std::string& addr) const
return true;
for (auto&& rx : priv_->personal_rx_) {
std::smatch m; // perhaps cache addr in personal_plain_?
if (std::regex_match(addr, m, rx))
if (rx.matches(addr))
return true;
}

View File

@ -19,12 +19,12 @@
#include "mu-parser.hh"
#include <algorithm>
#include <regex>
#include <limits>
#include "mu-tokenizer.hh"
#include "utils/mu-utils.hh"
#include "utils/mu-error.hh"
#include "utils/mu-regex.hh"
#include "message/mu-message.hh"
using namespace Mu;
@ -64,7 +64,7 @@ struct Parser::Private {
Private(const Store& store, Parser::Flags flags) : store_{store}, flags_{flags} {}
std::vector<std::string> process_regex(const std::string& field,
const std::regex& rx) const;
const Regex& rx) const;
Mu::Tree term_1(Mu::Tokens& tokens, WarningVec& warnings) const;
Mu::Tree term_2(Mu::Tokens& tokens, Node::Type& op, WarningVec& warnings) const;
@ -194,7 +194,7 @@ process_range(const std::string& field_str,
std::vector<std::string>
Parser::Private::process_regex(const std::string& field_str,
const std::regex& rx) const
const Regex& rx) const
{
const auto field_opt{field_from_name(field_str)};
if (!field_opt)
@ -204,7 +204,7 @@ Parser::Private::process_regex(const std::string& field_str,
std::vector<std::string> terms;
store_.for_each_term(field_opt->id, [&](auto&& str) {
auto val{str.c_str() + 1}; // strip off the Xapian prefix.
if (std::regex_search(val, rx))
if (rx.matches(val))
terms.emplace_back(std::move(val));
return true;
});
@ -263,9 +263,11 @@ Parser::Private::regex(const FieldInfoVec& fields,
try {
Tree tree(Node{Node::Type::OpOr});
const auto rx = std::regex(rxstr);
const auto rx = Regex::make(rxstr, G_REGEX_OPTIMIZE);
if (!rx)
throw rx.error();
for (const auto& field : fields) {
const auto terms = process_regex(field.field, rx);
const auto terms = process_regex(field.field, *rx);
for (const auto& term : terms) {
tree.add_child(Tree({Node::Type::ValueAtomic,
FieldValue{field.id, term}}));

View File

@ -21,10 +21,10 @@
#include <thread>
#include <vector>
#include <iostream>
#include <regex>
#include <fstream>
#include <utils/mu-utils.hh>
#include <utils/mu-regex.hh>
#include <mu-store.hh>
#include "mu-maildir.hh"
@ -390,13 +390,15 @@ The archives can be found at:
https://lists.cs.wisc.edu/archive/htcondor-users/
--===============0678627779074767862==--)";
static std::string
message(const std::regex& rx, size_t id)
message(const Regex& rx, size_t id)
{
char buf[16];
::snprintf(buf, sizeof(buf), "%zu", id);
return std::regex_replace(test_msg, rx, buf);
return to_string_gchar(
g_regex_replace(rx, test_msg, -1, 0, buf,
G_REGEX_MATCH_DEFAULT, {}));
}
struct TestData {
@ -420,7 +422,7 @@ setup(const TestData& tdata)
auto res = maildir_mkdir(mdir);
g_assert(!!res);
}
const auto rx = std::regex("@ID@");
const auto rx = Regex::make("@ID@");
/* create messages */
for (size_t n = 0; n != tdata.num_messages; ++n) {
auto mpath = format("%s/maildir-%zu/cur/msg-%zu:2,S",
@ -428,7 +430,7 @@ setup(const TestData& tdata)
n % tdata.num_maildirs,
n);
std::ofstream stream(mpath);
auto msg = message(rx, n);
auto msg = message(*rx, n);
stream.write(msg.c_str(), msg.size());
g_assert_true(stream.good());
}

View File

@ -39,7 +39,6 @@
#include <cinttypes>
#include <charconv>
#include <limits>
#include <regex>
#include <glib.h>
#include <glib/gprintf.h>
@ -261,15 +260,6 @@ Mu::split(const std::string& str, char sepa)
return vec;
}
std::vector<std::string>
Mu::split(const std::string& str, const std::regex& sepa_rx)
{
std::sregex_token_iterator it(str.begin(), str.end(), sepa_rx, -1);
std::sregex_token_iterator end;
return {it, end};
}
std::string
Mu::join(const std::vector<std::string>& svec, const std::string& sepa)
{

View File

@ -33,7 +33,6 @@
#include <type_traits>
#include <algorithm>
#include <numeric>
#include <regex>
#include "mu-utils-format.hh"
#include "mu-option.hh"
@ -97,16 +96,6 @@ std::vector<std::string> split(const std::string& str, const std::string& sepa);
*/
std::vector<std::string> split(const std::string& str, char sepa);
/**
* Split a string in parts
*
* @param str a string
* @param sepa the separator regex
*
* @return the parts.
*/
std::vector<std::string> split(const std::string& str, const std::regex& sepa_rx);
/**
* Join the strings in svec into a string, separated by sepa
*
@ -173,8 +162,6 @@ bool locale_workaround();
*/
bool timezone_available(const std::string& tz);
/**
* Well-known runtime paths
*

View File

@ -220,9 +220,6 @@ test_split()
// char sepa
assert_equal_svec(split("axbxc", 'x'), {"a", "b", "c"});
assert_equal_svec(split("axbxcx", 'x'), {"a", "b", "c", ""});
// rx sexp
assert_equal_svec(split("axbyc", std::regex("[xy]")), {"a", "b", "c"});
}
static void

View File

@ -21,12 +21,11 @@
#include "mu-cmd.hh"
#include "utils/mu-util.h"
#include "utils/mu-utils.hh"
#include "utils/mu-regex.hh"
#include <message/mu-message.hh>
#include <regex>
using namespace Mu;
static Result<void>
save_part(const Message::Part& part, size_t idx, const Options& opts)
{
@ -73,12 +72,13 @@ save_parts(const std::string& path, const std::string& filename_rx,
else if (seq_some(opts.extract.parts,
[&](auto&& num){return num==partnum;}))
return true;
else if (!filename_rx.empty() && part.raw_filename() &&
std::regex_match(*part.raw_filename(),
std::regex{filename_rx}))
return true;
else
return false;
else if (!filename_rx.empty() && part.raw_filename()) {
if (auto rx = Regex::make(filename_rx); !rx)
throw rx.error();
else if (rx->matches(*part.raw_filename()))
return true;
}
return false;
});
if (!do_extract)