From 85fed37870e648dd205b2f2c902d82f8a1bead78 Mon Sep 17 00:00:00 2001 From: "Dirk-Jan C. Binnema" Date: Thu, 5 May 2022 01:22:14 +0300 Subject: [PATCH] message/document: update sexp on the fly Keep the sexp for the document up to date during scan / change, instead of having a separate step. --- lib/message/mu-document.cc | 156 ++++++++++++++++++++++++++--- lib/message/mu-document.hh | 77 ++++++++------- lib/message/mu-fields.hh | 157 +++++++++++++++-------------- lib/message/mu-message-sexp.cc | 176 --------------------------------- lib/message/mu-message.cc | 86 +++++++++++++--- lib/mu-parser.cc | 2 +- lib/tests/test-parser.cc | 24 ++--- mu/tests/test-mu-query.cc | 15 ++- 8 files changed, 364 insertions(+), 329 deletions(-) delete mode 100644 lib/message/mu-message-sexp.cc diff --git a/lib/message/mu-document.cc b/lib/message/mu-document.cc index 9413c846..16b55205 100644 --- a/lib/message/mu-document.cc +++ b/lib/message/mu-document.cc @@ -27,6 +27,7 @@ #include #include +#include #include @@ -48,12 +49,13 @@ add_search_term(Xapian::Document& doc, const Field& field, const std::string& va termgen.index_text(utf8_flatten(val),1,field.xapian_term()); } else throw std::logic_error("not a search term"); +} - if (field.id == Field::Id::Tags) - for (auto tag = doc.termlist_begin(); tag != doc.termlist_end(); ++tag) - if ((*tag)[0] == 'X') - g_message("%u: %s", doc.get_docid(), (*tag).c_str()); +static std::string +make_prop_name(const Field& field) +{ + return ":" + std::string(field.name); } void @@ -66,6 +68,10 @@ Document::add(Field::Id id, const std::string& val) if (field.is_searchable()) add_search_term(xdoc_, field, val); + + if (field.include_in_sexp()) + sexp_list().add_prop(make_prop_name(field), + Sexp::make_string(std::move(val))); } void @@ -82,6 +88,14 @@ Document::add(Field::Id id, const std::vector& vals) std::for_each(vals.begin(), vals.end(), [&](const auto& val) { add_search_term(xdoc_, field, val); }); + + if (field.include_in_sexp()) { + Sexp::List elms; + for(auto&& val: vals) + elms.add(Sexp::make_string(val)); + sexp_list().add_prop(make_prop_name(field), + Sexp::make_list(std::move(elms))); + } } @@ -91,6 +105,24 @@ Document::string_vec_value(Field::Id field_id) const noexcept return Mu::split(string_value(field_id), SepaChar1); } +static Sexp +make_contacts_sexp(const Contacts& contacts) +{ + Sexp::List clist; + + seq_for_each(contacts, [&](auto&& c) { + if (!c.name.empty()) + clist.add(Sexp::make_prop_list( + ":name", Sexp::make_string(c.name), + ":email", Sexp::make_string(c.email))); + else + clist.add(Sexp::make_prop_list( + ":email", Sexp::make_string(c.email))); + }); + + return Sexp::make_list(std::move(clist)); +} + void Document::add(Field::Id id, const Contacts& contacts) { @@ -122,6 +154,11 @@ Document::add(Field::Id id, const Contacts& contacts) if (!cvec.empty()) xdoc_.add_value(field.value_no(), join(cvec, SepaChar1)); + + if (field.include_in_sexp()) + sexp_list().add_prop(make_prop_name(field), + make_contacts_sexp(contacts)); + } Contacts @@ -152,6 +189,26 @@ Document::contacts_value(Field::Id id) const noexcept return contacts; } +void +Document::add_extra_contacts(const std::string& propname, const Contacts& contacts) +{ + if (!contacts.empty()) + sexp_list().add_prop(std::string{propname}, + make_contacts_sexp(contacts)); +} + + +static Sexp +make_emacs_time_sexp(::time_t t) +{ + Sexp::List dlist; + dlist.add(Sexp::make_number(static_cast(t >> 16))); + dlist.add(Sexp::make_number(static_cast(t & 0xffff))); + dlist.add(Sexp::make_number(0)); + + return Sexp::make_list(std::move(dlist)); +} + void Document::add(Field::Id id, int64_t val) { @@ -166,6 +223,15 @@ Document::add(Field::Id id, int64_t val) if (field.is_value()) xdoc_.add_value(field.value_no(), to_lexnum(val)); + + if (field.include_in_sexp()) { + if (field.is_time_t()) + sexp_list().add_prop(make_prop_name(field), + make_emacs_time_sexp(val)); + else + sexp_list().add_prop(make_prop_name(field), + Sexp::make_number(val)); + } } int64_t @@ -184,8 +250,11 @@ Document::add(Priority prio) xdoc_.add_value(field.value_no(), std::string(1, to_char(prio))); xdoc_.add_boolean_term(field.xapian_term(to_char(prio))); -} + if (field.include_in_sexp()) + sexp_list().add_prop(make_prop_name(field), + Sexp::make_symbol_sv(priority_name(prio))); +} Priority Document::priority_value() const noexcept @@ -198,19 +267,51 @@ void Document::add(Flags flags) { constexpr auto field{field_from_id(Field::Id::Flags)}; - const auto old_flags{flags_value()}; + Sexp::List flaglist; xdoc_.add_value(field.value_no(), to_lexnum(static_cast(flags))); flag_infos_for_each([&](auto&& flag_info) { auto term=[&](){return field.xapian_term(flag_info.shortcut_lower());}; - if (any_of(flag_info.flag & flags)) + if (any_of(flag_info.flag & flags)) { xdoc_.add_boolean_term(term()); - else if(any_of(flag_info.flag & old_flags)) { - /* field can be _updated_, so clear out any removed flags */ - xdoc_.remove_term(term()); + flaglist.add(Sexp::make_symbol_sv(flag_info.name)); } - }); + + if (field.include_in_sexp()) + sexp_list().add_prop(make_prop_name(field), + Sexp::make_list(std::move(flaglist))); +} + + +Sexp::List& +Document::sexp_list() +{ + /* perhaps we need get the sexp_ from the document first? */ + if (sexp_list_.empty()) { + const auto str{xdoc_.get_data()}; + if (!str.empty()) { + Sexp sexp{Sexp::make_parse(str)}; + sexp_list_ = sexp.list(); + } + } + + return sexp_list_; +} + +std::string +Document::cached_sexp() const +{ + return xdoc_.get_data(); +} + +void +Document::update_cached_sexp(void) +{ + if (sexp_list_.empty()) + return; /* nothing to do; i.e. the exisiting sexp is still up to + * date */ + xdoc_.set_data(Sexp::make_list(Sexp::List{sexp_list()}).to_sexp_string()); } Flags @@ -219,6 +320,39 @@ Document::flags_value() const noexcept return static_cast(integer_value(Field::Id::Flags)); } +void +Document::remove(Field::Id field_id) +{ + const auto field{field_from_id(field_id)}; + const auto pfx{field.xapian_prefix()}; + + xapian_try([&]{ + + if (auto&& val{xdoc_.get_value(field.value_no())}; !val.empty()) { + g_debug("removing value<%u>: '%s'", field.value_no(), + val.c_str()); + xdoc_.remove_value(field.value_no()); + } + + std::vector kill_list; + for (auto&& it = xdoc_.termlist_begin(); + it != xdoc_.termlist_end(); ++it) { + const auto term{*it}; + if (!term.empty() && term.at(0) == pfx) + kill_list.emplace_back(term); + } + + for (auto&& term: kill_list) { + g_debug("removing term '%s'", term.c_str()); + try { + xdoc_.remove_term(term); + } catch(const Xapian::InvalidArgumentError& xe) { + g_critical("failed to remove '%s'", term.c_str()); + } + } + }); + +} #ifdef BUILD_TESTS diff --git a/lib/message/mu-document.hh b/lib/message/mu-document.hh index eb99328b..ffef95a6 100644 --- a/lib/message/mu-document.hh +++ b/lib/message/mu-document.hh @@ -30,6 +30,7 @@ #include "mu-flags.hh" #include "mu-contact.hh" #include +#include namespace Mu { @@ -53,46 +54,12 @@ public: */ Document(const Xapian::Document& doc): xdoc_{doc} {} - /** - * Copy CTOR - */ - Document(const Document& rhs) { *this = rhs; } - /** - * Move CTOR - */ - Document(Document&& rhs) {*this = std::move(rhs); } - /** * Get a reference to the underlying Xapian document. * */ const Xapian::Document& xapian_document() const { return xdoc_; } - /* Copy assignment operator - * - * @param rhs some message - * - * @return a message ref - */ - Document& operator=(const Document& rhs) { - if (this != &rhs) - xdoc_ = rhs.xdoc_; - return *this; - } - - /** - * Move assignment operator - * - * @param rhs some message - * - * @return a message ref - */ - Document& operator=(Document&& rhs) { - if (this != &rhs) - xdoc_ = std::move(rhs.xdoc_); - return *this; - } - /** * Get the doc-id for this document * @@ -129,6 +96,17 @@ public: */ void add(Field::Id id, const Contacts& contacts); + /** + * Addd some extra contacts with the given propname; + * this is useful for ":reply-to" and ":list-post" which don't + * have a Field::Id and are only present in the sexp, + * not in the terms/values + * + * @param propname property name (e.g.,. ":reply-to") + * @param contacts contacts for this property. + */ + void add_extra_contacts(const std::string& propname, + const Contacts& contacts); /** * Add an integer value to the document @@ -153,6 +131,33 @@ public: */ void add(Flags flags); + /** + * Remove values and terms for some field. + * + * @param field_id + */ + void remove(Field::Id field_id); + + /** + * Update the cached sexp from the sexp_list_ + */ + void update_cached_sexp(); + + /** + * Get the cached s-expression + * + * @return a string + */ + std::string cached_sexp() const; + + /** + * Get the cached s-expressionl useful for changing + * it (call update_sexp_cache() when done) + * + * @return the cache s-expression + */ + Sexp::List& sexp_list(); + /** * Generically adds an optional value, if set, to the document * @@ -225,7 +230,9 @@ public: Flags flags_value() const noexcept; private: - Xapian::Document xdoc_; + Xapian::Document xdoc_; + Sexp::List sexp_list_; + }; } // namepace Mu diff --git a/lib/message/mu-fields.hh b/lib/message/mu-fields.hh index 2d354106..72475040 100644 --- a/lib/message/mu-fields.hh +++ b/lib/message/mu-fields.hh @@ -42,6 +42,7 @@ struct Field { Bcc = 0, /**< Blind Carbon-Copy */ BodyText, /**< Text body */ Cc, /**< Carbon-Copy */ + Changed, /**< Last change time (think 'ctime') */ Date, /**< Message date */ EmbeddedText, /**< Embedded text in message */ File, /**< Filename */ @@ -50,8 +51,7 @@ struct Field { Maildir, /**< Maildir path */ MailingList, /**< Mailing list */ MessageId, /**< Message Id */ - Mime, /**< MIME-Type */ - Modified, /**< Last modification time */ + MimeType, /**< MIME-Type */ Path, /**< File-system Path */ Priority, /**< Message priority */ References, /**< All references (incl. Reply-To:) */ @@ -64,7 +64,6 @@ struct Field { * */ XBodyHtml, /**< HTML Body */ - XCachedSexp, /**< Cached message s-expression */ _count_ /**< Number of FieldIds */ }; @@ -138,8 +137,12 @@ struct Field { /**< Field value is stored (so the literal value can be retrieved) */ Range = 1 << 21, + + IncludeInSexp = 1 << 24, + /**< whether to include this field in the cached sexp. */ + /**< whether this is a range field (e.g., date, size)*/ - Internal = 1 << 26 + Internal = 1 << 26 }; constexpr bool any_of(Flag some_flag) const{ @@ -159,6 +162,8 @@ struct Field { constexpr bool is_contact() const { return any_of(Flag::Contact); } constexpr bool is_range() const { return any_of(Flag::Range); } + constexpr bool include_in_sexp() const { return any_of(Flag::IncludeInSexp);} + /** * Field members * @@ -166,6 +171,7 @@ struct Field { Id id; /**< Id of the message field */ Type type; /**< Type of the message field */ std::string_view name; /**< Name of the message field */ + std::string_view alias; /**< Alternative name for the message field */ std::string_view description; /**< Decription of the message field */ std::string_view example_query; /**< Example query */ char shortcut; /**< Shortcut for the message field; a..z */ @@ -209,17 +215,18 @@ static constexpr std::array { Field::Id::Bcc, Field::Type::ContactList, - "bcc", + "bcc", {}, "Blind carbon-copy recipient", "bcc:foo@example.com", 'h', Field::Flag::Contact | - Field::Flag::Value + Field::Flag::Value | + Field::Flag::IncludeInSexp }, { Field::Id::BodyText, Field::Type::String, - "body", + "body", {}, "Message plain-text body", "body:capybara", 'b', @@ -228,27 +235,41 @@ static constexpr std::array { Field::Id::Cc, Field::Type::ContactList, - "cc", + "cc", {}, "Carbon-copy recipient", "cc:quinn@example.com", 'c', Field::Flag::Contact | - Field::Flag::Value + Field::Flag::Value | + Field::Flag::IncludeInSexp + }, + + { + Field::Id::Changed, + Field::Type::TimeT, + "changed", {}, + "Last change time", + "change:30m..now", + 'k', + Field::Flag::Value | + Field::Flag::Range | + Field::Flag::IncludeInSexp }, { Field::Id::Date, Field::Type::TimeT, - "date", + "date", {}, "Message date", "date:20220101..20220505", 'd', Field::Flag::Value | - Field::Flag::Range + Field::Flag::Range | + Field::Flag::IncludeInSexp }, { Field::Id::EmbeddedText, Field::Type::String, - "embed", + "embed", {}, "Embedded text", "embed:war OR embed:peace", 'e', @@ -257,105 +278,102 @@ static constexpr std::array { Field::Id::File, Field::Type::String, - "file", + "file", {}, "Attachment file name", "file:/image\\.*.jpg/", 'j', - Field::Flag::NormalTerm + Field::Flag::BooleanTerm }, { Field::Id::Flags, Field::Type::Integer, - "flag", + "flags", "flag", "Message properties", "flag:unread", 'g', - Field::Flag::NormalTerm | - Field::Flag::Value + Field::Flag::BooleanTerm | + Field::Flag::Value | + Field::Flag::IncludeInSexp }, { Field::Id::From, Field::Type::ContactList, - "from", + "from", {}, "Message sender", "from:jimbo", 'f', Field::Flag::Contact | - Field::Flag::Value + Field::Flag::Value | + Field::Flag::IncludeInSexp }, { Field::Id::Maildir, Field::Type::String, - "maildir", + "maildir", {}, "Maildir path for message", "maildir:/private/archive", 'm', Field::Flag::BooleanTerm | - Field::Flag::Value + Field::Flag::Value | + Field::Flag::IncludeInSexp }, { Field::Id::MailingList, Field::Type::String, - "list", + "list", {}, "Mailing list (List-Id:)", "list:mu-discuss.example.com", 'v', Field::Flag::BooleanTerm | - Field::Flag::Value + Field::Flag::Value | + Field::Flag::IncludeInSexp }, { Field::Id::MessageId, Field::Type::String, - "msgid", - "Message-Id", + "message-id", "msgid", + "message-Id", "msgid:abc@123", 'i', Field::Flag::BooleanTerm | - Field::Flag::Value + Field::Flag::Value | + Field::Flag::IncludeInSexp }, { - Field::Id::Mime, + Field::Id::MimeType, Field::Type::String, - "mime", + "mime", "mime-type", "Attachment MIME-type", "mime:image/jpeg", 'y', - Field::Flag::NormalTerm - }, - { - Field::Id::Modified, - Field::Type::TimeT, - "modified", - "Last modification time", - "modified:30m..now", - 'k', - Field::Flag::Value | - Field::Flag::Range + Field::Flag::BooleanTerm }, { Field::Id::Path, Field::Type::String, - "path", + "path", {}, "File system path to message", "path:/a/b/Maildir/cur/msg:2,S", 'l', Field::Flag::BooleanTerm | - Field::Flag::Value + Field::Flag::Value | + Field::Flag::IncludeInSexp }, { Field::Id::Priority, Field::Type::Integer, - "prio", + "priority", "prio", "Priority", "prio:high", 'p', Field::Flag::BooleanTerm | - Field::Flag::Value + Field::Flag::Value | + Field::Flag::IncludeInSexp }, { Field::Id::References, Field::Type::StringList, - "refs", + "references", "refs", "References to related messages", {}, 'r', @@ -364,37 +382,40 @@ static constexpr std::array { Field::Id::Size, Field::Type::ByteSize, - "size", + "size", {}, "Message size in bytes", "size:1M..5M", 'z', Field::Flag::Value | - Field::Flag::Range + Field::Flag::Range | + Field::Flag::IncludeInSexp }, { Field::Id::Subject, Field::Type::String, - "subject", + "subject", {}, "Message subject", "subject:wombat", 's', Field::Flag::Value | - Field::Flag::IndexableTerm + Field::Flag::IndexableTerm | + Field::Flag::IncludeInSexp }, { Field::Id::Tags, Field::Type::StringList, - "tag", + "tags", "tag", "Message tags", "tag:projectx", 'x', - Field::Flag::NormalTerm | - Field::Flag::Value + Field::Flag::BooleanTerm | + Field::Flag::Value | + Field::Flag::IncludeInSexp }, { Field::Id::ThreadId, Field::Type::String, - "thread", + "thread", {}, "Thread a message belongs to", {}, 'w', @@ -404,37 +425,25 @@ static constexpr std::array { Field::Id::To, Field::Type::ContactList, - "to", + "to", {}, "Message recipient", "to:flimflam@example.com", 't', Field::Flag::Contact | - Field::Flag::Value + Field::Flag::Value | + Field::Flag::IncludeInSexp }, - - /* internal */ { Field::Id::XBodyHtml, Field::Type::String, - "htmlbody", + "htmlbody", {}, "Message html body", {}, {}, Field::Flag::Internal }, - - { - Field::Id::XCachedSexp, - Field::Type::String, - "sexp", - "Cached message s-expression", - {}, - {}, - Field::Flag::Internal | - Field::Flag::Value - }, }}; /* @@ -495,12 +504,16 @@ Option field_from_shortcut(char shortcut) { } static inline Option field_from_name(const std::string& name) { - if (name.length() == 1) + switch(name.length()) { + case 0: + return Nothing; + case 1: return field_from_shortcut(name[0]); - else + default: return field_find_if([&](auto&& field){ - return field.name == name; - }); + return name == field.name || name == field.alias; + }); + } } /** diff --git a/lib/message/mu-message-sexp.cc b/lib/message/mu-message-sexp.cc deleted file mode 100644 index ca8cd4a2..00000000 --- a/lib/message/mu-message-sexp.cc +++ /dev/null @@ -1,176 +0,0 @@ -/* -** Copyright (C) 2011-2022 Dirk-Jan C. Binnema -** -** This program is free software; you can redistribute it and/or modify it -** under the terms of the GNU General Public License as published by the -** Free Software Foundation; either version 3, or (at your option) any -** later version. -** -** This program is distributed in the hope that it will be useful, -** but WITHOUT ANY WARRANTY; without even the implied warranty of -** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -** GNU General Public License for more details. -** -** You should have received a copy of the GNU General Public License -** along with this program; if not, write to the Free Software Foundation, -** Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -** -*/ -#include -#include - -#include "mu-message.hh" -#include "mu-query-results.hh" -#include "utils/mu-str.h" -#include "mu-maildir.hh" -#include "utils/mu-utils.hh" - -using namespace Mu; - -static void -add_prop_nonempty(Sexp::List& list, std::string&& elm, - std::vector&& items) -{ - if (items.empty()) - return; - - Sexp::List elms; - for(auto&& item: items) - elms.add(Sexp::make_string(std::move(item))); - - list.add_prop(std::move(elm), Sexp::make_list(std::move(elms))); -} - -static void -add_prop_nonempty(Sexp::List& list, std::string&& name, std::string&& value) -{ - if (!value.empty()) - list.add_prop(std::move(name), - Sexp::make_string(std::move(value))); -} - -static Mu::Sexp -make_contact_sexp(const Contact& contact) -{ - return Sexp::make_list( - /* name */ - Sexp::make_string(contact.name, true/*?nil*/), - /* dot */ - Sexp::make_symbol("."), - /* email */ - Sexp::make_string(contact.email)); -} - -static void -add_list_post(Sexp::List& list, const Message& message) -{ - if (!message.has_mime_message()) - return; - - /* some mailing lists do not set the reply-to; see pull #1278. So for - * those cases, check the List-Post address and use that instead */ - - GMatchInfo* minfo; - GRegex* rx; - const auto list_post{message.header("List-Post")}; - if (!list_post) - return; - - rx = g_regex_new("?", - G_REGEX_CASELESS, (GRegexMatchFlags)0, {}); - g_return_if_fail(rx); - - if (g_regex_match(rx, list_post->c_str(), (GRegexMatchFlags)0, &minfo)) { - auto address = (char*)g_match_info_fetch(minfo, 1); - list.add_prop(":list-post", make_contact_sexp(Contact{address})); - g_free(address); - } - - g_match_info_free(minfo); - g_regex_unref(rx); -} - -static void -add_contacts(Sexp::List& list, const Message& message) -{ - auto add_contact_type = [&](const Contacts& contacts, std::string&& prop) { - Sexp::List clist; - seq_for_each(contacts, [&](auto&& c) { clist.add(make_contact_sexp(c)); }); - if (!clist.empty()) - list.add_prop(std::move(prop), - Sexp::make_list(std::move(clist))); - }; - - add_contact_type(message.from(),":from"); - add_contact_type(message.to(), ":to"); - add_contact_type(message.cc(), ":cc"); - add_contact_type(message.bcc(), ":bcc"); - - const auto reply_to = seq_filter(message.all_contacts(),[](auto &&c) { - return c.type == Contact::Type::ReplyTo; }); - add_contact_type(reply_to, ":reply-to"); -} - -static void -add_flags(Sexp::List& list, const Message& message) -{ - Sexp::List flaglist; - const auto flags{message.flags()}; - for (auto&& info: AllMessageFlagInfos) - if (any_of(flags & info.flag)) - flaglist.add(Sexp::make_symbol_sv(info.name)); - - if (!flaglist.empty()) - list.add_prop(":flags", Sexp::make_list(std::move(flaglist))); -} - -static void -add_date_and_size(Sexp::List& items, const Message& message) -{ - auto emacs_tstamp = [](::time_t t) { - Sexp::List dlist; - dlist.add(Sexp::make_number(static_cast(t >> 16))); - dlist.add(Sexp::make_number(static_cast(t & 0xffff))); - dlist.add(Sexp::make_number(0)); - return Sexp::make_list(std::move(dlist)); - }; - - items.add_prop(":date", emacs_tstamp(message.date())); - items.add_prop(":modified", emacs_tstamp(message.modified())); - - auto size{message.size()}; - if (size != 0) - items.add_prop(":size", Sexp::make_number(size)); -} - -Mu::Sexp::List -Message::to_sexp_list() const -{ - Sexp::List items; - - add_prop_nonempty(items, ":subject", subject()); - add_prop_nonempty(items, ":message-id", message_id()); - add_prop_nonempty(items, ":mailing-list", mailing_list()); - add_prop_nonempty(items, ":path", path()); - add_prop_nonempty(items, ":maildir",maildir()); - - items.add_prop(":priority", - Sexp::make_symbol_sv(priority_name(priority()))); - - add_contacts(items, *this); - add_list_post(items, *this); - - add_prop_nonempty(items, ":references", references()); - add_prop_nonempty(items, ":tags", tags()); - - add_date_and_size(items, *this); - add_flags(items, *this); - - return items; -} - -Mu::Sexp -Message::to_sexp() const -{ - return Sexp::make_list(to_sexp_list()); -} diff --git a/lib/message/mu-message.cc b/lib/message/mu-message.cc index 584c9ade..74904031 100644 --- a/lib/message/mu-message.cc +++ b/lib/message/mu-message.cc @@ -53,8 +53,9 @@ struct Message::Private { Option mailing_list; std::vector parts; - ::time_t mtime{}; + ::time_t ctime{}; + std::string cache_path; /* * we only need to index these, so we don't * really need these copy if we re-arrange things @@ -134,9 +135,6 @@ Message::Message(const std::string& text, const std::string& path, priv_->mime_msg = std::move(msg.value()); fill_document(*priv_); - - /* cache the sexp */ - priv_->doc.add(Field::Id::XCachedSexp, to_sexp().to_sexp_string()); } @@ -170,10 +168,23 @@ Message::document() const } +unsigned +Message::docid() const +{ + return priv_->doc.xapian_document().get_docid(); +} + + +const Mu::Sexp::List& +Message::to_sexp_list() const +{ + return priv_->doc.sexp_list(); +} + void Message::update_cached_sexp() { - priv_->doc.add(Field::Id::XCachedSexp, to_sexp().to_sexp_string()); + priv_->doc.update_cached_sexp(); } Result @@ -554,6 +565,47 @@ fake_message_id(const std::string& path) return format("%s%s", sha256_res.value().c_str(), mu_suffix); } +/* many of the doc.add(fiels ....) automatically update the sexp-list as well; + * however, there are some _extra_ values in the sexp-list that are not + * based on a field. So we add them here. + */ + + + +static void +doc_add_list_post(Document& doc, const MimeMessage& mime_msg) +{ + /* some mailing lists do not set the reply-to; see pull #1278. So for + * those cases, check the List-Post address and use that instead */ + + GMatchInfo* minfo; + GRegex* rx; + const auto list_post{mime_msg.header("List-Post")}; + if (!list_post) + return; + + rx = g_regex_new("?", + G_REGEX_CASELESS, (GRegexMatchFlags)0, {}); + g_return_if_fail(rx); + + Contacts contacts; + if (g_regex_match(rx, list_post->c_str(), (GRegexMatchFlags)0, &minfo)) { + auto address = (char*)g_match_info_fetch(minfo, 1); + contacts.push_back(Contact(address)); + g_free(address); + } + + g_match_info_free(minfo); + g_regex_unref(rx); + + doc.add_extra_contacts(":list-post", contacts); +} + +static void +doc_add_reply_to(Document& doc, const MimeMessage& mime_msg) +{ + doc.add_extra_contacts(":reply-to", mime_msg.contacts(Contact::Type::ReplyTo)); +} static void fill_document(Message::Private& priv) @@ -568,6 +620,9 @@ fill_document(Message::Private& priv) process_message(mime_msg, path, priv); + doc_add_list_post(doc, mime_msg); /* only in sexp */ + doc_add_reply_to(doc, mime_msg); /* only in sexp */ + field_for_each([&](auto&& field) { /* insist on expliclity handling each */ #pragma GCC diagnostic push @@ -578,10 +633,14 @@ fill_document(Message::Private& priv) break; case Field::Id::BodyText: doc.add(field.id, priv.body_txt); + break; case Field::Id::Cc: doc.add(field.id, mime_msg.contacts(Contact::Type::Cc)); break; + case Field::Id::Changed: + doc.add(field.id, priv.ctime); + break; case Field::Id::Date: doc.add(field.id, mime_msg.date()); break; @@ -606,13 +665,10 @@ fill_document(Message::Private& priv) case Field::Id::MessageId: doc.add(field.id, message_id); break; - case Field::Id::Mime: + case Field::Id::MimeType: for (auto&& part: priv.parts) doc.add(field.id, part.mime_type()); break; - case Field::Id::Modified: - doc.add(field.id, priv.mtime); - break; case Field::Id::Path: /* already */ break; case Field::Id::Priority: @@ -720,15 +776,19 @@ Message::update_after_move(const std::string& new_path, const auto statbuf{get_statbuf(new_path)}; if (!statbuf) return Err(statbuf.error()); + else + priv_->ctime = statbuf->st_ctime; + + priv_->doc.remove(Field::Id::Path); + priv_->doc.remove(Field::Id::Changed); priv_->doc.add(Field::Id::Path, new_path); - priv_->doc.add(Field::Id::Modified, statbuf->st_mtime); - priv_->doc.add(new_flags); + priv_->doc.add(Field::Id::Changed, priv_->ctime); + + set_flags(new_flags); if (const auto res = set_maildir(new_maildir); !res) return res; - priv_->doc.add(Field::Id::XCachedSexp, to_sexp().to_sexp_string()); - return Ok(); } diff --git a/lib/mu-parser.cc b/lib/mu-parser.cc index d95bcdbe..3ecb07c2 100644 --- a/lib/mu-parser.cc +++ b/lib/mu-parser.cc @@ -177,7 +177,7 @@ process_range(const std::string& field_str, std::string u2 = upper; constexpr auto upper_limit = std::numeric_limits::max(); - if (field_opt->id == Field::Id::Date || field_opt->id == Field::Id::Modified) { + if (field_opt->id == Field::Id::Date || field_opt->id == Field::Id::Changed) { l2 = to_lexnum(parse_date_time(lower, true).value_or(0)); u2 = to_lexnum(parse_date_time(upper, false).value_or(upper_limit)); } else if (field_opt->id == Field::Id::Size) { diff --git a/lib/tests/test-parser.cc b/lib/tests/test-parser.cc index 2e5fbfd2..79d06769 100644 --- a/lib/tests/test-parser.cc +++ b/lib/tests/test-parser.cc @@ -72,10 +72,10 @@ test_basic() //{ "", R"#((atom :value ""))#"}, { "foo", - R"#((value "msgid" "foo"))#", + R"#((value "message-id" "foo"))#", }, - {"foo or bar", R"#((or(value "msgid" "foo")(value "msgid" "bar")))#"}, - {"foo and bar", R"#((and(value "msgid" "foo")(value "msgid" "bar")))#"}, + {"foo or bar", R"#((or(value "message-id" "foo")(value "message-id" "bar")))#"}, + {"foo and bar", R"#((and(value "message-id" "foo")(value "message-id" "bar")))#"}, }; test_cases(cases); @@ -86,19 +86,19 @@ test_complex() { CaseVec cases = { {"foo and bar or cuux", - R"#((or(and(value "msgid" "foo")(value "msgid" "bar")))#" + - std::string(R"#((value "msgid" "cuux")))#")}, - {"a and not b", R"#((and(value "msgid" "a")(not(value "msgid" "b"))))#"}, + R"#((or(and(value "message-id" "foo")(value "message-id" "bar")))#" + + std::string(R"#((value "message-id" "cuux")))#")}, + {"a and not b", R"#((and(value "message-id" "a")(not(value "message-id" "b"))))#"}, {"a and b and c", - R"#((and(value "msgid" "a")(and(value "msgid" "b")(value "msgid" "c"))))#"}, + R"#((and(value "message-id" "a")(and(value "message-id" "b")(value "message-id" "c"))))#"}, {"(a or b) and c", - R"#((and(or(value "msgid" "a")(value "msgid" "b"))(value "msgid" "c")))#"}, + R"#((and(or(value "message-id" "a")(value "message-id" "b"))(value "message-id" "c")))#"}, {"a b", // implicit and - R"#((and(value "msgid" "a")(value "msgid" "b")))#"}, + R"#((and(value "message-id" "a")(value "message-id" "b")))#"}, {"a not b", // implicit and not - R"#((and(value "msgid" "a")(not(value "msgid" "b"))))#"}, + R"#((and(value "message-id" "a")(not(value "message-id" "b"))))#"}, {"not b", // implicit and not - R"#((not(value "msgid" "b")))#"}}; + R"#((not(value "message-id" "b")))#"}}; test_cases(cases); } @@ -117,7 +117,7 @@ test_range() static void test_flatten() { - CaseVec cases = {{" Mötørhęåđ", R"#((value "msgid" "motorhead"))#"}}; + CaseVec cases = {{" Mötørhęåđ", R"#((value "message-id" "motorhead"))#"}}; test_cases(cases); } diff --git a/mu/tests/test-mu-query.cc b/mu/tests/test-mu-query.cc index c094a9f2..e286805d 100644 --- a/mu/tests/test-mu-query.cc +++ b/mu/tests/test-mu-query.cc @@ -266,16 +266,11 @@ test_mu_query_accented_chars_02(void) { int i; - g_test_skip("fix me"); - return; - QResults queries[] = {{"f:mü", 1}, - //{"s:motörhead", 1}, - {"t:Helmut", 1}, - {"t:Kröger", 1}, - //{"s:MotorHeäD", 1}, - {"tag:queensryche", 1}, - {"tag:Queensrÿche", 1} + { "s:motörhead", 1}, + {"t:Helmut", 1}, + {"t:Kröger", 1}, + {"s:MotorHeäD", 1}, }; for (i = 0; i != G_N_ELEMENTS(queries); ++i) { @@ -475,6 +470,8 @@ test_mu_query_tags(void) {"tag:lost tag:paradise", 1}, {"tag:lost tag:horizon", 0}, {"tag:lost OR tag:horizon", 1}, + {"tag:queensryche", 1}, + {"tag:Queensrÿche", 1}, {"x:paradise,lost", 0}, {"x:paradise AND x:lost", 1}, {"x:\\\\backslash", 1},