From 3dbd8b97637e0b46943251a49f6897987cc019fb Mon Sep 17 00:00:00 2001 From: "Dirk-Jan C. Binnema" Date: Sun, 16 Feb 2025 17:25:57 +0200 Subject: [PATCH] mu4e: handle conversion from pcre regexps mu server uses PCRE-compatible regular expressions, but mu4e needs Emacs-style. In simple cases, the two are the same, but when it gets slightly more complicated they do not. E.g., (foo|bar) => \(foo\|bar\) Mu4e can do the conversion automatically, but for this the pcre2el package (available in MELPA) is required, and is used if user installs is. Anyway, this code implements the automatic conversion and adds some documentation. Fixes #2816. --- NEWS.org | 4 +++ man/mu-init.1.org | 9 ++++++ mu4e/mu4e-contacts.el | 66 +++++++++++++++++++++++++++++++++---------- mu4e/mu4e-server.el | 9 +++++- mu4e/mu4e.texi | 18 ++++++++++-- 5 files changed, 87 insertions(+), 19 deletions(-) diff --git a/NEWS.org b/NEWS.org index 8b269fca..ffd0a19b 100644 --- a/NEWS.org +++ b/NEWS.org @@ -84,6 +84,10 @@ view as well, such as ~gnus-mailing-list-subscribe~, ~gnus-mailing-list-unsubscribe~. + - The mu server uses PCRE-compatible regexps (for addresses), which are not + necessarily compatible with Emacs regular expressions. However, mu4e can now + automatically translate; this depends on the ~pcre2el~ package which the user + should install when using regular expression-addresses. * 1.12 (released on February 24, 2024) diff --git a/man/mu-init.1.org b/man/mu-init.1.org index d790f2f5..b98cdb5e 100644 --- a/man/mu-init.1.org +++ b/man/mu-init.1.org @@ -39,6 +39,15 @@ _email-address-or-regex_ can be either a plain e-mail address (such as for details), wrapped in */* (such as =/foo-.*@example\\.com/=). Depending on your shell, the argument may need to be quoted. +A note of warning for *mu4e* users: any regular-expressions used with +=--with-address= are also used by *mu4e*. Unfortunately, PCRE regular expressions +are *not* generally compatible with Emacs regular expressions. For instance, +*(foo|bar)* in PCRE syntax has *\(foo\|bar\)* as its Emacs equivalent. + +The good news is that *mu4e* can automatically translate the regular-expressions, +if you allow it to, by installing the *pcre2el* Elisp package. See the *mu4e* +documentation for further details. + ** --ignored-address _email-address-or-regex_ Specifies that some e-mail address is to be ignored from the contacts-cache (the option can be used multiple times). Such addresses then cannot be found with diff --git a/mu4e/mu4e-contacts.el b/mu4e/mu4e-contacts.el index 6a18ac1a..1a63d403 100644 --- a/mu4e/mu4e-contacts.el +++ b/mu4e/mu4e-contacts.el @@ -1,6 +1,6 @@ ;;; mu4e-contacts.el --- Dealing with contacts -*- lexical-binding: t -*- -;; Copyright (C) 2022-2024 Dirk-Jan C. Binnema +;; Copyright (C) 2022-2025 Dirk-Jan C. Binnema ;; Author: Dirk-Jan C. Binnema ;; Maintainer: Dirk-Jan C. Binnema @@ -22,14 +22,19 @@ ;;; Commentary: -;; Utility functions used in the mu4e +;; Functions and configuration for dealing with contacts in mu4e. ;;; Code: (require 'cl-lib) (require 'message) + +;; opportunistic; if we have it, try convert pcre-style regexps (from the mu +;; server) to emacs-style. +(require 'pcre2el nil 'noerror) + + (require 'mu4e-helpers) (require 'mu4e-update) - ;;; Configuration (defcustom mu4e-compose-complete-addresses t @@ -114,23 +119,59 @@ predicate function. A value of nil keeps all the addresses." (repeat string)) :group 'mu4e-compose) - ;;; Internal variables (defvar mu4e--contacts-tstamp "0" "Timestamp for the most recent contacts update." ) (defvar mu4e--contacts-set nil "Set with the full contact addresses for autocompletion.") - + +(defun mu4e-rx-pcre-to-emacs (pcre-rx) + "Convert a PCRE regular-expression PCRE-RX to Emacs-style. + +This depends on the \"pcre2el\" package being available. If not, +simply returns its argument. The same happen for +regular-expressions that cannot be converted. See the pcre2el +documentation for further details." + (let ((emacs-rx + (if (fboundp 'pcre-to-elisp) + (with-demoted-errors "pcre2el error: %S" + (pcre-to-elisp pcre-rx)) + pcre-rx))) + (unless (string= pcre-rx emacs-rx) + (mu4e-log 'misc "converted %s => %s" pcre-rx emacs-rx)) + emacs-rx)) + +(defun mu4e--grab-rx-addr(addr) + "Grab the regexp address (if any) from ADDR. +Address string ADDR is either a normal address like +\"foo@example.com\", or a regex-address like +\"/(foo|bar)@example\\.com/\". In the former case, return nil, +and in latter case, return the part between the slashes." + (save-match-data + (when (string-match "^/\\(.*\\)/$" addr) + (match-string 1 addr)))) + +(defun mu4e--massage-addresses (addrs) + "Convert list of addresses ADDRS into Emacs compatible. +This means any regexp-addresses are converted from PCRE to Emacs. +Other addresses remain unchanged." + (seq-map + (lambda (addr) + (if-let* ((rxaddr (mu4e--grab-rx-addr addr))) + (concat "/" (mu4e-rx-pcre-to-emacs rxaddr) "/") + addr)) + addrs)) + ;;; user mail address (defun mu4e-personal-addresses (&optional no-regexp) "Get the list user's personal addresses, as passed to \"mu init\". The address are either plain e-mail addresses or regexps (strings - wrapped / /). When NO-REGEXP is non-nil, do not include regexp - address patterns (if any)." +wrapped in / /). When NO-REGEXP is non-nil, do not include regexp +address patterns (if any)." (seq-remove - (lambda (addr) (and no-regexp (string-match-p "^/.*/" addr))) + (lambda (addr) (and no-regexp (mu4e--grab-rx-addr addr))) (when-let* ((props (mu4e-server-properties))) (plist-get props :personal-addresses)))) @@ -142,14 +183,11 @@ with both the plain addresses and /regular expressions/." (when addr (seq-find (lambda (m) - (if (string-match "/\\(.*\\)/" m) - (let ((rx (match-string 1 m)) - (case-fold-search t)) - (string-match rx addr)) + (if-let* ((rxaddr (mu4e--grab-rx-addr m)) (case-fold-search t)) + (string-match rxaddr addr) (eq t (compare-strings addr nil nil m nil nil 'case-insensitive)))) (mu4e-personal-addresses)))) - (defun mu4e-personal-or-alternative-address-p (addr) "Is ADDR either a personal or an alternative address? @@ -179,7 +217,6 @@ See #2680 for further details." (or (and addr (string= addr "")) (mu4e-personal-or-alternative-address-p addr))) - ;; Helpers ;;; RFC2822 handling of phrases in mail-addresses @@ -247,7 +284,6 @@ case a phrase contains a quote, it will be escaped." (format "%s <%s>" (mu4e--rfc822-quote-phrase name) email) email))) - (defun mu4e--update-contacts (contacts &optional tstamp) "Receive a sorted list of CONTACTS newer than TSTAMP. Update an internal set with it. diff --git a/mu4e/mu4e-server.el b/mu4e/mu4e-server.el index bdf32f4f..92501eb5 100644 --- a/mu4e/mu4e-server.el +++ b/mu4e/mu4e-server.el @@ -242,6 +242,8 @@ Checks whether the server process is live." (memq (process-status mu4e--server-process) '(run open listen connect stop)) t)) +(declare-function mu4e--massage-addresses "mu4e-contacts") + (defsubst mu4e--server-eat-sexp-from-buf () "Eat the next s-expression from `mu4e--server-buf'. Note: this is a string, not an emacs-buffer. `mu4e--server-buf gets @@ -383,7 +385,12 @@ The server output is as follows: ;; received a pong message ((plist-get sexp :pong) - (setq mu4e--server-props (plist-get sexp :props)) + (let ((props (plist-get sexp :props))) + ;; attempt to translate the regex-style. + (plist-put props :personal-addresses + (mu4e--massage-addresses + (plist-get props :personal-addresses))) + (setq mu4e--server-props props)) (funcall mu4e-pong-func sexp)) ;; receive queries info diff --git a/mu4e/mu4e.texi b/mu4e/mu4e.texi index 087cbb31..b642b34d 100644 --- a/mu4e/mu4e.texi +++ b/mu4e/mu4e.texi @@ -403,9 +403,8 @@ You can add some e-mail addresses, so @t{mu} recognizes them as yours: indexing messages. If you want to change them, you need to @t{init} once again. -The addresses may also be basic PCRE regular expressions, wrapped in -slashes, for example: - +The addresses may also be basic PCRE regular expressions, wrapped in slashes, +for example: @example $ mu init --maildir=~/Maildir '--my-address=/foo-.*@@example\.com/' @end example @@ -413,6 +412,19 @@ slashes, for example: If you want to see the values for your message-store, you can use @command{mu info}. +Note: unfortunately, PCRE regular expressions are not the same as Emacs regular +expressions. Very simple ones match, but e.g. @t{(foo|bar)} in PCRE syntax is +specified as @t{\(foo\|bar\)} in Emacs/@t{mu4e}. + +The good news is that @t{mu4e} can do the conversion automatically; however for +this to work, users needs to install the @t{pcre2el} package (available in +MELPA), and ensure it is available in @t{load-path} when Emacs runs (a normal +package-installation takes care of this). After that, @t{mu4e} takes care of the +translation automatically. + +It is possible to come up with complicated PCRE regular expressions that are not +translatable; if necessary, experiment with @t{pcre2el} to avoid this. + @node Indexing your messages @section Indexing your messages