* extract based on filename regexp and document it

This commit is contained in:
Dirk-Jan C. Binnema
2011-05-22 13:42:19 +03:00
parent f7a39155c8
commit 48d52daf0b
5 changed files with 154 additions and 59 deletions

View File

@ -1,4 +1,4 @@
.TH MU EXTRACT 1 "April 2011" "User Manuals" .TH MU EXTRACT 1 "May 2011" "User Manuals"
.SH NAME .SH NAME
@ -6,8 +6,8 @@ mu extract\- display and save message parts (attachments)
.SH SYNOPSIS .SH SYNOPSIS
.B mu extract [options] <file> [<parts-to-save>] .B mu extract [options] <file>
.B mu extract [options] <file> <filename> .B mu extract [options] <file> <pattern>
.SH DESCRIPTION .SH DESCRIPTION
@ -16,11 +16,12 @@ attachments) from mail messages. It works on message files, and does not
require the message to be indexed. require the message to be indexed.
For attachments, the file name used for saving is the name of the attachment For attachments, the file name used for saving is the name of the attachment
in the message. If there is no such name, or when saving other mime-parts, a in the message. If there is no such name, or when saving other MIME-parts, a
name derived from the message-id of the message. name is derived from the message-id of the message.
If you specify a filename as the second argument, an attempt will be made to If you specify a pattern (a case-insensitive regular expression)as the second
find a extract the particular (first) MIME-part with that filename. argument, all attachments with filenames matching that pattern will be
extracted.
Without any options, \fBmu extract\fR simply outputs the list of MIME-parts in Without any options, \fBmu extract\fR simply outputs the list of MIME-parts in
the message. the message.
@ -56,6 +57,24 @@ allowed.
application for the particular file type. Depends on the \fBxdg-open\fR application for the particular file type. Depends on the \fBxdg-open\fR
utility. utility.
.SH EXAMPLES
To display information about all the MIME-parts in a message file:
.nf
$ mu extract msgfile
.fi
To extract MIME-part 3 and 4 from this message, overwriting existing files
with the same name:
.nf
$ mu extract --parts=3,4 --overwrite msgfile
.fi
To extract all files ending in '.jpg' (case-insensitive):
.nf
$ mu extract msgfile '.*\.jpg'
.fi
.SH BUGS .SH BUGS
Please report bugs if you find them: Please report bugs if you find them:

View File

@ -1,3 +1,5 @@
/* -*-mode: c; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-*/
/* /*
** Copyright (C) 2010-2011 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl> ** Copyright (C) 2010-2011 Dirk-Jan C. Binnema <djcb@djcbsoftware.nl>
** **
@ -22,6 +24,7 @@
#endif /*HAVE_CONFIG_H*/ #endif /*HAVE_CONFIG_H*/
#include <stdlib.h> #include <stdlib.h>
#include <string.h>
#include "mu-msg.h" #include "mu-msg.h"
#include "mu-msg-part.h" #include "mu-msg-part.h"
@ -94,22 +97,60 @@ save_numbered_parts (MuMsg *msg, MuConfig *opts)
return rv; return rv;
} }
static gboolean static GRegex*
save_part_with_filename (MuMsg *msg, const char *filename, MuConfig *opts) anchored_regex (const char* pattern)
{ {
int idx; GRegex *rx;
GError *err;
gchar *anchored;
idx = mu_msg_part_find_file (msg, filename);
if (idx == -1) { anchored = g_strdup_printf
g_warning ("file '%s' not found in this message", filename); ("%s%s%s",
return FALSE; pattern[0] == '^' ? "" : "^",
pattern,
pattern[strlen(pattern)-1] == '$' ? "" : "$");
err = NULL;
rx = g_regex_new (anchored, G_REGEX_CASELESS|G_REGEX_OPTIMIZE, 0,
&err);
g_free (anchored);
if (!rx) {
g_warning ("error in regular expression '%s': %s",
pattern, err->message);
g_error_free (err);
return NULL;
} }
if (!save_part (msg, opts->targetdir, idx, opts->overwrite, return rx;
opts->play)) }
static gboolean
save_part_with_filename (MuMsg *msg, const char *pattern, MuConfig *opts)
{
GSList *lst, *cur;
GRegex *rx;
gboolean rv;
/* 'anchor' the pattern with '^...$' if not already */
rx = anchored_regex (pattern);
if (!rx)
return FALSE; return FALSE;
return TRUE; lst = mu_msg_part_find_files (msg, rx);
g_regex_unref (rx);
if (!lst) {
g_warning ("no matching attachments found");
return FALSE;
}
for (cur = lst, rv = TRUE; cur; cur = g_slist_next (cur))
rv &= save_part (msg, opts->targetdir,
GPOINTER_TO_UINT(cur->data),
opts->overwrite, opts->play);
return rv;
} }
@ -180,9 +221,6 @@ save_part_if (MuMsg *msg, MuMsgPart *part, SaveData *sd)
++sd->saved_num; ++sd->saved_num;
leave: leave:
if (!sd->result)
g_warning ("failed to save/play MIME-part %u to %s",
part->index, filepath ? filepath : "<none>");
sd->result = rv; sd->result = rv;
g_free (filepath); g_free (filepath);
} }
@ -292,12 +330,13 @@ static gboolean
check_params (MuConfig *opts) check_params (MuConfig *opts)
{ {
if (!opts->params[1] || (opts->params[2] && opts->params[3])) { if (!opts->params[1] || (opts->params[2] && opts->params[3])) {
g_warning ("usage: mu extract [options] <file> [<filename>]"); g_warning ("usage: mu extract [options] <file> [<pattern>]");
return FALSE; return MU_EXITCODE_ERROR;
} }
if (opts->params[2] && (opts->save_attachments || opts->save_all)) { if (opts->params[2] && (opts->save_attachments || opts->save_all)) {
g_warning ("--save-attachments --save-all is allowed don't accept " g_warning ("--save-attachments --save-all don't accept "
"a filename"); "a filename");
return FALSE; return FALSE;
} }
@ -325,15 +364,13 @@ mu_cmd_extract (MuConfig *opts)
g_return_val_if_fail (opts, MU_EXITCODE_ERROR); g_return_val_if_fail (opts, MU_EXITCODE_ERROR);
g_return_val_if_fail (opts->cmd == MU_CONFIG_CMD_EXTRACT, g_return_val_if_fail (opts->cmd == MU_CONFIG_CMD_EXTRACT,
MU_EXITCODE_ERROR); MU_EXITCODE_ERROR);
if (!check_params (opts)) if (!check_params (opts))
return MU_EXITCODE_ERROR; return MU_EXITCODE_ERROR;
if (!opts->params[2] && if (!opts->params[2] && !opts->parts &&
!opts->parts && !opts->save_attachments && !opts->save_all)
!opts->save_attachments && rv = show_parts (opts->params[1], opts); /* show, don't save */
!opts->save_all) /* show, don't save */
rv = show_parts (opts->params[1], opts);
else { else {
rv = mu_util_check_dir(opts->targetdir, FALSE, TRUE); rv = mu_util_check_dir(opts->targetdir, FALSE, TRUE);
if (!rv) if (!rv)

View File

@ -62,12 +62,12 @@ find_part (MuMsg* msg, guint partidx)
} }
struct _PartData { struct _PartData {
MuMsg *_msg; MuMsg *_msg;
unsigned _idx; unsigned _idx;
MuMsgPartForeachFunc _func; MuMsgPartForeachFunc _func;
gpointer _user_data; gpointer _user_data;
}; };
typedef struct _PartData PartData; typedef struct _PartData PartData;
static void static void
@ -106,7 +106,7 @@ mu_msg_part_foreach (MuMsg *msg, MuMsgPartForeachFunc func,
pdata._msg = msg; pdata._msg = msg;
pdata._idx = 0; pdata._idx = 0;
pdata._func = func; pdata._func = func;
pdata._user_data = user_data; pdata._user_data = user_data;
g_mime_message_foreach (msg->_file->_mime_msg, g_mime_message_foreach (msg->_file->_mime_msg,
@ -320,7 +320,7 @@ mu_msg_part_find_cid (MuMsg *msg, const char* sought_cid)
{ {
const char* cid; const char* cid;
g_return_val_if_fail (msg, -1L); g_return_val_if_fail (msg, -1);
g_return_val_if_fail (sought_cid, -1); g_return_val_if_fail (sought_cid, -1);
cid = g_str_has_prefix (sought_cid, "cid:") ? cid = g_str_has_prefix (sought_cid, "cid:") ?
@ -331,32 +331,50 @@ mu_msg_part_find_cid (MuMsg *msg, const char* sought_cid)
(gpointer)cid); (gpointer)cid);
} }
struct _MatchData2 {
GSList *_lst;
const GRegex *_rx;
guint _idx;
};
typedef struct _MatchData2 MatchData2;
static gboolean
match_filename (GMimeObject *part, const char *sought_filename) static void
match_filename_rx (GMimeObject *parent, GMimeObject *part, MatchData2 *mdata)
{ {
const char *fname; const char *fname;
if (!GMIME_IS_PART(part)) if (!GMIME_IS_PART(part))
return FALSE; goto leave;
fname = g_mime_part_get_filename (GMIME_PART(part)); fname = g_mime_part_get_filename (GMIME_PART(part));
if (!fname) if (!fname)
return FALSE; goto leave;
return g_strcmp0 (fname, sought_filename) == 0 ? TRUE : FALSE; if (g_regex_match (mdata->_rx, fname, 0, NULL))
mdata->_lst = g_slist_prepend (mdata->_lst,
GUINT_TO_POINTER(mdata->_idx));
leave:
++mdata->_idx;
} }
int GSList*
mu_msg_part_find_file (MuMsg *msg, const char* sought_filename) mu_msg_part_find_files (MuMsg *msg, const GRegex *pattern)
{ {
g_return_val_if_fail (msg, -1); MatchData2 mdata;
g_return_val_if_fail (sought_filename, -1);
g_return_val_if_fail (msg, NULL);
return msg_part_find_idx (msg->_file->_mime_msg, g_return_val_if_fail (pattern, NULL);
(MatchFunc)match_filename,
(gpointer)sought_filename); mdata._lst = NULL;
mdata._rx = pattern;
mdata._idx = 0;
g_mime_message_foreach (msg->_file->_mime_msg,
(GMimeObjectForeachFunc)match_filename_rx,
&mdata);
return mdata._lst;
} }

View File

@ -153,15 +153,16 @@ gchar* mu_msg_part_filepath_cache (MuMsg *msg, guint partid)
int mu_msg_part_find_cid (MuMsg *msg, const char* content_id); int mu_msg_part_find_cid (MuMsg *msg, const char* content_id);
/** /**
* get the (first) part index for the message part with a certain * retrieve a list of indices for mime-parts with filenames matching a regex
* filename *
*
* @param msg a message * @param msg a message
* @param sought_filename filename to look for * @param a regular expression to match the filename with
* *
* @return the part index number of the found part, or -1 if it was not found * @return a list with indices for the files matching the pattern; the
* indices are the GPOINTER_TO_UNIT(lst->data) of the list. The must
* be freed with g_slist_free
*/ */
int mu_msg_part_find_file (MuMsg *msg, const char* sought_filename); GSList* mu_msg_part_find_files (MuMsg *msg, const GRegex *pattern);
typedef void (*MuMsgPartForeachFunc) (MuMsg*, MuMsgPart*, gpointer); typedef void (*MuMsgPartForeachFunc) (MuMsg*, MuMsgPart*, gpointer);

View File

@ -28,7 +28,7 @@ Here are some tips for using =mu=. If you want to know more, please refer to the
After you have indexed your messages, you can search them. Here are some After you have indexed your messages, you can search them. Here are some
examples. examples.
*** messages about Helsinki *** messages about Helsinki (in message body, subject, sender, ...)
#+begin_src sh #+begin_src sh
$ mu find Helsinki $ mu find Helsinki
@ -52,8 +52,28 @@ Here are some tips for using =mu=. If you want to know more, please refer to the
$ mu find 'subject:soc*' flag:unread $ mu find 'subject:soc*' flag:unread
#+end_src #+end_src
** Retrieving attachments from messages
You can retrieve attachments from messages using =mu extract=, which takes a
message file as an argument. Without any other arguments, it displays the
MIME-parts of the message. You can then get specific attachments:
#+begin_src sh
$ mu extract --parts=3,4 my-msg-file
#+end_src
will get you parts 3 and 4. You can also extract files based on their name:
#+begin_src sh
$ mu extract my-msg-file '.*\.jpg'
#+end_src
The second argument is a case-insensitive regular expression, and the command
will extract any files matching the pattern -- in the example, all
=.jpg=-files.
** Further processing of matched messages ** Further processing of matched messages
@ -75,7 +95,7 @@ Here are some tips for using =mu=. If you want to know more, please refer to the
** Integration with mail clients ** Integration with mail clients
See the example in the =mu-find= man page. The =mu-find= man page contains examples for =mutt= and =wanderlust=.
#+html:<hr/><div align="center">&copy; 2011 Dirk-Jan C. Binnema</div> #+html:<hr/><div align="center">&copy; 2011 Dirk-Jan C. Binnema</div>
#+begin_html #+begin_html