guile: Add '--delete' option to find-dups.scm

Closes #592

Uses open-pipe* to ensure that escaping isn't needed (seen in md5sum)
since arguments are passed.

Also see this discussion: http://thread.gmane.org/gmane.lisp.guile.user/11777

Experimental feature... be careful.
This commit is contained in:
Mark A. Hershberger
2015-03-07 12:50:19 -05:00
committed by djcb
parent 9d8dfcaedd
commit 2aac8e43cb

View File

@ -22,6 +22,7 @@ exec guile -e main -s $0 $@
;; INFO: find duplicate messages ;; INFO: find duplicate messages
;; INFO: options: ;; INFO: options:
;; INFO: --muhome=<muhome>: path to mu home dir ;; INFO: --muhome=<muhome>: path to mu home dir
;; INFO: --delete: delete all but the first one (experimental, be careful!)
(use-modules (mu) (mu script) (mu stats)) (use-modules (mu) (mu script) (mu stats))
(use-modules (ice-9 getopt-long) (ice-9 optargs) (use-modules (ice-9 getopt-long) (ice-9 optargs)
@ -29,12 +30,26 @@ exec guile -e main -s $0 $@
(define (md5sum path) (define (md5sum path)
(let* ((port (open-pipe* OPEN_READ "md5sum" path)) (let* ((port (open-pipe* OPEN_READ "md5sum" path))
(md5 (read-delimited " " port))) (md5 (read-delimited " " port)))
(close-pipe port) (close-pipe port)
md5)) md5))
(define (remove-msg filepath delete)
(if delete
(begin
(format #t "\t~a ... " (basename filepath))
(let* ((port (open-pipe* OPEN_READ "mu" "remove" filepath))
(line (read-line port))
(err (status:exit-val (close-pipe port))))
(if (and (equal? err 0) (eof-object? line))
(begin
(format #t "ok\n")
(delete-file filepath))
(begin
(format #t "\nError while deleting:\n\t~a\n" line)))))
(format #t "would\t~a\n" (basename filepath))))
(define (find-dups) (define (find-dups delete)
(let ((id-table (make-hash-table 20000))) (let ((id-table (make-hash-table 20000)))
;; fill the hash with <msgid-size> => <list of paths> ;; fill the hash with <msgid-size> => <list of paths>
(mu:for-each-message (mu:for-each-message
@ -53,12 +68,13 @@ exec guile -e main -s $0 $@
(if (> (length paths) 1) (if (> (length paths) 1)
(let ((hash (make-hash-table 10))) (let ((hash (make-hash-table 10)))
(for-each (for-each
(lambda (path) (lambda (path)
(let* ((md5 (md5sum path)) (lst (hash-ref hash md5))) (when (file-exists? path)
(if lst (let* ((md5 (md5sum path)) (lst (hash-ref hash md5)))
(set! lst (cons path lst)) (if lst
(set! lst (list path))) (set! lst (cons path lst))
(hash-set! hash md5 lst))) (set! lst (list path)))
(hash-set! hash md5 lst))))
paths) paths)
;; hash now maps the md5sum to the messages... ;; hash now maps the md5sum to the messages...
(hash-for-each (hash-for-each
@ -68,9 +84,11 @@ exec guile -e main -s $0 $@
(format #t "md5sum: ~a:\n" md5) (format #t "md5sum: ~a:\n" md5)
(let ((num 1)) (let ((num 1))
(for-each (for-each
(lambda (path) (lambda (path)
(format #t "\t~d. ~a\n" num path) (if (equal? num 1)
(set! num (+ 1 num))) (format #t "\t~a\n" (basename path))
(remove-msg path delete))
(set! num (+ 1 num)))
mpaths))))) mpaths)))))
hash)))) hash))))
id-table))) id-table)))
@ -78,20 +96,33 @@ exec guile -e main -s $0 $@
(define (main args) (define (main args)
"Run some statistics function. "Find duplicate messages and, potentially, delete the dups.
Be careful with that!
Interpret argument-list ARGS (like command-line Interpret argument-list ARGS (like command-line
arguments). Possible arguments are: arguments). Possible arguments are:
--muhome (path to alternative mu home directory)." --muhome (path to alternative mu home directory).
--delete (delete all but the first one)."
(setlocale LC_ALL "") (setlocale LC_ALL "")
(let* ((optionspec '( (muhome (value #t)) (let* ((optionspec '( (muhome (value #t))
(delete (value #f))
(help (single-char #\h) (value #f)))) (help (single-char #\h) (value #f))))
(options (getopt-long args optionspec)) (options (getopt-long args optionspec))
(help (option-ref options 'help #f)) (help (option-ref options 'help #f))
(delete (option-ref options 'delete #f))
(muhome (option-ref options 'muhome #f))) (muhome (option-ref options 'muhome #f)))
(mu:initialize muhome) (mu:initialize muhome)
(find-dups))) (find-dups delete)))
;; Local Variables: ;; Local Variables:
;; mode: scheme ;; mode: scheme
;; End: ;; End: