Merge from gnulib
[emacs.git] / lisp / mh-e / mh-junk.el
blob4a6693c2db6cd6e34b4c87d51ad73771470ce5fb
1 ;;; mh-junk.el --- MH-E interface to anti-spam measures
3 ;; Copyright (C) 2003-2015 Free Software Foundation, Inc.
5 ;; Author: Satyaki Das <satyaki@theforce.stanford.edu>,
6 ;; Bill Wohler <wohler@newt.com>
7 ;; Maintainer: Bill Wohler <wohler@newt.com>
8 ;; Keywords: mail, spam
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software: you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation, either version 3 of the License, or
15 ;; (at your option) any later version.
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
25 ;;; Commentary:
27 ;; Spam handling in MH-E.
29 ;;; Change Log:
31 ;;; Code:
33 (require 'mh-e)
34 (require 'mh-scan)
35 (mh-require-cl)
37 ;;;###mh-autoload
38 (defun mh-junk-blacklist (range)
39 "Blacklist RANGE as spam.
41 This command trains the spam program in use (see the option
42 `mh-junk-program') with the content of RANGE and then handles the
43 message(s) as specified by the option `mh-junk-disposition'.
45 Check the documentation of `mh-interactive-range' to see how RANGE is
46 read in interactive use.
48 For more information about using your particular spam fighting
49 program, see:
51 - `mh-spamassassin-blacklist'
52 - `mh-bogofilter-blacklist'
53 - `mh-spamprobe-blacklist'"
54 (interactive (list (mh-interactive-range "Blacklist")))
55 (mh-iterate-on-range () range (mh-blacklist-a-msg nil))
56 (if (looking-at mh-scan-blacklisted-msg-regexp)
57 (mh-next-msg)))
59 (defun mh-blacklist-a-msg (message)
60 "Blacklist MESSAGE.
61 If MESSAGE is nil then the message at point is blacklisted.
62 The hook `mh-blacklisted-msg-hook' is called after you mark a message
63 for blacklisting."
64 (save-excursion
65 (if (numberp message)
66 (mh-goto-msg message nil t)
67 (beginning-of-line)
68 (setq message (mh-get-msg-num t)))
69 (cond ((looking-at mh-scan-refiled-msg-regexp)
70 (error "Message %d is refiled; undo refile before blacklisting"
71 message))
72 ((looking-at mh-scan-deleted-msg-regexp)
73 (error "Message %d is deleted; undo delete before blacklisting"
74 message))
75 ((looking-at mh-scan-whitelisted-msg-regexp)
76 (error "Message %d is whitelisted; undo before blacklisting"
77 message))
78 ((looking-at mh-scan-blacklisted-msg-regexp) nil)
80 (mh-set-folder-modified-p t)
81 (setq mh-blacklist (cons message mh-blacklist))
82 (if (not (memq message mh-seen-list))
83 (setq mh-seen-list (cons message mh-seen-list)))
84 (mh-notate nil mh-note-blacklisted mh-cmd-note)
85 (run-hooks 'mh-blacklist-msg-hook)))))
87 ;;;###mh-autoload
88 (defun mh-junk-blacklist-disposition ()
89 "Determines the fate of the selected spam."
90 (cond ((null mh-junk-disposition) nil)
91 ((equal mh-junk-disposition "") "+")
92 ((eq (aref mh-junk-disposition 0) ?+)
93 mh-junk-disposition)
94 ((eq (aref mh-junk-disposition 0) ?@)
95 (concat mh-current-folder "/"
96 (substring mh-junk-disposition 1)))
97 (t (concat "+" mh-junk-disposition))))
99 ;;;###mh-autoload
100 (defun mh-junk-process-blacklist (range)
101 "Blacklist RANGE as spam.
102 This command trains the spam program in use (see the option
103 `mh-junk-program') with the content of RANGE and then handles the
104 message(s) as specified by the option `mh-junk-disposition'."
105 (let ((blacklist-func (nth 1 (assoc mh-junk-choice mh-junk-function-alist))))
106 (unless blacklist-func
107 (error "Customize `mh-junk-program' appropriately"))
108 (mh-iterate-on-range msg range
109 (message "Blacklisting message %d..." msg)
110 (funcall (symbol-function blacklist-func) msg)
111 (message "Blacklisting message %d...done" msg))
112 (mh-next-msg)))
114 ;;;###mh-autoload
115 (defun mh-junk-whitelist (range)
116 "Whitelist RANGE as ham.
118 This command reclassifies the RANGE as ham if it were incorrectly
119 classified as spam (see the option `mh-junk-program'). It then
120 refiles the message into the \"+inbox\" folder.
122 Check the documentation of `mh-interactive-range' to see how
123 RANGE is read in interactive use."
124 (interactive (list (mh-interactive-range "Whitelist")))
125 (mh-iterate-on-range () range (mh-junk-whitelist-a-msg nil))
126 (if (looking-at mh-scan-whitelisted-msg-regexp)
127 (mh-next-msg)))
129 (defun mh-junk-whitelist-a-msg (message)
130 "Whitelist MESSAGE.
131 If MESSAGE is nil then the message at point is whitelisted. The
132 hook `mh-whitelist-msg-hook' is called after you mark a message
133 for whitelisting."
134 (save-excursion
135 (if (numberp message)
136 (mh-goto-msg message nil t)
137 (beginning-of-line)
138 (setq message (mh-get-msg-num t)))
139 (cond ((looking-at mh-scan-refiled-msg-regexp)
140 (error "Message %d is refiled; undo refile before whitelisting"
141 message))
142 ((looking-at mh-scan-deleted-msg-regexp)
143 (error "Message %d is deleted; undo delete before whitelisting"
144 message))
145 ((looking-at mh-scan-blacklisted-msg-regexp)
146 (error "Message %d is blacklisted; undo before whitelisting"
147 message))
148 ((looking-at mh-scan-whitelisted-msg-regexp) nil)
150 (mh-set-folder-modified-p t)
151 (setq mh-whitelist (cons message mh-whitelist))
152 (mh-notate nil mh-note-whitelisted mh-cmd-note)
153 (run-hooks 'mh-whitelist-msg-hook)))))
155 ;;;###mh-autoload
156 (defun mh-junk-process-whitelist (range)
157 "Whitelist RANGE as ham.
159 This command reclassifies the RANGE as ham if it were incorrectly
160 classified as spam (see the option `mh-junk-program')."
161 (let ((whitelist-func (nth 2 (assoc mh-junk-choice mh-junk-function-alist))))
162 (unless whitelist-func
163 (error "Customize `mh-junk-program' appropriately"))
164 (mh-iterate-on-range msg range
165 (message "Whitelisting message %d..." msg)
166 (funcall (symbol-function whitelist-func) msg)
167 (message "Whitelisting message %d...done" msg))
168 (mh-next-msg)))
172 ;; Spamassassin Interface
174 (defvar mh-spamassassin-executable (executable-find "spamassassin"))
175 (defvar mh-sa-learn-executable (executable-find "sa-learn"))
177 ;;;###mh-autoload
178 (defun mh-spamassassin-blacklist (msg)
179 "Blacklist MSG with SpamAssassin.
181 SpamAssassin is one of the more popular spam filtering programs.
182 Get it from your local distribution or from the SpamAssassin web
183 site at URL `http://spamassassin.org/'.
185 To use SpamAssassin, add the following recipes to
186 \".procmailrc\":
188 PATH=$PATH:/usr/bin/mh
189 MAILDIR=$HOME/`mhparam Path`
191 # Fight spam with SpamAssassin.
192 :0fw
193 | spamc
195 # Anything with a spam level of 10 or more is junked immediately.
197 * ^X-Spam-Level: ..........
198 /dev/null
201 * ^X-Spam-Status: Yes
202 spam/.
204 If you don't use \"spamc\", use \"spamassassin -P -a\".
206 Note that one of the recipes above throws away messages with a
207 score greater than or equal to 10. Here's how you can determine a
208 value that works best for you.
210 First, run \"spamassassin -t\" on every mail message in your
211 archive and use Gnumeric to verify that the average plus the
212 standard deviation of good mail is under 5, the SpamAssassin
213 default for \"spam\".
215 Using Gnumeric, sort the messages by score and view the messages
216 with the highest score. Determine the score which encompasses all
217 of your interesting messages and add a couple of points to be
218 conservative. Add that many dots to the \"X-Spam-Level:\" header
219 field above to send messages with that score down the drain.
221 In the example above, messages with a score of 5-9 are set aside
222 in the \"+spam\" folder for later review. The major weakness of
223 rules-based filters is a plethora of false positives so it is
224 worthwhile to check.
226 If SpamAssassin classifies a message incorrectly, or is unsure,
227 you can use the MH-E commands \\[mh-junk-blacklist] and
228 \\[mh-junk-whitelist].
230 The command \\[mh-junk-blacklist] adds a \"blacklist_from\" entry
231 to \"~/spamassassin/user_prefs\", deletes the message, and sends
232 the message to the Razor, so that others might not see this spam.
233 If the \"sa-learn\" command is available, the message is also
234 recategorized as spam.
236 The command \\[mh-junk-whitelist] adds a \"whitelist_from\" rule
237 to the \"~/.spamassassin/user_prefs\" file. If the \"sa-learn\"
238 command is available, the message is also recategorized as ham.
240 Over time, you'll observe that the same host or domain occurs
241 repeatedly in the \"blacklist_from\" entries, so you might think
242 that you could avoid future spam by blacklisting all mail from a
243 particular domain. The utility function
244 `mh-spamassassin-identify-spammers' helps you do precisely that.
245 This function displays a frequency count of the hosts and domains
246 in the \"blacklist_from\" entries from the last blank line in
247 \"~/.spamassassin/user_prefs\" to the end of the file. This
248 information can be used so that you can replace multiple
249 \"blacklist_from\" entries with a single wildcard entry such as:
251 blacklist_from *@*amazingoffersdirect2u.com
253 In versions of SpamAssassin (2.50 and on) that support a Bayesian
254 classifier, \\[mh-junk-blacklist] uses the program \"sa-learn\"
255 to recategorize the message as spam. Neither MH-E, nor
256 SpamAssassin, rebuilds the database after adding words, so you
257 will need to run \"sa-learn --rebuild\" periodically. This can be
258 done by adding the following to your crontab:
260 0 * * * * sa-learn --rebuild > /dev/null 2>&1"
261 (unless mh-spamassassin-executable
262 (error "Unable to find the spamassassin executable"))
263 (let ((current-folder mh-current-folder)
264 (msg-file (mh-msg-filename msg mh-current-folder))
265 (sender))
266 (message "Reporting message %d..." msg)
267 (mh-truncate-log-buffer)
268 ;; Put call-process output in log buffer if we are saving it
269 ;; (this happens if mh-junk-background is t).
270 (with-current-buffer mh-log-buffer
271 (call-process mh-spamassassin-executable msg-file mh-junk-background nil
272 ;;"--report" "--remove-from-whitelist"
273 "-r" "-R") ; spamassassin V2.20
274 (when mh-sa-learn-executable
275 (message "Recategorizing message %d as spam..." msg)
276 (mh-truncate-log-buffer)
277 (call-process mh-sa-learn-executable msg-file mh-junk-background nil
278 "--single" "--spam" "--local" "--no-rebuild")))
279 (message "Blacklisting sender of message %d..." msg)
280 (with-current-buffer (get-buffer-create mh-temp-buffer)
281 (erase-buffer)
282 (call-process (expand-file-name mh-scan-prog mh-progs)
283 nil t nil
284 (format "%d" msg) current-folder
285 "-format" "%<(mymbox{from})%|%(addr{from})%>")
286 (goto-char (point-min))
287 (if (search-forward-regexp "^\\(.+\\)$" nil t)
288 (progn
289 (setq sender (match-string 0))
290 (mh-spamassassin-add-rule "blacklist_from" sender)
291 (message "Blacklisting sender of message %d...done" msg))
292 (message "Blacklisting sender of message %d...not done (from my address)" msg)))))
294 ;;;###mh-autoload
295 (defun mh-spamassassin-whitelist (msg)
296 "Whitelist MSG with SpamAssassin.
298 The \\[mh-junk-whitelist] command adds a \"whitelist_from\" rule to
299 the \"~/.spamassassin/user_prefs\" file. If the \"sa-learn\" command
300 is available, the message is also recategorized as ham.
302 See `mh-spamassassin-blacklist' for more information."
303 (unless mh-spamassassin-executable
304 (error "Unable to find the spamassassin executable"))
305 (let ((msg-file (mh-msg-filename msg mh-current-folder))
306 (show-buffer (get-buffer mh-show-buffer))
307 from)
308 (with-current-buffer (get-buffer-create mh-temp-buffer)
309 (erase-buffer)
310 (message "Removing spamassassin markup from message %d..." msg)
311 (call-process mh-spamassassin-executable msg-file t nil
312 ;; "--remove-markup"
313 "-d") ; spamassassin V2.20
314 (if show-buffer
315 (kill-buffer show-buffer))
316 (write-file msg-file)
317 (when mh-sa-learn-executable
318 (message "Recategorizing message %d as ham..." msg)
319 (mh-truncate-log-buffer)
320 ;; Put call-process output in log buffer if we are saving it
321 ;; (this happens if mh-junk-background is t).
322 (with-current-buffer mh-log-buffer
323 (call-process mh-sa-learn-executable msg-file mh-junk-background nil
324 "--single" "--ham" "--local" "--no-rebuild")))
325 (message "Whitelisting sender of message %d..." msg)
326 (setq from
327 (car (mh-funcall-if-exists
328 ietf-drums-parse-address (mh-get-header-field "From:"))))
329 (kill-buffer nil)
330 (unless (or (null from) (equal from ""))
331 (mh-spamassassin-add-rule "whitelist_from" from))
332 (message "Whitelisting sender of message %d...done" msg))))
334 (defun mh-spamassassin-add-rule (rule body)
335 "Add a new rule to \"~/.spamassassin/user_prefs\".
336 The name of the rule is RULE and its body is BODY."
337 (save-window-excursion
338 (let* ((line (format "%s\t%s\n" rule body))
339 (case-fold-search t)
340 (file (expand-file-name "~/.spamassassin/user_prefs"))
341 (buffer-exists (find-buffer-visiting file)))
342 (find-file file)
343 (if (not (search-forward (format "\n%s" line) nil t))
344 (progn
345 (goto-char (point-max))
346 (insert (if (bolp) "" "\n") line)
347 (save-buffer)))
348 (if (not buffer-exists)
349 (kill-buffer nil)))))
351 ;;;###mh-autoload
352 (defun mh-spamassassin-identify-spammers ()
353 "Identify spammers who are repeat offenders.
355 This function displays a frequency count of the hosts and domains
356 in the \"blacklist_from\" entries from the last blank line in
357 \"~/.spamassassin/user_prefs\" to the end of the file. This
358 information can be used so that you can replace multiple
359 \"blacklist_from\" entries with a single wildcard entry such as:
361 blacklist_from *@*amazingoffersdirect2u.com"
362 (interactive)
363 (let* ((file (expand-file-name "~/.spamassassin/user_prefs"))
364 (domains (make-hash-table :test 'equal)))
365 (find-file file)
366 ;; Only consider entries between last blank line and end of file.
367 (goto-char (1- (point-max)))
368 (search-backward-regexp "^$")
369 ;; Perform frequency count.
370 (save-excursion
371 (while (search-forward-regexp "^blacklist_from\\s-*\\(.*\\)@\\(.*\\)$"
372 nil t)
373 (let ((host (match-string 2))
374 value)
375 ;; Remove top-level-domain from hostname.
376 (setq host (cdr (reverse (split-string host "\\."))))
377 ;; Add counts for each host and domain part.
378 (while host
379 (setq value (gethash (car host) domains))
380 (setf (gethash (car host) domains) (1+ (if (not value) 0 value)))
381 (setq host (cdr host))))))
383 ;; Output
384 (delete-other-windows)
385 (pop-to-buffer (get-buffer-create "*MH-E Spammer Frequencies*"))
386 (erase-buffer)
387 (maphash (lambda (key value) ""
388 (if (> value 2)
389 (insert (format "%s %s\n" key value))))
390 domains)
391 (sort-numeric-fields 2 (point-min) (point-max))
392 (reverse-region (point-min) (point-max))
393 (goto-char (point-min))))
397 ;; Bogofilter Interface
399 (defvar mh-bogofilter-executable (executable-find "bogofilter"))
401 ;;;###mh-autoload
402 (defun mh-bogofilter-blacklist (msg)
403 "Blacklist MSG with bogofilter.
405 Bogofilter is a Bayesian spam filtering program. Get it from your
406 local distribution or from the bogofilter web site at URL
407 `http://bogofilter.sourceforge.net/'.
409 Bogofilter is taught by running:
411 bogofilter -n < good-message
413 on every good message, and
415 bogofilter -s < spam-message
417 on every spam message. This is called a full training; three other
418 training methods are described in the FAQ that is distributed with
419 bogofilter. Note that most Bayesian filters need 1000 to 5000 of each
420 type of message to start doing a good job.
422 To use bogofilter, add the following recipes to \".procmailrc\":
424 PATH=$PATH:/usr/bin/mh
425 MAILDIR=$HOME/`mhparam Path`
427 # Fight spam with bogofilter.
428 :0fw
429 | bogofilter -3 -e -p
432 * ^X-Bogosity: Yes, tests=bogofilter
433 spam/.
436 * ^X-Bogosity: Unsure, tests=bogofilter
437 spam/unsure/.
439 If bogofilter classifies a message incorrectly, or is unsure, you can
440 use the MH-E commands \\[mh-junk-blacklist] and \\[mh-junk-whitelist]
441 to update bogofilter's training.
443 The \"Bogofilter FAQ\" suggests that you run the following
444 occasionally to shrink the database:
446 bogoutil -d wordlist.db | bogoutil -l wordlist.db.new
447 mv wordlist.db wordlist.db.prv
448 mv wordlist.db.new wordlist.db
450 The \"Bogofilter tuning HOWTO\" describes how you can fine-tune Bogofilter."
451 (unless mh-bogofilter-executable
452 (error "Unable to find the bogofilter executable"))
453 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
454 (mh-truncate-log-buffer)
455 ;; Put call-process output in log buffer if we are saving it
456 ;; (this happens if mh-junk-background is t).
457 (with-current-buffer mh-log-buffer
458 (call-process mh-bogofilter-executable msg-file mh-junk-background
459 nil "-s"))))
461 ;;;###mh-autoload
462 (defun mh-bogofilter-whitelist (msg)
463 "Whitelist MSG with bogofilter.
465 See `mh-bogofilter-blacklist' for more information."
466 (unless mh-bogofilter-executable
467 (error "Unable to find the bogofilter executable"))
468 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
469 (mh-truncate-log-buffer)
470 ;; Put call-process output in log buffer if we are saving it
471 ;; (this happens if mh-junk-background is t).
472 (with-current-buffer mh-log-buffer
473 (call-process mh-bogofilter-executable msg-file mh-junk-background
474 nil "-n"))))
478 ;; Spamprobe Interface
480 (defvar mh-spamprobe-executable (executable-find "spamprobe"))
482 ;;;###mh-autoload
483 (defun mh-spamprobe-blacklist (msg)
484 "Blacklist MSG with SpamProbe.
486 SpamProbe is a Bayesian spam filtering program. Get it from your
487 local distribution or from the SpamProbe web site at URL
488 `http://spamprobe.sourceforge.net'.
490 To use SpamProbe, add the following recipes to \".procmailrc\":
492 PATH=$PATH:/usr/bin/mh
493 MAILDIR=$HOME/`mhparam Path`
495 # Fight spam with SpamProbe.
497 SCORE=| spamprobe receive
499 :0 wf
500 | formail -I \"X-SpamProbe: $SCORE\"
503 *^X-SpamProbe: SPAM
504 spam/.
506 If SpamProbe classifies a message incorrectly, you can use the
507 MH-E commands \\[mh-junk-blacklist] and \\[mh-junk-whitelist] to
508 update SpamProbe's training."
509 (unless mh-spamprobe-executable
510 (error "Unable to find the spamprobe executable"))
511 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
512 (mh-truncate-log-buffer)
513 ;; Put call-process output in log buffer if we are saving it
514 ;; (this happens if mh-junk-background is t).
515 (with-current-buffer mh-log-buffer
516 (call-process mh-spamprobe-executable msg-file mh-junk-background
517 nil "spam"))))
519 ;;;###mh-autoload
520 (defun mh-spamprobe-whitelist (msg)
521 "Whitelist MSG with SpamProbe.
523 See `mh-spamprobe-blacklist' for more information."
524 (unless mh-spamprobe-executable
525 (error "Unable to find the spamprobe executable"))
526 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
527 (mh-truncate-log-buffer)
528 ;; Put call-process output in log buffer if we are saving it
529 ;; (this happens if mh-junk-background is t).
530 (with-current-buffer mh-log-buffer
531 (call-process mh-spamprobe-executable msg-file mh-junk-background
532 nil "good"))))
534 (provide 'mh-junk)
536 ;; Local Variables:
537 ;; indent-tabs-mode: nil
538 ;; sentence-end-double-space: nil
539 ;; End:
541 ;;; mh-junk.el ends here