Change release version from 21.4 to 22.1 throughout.
[emacs.git] / lisp / gnus / deuglify.el
blob0dc6e6d002ab7488299cf6c485c6f0107931e43f
1 ;;; deuglify.el --- deuglify broken Outlook (Express) articles
3 ;; Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
4 ;; Copyright (C) 2001, 2002 Raymond Scholz
6 ;; Author: Raymond Scholz <rscholz@zonix.de>
7 ;; Thomas Steffen (unwrapping algorithm,
8 ;; based on an idea of Stefan Monnier)
9 ;; Keywords: mail, news
11 ;; This file is part of GNU Emacs.
13 ;; GNU Emacs is free software; you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation; either version 2, or (at your option)
16 ;; any later version.
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs; see the file COPYING. If not, write to the
25 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
26 ;; Boston, MA 02111-1307, USA.
28 ;;; Commentary:
30 ;; This file enables Gnus to repair broken citations produced by
31 ;; common user agents like MS Outlook (Express). It may repair
32 ;; articles of other user agents too.
34 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
37 ;; Outlook sometimes wraps cited lines before sending a message as
38 ;; seen in this example:
40 ;; Example #1
41 ;; ----------
43 ;; John Doe wrote:
45 ;; > This sentence no verb. This sentence no verb. This sentence
46 ;; no
47 ;; > verb. This sentence no verb. This sentence no verb. This
48 ;; > sentence no verb.
50 ;; The function `gnus-article-outlook-unwrap-lines' tries to recognize those
51 ;; erroneously wrapped lines and will unwrap them. I.e. putting the
52 ;; wrapped parts ("no" in this example) back where they belong (at the
53 ;; end of the cited line above).
55 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
57 ;; Note that some people not only use broken user agents but also
58 ;; practice a bad citation style by omitting blank lines between the
59 ;; cited text and their own text.
61 ;; Example #2
62 ;; ----------
64 ;; John Doe wrote:
66 ;; > This sentence no verb. This sentence no verb. This sentence no
67 ;; You forgot in all your sentences.
68 ;; > verb. This sentence no verb. This sentence no verb. This
69 ;; > sentence no verb.
71 ;; Unwrapping "You forgot in all your sentences." would be illegal as
72 ;; this part wasn't intended to be cited text.
73 ;; `gnus-article-outlook-unwrap-lines' will only unwrap lines if the resulting
74 ;; citation line will be of a certain maximum length. You can control
75 ;; this by adjusting `gnus-outlook-deuglify-unwrap-max'. Also
76 ;; unwrapping will only be done if the line above the (possibly)
77 ;; wrapped line has a minimum length of `gnus-outlook-deuglify-unwrap-min'.
79 ;; Furthermore no unwrapping will be undertaken if the last character
80 ;; is one of the chars specified in
81 ;; `gnus-outlook-deuglify-unwrap-stop-chars'. Setting this to ".?!"
82 ;; inhibits unwrapping if the cited line ends with a full stop,
83 ;; question mark or exclamation mark. Note that this variable
84 ;; defaults to `nil', triggering a few false positives but generally
85 ;; giving you better results.
87 ;; Unwrapping works on every level of citation. Thus you will be able
88 ;; repair broken citations of broken user agents citing broken
89 ;; citations of broken user agents citing broken citations...
91 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
93 ;; Citations are commonly introduced with an attribution line
94 ;; indicating who wrote the cited text. Outlook adds superfluous
95 ;; information that can be found in the header of the message to this
96 ;; line and often wraps it.
98 ;; If that weren't enough, lots of people write their own text above
99 ;; the cited text and cite the complete original article below.
101 ;; Example #3
102 ;; ----------
104 ;; Hey, John. There's no in all your sentences!
106 ;; John Doe <john.doe@some.domain> wrote in message
107 ;; news:a87usw8$dklsssa$2@some.news.server...
108 ;; > This sentence no verb. This sentence no verb. This sentence
109 ;; no
110 ;; > verb. This sentence no verb. This sentence no verb. This
111 ;; > sentence no verb.
112 ;; >
113 ;; > Bye, John
115 ;; Repairing the attribution line will be done by function
116 ;; `gnus-article-outlook-repair-attribution which calls other function that
117 ;; try to recognize and repair broken attribution lines. See variable
118 ;; `gnus-outlook-deuglify-attrib-cut-regexp' for stuff that should be
119 ;; cut off from the beginning of an attribution line and variable
120 ;; `gnus-outlook-deuglify-attrib-verb-regexp' for the verbs that are
121 ;; required to be found in an attribution line. These function return
122 ;; the point where the repaired attribution line starts.
124 ;; Rearranging the article so that the cited text appears above the
125 ;; new text will be done by function
126 ;; `gnus-article-outlook-rearrange-citation'. This function calls
127 ;; `gnus-article-outlook-repair-attribution to find and repair an attribution
128 ;; line.
130 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
132 ;; Well, and that's what the message will look like after applying
133 ;; deuglification:
135 ;; Example #3 (deuglified)
136 ;; -----------------------
138 ;; John Doe <john.doe@some.domain> wrote:
140 ;; > This sentence no verb. This sentence no verb. This sentence no
141 ;; > verb. This sentence no verb. This sentence no verb. This
142 ;; > sentence no verb.
143 ;; >
144 ;; > Bye, John
146 ;; Hey, John. There's no in all your sentences!
148 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
150 ;; Usage
151 ;; -----
153 ;; Press `W k' in the Summary Buffer.
155 ;; Non recommended usage :-)
156 ;; ---------------------
158 ;; To automatically invoke deuglification on every article you read,
159 ;; put something like that in your .gnus:
161 ;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-unwrap-lines)
163 ;; or _one_ of the following lines:
165 ;; ;; repair broken attribution lines
166 ;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-repair-attribution)
168 ;; ;; repair broken attribution lines and citations
169 ;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-rearrange-citation)
171 ;; Note that there always may be some false positives, so I suggest
172 ;; using the manual invocation. After deuglification you may want to
173 ;; refill the whole article using `W w'.
175 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
177 ;; Limitations
178 ;; -----------
180 ;; As I said before there may (or will) be a few false positives on
181 ;; unwrapping cited lines with `gnus-article-outlook-unwrap-lines'.
183 ;; `gnus-article-outlook-repair-attribution will only fix the first
184 ;; attribution line found in the article. Furthermore it fixed to
185 ;; certain kinds of attributions. And there may be horribly many
186 ;; false positives, vanishing lines and so on -- so don't trust your
187 ;; eyes. Again I recommend manual invocation.
189 ;; `gnus-article-outlook-rearrange-citation' carries all the limitations of
190 ;; `gnus-article-outlook-repair-attribution.
192 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
194 ;; See ChangeLog for other changes.
196 ;; Revision 1.5 2002/01/27 14:39:17 rscholz
197 ;; * New variable `gnus-outlook-deuglify-no-wrap-chars' to inhibit
198 ;; unwrapping if one these chars is first in the possibly wrapped line.
199 ;; * Improved rearranging of the article.
200 ;; * New function `gnus-outlook-repair-attribution-block' for repairing
201 ;; those big "Original Message (following some headers)" attributions.
203 ;; Revision 1.4 2002/01/03 14:05:00 rscholz
204 ;; Renamed `gnus-outlook-deuglify-article' to
205 ;; `gnus-article-outlook-deuglify-article'.
206 ;; Made it easier to deuglify the article while being in Gnus' Article
207 ;; Edit Mode. (suggested by Phil Nitschke)
210 ;; Revision 1.3 2002/01/02 23:35:54 rscholz
211 ;; Fix a bug that caused succeeding long attribution lines to be
212 ;; unwrapped. Minor doc fixes and regular expression tuning.
214 ;; Revision 1.2 2001/12/30 20:14:34 rscholz
215 ;; Clean up source.
217 ;; Revision 1.1 2001/12/30 20:13:32 rscholz
218 ;; Initial revision
220 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
222 ;;; Code:
224 (require 'gnus-art)
225 (require 'gnus-sum)
227 (defconst gnus-outlook-deuglify-version "1.5 Gnus version"
228 "Version of gnus-outlook-deuglify.")
230 ;;; User Customizable Variables:
232 (defgroup gnus-outlook-deuglify nil
233 "Deuglify articles generated by broken user agents like MS Outlook (Express)."
234 :version "22.1")
236 ;;;###autoload
237 (defcustom gnus-outlook-deuglify-unwrap-min 45
238 "Minimum length of the cited line above the (possibly) wrapped line."
239 :version "22.1"
240 :type 'integer
241 :group 'gnus-outlook-deuglify)
243 ;;;###autoload
244 (defcustom gnus-outlook-deuglify-unwrap-max 95
245 "Maximum length of the cited line after unwrapping."
246 :version "22.1"
247 :type 'integer
248 :group 'gnus-outlook-deuglify)
250 (defcustom gnus-outlook-deuglify-cite-marks ">|#%"
251 "Characters that indicate cited lines."
252 :version "22.1"
253 :type 'string
254 :group 'gnus-outlook-deuglify)
256 (defcustom gnus-outlook-deuglify-unwrap-stop-chars nil ;; ".?!" or nil
257 "Characters that inhibit unwrapping if they are the last one on the cited line above the possible wrapped line."
258 :version "22.1"
259 :type '(radio (const :format "None " nil)
260 (string :value ".?!"))
261 :group 'gnus-outlook-deuglify)
263 (defcustom gnus-outlook-deuglify-no-wrap-chars "`"
264 "Characters that inhibit unwrapping if they are the first one in the possibly wrapped line."
265 :version "22.1"
266 :type 'string
267 :group 'gnus-outlook-deuglify)
269 (defcustom gnus-outlook-deuglify-attrib-cut-regexp
270 "\\(On \\|Am \\)?\\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sat\\|Sun\\),[^,]+, "
271 "Regular expression matching the beginning of an attribution line that should be cut off."
272 :version "22.1"
273 :type 'string
274 :group 'gnus-outlook-deuglify)
276 (defcustom gnus-outlook-deuglify-attrib-verb-regexp
277 "wrote\\|writes\\|says\\|schrieb\\|schreibt\\|meinte\\|skrev\\|a écrit\\|schreef\\|escribió"
278 "Regular expression matching the verb used in an attribution line."
279 :version "22.1"
280 :type 'string
281 :group 'gnus-outlook-deuglify)
283 (defcustom gnus-outlook-deuglify-attrib-end-regexp
284 ": *\\|\\.\\.\\."
285 "Regular expression matching the end of an attribution line."
286 :version "22.1"
287 :type 'string
288 :group 'gnus-outlook-deuglify)
290 ;;;###autoload
291 (defcustom gnus-outlook-display-hook nil
292 "A hook called after an deuglified article has been prepared.
293 It is run after `gnus-article-prepare-hook'."
294 :version "22.1"
295 :type 'hook
296 :group 'gnus-outlook-deuglify)
298 ;; Functions
300 (defun gnus-outlook-display-article-buffer ()
301 "Redisplay current buffer or article buffer."
302 (with-current-buffer (or gnus-article-buffer (current-buffer))
303 ;; "Emulate" `gnus-article-prepare-display' without calling
304 ;; it. Calling `gnus-article-prepare-display' on an already
305 ;; prepared article removes all MIME parts. I'm unsure whether
306 ;; this is a bug or not.
307 (gnus-article-highlight t)
308 (gnus-treat-article nil)
309 (gnus-run-hooks 'gnus-article-prepare-hook
310 'gnus-outlook-display-hook)))
312 ;;;###autoload
313 (defun gnus-article-outlook-unwrap-lines (&optional nodisplay)
314 "Unwrap lines that appear to be wrapped citation lines.
315 You can control what lines will be unwrapped by frobbing
316 `gnus-outlook-deuglify-unwrap-min' and `gnus-outlook-deuglify-unwrap-max',
317 indicating the minimum and maximum length of an unwrapped citation line. If
318 NODISPLAY is non-nil, don't redisplay the article buffer."
319 (interactive "P")
320 (save-excursion
321 (let ((case-fold-search nil)
322 (inhibit-read-only t)
323 (cite-marks gnus-outlook-deuglify-cite-marks)
324 (no-wrap gnus-outlook-deuglify-no-wrap-chars)
325 (stop-chars gnus-outlook-deuglify-unwrap-stop-chars))
326 (gnus-with-article-buffer
327 (article-goto-body)
328 (while (re-search-forward
329 (concat
330 "^\\([ \t" cite-marks "]*\\)"
331 "\\([" cite-marks "].*[^\n " stop-chars "]\\)[ \t]?\n"
332 "\\1\\([^\n " cite-marks no-wrap "]+.*\\)$")
333 nil t)
334 (let ((len12 (- (match-end 2) (match-beginning 1)))
335 (len3 (- (match-end 3) (match-beginning 3))))
336 (if (and (> len12 gnus-outlook-deuglify-unwrap-min)
337 (< (+ len12 len3) gnus-outlook-deuglify-unwrap-max))
338 (progn
339 (replace-match "\\1\\2 \\3")
340 (goto-char (match-beginning 0)))))))))
341 (unless nodisplay (gnus-outlook-display-article-buffer)))
343 (defun gnus-outlook-rearrange-article (attr-start)
344 "Put the text from ATTR-START to the end of buffer at the top of the article buffer."
345 (save-excursion
346 (let ((inhibit-read-only t)
347 (cite-marks gnus-outlook-deuglify-cite-marks))
348 (gnus-with-article-buffer
349 (article-goto-body)
350 ;; article does not start with attribution
351 (unless (= (point) attr-start)
352 (gnus-kill-all-overlays)
353 (let ((cur (point))
354 ;; before signature or end of buffer
355 (to (if (gnus-article-search-signature)
356 (point)
357 (point-max))))
358 ;; handle the case where the full quote is below the
359 ;; signature
360 (if (< to attr-start)
361 (setq to (point-max)))
362 (transpose-regions cur attr-start attr-start to)))))))
364 ;; John Doe <john.doe@some.domain> wrote in message
365 ;; news:a87usw8$dklsssa$2@some.news.server...
367 (defun gnus-outlook-repair-attribution-outlook ()
368 "Repair a broken attribution line (Outlook)."
369 (save-excursion
370 (let ((case-fold-search nil)
371 (inhibit-read-only t)
372 (cite-marks gnus-outlook-deuglify-cite-marks))
373 (gnus-with-article-buffer
374 (article-goto-body)
375 (if (re-search-forward
376 (concat "^\\([^" cite-marks "].+\\)"
377 "\\(" gnus-outlook-deuglify-attrib-verb-regexp "\\)"
378 "\\(.*\n?[^\n" cite-marks "].*\\)?"
379 "\\(" gnus-outlook-deuglify-attrib-end-regexp "\\)$")
380 nil t)
381 (progn
382 (gnus-kill-all-overlays)
383 (replace-match "\\1\\2\\4")
384 (match-beginning 0)))))))
387 ;; ----- Original Message -----
388 ;; From: "John Doe" <john.doe@some.domain>
389 ;; To: "Doe Foundation" <info@doefnd.org>
390 ;; Sent: Monday, November 19, 2001 12:13 PM
391 ;; Subject: More Doenuts
393 (defun gnus-outlook-repair-attribution-block ()
394 "Repair a big broken attribution block."
395 (save-excursion
396 (let ((case-fold-search nil)
397 (inhibit-read-only t)
398 (cite-marks gnus-outlook-deuglify-cite-marks))
399 (gnus-with-article-buffer
400 (article-goto-body)
401 (if (re-search-forward
402 (concat "^[" cite-marks " \t]*--* ?[^-]+ [^-]+ ?--*\\s *\n"
403 "[^\n:]+:[ \t]*\\([^\n]+\\)\n"
404 "\\([^\n:]+:[ \t]*[^\n]+\n\\)+")
405 nil t)
406 (progn
407 (gnus-kill-all-overlays)
408 (replace-match "\\1 wrote:\n")
409 (match-beginning 0)))))))
411 ;; On Wed, 16 Jan 2002 23:23:30 +0100, John Doe <john.doe@some.domain> wrote:
413 (defun gnus-outlook-repair-attribution-other ()
414 "Repair a broken attribution line (other user agents than Outlook)."
415 (save-excursion
416 (let ((case-fold-search nil)
417 (inhibit-read-only t)
418 (cite-marks gnus-outlook-deuglify-cite-marks))
419 (gnus-with-article-buffer
420 (article-goto-body)
421 (if (re-search-forward
422 (concat "^\\("gnus-outlook-deuglify-attrib-cut-regexp"\\)?"
423 "\\([^" cite-marks "].+\\)\n\\([^\n" cite-marks "].*\\)?"
424 "\\(" gnus-outlook-deuglify-attrib-verb-regexp "\\).*"
425 "\\(" gnus-outlook-deuglify-attrib-end-regexp "\\)$")
426 nil t)
427 (progn
428 (gnus-kill-all-overlays)
429 (replace-match "\\4 \\5\\6\\7")
430 (match-beginning 0)))))))
432 ;;;###autoload
433 (defun gnus-article-outlook-repair-attribution (&optional nodisplay)
434 "Repair a broken attribution line.
435 If NODISPLAY is non-nil, don't redisplay the article buffer."
436 (interactive "P")
437 (let ((attrib-start
439 (gnus-outlook-repair-attribution-other)
440 (gnus-outlook-repair-attribution-block)
441 (gnus-outlook-repair-attribution-outlook))))
442 (unless nodisplay (gnus-outlook-display-article-buffer))
443 attrib-start))
445 (defun gnus-article-outlook-rearrange-citation (&optional nodisplay)
446 "Repair broken citations.
447 If NODISPLAY is non-nil, don't redisplay the article buffer."
448 (interactive "P")
449 (let ((attrib-start (gnus-article-outlook-repair-attribution 'nodisplay)))
450 ;; rearrange citations if an attribution line has been recognized
451 (if attrib-start
452 (gnus-outlook-rearrange-article attrib-start)))
453 (unless nodisplay (gnus-outlook-display-article-buffer)))
455 ;;;###autoload
456 (defun gnus-outlook-deuglify-article (&optional nodisplay)
457 "Full deuglify of broken Outlook (Express) articles.
458 Treat dumbquotes, unwrap lines, repair attribution and rearrange citation. If
459 NODISPLAY is non-nil, don't redisplay the article buffer."
460 (interactive "P")
461 ;; apply treatment of dumb quotes
462 (gnus-article-treat-dumbquotes)
463 ;; repair wrapped cited lines
464 (gnus-article-outlook-unwrap-lines 'nodisplay)
465 ;; repair attribution line and rearrange citation.
466 (gnus-article-outlook-rearrange-citation 'nodisplay)
467 (unless nodisplay (gnus-outlook-display-article-buffer)))
469 ;;;###autoload
470 (defun gnus-article-outlook-deuglify-article ()
471 "Deuglify broken Outlook (Express) articles and redisplay."
472 (interactive)
473 (gnus-outlook-deuglify-article nil))
475 (provide 'deuglify)
477 ;; Local Variables:
478 ;; coding: iso-8859-1
479 ;; End:
481 ;;; arch-tag: 5f895cc9-51a9-487c-b42e-28844d79eb73
482 ;;; deuglify.el ends here