1 ;;; deuglify.el --- deuglify broken Outlook (Express) articles
3 ;; Copyright (C) 2001-2013 Free Software Foundation, Inc.
5 ;; Author: Raymond Scholz <rscholz@zonix.de>
7 ;; (unwrapping algorithm, based on an idea of Stefan Monnier)
8 ;; Keywords: mail, news
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software: you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation, either version 3 of the License, or
15 ;; (at your option) any later version.
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
27 ;; This file enables Gnus to repair broken citations produced by
28 ;; common user agents like MS Outlook (Express). It may repair
29 ;; articles of other user agents too.
31 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
34 ;; Outlook sometimes wraps cited lines before sending a message as
35 ;; seen in this example:
42 ;; > This sentence no verb. This sentence no verb. This sentence
44 ;; > verb. This sentence no verb. This sentence no verb. This
45 ;; > sentence no verb.
47 ;; The function `gnus-article-outlook-unwrap-lines' tries to recognize those
48 ;; erroneously wrapped lines and will unwrap them. I.e. putting the
49 ;; wrapped parts ("no" in this example) back where they belong (at the
50 ;; end of the cited line above).
52 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
54 ;; Note that some people not only use broken user agents but also
55 ;; practice a bad citation style by omitting blank lines between the
56 ;; cited text and their own text.
63 ;; > This sentence no verb. This sentence no verb. This sentence no
64 ;; You forgot in all your sentences.
65 ;; > verb. This sentence no verb. This sentence no verb. This
66 ;; > sentence no verb.
68 ;; Unwrapping "You forgot in all your sentences." would be invalid as
69 ;; this part wasn't intended to be cited text.
70 ;; `gnus-article-outlook-unwrap-lines' will only unwrap lines if the resulting
71 ;; citation line will be of a certain maximum length. You can control
72 ;; this by adjusting `gnus-outlook-deuglify-unwrap-max'. Also
73 ;; unwrapping will only be done if the line above the (possibly)
74 ;; wrapped line has a minimum length of `gnus-outlook-deuglify-unwrap-min'.
76 ;; Furthermore no unwrapping will be undertaken if the last character
77 ;; is one of the chars specified in
78 ;; `gnus-outlook-deuglify-unwrap-stop-chars'. Setting this to ".?!"
79 ;; inhibits unwrapping if the cited line ends with a full stop,
80 ;; question mark or exclamation mark. Note that this variable
81 ;; defaults to `nil', triggering a few false positives but generally
82 ;; giving you better results.
84 ;; Unwrapping works on every level of citation. Thus you will be able
85 ;; repair broken citations of broken user agents citing broken
86 ;; citations of broken user agents citing broken citations...
88 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
90 ;; Citations are commonly introduced with an attribution line
91 ;; indicating who wrote the cited text. Outlook adds superfluous
92 ;; information that can be found in the header of the message to this
93 ;; line and often wraps it.
95 ;; If that weren't enough, lots of people write their own text above
96 ;; the cited text and cite the complete original article below.
101 ;; Hey, John. There's no in all your sentences!
103 ;; John Doe <john.doe@some.domain> wrote in message
104 ;; news:a87usw8$dklsssa$2@some.news.server...
105 ;; > This sentence no verb. This sentence no verb. This sentence
107 ;; > verb. This sentence no verb. This sentence no verb. This
108 ;; > sentence no verb.
112 ;; Repairing the attribution line will be done by function
113 ;; `gnus-article-outlook-repair-attribution which calls other function that
114 ;; try to recognize and repair broken attribution lines. See variable
115 ;; `gnus-outlook-deuglify-attrib-cut-regexp' for stuff that should be
116 ;; cut off from the beginning of an attribution line and variable
117 ;; `gnus-outlook-deuglify-attrib-verb-regexp' for the verbs that are
118 ;; required to be found in an attribution line. These function return
119 ;; the point where the repaired attribution line starts.
121 ;; Rearranging the article so that the cited text appears above the
122 ;; new text will be done by function
123 ;; `gnus-article-outlook-rearrange-citation'. This function calls
124 ;; `gnus-article-outlook-repair-attribution to find and repair an attribution
127 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
129 ;; Well, and that's what the message will look like after applying
132 ;; Example #3 (deuglified)
133 ;; -----------------------
135 ;; John Doe <john.doe@some.domain> wrote:
137 ;; > This sentence no verb. This sentence no verb. This sentence no
138 ;; > verb. This sentence no verb. This sentence no verb. This
139 ;; > sentence no verb.
143 ;; Hey, John. There's no in all your sentences!
145 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
150 ;; Press `W k' in the Summary Buffer.
152 ;; Non recommended usage :-)
153 ;; ---------------------
155 ;; To automatically invoke deuglification on every article you read,
156 ;; put something like that in your .gnus:
158 ;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-unwrap-lines)
160 ;; or _one_ of the following lines:
162 ;; ;; repair broken attribution lines
163 ;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-repair-attribution)
165 ;; ;; repair broken attribution lines and citations
166 ;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-rearrange-citation)
168 ;; Note that there always may be some false positives, so I suggest
169 ;; using the manual invocation. After deuglification you may want to
170 ;; refill the whole article using `W w'.
172 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
177 ;; As I said before there may (or will) be a few false positives on
178 ;; unwrapping cited lines with `gnus-article-outlook-unwrap-lines'.
180 ;; `gnus-article-outlook-repair-attribution will only fix the first
181 ;; attribution line found in the article. Furthermore it fixed to
182 ;; certain kinds of attributions. And there may be horribly many
183 ;; false positives, vanishing lines and so on -- so don't trust your
184 ;; eyes. Again I recommend manual invocation.
186 ;; `gnus-article-outlook-rearrange-citation' carries all the limitations of
187 ;; `gnus-article-outlook-repair-attribution.
189 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
191 ;; See ChangeLog for other changes.
193 ;; Revision 1.5 2002/01/27 14:39:17 rscholz
194 ;; * New variable `gnus-outlook-deuglify-no-wrap-chars' to inhibit
195 ;; unwrapping if one these chars is first in the possibly wrapped line.
196 ;; * Improved rearranging of the article.
197 ;; * New function `gnus-outlook-repair-attribution-block' for repairing
198 ;; those big "Original Message (following some headers)" attributions.
200 ;; Revision 1.4 2002/01/03 14:05:00 rscholz
201 ;; Renamed `gnus-outlook-deuglify-article' to
202 ;; `gnus-article-outlook-deuglify-article'.
203 ;; Made it easier to deuglify the article while being in Gnus' Article
204 ;; Edit Mode. (suggested by Phil Nitschke)
207 ;; Revision 1.3 2002/01/02 23:35:54 rscholz
208 ;; Fix a bug that caused succeeding long attribution lines to be
209 ;; unwrapped. Minor doc fixes and regular expression tuning.
211 ;; Revision 1.2 2001/12/30 20:14:34 rscholz
214 ;; Revision 1.1 2001/12/30 20:13:32 rscholz
217 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
224 (defconst gnus-outlook-deuglify-version
"1.5 Gnus version"
225 "Version of gnus-outlook-deuglify.")
227 ;;; User Customizable Variables:
229 (defgroup gnus-outlook-deuglify nil
230 "Deuglify articles generated by broken user agents like MS Outlook (Express)."
234 (defcustom gnus-outlook-deuglify-unwrap-min
45
235 "Minimum length of the cited line above the (possibly) wrapped line."
238 :group
'gnus-outlook-deuglify
)
240 (defcustom gnus-outlook-deuglify-unwrap-max
95
241 "Maximum length of the cited line after unwrapping."
244 :group
'gnus-outlook-deuglify
)
246 (defcustom gnus-outlook-deuglify-cite-marks
">|#%"
247 "Characters that indicate cited lines."
250 :group
'gnus-outlook-deuglify
)
252 (defcustom gnus-outlook-deuglify-unwrap-stop-chars nil
;; ".?!" or nil
253 "Characters that inhibit unwrapping if they are the last one on the cited line above the possible wrapped line."
255 :type
'(radio (const :format
"None " nil
)
256 (string :value
".?!"))
257 :group
'gnus-outlook-deuglify
)
259 (defcustom gnus-outlook-deuglify-no-wrap-chars
"`"
260 "Characters that inhibit unwrapping if they are the first one in the possibly wrapped line."
263 :group
'gnus-outlook-deuglify
)
265 (defcustom gnus-outlook-deuglify-attrib-cut-regexp
266 "\\(On \\|Am \\)?\\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sat\\|Sun\\),[^,]+, "
267 "Regular expression matching the beginning of an attribution line that should be cut off."
270 :group
'gnus-outlook-deuglify
)
272 (defcustom gnus-outlook-deuglify-attrib-verb-regexp
273 "wrote\\|writes\\|says\\|schrieb\\|schreibt\\|meinte\\|skrev\\|a écrit\\|schreef\\|escribió"
274 "Regular expression matching the verb used in an attribution line."
277 :group
'gnus-outlook-deuglify
)
279 (defcustom gnus-outlook-deuglify-attrib-end-regexp
281 "Regular expression matching the end of an attribution line."
284 :group
'gnus-outlook-deuglify
)
286 (defcustom gnus-outlook-display-hook nil
287 "A hook called after an deuglified article has been prepared.
288 It is run after `gnus-article-prepare-hook'."
291 :group
'gnus-outlook-deuglify
)
295 (defun gnus-outlook-display-article-buffer ()
296 "Redisplay current buffer or article buffer."
297 (with-current-buffer (or gnus-article-buffer
(current-buffer))
298 ;; "Emulate" `gnus-article-prepare-display' without calling
299 ;; it. Calling `gnus-article-prepare-display' on an already
300 ;; prepared article removes all MIME parts. I'm unsure whether
301 ;; this is a bug or not.
302 (gnus-article-highlight t
)
303 (gnus-treat-article nil
)
304 (gnus-run-hooks 'gnus-article-prepare-hook
305 'gnus-outlook-display-hook
)))
308 (defun gnus-article-outlook-unwrap-lines (&optional nodisplay
)
309 "Unwrap lines that appear to be wrapped citation lines.
310 You can control what lines will be unwrapped by frobbing
311 `gnus-outlook-deuglify-unwrap-min' and `gnus-outlook-deuglify-unwrap-max',
312 indicating the minimum and maximum length of an unwrapped citation line. If
313 NODISPLAY is non-nil, don't redisplay the article buffer."
315 (let ((case-fold-search nil
)
316 (inhibit-read-only t
)
317 (cite-marks gnus-outlook-deuglify-cite-marks
)
318 (no-wrap gnus-outlook-deuglify-no-wrap-chars
)
319 (stop-chars gnus-outlook-deuglify-unwrap-stop-chars
))
320 (gnus-with-article-buffer
322 (while (re-search-forward
324 "^\\([ \t" cite-marks
"]*\\)"
325 "\\([" cite-marks
"].*[^\n " stop-chars
"]\\)[ \t]?\n"
326 "\\1\\([^\n " cite-marks no-wrap
"]+.*\\)$")
328 (let ((len12 (- (match-end 2) (match-beginning 1)))
329 (len3 (- (match-end 3) (match-beginning 3))))
330 (when (and (> len12 gnus-outlook-deuglify-unwrap-min
)
331 (< (+ len12 len3
) gnus-outlook-deuglify-unwrap-max
))
332 (replace-match "\\1\\2 \\3")
333 (goto-char (match-beginning 0)))))))
334 (unless nodisplay
(gnus-outlook-display-article-buffer)))
336 (defun gnus-outlook-rearrange-article (attr-start)
337 "Put the text from ATTR-START to the end of buffer at the top of the article buffer."
338 ;; FIXME: 1. (*) text/plain ( ) text/html
339 (let ((inhibit-read-only t
)
340 (cite-marks gnus-outlook-deuglify-cite-marks
))
341 (gnus-with-article-buffer
343 ;; article does not start with attribution
344 (unless (= (point) attr-start
)
345 (gnus-kill-all-overlays)
347 ;; before signature or end of buffer
348 (to (if (gnus-article-search-signature)
351 ;; handle the case where the full quote is below the
353 (when (< to attr-start
)
354 (setq to
(point-max)))
356 (narrow-to-region attr-start to
)
357 (goto-char attr-start
)
359 (unless (looking-at ">")
360 (message-indent-citation (point) (point-max) 'yank-only
)
361 (goto-char (point-max))
363 (setq to
(point-max)))
365 (transpose-regions cur attr-start attr-start to
))))))
367 ;; John Doe <john.doe@some.domain> wrote in message
368 ;; news:a87usw8$dklsssa$2@some.news.server...
370 (defun gnus-outlook-repair-attribution-outlook ()
371 "Repair a broken attribution line (Outlook)."
372 (let ((case-fold-search nil
)
373 (inhibit-read-only t
)
374 (cite-marks gnus-outlook-deuglify-cite-marks
))
375 (gnus-with-article-buffer
377 (when (re-search-forward
378 (concat "^\\([^" cite-marks
"].+\\)"
379 "\\(" gnus-outlook-deuglify-attrib-verb-regexp
"\\)"
380 "\\(.*\n?[^\n" cite-marks
"].*\\)?"
381 "\\(" gnus-outlook-deuglify-attrib-end-regexp
"\\)$")
383 (gnus-kill-all-overlays)
384 (replace-match "\\1\\2\\4")
385 (match-beginning 0)))))
388 ;; ----- Original Message -----
389 ;; From: "John Doe" <john.doe@some.domain>
390 ;; To: "Doe Foundation" <info@doefnd.org>
391 ;; Sent: Monday, November 19, 2001 12:13 PM
392 ;; Subject: More Doenuts
394 (defun gnus-outlook-repair-attribution-block ()
395 "Repair a big broken attribution block."
396 (let ((case-fold-search nil
)
397 (inhibit-read-only t
)
398 (cite-marks gnus-outlook-deuglify-cite-marks
))
399 (gnus-with-article-buffer
401 (when (re-search-forward
402 (concat "^[" cite-marks
" \t]*--* ?[^-]+ [^-]+ ?--*\\s *\n"
403 "[^\n:]+:[ \t]*\\([^\n]+\\)\n"
404 "\\([^\n:]+:[ \t]*[^\n]+\n\\)+")
406 (gnus-kill-all-overlays)
407 (replace-match "\\1 wrote:\n")
408 (match-beginning 0)))))
410 ;; On Wed, 16 Jan 2002 23:23:30 +0100, John Doe <john.doe@some.domain> wrote:
412 (defun gnus-outlook-repair-attribution-other ()
413 "Repair a broken attribution line (other user agents than Outlook)."
414 (let ((case-fold-search nil
)
415 (inhibit-read-only t
)
416 (cite-marks gnus-outlook-deuglify-cite-marks
))
417 (gnus-with-article-buffer
419 (when (re-search-forward
420 (concat "^\\("gnus-outlook-deuglify-attrib-cut-regexp
"\\)?"
421 "\\([^" cite-marks
"].+\\)\n\\([^\n" cite-marks
"].*\\)?"
422 "\\(" gnus-outlook-deuglify-attrib-verb-regexp
"\\).*"
423 "\\(" gnus-outlook-deuglify-attrib-end-regexp
"\\)$")
425 (gnus-kill-all-overlays)
426 (replace-match "\\4 \\5\\6\\7")
427 (match-beginning 0)))))
430 (defun gnus-article-outlook-repair-attribution (&optional nodisplay
)
431 "Repair a broken attribution line.
432 If NODISPLAY is non-nil, don't redisplay the article buffer."
436 (gnus-outlook-repair-attribution-other)
437 (gnus-outlook-repair-attribution-block)
438 (gnus-outlook-repair-attribution-outlook))))
439 (unless nodisplay
(gnus-outlook-display-article-buffer))
442 (defun gnus-article-outlook-rearrange-citation (&optional nodisplay
)
443 "Repair broken citations.
444 If NODISPLAY is non-nil, don't redisplay the article buffer."
446 (let ((attrib-start (gnus-article-outlook-repair-attribution 'nodisplay
)))
447 ;; rearrange citations if an attribution line has been recognized
449 (gnus-outlook-rearrange-article attrib-start
)))
450 (unless nodisplay
(gnus-outlook-display-article-buffer)))
453 (defun gnus-outlook-deuglify-article (&optional nodisplay
)
454 "Full deuglify of broken Outlook (Express) articles.
455 Treat dumbquotes, unwrap lines, repair attribution and rearrange citation. If
456 NODISPLAY is non-nil, don't redisplay the article buffer."
458 ;; apply treatment of dumb quotes
459 (gnus-article-treat-dumbquotes)
460 ;; repair wrapped cited lines
461 (gnus-article-outlook-unwrap-lines 'nodisplay
)
462 ;; repair attribution line and rearrange citation.
463 (gnus-article-outlook-rearrange-citation 'nodisplay
)
464 (unless nodisplay
(gnus-outlook-display-article-buffer)))
467 (defun gnus-article-outlook-deuglify-article ()
468 "Deuglify broken Outlook (Express) articles and redisplay."
470 (gnus-outlook-deuglify-article nil
))
475 ;; coding: iso-8859-1
478 ;;; deuglify.el ends here