shr.el (shr-insert): Don't insert double spaces.
[emacs.git] / lisp / gnus / nndoc.el
blob6c9ef1cef8742c7e3b716ca524c4b50877ba0faf
1 ;;; nndoc.el --- single file access for Gnus
3 ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
4 ;; 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
6 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
7 ;; Masanobu UMEDA <umerin@flab.flab.fujitsu.junet>
8 ;; Keywords: news
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software: you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation, either version 3 of the License, or
15 ;; (at your option) any later version.
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
25 ;;; Commentary:
27 ;; For Outlook mail boxes format, see http://mbx2mbox.sourceforge.net/
29 ;;; Code:
31 (require 'nnheader)
32 (require 'message)
33 (require 'nnmail)
34 (require 'nnoo)
35 (require 'gnus-util)
36 (require 'mm-util)
37 (eval-when-compile (require 'cl))
39 (nnoo-declare nndoc)
41 (defvoo nndoc-article-type 'guess
42 "*Type of the file.
43 One of `mbox', `babyl', `digest', `news', `rnews', `mmdf', `forward',
44 `rfc934', `rfc822-forward', `mime-parts', `standard-digest',
45 `slack-digest', `clari-briefs', `nsmail', `outlook', `oe-dbx',
46 `mailman', `exim-bounce', or `guess'.")
48 (defvoo nndoc-post-type 'mail
49 "*Whether the nndoc group is `mail' or `post'.")
51 (defvoo nndoc-open-document-hook 'nnheader-ms-strip-cr
52 "Hook run after opening a document.
53 The default function removes all trailing carriage returns
54 from the document.")
56 (defvar nndoc-type-alist
57 `((mmdf
58 (article-begin . "^\^A\^A\^A\^A\n")
59 (body-end . "^\^A\^A\^A\^A\n"))
60 (mime-digest
61 (article-begin . "")
62 (head-begin . "^ ?\n")
63 (head-end . "^ ?$")
64 (body-end . "")
65 (file-end . "")
66 (subtype digest guess))
67 (nsmail
68 (article-begin . "^From - "))
69 (news
70 (article-begin . "^Path:"))
71 (rnews
72 (article-begin . "^#! *rnews +\\([0-9]+\\) *\n")
73 (body-end-function . nndoc-rnews-body-end))
74 (mbox
75 (article-begin-function . nndoc-mbox-article-begin)
76 (body-end-function . nndoc-mbox-body-end))
77 (babyl
78 (article-begin . "\^_\^L *\n")
79 (body-end . "\^_")
80 (body-begin-function . nndoc-babyl-body-begin)
81 (head-begin-function . nndoc-babyl-head-begin))
82 (mime-parts
83 (generate-head-function . nndoc-generate-mime-parts-head)
84 (article-transform-function . nndoc-transform-mime-parts))
85 (exim-bounce
86 (article-begin . "^------ This is a copy of the message, including all the headers. ------\n\n")
87 (body-end-function . nndoc-exim-bounce-body-end-function))
88 (rfc934
89 (article-begin . "^--.*\n+")
90 (body-end . "^--.*$")
91 (prepare-body-function . nndoc-unquote-dashes))
92 (mailman
93 (article-begin . "^--__--__--\n\nMessage:")
94 (body-end . "^--__--__--$")
95 (prepare-body-function . nndoc-unquote-dashes))
96 (clari-briefs
97 (article-begin . "^ \\*")
98 (body-end . "^\t------*[ \t]^*\n^ \\*")
99 (body-begin . "^\t")
100 (head-end . "^\t")
101 (generate-head-function . nndoc-generate-clari-briefs-head)
102 (article-transform-function . nndoc-transform-clari-briefs))
104 (standard-digest
105 (first-article . ,(concat "^" (make-string 70 ?-) "\n *\n+"))
106 (article-begin . ,(concat "^\n" (make-string 30 ?-) "\n *\n+"))
107 (prepare-body-function . nndoc-unquote-dashes)
108 (body-end-function . nndoc-digest-body-end)
109 (head-end . "^ *$")
110 (body-begin . "^ *\n")
111 (file-end . "^End of .*digest.*[0-9].*\n\\*\\*\\|^End of.*Digest *$")
112 (subtype digest guess))
113 (slack-digest
114 (article-begin . "^------------------------------*[\n \t]+")
115 (head-end . "^ ?$")
116 (body-end-function . nndoc-digest-body-end)
117 (body-begin . "^ ?$")
118 (file-end . "^End of")
119 (prepare-body-function . nndoc-unquote-dashes)
120 (subtype digest guess))
121 (google
122 (pre-dissection-function . nndoc-decode-content-transfer-encoding)
123 (article-begin . "^== [0-9]+ of [0-9]+ ==$")
124 (head-begin . "^Date:")
125 (head-end . "^$")
126 (body-end-function . nndoc-digest-body-end)
127 (body-begin . "^$")
128 (file-end . "^==============================================================================$")
129 (prepare-body-function . nndoc-unquote-dashes)
130 (subtype digest guess))
131 (lanl-gov-announce
132 (article-begin . "^\\\\\\\\\n")
133 (head-begin . "^\\(Paper.*:\\|arXiv:\\)")
134 (head-end . "\\(^\\\\\\\\.*\n\\|-----------------\\)")
135 (body-begin . "")
136 (body-end . "\\(-------------------------------------------------\\|%-%-%-%-%-%-%-%-%-%-%-%-%-%-\\|%%--%%--%%--%%--%%--%%--%%--%%--\\|%%%---%%%---%%%---%%%---\\)")
137 (file-end . "\\(^Title: Recent Seminal\\|%%%---%%%---%%%---%%%---\\)")
138 (generate-head-function . nndoc-generate-lanl-gov-head)
139 (article-transform-function . nndoc-transform-lanl-gov-announce)
140 (subtype preprints guess))
141 (rfc822-forward
142 (article-begin . "^\n+")
143 (body-end-function . nndoc-rfc822-forward-body-end-function)
144 (generate-head-function . nndoc-rfc822-forward-generate-head)
145 (generate-article-function . nndoc-rfc822-forward-generate-article))
146 (outlook
147 (article-begin-function . nndoc-outlook-article-begin)
148 (body-end . "\0"))
149 (oe-dbx ;; Outlook Express DBX format
150 (dissection-function . nndoc-oe-dbx-dissection)
151 (generate-head-function . nndoc-oe-dbx-generate-head)
152 (generate-article-function . nndoc-oe-dbx-generate-article))
153 (forward
154 (article-begin . "^-+ \\(Start of \\)?forwarded message.*\n+")
155 (body-end . "^-+ End \\(of \\)?forwarded message.*$")
156 (prepare-body-function . nndoc-unquote-dashes))
157 (mail-in-mail ;; Wild guess on mailer daemon's messages or others
158 (article-begin-function . nndoc-mail-in-mail-article-begin))
159 (guess
160 (guess . t)
161 (subtype nil))
162 (digest
163 (guess . t)
164 (subtype nil))
165 (preprints
166 (guess . t)
167 (subtype nil))))
169 (defvar nndoc-binary-file-names ".[Dd][Bb][Xx]$"
170 "Regexp for binary nndoc file names.")
173 (defvoo nndoc-file-begin nil)
174 (defvoo nndoc-first-article nil)
175 (defvoo nndoc-article-begin nil)
176 (defvoo nndoc-head-begin nil)
177 (defvoo nndoc-head-end nil)
178 (defvoo nndoc-file-end nil)
179 (defvoo nndoc-body-begin nil)
180 (defvoo nndoc-body-end-function nil)
181 (defvoo nndoc-body-begin-function nil)
182 (defvoo nndoc-head-begin-function nil)
183 (defvoo nndoc-body-end nil)
184 ;; nndoc-dissection-alist is a list of sublists. Each sublist holds the
185 ;; following items. ARTICLE acts as the association key and is an ordinal
186 ;; starting at 1. HEAD-BEGIN [0], HEAD-END [1], BODY-BEGIN [2] and BODY-END
187 ;; [3] are positions in the `nndoc' buffer. LINE-COUNT [4] is a count of
188 ;; lines in the body. For MIME dissections only, ARTICLE-INSERT [5] and
189 ;; SUMMARY-INSERT [6] give headers to insert for full article or summary line
190 ;; generation, respectively. Other headers usually follow directly from the
191 ;; buffer. Value `nil' means no insert.
192 (defvoo nndoc-dissection-alist nil)
193 (defvoo nndoc-prepare-body-function nil)
194 (defvoo nndoc-generate-head-function nil)
195 (defvoo nndoc-article-transform-function nil)
196 (defvoo nndoc-article-begin-function nil)
197 (defvoo nndoc-generate-article-function nil)
198 (defvoo nndoc-dissection-function nil)
199 (defvoo nndoc-pre-dissection-function nil)
201 (defvoo nndoc-status-string "")
202 (defvoo nndoc-group-alist nil)
203 (defvoo nndoc-current-buffer nil
204 "Current nndoc news buffer.")
205 (defvoo nndoc-address nil)
207 (defconst nndoc-version "nndoc 1.0"
208 "nndoc version.")
212 ;;; Interface functions
214 (nnoo-define-basics nndoc)
216 (deffoo nndoc-retrieve-headers (articles &optional newsgroup server fetch-old)
217 (when (nndoc-possibly-change-buffer newsgroup server)
218 (with-current-buffer nntp-server-buffer
219 (erase-buffer)
220 (let (article entry)
221 (if (stringp (car articles))
222 'headers
223 (while articles
224 (when (setq entry (cdr (assq (setq article (pop articles))
225 nndoc-dissection-alist)))
226 (insert (format "221 %d Article retrieved.\n" article))
227 (if nndoc-generate-head-function
228 (funcall nndoc-generate-head-function article)
229 (insert-buffer-substring
230 nndoc-current-buffer (car entry) (nth 1 entry)))
231 (goto-char (point-max))
232 (unless (eq (char-after (1- (point))) ?\n)
233 (insert "\n"))
234 (insert (format "Lines: %d\n" (nth 4 entry)))
235 (insert ".\n")))
237 (nnheader-fold-continuation-lines)
238 'headers)))))
240 (deffoo nndoc-request-article (article &optional newsgroup server buffer)
241 (nndoc-possibly-change-buffer newsgroup server)
242 (save-excursion
243 (let ((buffer (or buffer nntp-server-buffer))
244 (entry (cdr (assq article nndoc-dissection-alist)))
245 beg)
246 (set-buffer buffer)
247 (erase-buffer)
248 (when entry
249 (cond
250 ((stringp article) nil)
251 (nndoc-generate-article-function
252 (funcall nndoc-generate-article-function article))
254 (insert-buffer-substring
255 nndoc-current-buffer (car entry) (nth 1 entry))
256 (insert "\n")
257 (setq beg (point))
258 (insert-buffer-substring
259 nndoc-current-buffer (nth 2 entry) (nth 3 entry))
260 (goto-char beg)
261 (when nndoc-prepare-body-function
262 (funcall nndoc-prepare-body-function))
263 (when nndoc-article-transform-function
264 (funcall nndoc-article-transform-function article))
265 t))))))
267 (deffoo nndoc-request-group (group &optional server dont-check info)
268 "Select news GROUP."
269 (let (number)
270 (cond
271 ((not (nndoc-possibly-change-buffer group server))
272 (nnheader-report 'nndoc "No such file or buffer: %s"
273 nndoc-address))
274 (dont-check
275 (nnheader-report 'nndoc "Selected group %s" group)
277 ((zerop (setq number (length nndoc-dissection-alist)))
278 (nndoc-close-group group)
279 (nnheader-report 'nndoc "No articles in group %s" group))
281 (nnheader-insert "211 %d %d %d %s\n" number 1 number group)))))
283 (deffoo nndoc-retrieve-groups (groups &optional server)
284 (dolist (group groups)
285 (nndoc-request-group group server))
288 (deffoo nndoc-request-type (group &optional article)
289 (cond ((not article) 'unknown)
290 (nndoc-post-type nndoc-post-type)
291 (t 'unknown)))
293 (deffoo nndoc-close-group (group &optional server)
294 (nndoc-possibly-change-buffer group server)
295 (and nndoc-current-buffer
296 (buffer-name nndoc-current-buffer)
297 (kill-buffer nndoc-current-buffer))
298 (setq nndoc-group-alist (delq (assoc group nndoc-group-alist)
299 nndoc-group-alist))
300 (setq nndoc-current-buffer nil)
301 (nnoo-close-server 'nndoc server)
302 (setq nndoc-dissection-alist nil)
305 (deffoo nndoc-request-list (&optional server)
308 (deffoo nndoc-request-newgroups (date &optional server)
309 nil)
311 (deffoo nndoc-request-list-newsgroups (&optional server)
312 nil)
315 ;;; Internal functions.
317 (defun nndoc-possibly-change-buffer (group source)
318 (let (buf)
319 (cond
320 ;; The current buffer is this group's buffer.
321 ((and nndoc-current-buffer
322 (buffer-name nndoc-current-buffer)
323 (eq nndoc-current-buffer
324 (setq buf (cdr (assoc group nndoc-group-alist))))))
325 ;; We change buffers by taking an old from the group alist.
326 ;; `source' is either a string (a file name) or a buffer object.
327 (buf
328 (setq nndoc-current-buffer buf))
329 ;; It's a totally new group.
330 ((or (and (bufferp nndoc-address)
331 (buffer-name nndoc-address))
332 (and (stringp nndoc-address)
333 (file-exists-p nndoc-address)
334 (not (file-directory-p nndoc-address))))
335 (push (cons group (setq nndoc-current-buffer
336 (get-buffer-create
337 (concat " *nndoc " group "*"))))
338 nndoc-group-alist)
339 (setq nndoc-dissection-alist nil)
340 (with-current-buffer nndoc-current-buffer
341 (erase-buffer)
342 (if (and (stringp nndoc-address)
343 (string-match nndoc-binary-file-names nndoc-address))
344 (let ((coding-system-for-read 'binary))
345 (mm-insert-file-contents nndoc-address))
346 (if (stringp nndoc-address)
347 (nnheader-insert-file-contents nndoc-address)
348 (insert-buffer-substring nndoc-address))
349 (run-hooks 'nndoc-open-document-hook)))))
350 ;; Initialize the nndoc structures according to this new document.
351 (when (and nndoc-current-buffer
352 (not nndoc-dissection-alist))
353 (with-current-buffer nndoc-current-buffer
354 (nndoc-set-delims)
355 (if (eq nndoc-article-type 'mime-parts)
356 (nndoc-dissect-mime-parts)
357 (nndoc-dissect-buffer))))
358 (unless nndoc-current-buffer
359 (nndoc-close-server))
360 ;; Return whether we managed to select a file.
361 nndoc-current-buffer))
364 ;;; Deciding what document type we have
367 (defun nndoc-set-delims ()
368 "Set the nndoc delimiter variables according to the type of the document."
369 (let ((vars '(nndoc-file-begin
370 nndoc-first-article
371 nndoc-article-begin-function
372 nndoc-head-begin nndoc-head-end
373 nndoc-file-end nndoc-article-begin
374 nndoc-body-begin nndoc-body-end-function nndoc-body-end
375 nndoc-prepare-body-function nndoc-article-transform-function
376 nndoc-generate-head-function nndoc-body-begin-function
377 nndoc-head-begin-function
378 nndoc-generate-article-function
379 nndoc-dissection-function
380 nndoc-pre-dissection-function)))
381 (while vars
382 (set (pop vars) nil)))
383 (let (defs)
384 ;; Guess away until we find the real file type.
385 (while (assq 'guess (setq defs (cdr (assq nndoc-article-type
386 nndoc-type-alist))))
387 (setq nndoc-article-type (nndoc-guess-type nndoc-article-type)))
388 ;; Set the nndoc variables.
389 (while defs
390 (set (intern (format "nndoc-%s" (caar defs)))
391 (cdr (pop defs))))))
393 (defun nndoc-guess-type (subtype)
394 (let ((alist nndoc-type-alist)
395 results result entry)
396 (while (and (not result)
397 (setq entry (pop alist)))
398 (when (memq subtype (or (cdr (assq 'subtype entry)) '(guess)))
399 (goto-char (point-min))
400 ;; Remove blank lines.
401 (while (eq (following-char) ?\n)
402 (delete-char 1))
403 (when (numberp (setq result (funcall (intern
404 (format "nndoc-%s-type-p"
405 (car entry))))))
406 (push (cons result entry) results)
407 (setq result nil))))
408 (unless (or result results)
409 (error "Document is not of any recognized type"))
410 (if result
411 (car entry)
412 (cadar (last (sort results 'car-less-than-car))))))
415 ;;; Built-in type predicates and functions
418 (defun nndoc-mbox-type-p ()
419 (when (looking-at message-unix-mail-delimiter)
422 (defun nndoc-mbox-article-begin ()
423 (when (re-search-forward (concat "^" message-unix-mail-delimiter) nil t)
424 (goto-char (match-beginning 0))))
426 (defun nndoc-mbox-body-end ()
427 (let ((beg (point))
428 len end)
429 (when
430 (save-excursion
431 (and (re-search-backward
432 (concat "^" message-unix-mail-delimiter) nil t)
433 (setq end (point))
434 (search-forward "\n\n" beg t)
435 (re-search-backward
436 "^Content-Length:[ \t]*\\([0-9]+\\) *$" end t)
437 (setq len (string-to-number (match-string 1)))
438 (search-forward "\n\n" beg t)
439 (unless (= (setq len (+ (point) len)) (point-max))
440 (and (< len (point-max))
441 (goto-char len)
442 (looking-at message-unix-mail-delimiter)))))
443 (goto-char len))))
445 (defun nndoc-mmdf-type-p ()
446 (when (looking-at "\^A\^A\^A\^A$")
449 (defun nndoc-news-type-p ()
450 (when (looking-at "^Path:.*\n")
453 (defun nndoc-rnews-type-p ()
454 (when (looking-at "#! *rnews")
457 (defun nndoc-rnews-body-end ()
458 (and (re-search-backward nndoc-article-begin nil t)
459 (forward-line 1)
460 (goto-char (+ (point) (string-to-number (match-string 1))))))
462 (defun nndoc-google-type-p ()
463 (when (re-search-forward "^=3D=3D 1 of [0-9]+ =3D=3D$" nil t)
466 (defun nndoc-decode-content-transfer-encoding ()
467 (let ((encoding
468 (save-restriction
469 (message-narrow-to-head)
470 (message-fetch-field "content-transfer-encoding"))))
471 (when (and encoding
472 (search-forward "\n\n" nil t))
473 (save-restriction
474 (narrow-to-region (point) (point-max))
475 (mm-decode-content-transfer-encoding
476 (intern (downcase (mail-header-strip encoding))))))))
478 (defun nndoc-babyl-type-p ()
479 (when (re-search-forward "\^_\^L *\n" nil t)
482 (defun nndoc-babyl-body-begin ()
483 (re-search-forward "^\n" nil t)
484 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
485 (let ((next (or (save-excursion
486 (re-search-forward nndoc-article-begin nil t))
487 (point-max))))
488 (unless (re-search-forward "^\n" next t)
489 (goto-char next)
490 (forward-line -1)
491 (insert "\n")
492 (forward-line -1)))))
494 (defun nndoc-babyl-head-begin ()
495 (when (re-search-forward "^[0-9].*\n" nil t)
496 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
497 (forward-line 1))
500 (defun nndoc-forward-type-p ()
501 (when (and (re-search-forward "^-+ \\(Start of \\)?forwarded message.*\n+"
502 nil t)
503 (looking-at "[\r\n]*[a-zA-Z][a-zA-Z0-9-]*:\\|^>?From "))
506 (defun nndoc-rfc934-type-p ()
507 (when (and (re-search-forward "^-+ Start of forwarded.*\n+" nil t)
508 (not (re-search-forward "^Subject:.*digest" nil t))
509 (not (re-search-backward "^From:" nil t 2))
510 (not (re-search-forward "^From:" nil t 2)))
513 (defun nndoc-mailman-type-p ()
514 (when (re-search-forward "^--__--__--\n+" nil t)
517 (defun nndoc-rfc822-forward-type-p ()
518 (save-restriction
519 (message-narrow-to-head)
520 (when (re-search-forward "^Content-Type: *message/rfc822" nil t)
521 t)))
523 (defun nndoc-rfc822-forward-body-end-function ()
524 (goto-char (point-max)))
526 (defun nndoc-rfc822-forward-generate-article (article &optional head)
527 (let ((entry (cdr (assq article nndoc-dissection-alist)))
528 (begin (point))
529 encoding)
530 (with-current-buffer nndoc-current-buffer
531 (save-restriction
532 (message-narrow-to-head)
533 (setq encoding (message-fetch-field "content-transfer-encoding"))))
534 (insert-buffer-substring nndoc-current-buffer (car entry) (nth 3 entry))
535 (when encoding
536 (save-restriction
537 (narrow-to-region begin (point-max))
538 (mm-decode-content-transfer-encoding
539 (intern (downcase (mail-header-strip encoding))))))
540 (when head
541 (goto-char begin)
542 (when (search-forward "\n\n" nil t)
543 (delete-region (1- (point)) (point-max)))))
546 (defun nndoc-rfc822-forward-generate-head (article)
547 (nndoc-rfc822-forward-generate-article article 'head))
549 (defun nndoc-mime-parts-type-p ()
550 (let ((case-fold-search t)
551 (limit (search-forward "\n\n" nil t)))
552 (goto-char (point-min))
553 (when (and limit
554 (re-search-forward
555 (concat "\
556 ^Content-Type:[ \t]*multipart/[a-z]+ *; *\\(\\(\n[ \t]\\)?.*;\\)*"
557 "\\(\n[ \t]\\)?[ \t]*boundary=\"?[^\"\n]*[^\" \t\n]")
558 limit t))
559 t)))
561 (defun nndoc-transform-mime-parts (article)
562 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
563 (headers (nth 5 entry)))
564 (when headers
565 (goto-char (point-min))
566 (insert headers))))
568 (defun nndoc-generate-mime-parts-head (article)
569 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
570 (headers (nth 6 entry)))
571 (save-restriction
572 (narrow-to-region (point) (point))
573 (insert-buffer-substring
574 nndoc-current-buffer (car entry) (nth 1 entry))
575 (goto-char (point-max)))
576 (when headers
577 (insert headers))))
579 (defun nndoc-clari-briefs-type-p ()
580 (when (let ((case-fold-search nil))
581 (re-search-forward "^\t[^a-z]+ ([^a-z]+) --" nil t))
584 (defun nndoc-transform-clari-briefs (article)
585 (goto-char (point-min))
586 (when (looking-at " *\\*\\(.*\\)\n")
587 (replace-match "" t t))
588 (nndoc-generate-clari-briefs-head article))
590 (defun nndoc-generate-clari-briefs-head (article)
591 (let ((entry (cdr (assq article nndoc-dissection-alist)))
592 subject from)
593 (with-current-buffer nndoc-current-buffer
594 (save-restriction
595 (narrow-to-region (car entry) (nth 3 entry))
596 (goto-char (point-min))
597 (when (looking-at " *\\*\\(.*\\)$")
598 (setq subject (match-string 1))
599 (when (string-match "[ \t]+$" subject)
600 (setq subject (substring subject 0 (match-beginning 0)))))
601 (when
602 (let ((case-fold-search nil))
603 (re-search-forward
604 "^\t\\([^a-z]+\\(,[^(]+\\)? ([^a-z]+)\\) --" nil t))
605 (setq from (match-string 1)))))
606 (insert "From: " "clari@clari.net (" (or from "unknown") ")"
607 "\nSubject: " (or subject "(no subject)") "\n")))
609 (defun nndoc-exim-bounce-type-p ()
610 (and (re-search-forward "^------ This is a copy of the message, including all the headers. ------" nil t)
613 (defun nndoc-exim-bounce-body-end-function ()
614 (goto-char (point-max)))
617 (defun nndoc-mime-digest-type-p ()
618 (let ((case-fold-search t)
619 boundary-id b-delimiter entry)
620 (when (and
621 (re-search-forward
622 (concat "^Content-Type: *multipart/digest;[ \t\n]*[ \t]"
623 "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)")
624 nil t)
625 (match-beginning 1))
626 (setq boundary-id (match-string 1)
627 b-delimiter (concat "\n--" boundary-id "[ \t]*$"))
628 (setq entry (assq 'mime-digest nndoc-type-alist))
629 (setcdr entry
630 (list
631 (cons 'head-begin "^ ?\n")
632 (cons 'head-end "^ ?$")
633 (cons 'body-begin "^ ?\n")
634 (cons 'article-begin b-delimiter)
635 (cons 'body-end-function 'nndoc-digest-body-end)
636 (cons 'file-end (concat "^--" boundary-id "--[ \t]*$"))))
637 t)))
639 (defun nndoc-standard-digest-type-p ()
640 (when (and (re-search-forward (concat "^" (make-string 70 ?-) "\n\n") nil t)
641 (re-search-forward
642 (concat "\n\n" (make-string 30 ?-) "\n\n") nil t))
645 (defun nndoc-digest-body-end ()
646 (and (re-search-forward nndoc-article-begin nil t)
647 (goto-char (match-beginning 0))))
649 (defun nndoc-slack-digest-type-p ()
652 (defun nndoc-lanl-gov-announce-type-p ()
653 (when (let ((case-fold-search nil))
654 (re-search-forward "^\\\\\\\\\n\\(Paper\\( (\\*cross-listing\\*)\\)?: [a-zA-Z-\\.]+/[0-9]+\\|arXiv:\\)" nil t))
657 (defun nndoc-transform-lanl-gov-announce (article)
658 (let ((case-fold-search nil))
659 (goto-char (point-max))
660 (when (re-search-backward "^\\\\\\\\ +( *\\([^ ]*\\) , *\\([^ ]*\\))" nil t)
661 (replace-match "\n\nGet it at \\1 (\\2)" t nil))
662 (goto-char (point-min))
663 (while (re-search-forward "^\\\\\\\\$" nil t)
664 (replace-match "" t nil))
665 (goto-char (point-min))
666 (when (re-search-forward "^replaced with revised version +\\(.*[^ ]\\) +" nil t)
667 (replace-match "Date: \\1 (revised) " t nil))
668 (goto-char (point-min))
669 (unless (re-search-forward "^From" nil t)
670 (goto-char (point-min))
671 (when (re-search-forward "^Authors?: \\(.*\\)" nil t)
672 (goto-char (point-min))
673 (insert "From: " (match-string 1) "\n")))
674 (when (re-search-forward "^arXiv:" nil t)
675 (replace-match "Paper: arXiv:" t nil))))
677 (defun nndoc-generate-lanl-gov-head (article)
678 (let ((entry (cdr (assq article nndoc-dissection-alist)))
679 (from "<no address given>")
680 subject date)
681 (with-current-buffer nndoc-current-buffer
682 (save-restriction
683 (narrow-to-region (car entry) (nth 1 entry))
684 (goto-char (point-min))
685 (when (looking-at "^\\(Paper.*: \\|arXiv:\\)\\([0-9a-zA-Z-\\./]+\\)")
686 (setq subject (concat " (" (match-string 2) ")"))
687 (when (re-search-forward "^From: \\(.*\\)" nil t)
688 (setq from (concat "<"
689 (cadr (funcall gnus-extract-address-components
690 (match-string 1))) ">")))
691 (if (re-search-forward "^Date: +\\([^(]*\\)" nil t)
692 (setq date (match-string 1))
693 (when (re-search-forward "^replaced with revised version +\\([^(]*\\)" nil t)
694 (setq date (match-string 1))))
695 (when (re-search-forward "^Title: \\([^\f]*\\)\nAuthors?: \\(.*\\)"
696 nil t)
697 (setq subject (concat (match-string 1) subject))
698 (setq from (concat (match-string 2) " " from))))))
699 (while (and from (string-match "(\[^)\]*)" from))
700 (setq from (replace-match "" t t from)))
701 (insert "From: " (or from "unknown")
702 "\nSubject: " (or subject "(no subject)") "\n")
703 (if date (insert "Date: " date))))
705 (defun nndoc-nsmail-type-p ()
706 (when (looking-at "From - ")
709 (defun nndoc-outlook-article-begin ()
710 (prog1 (re-search-forward "From:\\|Received:" nil t)
711 (goto-char (match-beginning 0))))
713 (defun nndoc-outlook-type-p ()
714 ;; FIXME: Is JMF the magic of outlook mailbox? -- ShengHuo.
715 (looking-at "JMF"))
717 (defun nndoc-oe-dbx-type-p ()
718 (looking-at (mm-string-to-multibyte "\317\255\022\376")))
720 (defun nndoc-read-little-endian ()
721 (+ (prog1 (char-after) (forward-char 1))
722 (lsh (prog1 (char-after) (forward-char 1)) 8)
723 (lsh (prog1 (char-after) (forward-char 1)) 16)
724 (lsh (prog1 (char-after) (forward-char 1)) 24)))
726 (defun nndoc-oe-dbx-decode-block ()
727 (list
728 (nndoc-read-little-endian) ;; this address
729 (nndoc-read-little-endian) ;; next address offset
730 (nndoc-read-little-endian) ;; blocksize
731 (nndoc-read-little-endian))) ;; next address
733 (defun nndoc-oe-dbx-dissection ()
734 (let ((i 0) blk p tp)
735 (goto-char 60117) ;; 0x0000EAD4+1
736 (setq p (point))
737 (unless (eobp)
738 (setq blk (nndoc-oe-dbx-decode-block)))
739 (while (and blk (> (car blk) 0) (or (zerop (nth 3 blk))
740 (> (nth 3 blk) p)))
741 (push (list (incf i) p nil nil nil 0) nndoc-dissection-alist)
742 (while (and (> (car blk) 0) (> (nth 3 blk) p))
743 (goto-char (1+ (nth 3 blk)))
744 (setq blk (nndoc-oe-dbx-decode-block)))
745 (if (or (<= (car blk) p)
746 (<= (nth 1 blk) 0)
747 (not (zerop (nth 3 blk))))
748 (setq blk nil)
749 (setq tp (+ (car blk) (nth 1 blk) 17))
750 (if (or (<= tp p) (>= tp (point-max)))
751 (setq blk nil)
752 (goto-char tp)
753 (setq p tp
754 blk (nndoc-oe-dbx-decode-block)))))))
756 (defun nndoc-oe-dbx-generate-article (article &optional head)
757 (let ((entry (cdr (assq article nndoc-dissection-alist)))
758 (cur (current-buffer))
759 (begin (point))
760 blk p)
761 (with-current-buffer nndoc-current-buffer
762 (setq p (car entry))
763 (while (> p (point-min))
764 (goto-char p)
765 (setq blk (nndoc-oe-dbx-decode-block))
766 (setq p (point))
767 (with-current-buffer cur
768 (insert-buffer-substring nndoc-current-buffer p (+ p (nth 2 blk))))
769 (setq p (1+ (nth 3 blk)))))
770 (goto-char begin)
771 (while (re-search-forward "\r$" nil t)
772 (delete-char -1))
773 (when head
774 (goto-char begin)
775 (when (search-forward "\n\n" nil t)
776 (setcar (cddddr entry) (count-lines (point) (point-max)))
777 (delete-region (1- (point)) (point-max))))
780 (defun nndoc-oe-dbx-generate-head (article)
781 (nndoc-oe-dbx-generate-article article 'head))
783 (defun nndoc-mail-in-mail-type-p ()
784 (let (found)
785 (save-excursion
786 (catch 'done
787 (while (re-search-forward "\n\n[-A-Za-z0-9]+:" nil t)
788 (setq found 0)
789 (forward-line)
790 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
791 (if (looking-at "[-A-Za-z0-9]+:")
792 (setq found (1+ found)))
793 (forward-line))
794 (if (and (> found 0) (looking-at "\n"))
795 (throw 'done 9999)))
796 nil))))
798 (defun nndoc-mail-in-mail-article-begin ()
799 (let (point found)
800 (if (catch 'done
801 (while (re-search-forward "\n\n\\([-A-Za-z0-9]+:\\)" nil t)
802 (setq found 0)
803 (setq point (match-beginning 1))
804 (forward-line)
805 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
806 (if (looking-at "[-A-Za-z0-9]+:")
807 (setq found (1+ found)))
808 (forward-line))
809 (if (and (> found 0) (looking-at "\n"))
810 (throw 'done t)))
811 nil)
812 (goto-char point))))
814 (deffoo nndoc-request-accept-article (group &optional server last)
815 nil)
818 ;;; Functions for dissecting the documents
821 (defun nndoc-search (regexp)
822 (prog1
823 (re-search-forward regexp nil t)
824 (beginning-of-line)))
826 (defun nndoc-dissect-buffer ()
827 "Go through the document and partition it into heads/bodies/articles."
828 (let ((i 0)
829 (first t)
830 art-begin head-begin head-end body-begin body-end)
831 (setq nndoc-dissection-alist nil)
832 (with-current-buffer nndoc-current-buffer
833 (goto-char (point-min))
834 ;; Remove blank lines.
835 (while (eq (following-char) ?\n)
836 (delete-char 1))
837 (when nndoc-pre-dissection-function
838 (save-excursion
839 (funcall nndoc-pre-dissection-function)))
840 (if nndoc-dissection-function
841 (funcall nndoc-dissection-function)
842 ;; Find the beginning of the file.
843 (when nndoc-file-begin
844 (nndoc-search nndoc-file-begin))
845 ;; Go through the file.
846 (while (if (and first nndoc-first-article)
847 (nndoc-search nndoc-first-article)
848 (if art-begin
849 (goto-char art-begin)
850 (nndoc-article-begin)))
851 (setq first nil
852 art-begin nil)
853 (cond (nndoc-head-begin-function
854 (funcall nndoc-head-begin-function))
855 (nndoc-head-begin
856 (nndoc-search nndoc-head-begin)))
857 (if (or (eobp)
858 (and nndoc-file-end
859 (looking-at nndoc-file-end)))
860 (goto-char (point-max))
861 (setq head-begin (point))
862 (nndoc-search (or nndoc-head-end "^$"))
863 (setq head-end (point))
864 (if nndoc-body-begin-function
865 (funcall nndoc-body-begin-function)
866 (nndoc-search (or nndoc-body-begin "^\n")))
867 (setq body-begin (point))
868 (or (and nndoc-body-end-function
869 (funcall nndoc-body-end-function))
870 (and nndoc-body-end
871 (nndoc-search nndoc-body-end))
872 (and (nndoc-article-begin)
873 (setq art-begin (point)))
874 (progn
875 (goto-char (point-max))
876 (when nndoc-file-end
877 (and (re-search-backward nndoc-file-end nil t)
878 (beginning-of-line)))))
879 (setq body-end (point))
880 (push (list (incf i) head-begin head-end body-begin body-end
881 (count-lines body-begin body-end))
882 nndoc-dissection-alist)))))))
884 (defun nndoc-article-begin ()
885 (if nndoc-article-begin-function
886 (funcall nndoc-article-begin-function)
887 (ignore-errors
888 (nndoc-search nndoc-article-begin))))
890 (defun nndoc-unquote-dashes ()
891 "Unquote quoted non-separators in digests."
892 (while (re-search-forward "^- -"nil t)
893 (replace-match "-" t t)))
895 ;; Against compiler warnings.
896 (defvar nndoc-mime-split-ordinal)
898 (defun nndoc-dissect-mime-parts ()
899 "Go through a MIME composite article and partition it into sub-articles.
900 When a MIME entity contains sub-entities, dissection produces one article for
901 the header of this entity, and one article per sub-entity."
902 (setq nndoc-dissection-alist nil
903 nndoc-mime-split-ordinal 0)
904 (with-current-buffer nndoc-current-buffer
905 (nndoc-dissect-mime-parts-sub (point-min) (point-max) nil nil nil)))
907 (defun nndoc-dissect-mime-parts-sub (head-begin body-end article-insert
908 position parent)
909 "Dissect an entity, within a composite MIME message.
910 The complete message or MIME entity extends from HEAD-BEGIN to BODY-END.
911 ARTICLE-INSERT should be added at beginning for generating a full article.
912 The string POSITION holds a dotted decimal representation of the article
913 position in the hierarchical structure, it is nil for the outer entity.
914 PARENT is the message-ID of the parent summary line, or nil for none."
915 (let ((case-fold-search t)
916 (message-id (nnmail-message-id))
917 head-end body-begin summary-insert message-rfc822 multipart-any
918 subject content-type type subtype boundary-regexp)
919 ;; Gracefully handle a missing body.
920 (goto-char head-begin)
921 (if (or (and (eq (char-after) ?\n) (or (forward-char 1) t))
922 (search-forward "\n\n" body-end t))
923 (setq head-end (1- (point))
924 body-begin (point))
925 (setq head-end body-end
926 body-begin body-end))
927 (narrow-to-region head-begin head-end)
928 ;; Save MIME attributes.
929 (goto-char head-begin)
930 (setq content-type (message-fetch-field "Content-Type"))
931 (when content-type
932 (when (string-match
933 "^ *\\([^ \t\n/;]+\\)/\\([^ \t\n/;]+\\)" content-type)
934 (setq type (downcase (match-string 1 content-type))
935 subtype (downcase (match-string 2 content-type))
936 message-rfc822 (and (string= type "message")
937 (string= subtype "rfc822"))
938 multipart-any (string= type "multipart")))
939 (when (string-match ";[ \t\n]*name=\\([^ \t\n;]+\\)" content-type)
940 (setq subject (match-string 1 content-type)))
941 (when (string-match "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)" content-type)
942 (setq boundary-regexp (concat "^--"
943 (regexp-quote
944 (match-string 1 content-type))
945 "\\(--\\)?[ \t]*\n"))))
946 (unless subject
947 (when (or multipart-any (not article-insert))
948 (setq subject (message-fetch-field "Subject"))))
949 (unless type
950 (setq type "text"
951 subtype "plain"))
952 ;; Prepare the article and summary inserts.
953 (unless article-insert
954 (setq article-insert (buffer-string)
955 head-end head-begin))
956 ;; Fix MIME-Version
957 (unless (string-match "MIME-Version:" article-insert)
958 (setq article-insert
959 (concat article-insert "MIME-Version: 1.0\n")))
960 (setq summary-insert article-insert)
961 ;; - summary Subject.
962 (setq summary-insert
963 (let ((line (concat "Subject: <" position
964 (and position multipart-any ".")
965 (and multipart-any "*")
966 (and (or position multipart-any) " ")
967 (cond ((string= subtype "plain") type)
968 ((string= subtype "basic") type)
969 (t subtype))
971 (and subject " ")
972 subject
973 "\n")))
974 (if (string-match "Subject:.*\n\\([ \t].*\n\\)*" summary-insert)
975 (replace-match line t t summary-insert)
976 (concat summary-insert line))))
977 ;; - summary Message-ID.
978 (setq summary-insert
979 (let ((line (concat "Message-ID: " message-id "\n")))
980 (if (string-match "Message-ID:.*\n\\([ \t].*\n\\)*" summary-insert)
981 (replace-match line t t summary-insert)
982 (concat summary-insert line))))
983 ;; - summary References.
984 (when parent
985 (setq summary-insert
986 (let ((line (concat "References: " parent "\n")))
987 (if (string-match "References:.*\n\\([ \t].*\n\\)*"
988 summary-insert)
989 (replace-match line t t summary-insert)
990 (concat summary-insert line)))))
991 ;; Generate dissection information for this entity.
992 (push (list (incf nndoc-mime-split-ordinal)
993 head-begin head-end body-begin body-end
994 (count-lines body-begin body-end)
995 article-insert summary-insert)
996 nndoc-dissection-alist)
997 ;; Recurse for all sub-entities, if any.
998 (widen)
999 (cond
1000 (message-rfc822
1001 (save-excursion
1002 (nndoc-dissect-mime-parts-sub body-begin body-end nil
1003 position message-id)))
1004 ((and multipart-any boundary-regexp)
1005 (let ((part-counter 0)
1006 part-begin part-end eof-flag)
1007 (while (string-match "\
1008 ^\\(Lines\\|Content-\\(Type\\|Transfer-Encoding\\|Disposition\\)\\):.*\n\\([ \t].*\n\\)*"
1009 article-insert)
1010 (setq article-insert (replace-match "" t t article-insert)))
1011 (let ((case-fold-search nil))
1012 (goto-char body-begin)
1013 (setq eof-flag (not (re-search-forward boundary-regexp body-end t)))
1014 (while (not eof-flag)
1015 (setq part-begin (point))
1016 (cond ((re-search-forward boundary-regexp body-end t)
1017 (or (not (match-string 1))
1018 (string= (match-string 1) "")
1019 (setq eof-flag t))
1020 (forward-line -1)
1021 (setq part-end (point))
1022 (forward-line 1))
1023 (t (setq part-end body-end
1024 eof-flag t)))
1025 (save-excursion
1026 (nndoc-dissect-mime-parts-sub
1027 part-begin part-end article-insert
1028 (concat position
1029 (and position ".")
1030 (format "%d" (incf part-counter)))
1031 message-id)))))))))
1033 ;;;###autoload
1034 (defun nndoc-add-type (definition &optional position)
1035 "Add document DEFINITION to the list of nndoc document definitions.
1036 If POSITION is nil or `last', the definition will be added
1037 as the last checked definition, if t or `first', add as the
1038 first definition, and if any other symbol, add after that
1039 symbol in the alist."
1040 ;; First remove any old instances.
1041 (gnus-alist-pull (car definition) nndoc-type-alist)
1042 ;; Then enter the new definition in the proper place.
1043 (cond
1044 ((or (null position) (eq position 'last))
1045 (setq nndoc-type-alist (nconc nndoc-type-alist (list definition))))
1046 ((or (eq position t) (eq position 'first))
1047 (push definition nndoc-type-alist))
1049 (let ((list (memq (assq position nndoc-type-alist)
1050 nndoc-type-alist)))
1051 (unless list
1052 (error "No such position: %s" position))
1053 (setcdr list (cons definition (cdr list)))))))
1055 (provide 'nndoc)
1057 ;;; nndoc.el ends here