Merge branch 'emacs-24' of git.sv.gnu.org:/srv/git/emacs into emacs-24
[emacs.git] / lisp / gnus / nndoc.el
blob88afe34a4108d490e55f5d1675fc04f435b9b194
1 ;;; nndoc.el --- single file access for Gnus
3 ;; Copyright (C) 1995-2014 Free Software Foundation, Inc.
5 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
6 ;; Masanobu UMEDA <umerin@flab.flab.fujitsu.junet>
7 ;; Keywords: news
9 ;; This file is part of GNU Emacs.
11 ;; GNU Emacs is free software: you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation, either version 3 of the License, or
14 ;; (at your option) any later version.
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
24 ;;; Commentary:
26 ;; For Outlook mail boxes format, see http://mbx2mbox.sourceforge.net/
28 ;;; Code:
30 (require 'nnheader)
31 (require 'message)
32 (require 'nnmail)
33 (require 'nnoo)
34 (require 'gnus-util)
35 (require 'mm-util)
36 (eval-when-compile (require 'cl))
38 (nnoo-declare nndoc)
40 (defvoo nndoc-article-type 'guess
41 "*Type of the file.
42 One of `mbox', `babyl', `digest', `news', `rnews', `mmdf', `forward',
43 `rfc934', `rfc822-forward', `mime-parts', `standard-digest',
44 `slack-digest', `clari-briefs', `nsmail', `outlook', `oe-dbx',
45 `mailman', `exim-bounce', or `guess'.")
47 (defvoo nndoc-post-type 'mail
48 "*Whether the nndoc group is `mail' or `post'.")
50 (defvoo nndoc-open-document-hook 'nnheader-ms-strip-cr
51 "Hook run after opening a document.
52 The default function removes all trailing carriage returns
53 from the document.")
55 (defvar nndoc-type-alist
56 `((mmdf
57 (article-begin . "^\^A\^A\^A\^A\n")
58 (body-end . "^\^A\^A\^A\^A\n"))
59 (debbugs-db
60 (file-begin . "^\005")
61 (article-begin . "^[\005\007]\n")
62 (body-end . "^\003"))
63 (mime-digest
64 (article-begin . "")
65 (head-begin . "^ ?\n")
66 (head-end . "^ ?$")
67 (body-end . "")
68 (file-end . "")
69 (subtype digest guess))
70 (nsmail
71 (article-begin . "^From - "))
72 (news
73 (article-begin . "^Path:"))
74 (rnews
75 (article-begin . "^#! *rnews +\\([0-9]+\\) *\n")
76 (body-end-function . nndoc-rnews-body-end))
77 (mbox
78 (article-begin-function . nndoc-mbox-article-begin)
79 (body-end-function . nndoc-mbox-body-end))
80 (babyl
81 (article-begin . "\^_\^L *\n")
82 (body-end . "\^_")
83 (body-begin-function . nndoc-babyl-body-begin)
84 (head-begin-function . nndoc-babyl-head-begin))
85 (mime-parts
86 (generate-head-function . nndoc-generate-mime-parts-head)
87 (article-transform-function . nndoc-transform-mime-parts))
88 (exim-bounce
89 (article-begin . "^------ This is a copy of the message, including all the headers. ------\n\n")
90 (body-end-function . nndoc-exim-bounce-body-end-function))
91 (rfc934
92 (article-begin . "^--.*\n+")
93 (body-end . "^--.*$")
94 (prepare-body-function . nndoc-unquote-dashes))
95 (mailman
96 (article-begin . "^--__--__--\n\nMessage:")
97 (body-end . "^--__--__--$")
98 (prepare-body-function . nndoc-unquote-dashes))
99 (clari-briefs
100 (article-begin . "^ \\*")
101 (body-end . "^\t------*[ \t]^*\n^ \\*")
102 (body-begin . "^\t")
103 (head-end . "^\t")
104 (generate-head-function . nndoc-generate-clari-briefs-head)
105 (article-transform-function . nndoc-transform-clari-briefs))
107 (standard-digest
108 (first-article . ,(concat "^" (make-string 70 ?-) "\n *\n+"))
109 (article-begin . ,(concat "^\n" (make-string 30 ?-) "\n *\n+"))
110 (prepare-body-function . nndoc-unquote-dashes)
111 (body-end-function . nndoc-digest-body-end)
112 (head-end . "^ *$")
113 (body-begin . "^ *\n")
114 (file-end . "^End of .*digest.*[0-9].*\n\\*\\*\\|^End of.*Digest *$")
115 (subtype digest guess))
116 (slack-digest
117 (article-begin . "^------------------------------*[\n \t]+")
118 (head-end . "^ ?$")
119 (body-end-function . nndoc-digest-body-end)
120 (body-begin . "^ ?$")
121 (file-end . "^End of")
122 (prepare-body-function . nndoc-unquote-dashes)
123 (subtype digest guess))
124 (google
125 (pre-dissection-function . nndoc-decode-content-transfer-encoding)
126 (article-begin . "^== [0-9]+ of [0-9]+ ==$")
127 (head-begin . "^Date:")
128 (head-end . "^$")
129 (body-end-function . nndoc-digest-body-end)
130 (body-begin . "^$")
131 (file-end . "^==============================================================================$")
132 (prepare-body-function . nndoc-unquote-dashes)
133 (subtype digest guess))
134 (lanl-gov-announce
135 (article-begin . "^\\\\\\\\\n")
136 (head-begin . "^\\(Paper.*:\\|arXiv:\\)")
137 (head-end . "\\(^\\\\\\\\.*\n\\|-----------------\\)")
138 (body-begin . "")
139 (body-end . "\\(-------------------------------------------------\\|%-%-%-%-%-%-%-%-%-%-%-%-%-%-\\|%%--%%--%%--%%--%%--%%--%%--%%--\\|%%%---%%%---%%%---%%%---\\)")
140 (file-end . "\\(^Title: Recent Seminal\\|%%%---%%%---%%%---%%%---\\)")
141 (generate-head-function . nndoc-generate-lanl-gov-head)
142 (article-transform-function . nndoc-transform-lanl-gov-announce)
143 (subtype preprints guess))
144 (git
145 (file-begin . "\n- Log ---.*")
146 (article-begin . "^commit ")
147 (head-begin . "^Author: ")
148 (body-begin . "^$")
149 (file-end . "\n-----------------------------------------------------------------------")
150 (article-transform-function . nndoc-transform-git-article)
151 (header-transform-function . nndoc-transform-git-headers))
152 (rfc822-forward
153 (article-begin . "^\n+")
154 (body-end-function . nndoc-rfc822-forward-body-end-function)
155 (generate-head-function . nndoc-rfc822-forward-generate-head)
156 (generate-article-function . nndoc-rfc822-forward-generate-article))
157 (outlook
158 (article-begin-function . nndoc-outlook-article-begin)
159 (body-end . "\0"))
160 (oe-dbx ;; Outlook Express DBX format
161 (dissection-function . nndoc-oe-dbx-dissection)
162 (generate-head-function . nndoc-oe-dbx-generate-head)
163 (generate-article-function . nndoc-oe-dbx-generate-article))
164 (forward
165 (article-begin . "^-+ \\(Start of \\)?forwarded message.*\n+")
166 (body-end . "^-+ End \\(of \\)?forwarded message.*$")
167 (prepare-body-function . nndoc-unquote-dashes))
168 (mail-in-mail ;; Wild guess on mailer daemon's messages or others
169 (article-begin-function . nndoc-mail-in-mail-article-begin))
170 (guess
171 (guess . t)
172 (subtype nil))
173 (digest
174 (guess . t)
175 (subtype nil))
176 (preprints
177 (guess . t)
178 (subtype nil))))
180 (defvar nndoc-binary-file-names ".[Dd][Bb][Xx]$"
181 "Regexp for binary nndoc file names.")
184 (defvoo nndoc-file-begin nil)
185 (defvoo nndoc-first-article nil)
186 (defvoo nndoc-article-begin nil)
187 (defvoo nndoc-head-begin nil)
188 (defvoo nndoc-head-end nil)
189 (defvoo nndoc-file-end nil)
190 (defvoo nndoc-body-begin nil)
191 (defvoo nndoc-body-end-function nil)
192 (defvoo nndoc-body-begin-function nil)
193 (defvoo nndoc-head-begin-function nil)
194 (defvoo nndoc-body-end nil)
195 ;; nndoc-dissection-alist is a list of sublists. Each sublist holds the
196 ;; following items. ARTICLE acts as the association key and is an ordinal
197 ;; starting at 1. HEAD-BEGIN [0], HEAD-END [1], BODY-BEGIN [2] and BODY-END
198 ;; [3] are positions in the `nndoc' buffer. LINE-COUNT [4] is a count of
199 ;; lines in the body. For MIME dissections only, ARTICLE-INSERT [5] and
200 ;; SUMMARY-INSERT [6] give headers to insert for full article or summary line
201 ;; generation, respectively. Other headers usually follow directly from the
202 ;; buffer. Value `nil' means no insert.
203 (defvoo nndoc-dissection-alist nil)
204 (defvoo nndoc-prepare-body-function nil)
205 (defvoo nndoc-generate-head-function nil)
206 (defvoo nndoc-article-transform-function nil)
207 (defvoo nndoc-header-transform-function nil)
208 (defvoo nndoc-article-begin-function nil)
209 (defvoo nndoc-generate-article-function nil)
210 (defvoo nndoc-dissection-function nil)
211 (defvoo nndoc-pre-dissection-function nil)
213 (defvoo nndoc-status-string "")
214 (defvoo nndoc-group-alist nil)
215 (defvoo nndoc-current-buffer nil
216 "Current nndoc news buffer.")
217 (defvoo nndoc-address nil)
219 (defconst nndoc-version "nndoc 1.0"
220 "nndoc version.")
224 ;;; Interface functions
226 (nnoo-define-basics nndoc)
228 (deffoo nndoc-retrieve-headers (articles &optional newsgroup server fetch-old)
229 (when (nndoc-possibly-change-buffer newsgroup server)
230 (with-current-buffer nntp-server-buffer
231 (erase-buffer)
232 (let (article entry)
233 (if (stringp (car articles))
234 'headers
235 (while articles
236 (when (setq entry (cdr (assq (setq article (pop articles))
237 nndoc-dissection-alist)))
238 (let ((start (point)))
239 (insert (format "221 %d Article retrieved.\n" article))
240 (if nndoc-generate-head-function
241 (funcall nndoc-generate-head-function article)
242 (insert-buffer-substring
243 nndoc-current-buffer (car entry) (nth 1 entry)))
244 (goto-char (point-max))
245 (unless (eq (char-after (1- (point))) ?\n)
246 (insert "\n"))
247 (insert (format "Lines: %d\n" (nth 4 entry)))
248 (insert ".\n")
249 (when nndoc-header-transform-function
250 (save-excursion
251 (save-restriction
252 (narrow-to-region start (point))
253 (funcall nndoc-header-transform-function entry)))))))
254 (nnheader-fold-continuation-lines)
255 'headers)))))
257 (deffoo nndoc-request-article (article &optional newsgroup server buffer)
258 (nndoc-possibly-change-buffer newsgroup server)
259 (save-excursion
260 (let ((buffer (or buffer nntp-server-buffer))
261 (entry (cdr (assq article nndoc-dissection-alist)))
262 beg)
263 (set-buffer buffer)
264 (erase-buffer)
265 (when entry
266 (cond
267 ((stringp article) nil)
268 (nndoc-generate-article-function
269 (funcall nndoc-generate-article-function article))
271 (insert-buffer-substring
272 nndoc-current-buffer (car entry) (nth 1 entry))
273 (insert "\n")
274 (setq beg (point))
275 (insert-buffer-substring
276 nndoc-current-buffer (nth 2 entry) (nth 3 entry))
277 (goto-char beg)
278 (when nndoc-prepare-body-function
279 (funcall nndoc-prepare-body-function))
280 (when nndoc-article-transform-function
281 (funcall nndoc-article-transform-function article))
282 t))))))
284 (deffoo nndoc-request-group (group &optional server dont-check info)
285 "Select news GROUP."
286 (let (number)
287 (cond
288 ((not (nndoc-possibly-change-buffer group server))
289 (nnheader-report 'nndoc "No such file or buffer: %s"
290 nndoc-address))
291 (dont-check
292 (nnheader-report 'nndoc "Selected group %s" group)
294 ((zerop (setq number (length nndoc-dissection-alist)))
295 (nndoc-close-group group)
296 (nnheader-report 'nndoc "No articles in group %s" group))
298 (nnheader-insert "211 %d %d %d %s\n" number 1 number group)))))
300 (deffoo nndoc-retrieve-groups (groups &optional server)
301 (dolist (group groups)
302 (nndoc-request-group group server))
305 (deffoo nndoc-request-type (group &optional article)
306 (cond ((not article) 'unknown)
307 (nndoc-post-type nndoc-post-type)
308 (t 'unknown)))
310 (deffoo nndoc-close-group (group &optional server)
311 (nndoc-possibly-change-buffer group server)
312 (and nndoc-current-buffer
313 (buffer-name nndoc-current-buffer)
314 (kill-buffer nndoc-current-buffer))
315 (setq nndoc-group-alist (delq (assoc group nndoc-group-alist)
316 nndoc-group-alist))
317 (setq nndoc-current-buffer nil)
318 (nnoo-close-server 'nndoc server)
319 (setq nndoc-dissection-alist nil)
322 (deffoo nndoc-request-list (&optional server)
325 (deffoo nndoc-request-newgroups (date &optional server)
326 nil)
328 (deffoo nndoc-request-list-newsgroups (&optional server)
329 nil)
332 ;;; Internal functions.
334 (defun nndoc-possibly-change-buffer (group source)
335 (let (buf)
336 (cond
337 ;; The current buffer is this group's buffer.
338 ((and nndoc-current-buffer
339 (buffer-name nndoc-current-buffer)
340 (eq nndoc-current-buffer
341 (setq buf (cdr (assoc group nndoc-group-alist))))))
342 ;; We change buffers by taking an old from the group alist.
343 ;; `source' is either a string (a file name) or a buffer object.
344 (buf
345 (setq nndoc-current-buffer buf))
346 ;; It's a totally new group.
347 ((or (and (bufferp nndoc-address)
348 (buffer-name nndoc-address))
349 (and (stringp nndoc-address)
350 (file-exists-p nndoc-address)
351 (not (file-directory-p nndoc-address))))
352 (push (cons group (setq nndoc-current-buffer
353 (get-buffer-create
354 (concat " *nndoc " group "*"))))
355 nndoc-group-alist)
356 (setq nndoc-dissection-alist nil)
357 (with-current-buffer nndoc-current-buffer
358 (erase-buffer)
359 (if (and (stringp nndoc-address)
360 (string-match nndoc-binary-file-names nndoc-address))
361 (let ((coding-system-for-read 'binary))
362 (mm-insert-file-contents nndoc-address))
363 (if (stringp nndoc-address)
364 (nnheader-insert-file-contents nndoc-address)
365 (insert-buffer-substring nndoc-address))
366 (run-hooks 'nndoc-open-document-hook)))))
367 ;; Initialize the nndoc structures according to this new document.
368 (when (and nndoc-current-buffer
369 (not nndoc-dissection-alist))
370 (with-current-buffer nndoc-current-buffer
371 (nndoc-set-delims)
372 (if (eq nndoc-article-type 'mime-parts)
373 (nndoc-dissect-mime-parts)
374 (nndoc-dissect-buffer))))
375 (unless nndoc-current-buffer
376 (nndoc-close-server))
377 ;; Return whether we managed to select a file.
378 nndoc-current-buffer))
381 ;;; Deciding what document type we have
384 (defun nndoc-set-delims ()
385 "Set the nndoc delimiter variables according to the type of the document."
386 (let ((vars '(nndoc-file-begin
387 nndoc-first-article
388 nndoc-article-begin-function
389 nndoc-head-begin nndoc-head-end
390 nndoc-file-end nndoc-article-begin
391 nndoc-body-begin nndoc-body-end-function nndoc-body-end
392 nndoc-prepare-body-function nndoc-article-transform-function
393 nndoc-header-transform-function
394 nndoc-generate-head-function nndoc-body-begin-function
395 nndoc-head-begin-function
396 nndoc-generate-article-function
397 nndoc-dissection-function
398 nndoc-pre-dissection-function)))
399 (while vars
400 (set (pop vars) nil)))
401 (let (defs)
402 ;; Guess away until we find the real file type.
403 (while (assq 'guess (setq defs (cdr (assq nndoc-article-type
404 nndoc-type-alist))))
405 (setq nndoc-article-type (nndoc-guess-type nndoc-article-type)))
406 ;; Set the nndoc variables.
407 (while defs
408 (set (intern (format "nndoc-%s" (caar defs)))
409 (cdr (pop defs))))))
411 (defun nndoc-guess-type (subtype)
412 (let ((alist nndoc-type-alist)
413 results result entry)
414 (while (and (not result)
415 (setq entry (pop alist)))
416 (when (memq subtype (or (cdr (assq 'subtype entry)) '(guess)))
417 (goto-char (point-min))
418 ;; Remove blank lines.
419 (while (eq (following-char) ?\n)
420 (delete-char 1))
421 (when (numberp (setq result (funcall (intern
422 (format "nndoc-%s-type-p"
423 (car entry))))))
424 (push (cons result entry) results)
425 (setq result nil))))
426 (unless (or result results)
427 (error "Document is not of any recognized type"))
428 (if result
429 (car entry)
430 (cadar (last (sort results 'car-less-than-car))))))
433 ;;; Built-in type predicates and functions
436 (defun nndoc-mbox-type-p ()
437 (when (looking-at message-unix-mail-delimiter)
440 (defun nndoc-mbox-article-begin ()
441 (when (re-search-forward (concat "^" message-unix-mail-delimiter) nil t)
442 (goto-char (match-beginning 0))))
444 (defun nndoc-mbox-body-end ()
445 (let ((beg (point))
446 len end)
447 (when
448 (save-excursion
449 (and (re-search-backward
450 (concat "^" message-unix-mail-delimiter) nil t)
451 (setq end (point))
452 (search-forward "\n\n" beg t)
453 (re-search-backward
454 "^Content-Length:[ \t]*\\([0-9]+\\) *$" end t)
455 (setq len (string-to-number (match-string 1)))
456 (search-forward "\n\n" beg t)
457 (unless (= (setq len (+ (point) len)) (point-max))
458 (and (< len (point-max))
459 (goto-char len)
460 (looking-at message-unix-mail-delimiter)))))
461 (goto-char len))))
463 (defun nndoc-mmdf-type-p ()
464 (when (looking-at "\^A\^A\^A\^A$")
467 (defun nndoc-debbugs-db-type-p ()
468 (when (looking-at "\006$")
471 (defun nndoc-news-type-p ()
472 (when (looking-at "^Path:.*\n")
475 (defun nndoc-rnews-type-p ()
476 (when (looking-at "#! *rnews")
479 (defun nndoc-rnews-body-end ()
480 (and (re-search-backward nndoc-article-begin nil t)
481 (forward-line 1)
482 (goto-char (+ (point) (string-to-number (match-string 1))))))
484 (defun nndoc-google-type-p ()
485 (when (re-search-forward "^=3D=3D 1 of [0-9]+ =3D=3D$" nil t)
488 (defun nndoc-decode-content-transfer-encoding ()
489 (let ((encoding
490 (save-restriction
491 (message-narrow-to-head)
492 (message-fetch-field "content-transfer-encoding"))))
493 (when (and encoding
494 (search-forward "\n\n" nil t))
495 (save-restriction
496 (narrow-to-region (point) (point-max))
497 (mm-decode-content-transfer-encoding
498 (intern (downcase (mail-header-strip encoding))))))))
500 (defun nndoc-babyl-type-p ()
501 (when (re-search-forward "\^_\^L *\n" nil t)
504 (defun nndoc-babyl-body-begin ()
505 (re-search-forward "^\n" nil t)
506 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
507 (let ((next (or (save-excursion
508 (re-search-forward nndoc-article-begin nil t))
509 (point-max))))
510 (unless (re-search-forward "^\n" next t)
511 (goto-char next)
512 (forward-line -1)
513 (insert "\n")
514 (forward-line -1)))))
516 (defun nndoc-babyl-head-begin ()
517 (when (re-search-forward "^[0-9].*\n" nil t)
518 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
519 (forward-line 1))
522 (defun nndoc-forward-type-p ()
523 (when (and (re-search-forward "^-+ \\(Start of \\)?forwarded message.*\n+"
524 nil t)
525 (looking-at "[\r\n]*[a-zA-Z][a-zA-Z0-9-]*:\\|^>?From "))
528 (defun nndoc-rfc934-type-p ()
529 (when (and (re-search-forward "^-+ Start of forwarded.*\n+" nil t)
530 (not (re-search-forward "^Subject:.*digest" nil t))
531 (not (re-search-backward "^From:" nil t 2))
532 (not (re-search-forward "^From:" nil t 2)))
535 (defun nndoc-mailman-type-p ()
536 (when (re-search-forward "^--__--__--\n+" nil t)
539 (defun nndoc-rfc822-forward-type-p ()
540 (save-restriction
541 (message-narrow-to-head)
542 (when (re-search-forward "^Content-Type: *message/rfc822" nil t)
543 t)))
545 (defun nndoc-rfc822-forward-body-end-function ()
546 (goto-char (point-max)))
548 (defun nndoc-rfc822-forward-generate-article (article &optional head)
549 (let ((entry (cdr (assq article nndoc-dissection-alist)))
550 (begin (point))
551 encoding)
552 (with-current-buffer nndoc-current-buffer
553 (save-restriction
554 (message-narrow-to-head)
555 (setq encoding (message-fetch-field "content-transfer-encoding"))))
556 (insert-buffer-substring nndoc-current-buffer (car entry) (nth 3 entry))
557 (when encoding
558 (save-restriction
559 (narrow-to-region begin (point-max))
560 (mm-decode-content-transfer-encoding
561 (intern (downcase (mail-header-strip encoding))))))
562 (when head
563 (goto-char begin)
564 (when (search-forward "\n\n" nil t)
565 (delete-region (1- (point)) (point-max)))))
568 (defun nndoc-rfc822-forward-generate-head (article)
569 (nndoc-rfc822-forward-generate-article article 'head))
571 (defun nndoc-mime-parts-type-p ()
572 (let ((case-fold-search t)
573 (limit (search-forward "\n\n" nil t)))
574 (goto-char (point-min))
575 (when (and limit
576 (re-search-forward
577 (concat "\
578 ^Content-Type:[ \t]*multipart/[a-z]+ *; *\\(\\(\n[ \t]\\)?.*;\\)*"
579 "\\(\n[ \t]\\)?[ \t]*boundary=\"?[^\"\n]*[^\" \t\n]")
580 limit t))
581 t)))
583 (defun nndoc-transform-mime-parts (article)
584 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
585 (headers (nth 5 entry)))
586 (when headers
587 (goto-char (point-min))
588 (insert headers))))
590 (defun nndoc-generate-mime-parts-head (article)
591 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
592 (headers (nth 6 entry)))
593 (save-restriction
594 (narrow-to-region (point) (point))
595 (insert-buffer-substring
596 nndoc-current-buffer (car entry) (nth 1 entry))
597 (goto-char (point-max)))
598 (when headers
599 (insert headers))))
601 (defun nndoc-clari-briefs-type-p ()
602 (when (let ((case-fold-search nil))
603 (re-search-forward "^\t[^a-z]+ ([^a-z]+) --" nil t))
606 (defun nndoc-transform-clari-briefs (article)
607 (goto-char (point-min))
608 (when (looking-at " *\\*\\(.*\\)\n")
609 (replace-match "" t t))
610 (nndoc-generate-clari-briefs-head article))
612 (defun nndoc-generate-clari-briefs-head (article)
613 (let ((entry (cdr (assq article nndoc-dissection-alist)))
614 subject from)
615 (with-current-buffer nndoc-current-buffer
616 (save-restriction
617 (narrow-to-region (car entry) (nth 3 entry))
618 (goto-char (point-min))
619 (when (looking-at " *\\*\\(.*\\)$")
620 (setq subject (match-string 1))
621 (when (string-match "[ \t]+$" subject)
622 (setq subject (substring subject 0 (match-beginning 0)))))
623 (when
624 (let ((case-fold-search nil))
625 (re-search-forward
626 "^\t\\([^a-z]+\\(,[^(]+\\)? ([^a-z]+)\\) --" nil t))
627 (setq from (match-string 1)))))
628 (insert "From: " "clari@clari.net (" (or from "unknown") ")"
629 "\nSubject: " (or subject "(no subject)") "\n")))
631 (defun nndoc-exim-bounce-type-p ()
632 (and (re-search-forward "^------ This is a copy of the message, including all the headers. ------" nil t)
635 (defun nndoc-exim-bounce-body-end-function ()
636 (goto-char (point-max)))
639 (defun nndoc-mime-digest-type-p ()
640 (let ((case-fold-search t)
641 boundary-id b-delimiter entry)
642 (when (and
643 (re-search-forward
644 (concat "^Content-Type: *multipart/digest;[ \t\n]*[ \t]"
645 "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)")
646 nil t)
647 (match-beginning 1))
648 (setq boundary-id (match-string 1)
649 b-delimiter (concat "\n--" boundary-id "[ \t]*$"))
650 (setq entry (assq 'mime-digest nndoc-type-alist))
651 (setcdr entry
652 (list
653 (cons 'head-begin "^ ?\n")
654 (cons 'head-end "^ ?$")
655 (cons 'body-begin "^ ?\n")
656 (cons 'article-begin b-delimiter)
657 (cons 'body-end-function 'nndoc-digest-body-end)
658 (cons 'file-end (concat "^--" boundary-id "--[ \t]*$"))))
659 t)))
661 (defun nndoc-standard-digest-type-p ()
662 (when (and (re-search-forward (concat "^" (make-string 70 ?-) "\n\n") nil t)
663 (re-search-forward
664 (concat "\n\n" (make-string 30 ?-) "\n\n") nil t))
667 (defun nndoc-digest-body-end ()
668 (and (re-search-forward nndoc-article-begin nil t)
669 (goto-char (match-beginning 0))))
671 (defun nndoc-slack-digest-type-p ()
674 (defun nndoc-git-type-p ()
675 (and (search-forward "\n- Log ---" nil t)
676 (search-forward "\ncommit " nil t)
677 (search-forward "\nAuthor: " nil t)))
679 (defun nndoc-transform-git-article (article)
680 (goto-char (point-min))
681 (when (re-search-forward "^Author: " nil t)
682 (replace-match "From: " t t)))
684 (defun nndoc-transform-git-headers (entry)
685 (goto-char (point-min))
686 (when (re-search-forward "^Author: " nil t)
687 (replace-match "From: " t t))
688 (let (subject)
689 (with-current-buffer nndoc-current-buffer
690 (goto-char (car entry))
691 (when (search-forward "\n\n" nil t)
692 (setq subject (buffer-substring (point) (line-end-position)))))
693 (when subject
694 (goto-char (point-min))
695 (forward-line 1)
696 (insert (format "Subject: %s\n" subject)))))
698 (defun nndoc-lanl-gov-announce-type-p ()
699 (when (let ((case-fold-search nil))
700 (re-search-forward "^\\\\\\\\\n\\(Paper\\( (\\*cross-listing\\*)\\)?: [a-zA-Z-\\.]+/[0-9]+\\|arXiv:\\)" nil t))
703 (defun nndoc-transform-lanl-gov-announce (article)
704 (let ((case-fold-search nil))
705 (goto-char (point-max))
706 (when (re-search-backward "^\\\\\\\\ +( *\\([^ ]*\\) , *\\([^ ]*\\))" nil t)
707 (replace-match "\n\nGet it at \\1 (\\2)" t nil))
708 (goto-char (point-min))
709 (while (re-search-forward "^\\\\\\\\$" nil t)
710 (replace-match "" t nil))
711 (goto-char (point-min))
712 (when (re-search-forward "^replaced with revised version +\\(.*[^ ]\\) +" nil t)
713 (replace-match "Date: \\1 (revised) " t nil))
714 (goto-char (point-min))
715 (unless (re-search-forward "^From" nil t)
716 (goto-char (point-min))
717 (when (re-search-forward "^Authors?: \\(.*\\)" nil t)
718 (goto-char (point-min))
719 (insert "From: " (match-string 1) "\n")))
720 (when (re-search-forward "^arXiv:" nil t)
721 (replace-match "Paper: arXiv:" t nil))))
723 (defun nndoc-generate-lanl-gov-head (article)
724 (let ((entry (cdr (assq article nndoc-dissection-alist)))
725 (from "<no address given>")
726 subject date)
727 (with-current-buffer nndoc-current-buffer
728 (save-restriction
729 (narrow-to-region (car entry) (nth 1 entry))
730 (goto-char (point-min))
731 (when (looking-at "^\\(Paper.*: \\|arXiv:\\)\\([0-9a-zA-Z-\\./]+\\)")
732 (setq subject (concat " (" (match-string 2) ")"))
733 (when (re-search-forward "^From: \\(.*\\)" nil t)
734 (setq from (concat "<"
735 (cadr (funcall gnus-extract-address-components
736 (match-string 1))) ">")))
737 (if (re-search-forward "^Date: +\\([^(]*\\)" nil t)
738 (setq date (match-string 1))
739 (when (re-search-forward "^replaced with revised version +\\([^(]*\\)" nil t)
740 (setq date (match-string 1))))
741 (when (re-search-forward "^Title: \\([^\f]*\\)\nAuthors?: \\(.*\\)"
742 nil t)
743 (setq subject (concat (match-string 1) subject))
744 (setq from (concat (match-string 2) " " from))))))
745 (while (and from (string-match "(\[^)\]*)" from))
746 (setq from (replace-match "" t t from)))
747 (insert "From: " (or from "unknown")
748 "\nSubject: " (or subject "(no subject)") "\n")
749 (if date (insert "Date: " date))))
751 (defun nndoc-nsmail-type-p ()
752 (when (looking-at "From - ")
755 (defun nndoc-outlook-article-begin ()
756 (prog1 (re-search-forward "From:\\|Received:" nil t)
757 (goto-char (match-beginning 0))))
759 (defun nndoc-outlook-type-p ()
760 ;; FIXME: Is JMF the magic of outlook mailbox? -- ShengHuo.
761 (looking-at "JMF"))
763 (defun nndoc-oe-dbx-type-p ()
764 (looking-at (mm-string-to-multibyte "\317\255\022\376")))
766 (defun nndoc-read-little-endian ()
767 (+ (prog1 (char-after) (forward-char 1))
768 (lsh (prog1 (char-after) (forward-char 1)) 8)
769 (lsh (prog1 (char-after) (forward-char 1)) 16)
770 (lsh (prog1 (char-after) (forward-char 1)) 24)))
772 (defun nndoc-oe-dbx-decode-block ()
773 (list
774 (nndoc-read-little-endian) ;; this address
775 (nndoc-read-little-endian) ;; next address offset
776 (nndoc-read-little-endian) ;; blocksize
777 (nndoc-read-little-endian))) ;; next address
779 (defun nndoc-oe-dbx-dissection ()
780 (let ((i 0) blk p tp)
781 (goto-char 60117) ;; 0x0000EAD4+1
782 (setq p (point))
783 (unless (eobp)
784 (setq blk (nndoc-oe-dbx-decode-block)))
785 (while (and blk (> (car blk) 0) (or (zerop (nth 3 blk))
786 (> (nth 3 blk) p)))
787 (push (list (incf i) p nil nil nil 0) nndoc-dissection-alist)
788 (while (and (> (car blk) 0) (> (nth 3 blk) p))
789 (goto-char (1+ (nth 3 blk)))
790 (setq blk (nndoc-oe-dbx-decode-block)))
791 (if (or (<= (car blk) p)
792 (<= (nth 1 blk) 0)
793 (not (zerop (nth 3 blk))))
794 (setq blk nil)
795 (setq tp (+ (car blk) (nth 1 blk) 17))
796 (if (or (<= tp p) (>= tp (point-max)))
797 (setq blk nil)
798 (goto-char tp)
799 (setq p tp
800 blk (nndoc-oe-dbx-decode-block)))))))
802 (defun nndoc-oe-dbx-generate-article (article &optional head)
803 (let ((entry (cdr (assq article nndoc-dissection-alist)))
804 (cur (current-buffer))
805 (begin (point))
806 blk p)
807 (with-current-buffer nndoc-current-buffer
808 (setq p (car entry))
809 (while (> p (point-min))
810 (goto-char p)
811 (setq blk (nndoc-oe-dbx-decode-block))
812 (setq p (point))
813 (with-current-buffer cur
814 (insert-buffer-substring nndoc-current-buffer p (+ p (nth 2 blk))))
815 (setq p (1+ (nth 3 blk)))))
816 (goto-char begin)
817 (while (re-search-forward "\r$" nil t)
818 (delete-char -1))
819 (when head
820 (goto-char begin)
821 (when (search-forward "\n\n" nil t)
822 (setcar (cddddr entry) (count-lines (point) (point-max)))
823 (delete-region (1- (point)) (point-max))))
826 (defun nndoc-oe-dbx-generate-head (article)
827 (nndoc-oe-dbx-generate-article article 'head))
829 (defun nndoc-mail-in-mail-type-p ()
830 (let (found)
831 (save-excursion
832 (catch 'done
833 (while (re-search-forward "\n\n[-A-Za-z0-9]+:" nil t)
834 (setq found 0)
835 (forward-line)
836 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
837 (if (looking-at "[-A-Za-z0-9]+:")
838 (setq found (1+ found)))
839 (forward-line))
840 (if (and (> found 0) (looking-at "\n"))
841 (throw 'done 9999)))
842 nil))))
844 (defun nndoc-mail-in-mail-article-begin ()
845 (let (point found)
846 (if (catch 'done
847 (while (re-search-forward "\n\n\\([-A-Za-z0-9]+:\\)" nil t)
848 (setq found 0)
849 (setq point (match-beginning 1))
850 (forward-line)
851 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
852 (if (looking-at "[-A-Za-z0-9]+:")
853 (setq found (1+ found)))
854 (forward-line))
855 (if (and (> found 0) (looking-at "\n"))
856 (throw 'done t)))
857 nil)
858 (goto-char point))))
860 (deffoo nndoc-request-accept-article (group &optional server last)
861 nil)
864 ;;; Functions for dissecting the documents
867 (defun nndoc-search (regexp)
868 (prog1
869 (re-search-forward regexp nil t)
870 (beginning-of-line)))
872 (defun nndoc-dissect-buffer ()
873 "Go through the document and partition it into heads/bodies/articles."
874 (let ((i 0)
875 (first t)
876 art-begin head-begin head-end body-begin body-end)
877 (setq nndoc-dissection-alist nil)
878 (with-current-buffer nndoc-current-buffer
879 (goto-char (point-min))
880 ;; Remove blank lines.
881 (while (eq (following-char) ?\n)
882 (delete-char 1))
883 (when nndoc-pre-dissection-function
884 (save-excursion
885 (funcall nndoc-pre-dissection-function)))
886 (if nndoc-dissection-function
887 (funcall nndoc-dissection-function)
888 ;; Find the beginning of the file.
889 (when nndoc-file-begin
890 (nndoc-search nndoc-file-begin))
891 ;; Go through the file.
892 (while (if (and first nndoc-first-article)
893 (nndoc-search nndoc-first-article)
894 (if art-begin
895 (goto-char art-begin)
896 (nndoc-article-begin)))
897 (setq first nil
898 art-begin nil)
899 (cond (nndoc-head-begin-function
900 (funcall nndoc-head-begin-function))
901 (nndoc-head-begin
902 (nndoc-search nndoc-head-begin)))
903 (if (or (eobp)
904 (and nndoc-file-end
905 (looking-at nndoc-file-end)))
906 (goto-char (point-max))
907 (setq head-begin (point))
908 (nndoc-search (or nndoc-head-end "^$"))
909 (setq head-end (point))
910 (if nndoc-body-begin-function
911 (funcall nndoc-body-begin-function)
912 (nndoc-search (or nndoc-body-begin "^\n")))
913 (setq body-begin (point))
914 (or (and nndoc-body-end-function
915 (funcall nndoc-body-end-function))
916 (and nndoc-body-end
917 (nndoc-search nndoc-body-end))
918 (and (nndoc-article-begin)
919 (setq art-begin (point)))
920 (progn
921 (goto-char (point-max))
922 (when nndoc-file-end
923 (and (re-search-backward nndoc-file-end nil t)
924 (beginning-of-line)))))
925 (setq body-end (point))
926 (push (list (incf i) head-begin head-end body-begin body-end
927 (count-lines body-begin body-end))
928 nndoc-dissection-alist)))))
929 (setq nndoc-dissection-alist (nreverse nndoc-dissection-alist))))
931 (defun nndoc-article-begin ()
932 (if nndoc-article-begin-function
933 (funcall nndoc-article-begin-function)
934 (ignore-errors
935 (nndoc-search nndoc-article-begin))))
937 (defun nndoc-unquote-dashes ()
938 "Unquote quoted non-separators in digests."
939 (while (re-search-forward "^- -"nil t)
940 (replace-match "-" t t)))
942 ;; Against compiler warnings.
943 (defvar nndoc-mime-split-ordinal)
945 (defun nndoc-dissect-mime-parts ()
946 "Go through a MIME composite article and partition it into sub-articles.
947 When a MIME entity contains sub-entities, dissection produces one article for
948 the header of this entity, and one article per sub-entity."
949 (setq nndoc-dissection-alist nil
950 nndoc-mime-split-ordinal 0)
951 (with-current-buffer nndoc-current-buffer
952 (nndoc-dissect-mime-parts-sub (point-min) (point-max) nil nil nil)))
954 (defun nndoc-dissect-mime-parts-sub (head-begin body-end article-insert
955 position parent)
956 "Dissect an entity, within a composite MIME message.
957 The complete message or MIME entity extends from HEAD-BEGIN to BODY-END.
958 ARTICLE-INSERT should be added at beginning for generating a full article.
959 The string POSITION holds a dotted decimal representation of the article
960 position in the hierarchical structure, it is nil for the outer entity.
961 PARENT is the message-ID of the parent summary line, or nil for none."
962 (let ((case-fold-search t)
963 (message-id (nnmail-message-id))
964 head-end body-begin summary-insert message-rfc822 multipart-any
965 subject content-type type subtype boundary-regexp)
966 ;; Gracefully handle a missing body.
967 (goto-char head-begin)
968 (if (or (and (eq (char-after) ?\n) (or (forward-char 1) t))
969 (search-forward "\n\n" body-end t))
970 (setq head-end (1- (point))
971 body-begin (point))
972 (setq head-end body-end
973 body-begin body-end))
974 (narrow-to-region head-begin head-end)
975 ;; Save MIME attributes.
976 (goto-char head-begin)
977 (setq content-type (message-fetch-field "Content-Type"))
978 (when content-type
979 (when (string-match
980 "^ *\\([^ \t\n/;]+\\)/\\([^ \t\n/;]+\\)" content-type)
981 (setq type (downcase (match-string 1 content-type))
982 subtype (downcase (match-string 2 content-type))
983 message-rfc822 (and (string= type "message")
984 (string= subtype "rfc822"))
985 multipart-any (string= type "multipart")))
986 (when (string-match ";[ \t\n]*name=\\([^ \t\n;]+\\)" content-type)
987 (setq subject (match-string 1 content-type)))
988 (when (string-match "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)" content-type)
989 (setq boundary-regexp (concat "^--"
990 (regexp-quote
991 (match-string 1 content-type))
992 "\\(--\\)?[ \t]*\n"))))
993 (unless subject
994 (when (or multipart-any (not article-insert))
995 (setq subject (message-fetch-field "Subject"))))
996 (unless type
997 (setq type "text"
998 subtype "plain"))
999 ;; Prepare the article and summary inserts.
1000 (unless article-insert
1001 (setq article-insert (buffer-string)
1002 head-end head-begin))
1003 ;; Fix MIME-Version
1004 (unless (string-match "MIME-Version:" article-insert)
1005 (setq article-insert
1006 (concat article-insert "MIME-Version: 1.0\n")))
1007 (setq summary-insert article-insert)
1008 ;; - summary Subject.
1009 (setq summary-insert
1010 (let ((line (concat "Subject: <" position
1011 (and position multipart-any ".")
1012 (and multipart-any "*")
1013 (and (or position multipart-any) " ")
1014 (cond ((string= subtype "plain") type)
1015 ((string= subtype "basic") type)
1016 (t subtype))
1018 (and subject " ")
1019 subject
1020 "\n")))
1021 (if (string-match "Subject:.*\n\\([ \t].*\n\\)*" summary-insert)
1022 (replace-match line t t summary-insert)
1023 (concat summary-insert line))))
1024 ;; - summary Message-ID.
1025 (setq summary-insert
1026 (let ((line (concat "Message-ID: " message-id "\n")))
1027 (if (string-match "Message-ID:.*\n\\([ \t].*\n\\)*" summary-insert)
1028 (replace-match line t t summary-insert)
1029 (concat summary-insert line))))
1030 ;; - summary References.
1031 (when parent
1032 (setq summary-insert
1033 (let ((line (concat "References: " parent "\n")))
1034 (if (string-match "References:.*\n\\([ \t].*\n\\)*"
1035 summary-insert)
1036 (replace-match line t t summary-insert)
1037 (concat summary-insert line)))))
1038 ;; Generate dissection information for this entity.
1039 (push (list (incf nndoc-mime-split-ordinal)
1040 head-begin head-end body-begin body-end
1041 (count-lines body-begin body-end)
1042 article-insert summary-insert)
1043 nndoc-dissection-alist)
1044 ;; Recurse for all sub-entities, if any.
1045 (widen)
1046 (cond
1047 (message-rfc822
1048 (save-excursion
1049 (nndoc-dissect-mime-parts-sub body-begin body-end nil
1050 position message-id)))
1051 ((and multipart-any boundary-regexp)
1052 (let ((part-counter 0)
1053 part-begin part-end eof-flag)
1054 (while (string-match "\
1055 ^\\(Lines\\|Content-\\(Type\\|Transfer-Encoding\\|Disposition\\)\\):.*\n\\([ \t].*\n\\)*"
1056 article-insert)
1057 (setq article-insert (replace-match "" t t article-insert)))
1058 (let ((case-fold-search nil))
1059 (goto-char body-begin)
1060 (setq eof-flag (not (re-search-forward boundary-regexp body-end t)))
1061 (while (not eof-flag)
1062 (setq part-begin (point))
1063 (cond ((re-search-forward boundary-regexp body-end t)
1064 (or (not (match-string 1))
1065 (string= (match-string 1) "")
1066 (setq eof-flag t))
1067 (forward-line -1)
1068 (setq part-end (point))
1069 (forward-line 1))
1070 (t (setq part-end body-end
1071 eof-flag t)))
1072 (save-excursion
1073 (nndoc-dissect-mime-parts-sub
1074 part-begin part-end article-insert
1075 (concat position
1076 (and position ".")
1077 (format "%d" (incf part-counter)))
1078 message-id)))))))))
1080 ;;;###autoload
1081 (defun nndoc-add-type (definition &optional position)
1082 "Add document DEFINITION to the list of nndoc document definitions.
1083 If POSITION is nil or `last', the definition will be added
1084 as the last checked definition, if t or `first', add as the
1085 first definition, and if any other symbol, add after that
1086 symbol in the alist."
1087 ;; First remove any old instances.
1088 (gnus-alist-pull (car definition) nndoc-type-alist)
1089 ;; Then enter the new definition in the proper place.
1090 (cond
1091 ((or (null position) (eq position 'last))
1092 (setq nndoc-type-alist (nconc nndoc-type-alist (list definition))))
1093 ((or (eq position t) (eq position 'first))
1094 (push definition nndoc-type-alist))
1096 (let ((list (memq (assq position nndoc-type-alist)
1097 nndoc-type-alist)))
1098 (unless list
1099 (error "No such position: %s" position))
1100 (setcdr list (cons definition (cdr list)))))))
1102 (provide 'nndoc)
1104 ;;; nndoc.el ends here