* nnheader.el (nnheader-update-marks-actions): Fix typo in last checkin.
[gnus.git] / lisp / nndoc.el
blob0dee06d29371fc1e3597c343b82659b1fe8f8c34
1 ;;; nndoc.el --- single file access for Gnus
3 ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
4 ;; 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
6 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
7 ;; Masanobu UMEDA <umerin@flab.flab.fujitsu.junet>
8 ;; Keywords: news
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software: you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation, either version 3 of the License, or
15 ;; (at your option) any later version.
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
25 ;;; Commentary:
27 ;; For Outlook mail boxes format, see http://mbx2mbox.sourceforge.net/
29 ;;; Code:
31 (require 'nnheader)
32 (require 'message)
33 (require 'nnmail)
34 (require 'nnoo)
35 (require 'gnus-util)
36 (require 'mm-util)
37 (eval-when-compile (require 'cl))
39 (nnoo-declare nndoc)
41 (defvoo nndoc-article-type 'guess
42 "*Type of the file.
43 One of `mbox', `babyl', `digest', `news', `rnews', `mmdf', `forward',
44 `rfc934', `rfc822-forward', `mime-parts', `standard-digest',
45 `slack-digest', `clari-briefs', `nsmail', `outlook', `oe-dbx',
46 `mailman', `exim-bounce', or `guess'.")
48 (defvoo nndoc-post-type 'mail
49 "*Whether the nndoc group is `mail' or `post'.")
51 (defvoo nndoc-open-document-hook 'nnheader-ms-strip-cr
52 "Hook run after opening a document.
53 The default function removes all trailing carriage returns
54 from the document.")
56 (defvar nndoc-type-alist
57 `((mmdf
58 (article-begin . "^\^A\^A\^A\^A\n")
59 (body-end . "^\^A\^A\^A\^A\n"))
60 (mime-digest
61 (article-begin . "")
62 (head-begin . "^ ?\n")
63 (head-end . "^ ?$")
64 (body-end . "")
65 (file-end . "")
66 (subtype digest guess))
67 (nsmail
68 (article-begin . "^From - "))
69 (news
70 (article-begin . "^Path:"))
71 (rnews
72 (article-begin . "^#! *rnews +\\([0-9]+\\) *\n")
73 (body-end-function . nndoc-rnews-body-end))
74 (mbox
75 (article-begin-function . nndoc-mbox-article-begin)
76 (body-end-function . nndoc-mbox-body-end))
77 (babyl
78 (article-begin . "\^_\^L *\n")
79 (body-end . "\^_")
80 (body-begin-function . nndoc-babyl-body-begin)
81 (head-begin-function . nndoc-babyl-head-begin))
82 (mime-parts
83 (generate-head-function . nndoc-generate-mime-parts-head)
84 (article-transform-function . nndoc-transform-mime-parts))
85 (exim-bounce
86 (article-begin . "^------ This is a copy of the message, including all the headers. ------\n\n")
87 (body-end-function . nndoc-exim-bounce-body-end-function))
88 (rfc934
89 (article-begin . "^--.*\n+")
90 (body-end . "^--.*$")
91 (prepare-body-function . nndoc-unquote-dashes))
92 (mailman
93 (article-begin . "^--__--__--\n\nMessage:")
94 (body-end . "^--__--__--$")
95 (prepare-body-function . nndoc-unquote-dashes))
96 (clari-briefs
97 (article-begin . "^ \\*")
98 (body-end . "^\t------*[ \t]^*\n^ \\*")
99 (body-begin . "^\t")
100 (head-end . "^\t")
101 (generate-head-function . nndoc-generate-clari-briefs-head)
102 (article-transform-function . nndoc-transform-clari-briefs))
104 (standard-digest
105 (first-article . ,(concat "^" (make-string 70 ?-) "\n *\n+"))
106 (article-begin . ,(concat "^\n" (make-string 30 ?-) "\n *\n+"))
107 (prepare-body-function . nndoc-unquote-dashes)
108 (body-end-function . nndoc-digest-body-end)
109 (head-end . "^ *$")
110 (body-begin . "^ *\n")
111 (file-end . "^End of .*digest.*[0-9].*\n\\*\\*\\|^End of.*Digest *$")
112 (subtype digest guess))
113 (slack-digest
114 (article-begin . "^------------------------------*[\n \t]+")
115 (head-end . "^ ?$")
116 (body-end-function . nndoc-digest-body-end)
117 (body-begin . "^ ?$")
118 (file-end . "^End of")
119 (prepare-body-function . nndoc-unquote-dashes)
120 (subtype digest guess))
121 (google
122 (pre-dissection-function . nndoc-decode-content-transfer-encoding)
123 (article-begin . "^== [0-9]+ of [0-9]+ ==$")
124 (head-begin . "^Date:")
125 (head-end . "^$")
126 (body-end-function . nndoc-digest-body-end)
127 (body-begin . "^$")
128 (file-end . "^==============================================================================$")
129 (prepare-body-function . nndoc-unquote-dashes)
130 (subtype digest guess))
131 (lanl-gov-announce
132 (article-begin . "^\\\\\\\\\n")
133 (head-begin . "^\\(Paper.*:\\|arXiv:\\)")
134 (head-end . "\\(^\\\\\\\\.*\n\\|-----------------\\)")
135 (body-begin . "")
136 (body-end . "\\(-------------------------------------------------\\|%-%-%-%-%-%-%-%-%-%-%-%-%-%-\\|%%--%%--%%--%%--%%--%%--%%--%%--\\|%%%---%%%---%%%---%%%---\\)")
137 (file-end . "\\(^Title: Recent Seminal\\|%%%---%%%---%%%---%%%---\\)")
138 (generate-head-function . nndoc-generate-lanl-gov-head)
139 (article-transform-function . nndoc-transform-lanl-gov-announce)
140 (subtype preprints guess))
141 (git
142 (file-begin . "\n- Log ---.*")
143 (article-begin . "^commit ")
144 (head-begin . "^Author: ")
145 (body-begin . "^$")
146 (file-end . "\n-----------------------------------------------------------------------")
147 (article-transform-function . nndoc-transform-git-article)
148 (header-transform-function . nndoc-transform-git-headers))
149 (rfc822-forward
150 (article-begin . "^\n+")
151 (body-end-function . nndoc-rfc822-forward-body-end-function)
152 (generate-head-function . nndoc-rfc822-forward-generate-head)
153 (generate-article-function . nndoc-rfc822-forward-generate-article))
154 (outlook
155 (article-begin-function . nndoc-outlook-article-begin)
156 (body-end . "\0"))
157 (oe-dbx ;; Outlook Express DBX format
158 (dissection-function . nndoc-oe-dbx-dissection)
159 (generate-head-function . nndoc-oe-dbx-generate-head)
160 (generate-article-function . nndoc-oe-dbx-generate-article))
161 (forward
162 (article-begin . "^-+ \\(Start of \\)?forwarded message.*\n+")
163 (body-end . "^-+ End \\(of \\)?forwarded message.*$")
164 (prepare-body-function . nndoc-unquote-dashes))
165 (mail-in-mail ;; Wild guess on mailer daemon's messages or others
166 (article-begin-function . nndoc-mail-in-mail-article-begin))
167 (guess
168 (guess . t)
169 (subtype nil))
170 (digest
171 (guess . t)
172 (subtype nil))
173 (preprints
174 (guess . t)
175 (subtype nil))))
177 (defvar nndoc-binary-file-names ".[Dd][Bb][Xx]$"
178 "Regexp for binary nndoc file names.")
181 (defvoo nndoc-file-begin nil)
182 (defvoo nndoc-first-article nil)
183 (defvoo nndoc-article-begin nil)
184 (defvoo nndoc-head-begin nil)
185 (defvoo nndoc-head-end nil)
186 (defvoo nndoc-file-end nil)
187 (defvoo nndoc-body-begin nil)
188 (defvoo nndoc-body-end-function nil)
189 (defvoo nndoc-body-begin-function nil)
190 (defvoo nndoc-head-begin-function nil)
191 (defvoo nndoc-body-end nil)
192 ;; nndoc-dissection-alist is a list of sublists. Each sublist holds the
193 ;; following items. ARTICLE acts as the association key and is an ordinal
194 ;; starting at 1. HEAD-BEGIN [0], HEAD-END [1], BODY-BEGIN [2] and BODY-END
195 ;; [3] are positions in the `nndoc' buffer. LINE-COUNT [4] is a count of
196 ;; lines in the body. For MIME dissections only, ARTICLE-INSERT [5] and
197 ;; SUMMARY-INSERT [6] give headers to insert for full article or summary line
198 ;; generation, respectively. Other headers usually follow directly from the
199 ;; buffer. Value `nil' means no insert.
200 (defvoo nndoc-dissection-alist nil)
201 (defvoo nndoc-prepare-body-function nil)
202 (defvoo nndoc-generate-head-function nil)
203 (defvoo nndoc-article-transform-function nil)
204 (defvoo nndoc-header-transform-function nil)
205 (defvoo nndoc-article-begin-function nil)
206 (defvoo nndoc-generate-article-function nil)
207 (defvoo nndoc-dissection-function nil)
208 (defvoo nndoc-pre-dissection-function nil)
210 (defvoo nndoc-status-string "")
211 (defvoo nndoc-group-alist nil)
212 (defvoo nndoc-current-buffer nil
213 "Current nndoc news buffer.")
214 (defvoo nndoc-address nil)
216 (defconst nndoc-version "nndoc 1.0"
217 "nndoc version.")
221 ;;; Interface functions
223 (nnoo-define-basics nndoc)
225 (deffoo nndoc-retrieve-headers (articles &optional newsgroup server fetch-old)
226 (when (nndoc-possibly-change-buffer newsgroup server)
227 (with-current-buffer nntp-server-buffer
228 (erase-buffer)
229 (let (article entry)
230 (if (stringp (car articles))
231 'headers
232 (while articles
233 (when (setq entry (cdr (assq (setq article (pop articles))
234 nndoc-dissection-alist)))
235 (let ((start (point)))
236 (insert (format "221 %d Article retrieved.\n" article))
237 (if nndoc-generate-head-function
238 (funcall nndoc-generate-head-function article)
239 (insert-buffer-substring
240 nndoc-current-buffer (car entry) (nth 1 entry)))
241 (goto-char (point-max))
242 (unless (eq (char-after (1- (point))) ?\n)
243 (insert "\n"))
244 (insert (format "Lines: %d\n" (nth 4 entry)))
245 (insert ".\n")
246 (when nndoc-header-transform-function
247 (save-excursion
248 (save-restriction
249 (narrow-to-region start (point))
250 (funcall nndoc-header-transform-function entry)))))))
251 (nnheader-fold-continuation-lines)
252 'headers)))))
254 (deffoo nndoc-request-article (article &optional newsgroup server buffer)
255 (nndoc-possibly-change-buffer newsgroup server)
256 (save-excursion
257 (let ((buffer (or buffer nntp-server-buffer))
258 (entry (cdr (assq article nndoc-dissection-alist)))
259 beg)
260 (set-buffer buffer)
261 (erase-buffer)
262 (when entry
263 (cond
264 ((stringp article) nil)
265 (nndoc-generate-article-function
266 (funcall nndoc-generate-article-function article))
268 (insert-buffer-substring
269 nndoc-current-buffer (car entry) (nth 1 entry))
270 (insert "\n")
271 (setq beg (point))
272 (insert-buffer-substring
273 nndoc-current-buffer (nth 2 entry) (nth 3 entry))
274 (goto-char beg)
275 (when nndoc-prepare-body-function
276 (funcall nndoc-prepare-body-function))
277 (when nndoc-article-transform-function
278 (funcall nndoc-article-transform-function article))
279 t))))))
281 (deffoo nndoc-request-group (group &optional server dont-check info)
282 "Select news GROUP."
283 (let (number)
284 (cond
285 ((not (nndoc-possibly-change-buffer group server))
286 (nnheader-report 'nndoc "No such file or buffer: %s"
287 nndoc-address))
288 (dont-check
289 (nnheader-report 'nndoc "Selected group %s" group)
291 ((zerop (setq number (length nndoc-dissection-alist)))
292 (nndoc-close-group group)
293 (nnheader-report 'nndoc "No articles in group %s" group))
295 (nnheader-insert "211 %d %d %d %s\n" number 1 number group)))))
297 (deffoo nndoc-retrieve-groups (groups &optional server)
298 (dolist (group groups)
299 (nndoc-request-group group server))
302 (deffoo nndoc-request-type (group &optional article)
303 (cond ((not article) 'unknown)
304 (nndoc-post-type nndoc-post-type)
305 (t 'unknown)))
307 (deffoo nndoc-close-group (group &optional server)
308 (nndoc-possibly-change-buffer group server)
309 (and nndoc-current-buffer
310 (buffer-name nndoc-current-buffer)
311 (kill-buffer nndoc-current-buffer))
312 (setq nndoc-group-alist (delq (assoc group nndoc-group-alist)
313 nndoc-group-alist))
314 (setq nndoc-current-buffer nil)
315 (nnoo-close-server 'nndoc server)
316 (setq nndoc-dissection-alist nil)
319 (deffoo nndoc-request-list (&optional server)
322 (deffoo nndoc-request-newgroups (date &optional server)
323 nil)
325 (deffoo nndoc-request-list-newsgroups (&optional server)
326 nil)
329 ;;; Internal functions.
331 (defun nndoc-possibly-change-buffer (group source)
332 (let (buf)
333 (cond
334 ;; The current buffer is this group's buffer.
335 ((and nndoc-current-buffer
336 (buffer-name nndoc-current-buffer)
337 (eq nndoc-current-buffer
338 (setq buf (cdr (assoc group nndoc-group-alist))))))
339 ;; We change buffers by taking an old from the group alist.
340 ;; `source' is either a string (a file name) or a buffer object.
341 (buf
342 (setq nndoc-current-buffer buf))
343 ;; It's a totally new group.
344 ((or (and (bufferp nndoc-address)
345 (buffer-name nndoc-address))
346 (and (stringp nndoc-address)
347 (file-exists-p nndoc-address)
348 (not (file-directory-p nndoc-address))))
349 (push (cons group (setq nndoc-current-buffer
350 (get-buffer-create
351 (concat " *nndoc " group "*"))))
352 nndoc-group-alist)
353 (setq nndoc-dissection-alist nil)
354 (with-current-buffer nndoc-current-buffer
355 (erase-buffer)
356 (if (and (stringp nndoc-address)
357 (string-match nndoc-binary-file-names nndoc-address))
358 (let ((coding-system-for-read 'binary))
359 (mm-insert-file-contents nndoc-address))
360 (if (stringp nndoc-address)
361 (nnheader-insert-file-contents nndoc-address)
362 (insert-buffer-substring nndoc-address))
363 (run-hooks 'nndoc-open-document-hook)))))
364 ;; Initialize the nndoc structures according to this new document.
365 (when (and nndoc-current-buffer
366 (not nndoc-dissection-alist))
367 (with-current-buffer nndoc-current-buffer
368 (nndoc-set-delims)
369 (if (eq nndoc-article-type 'mime-parts)
370 (nndoc-dissect-mime-parts)
371 (nndoc-dissect-buffer))))
372 (unless nndoc-current-buffer
373 (nndoc-close-server))
374 ;; Return whether we managed to select a file.
375 nndoc-current-buffer))
378 ;;; Deciding what document type we have
381 (defun nndoc-set-delims ()
382 "Set the nndoc delimiter variables according to the type of the document."
383 (let ((vars '(nndoc-file-begin
384 nndoc-first-article
385 nndoc-article-begin-function
386 nndoc-head-begin nndoc-head-end
387 nndoc-file-end nndoc-article-begin
388 nndoc-body-begin nndoc-body-end-function nndoc-body-end
389 nndoc-prepare-body-function nndoc-article-transform-function
390 nndoc-header-transform-function
391 nndoc-generate-head-function nndoc-body-begin-function
392 nndoc-head-begin-function
393 nndoc-generate-article-function
394 nndoc-dissection-function
395 nndoc-pre-dissection-function)))
396 (while vars
397 (set (pop vars) nil)))
398 (let (defs)
399 ;; Guess away until we find the real file type.
400 (while (assq 'guess (setq defs (cdr (assq nndoc-article-type
401 nndoc-type-alist))))
402 (setq nndoc-article-type (nndoc-guess-type nndoc-article-type)))
403 ;; Set the nndoc variables.
404 (while defs
405 (set (intern (format "nndoc-%s" (caar defs)))
406 (cdr (pop defs))))))
408 (defun nndoc-guess-type (subtype)
409 (let ((alist nndoc-type-alist)
410 results result entry)
411 (while (and (not result)
412 (setq entry (pop alist)))
413 (when (memq subtype (or (cdr (assq 'subtype entry)) '(guess)))
414 (goto-char (point-min))
415 ;; Remove blank lines.
416 (while (eq (following-char) ?\n)
417 (delete-char 1))
418 (when (numberp (setq result (funcall (intern
419 (format "nndoc-%s-type-p"
420 (car entry))))))
421 (push (cons result entry) results)
422 (setq result nil))))
423 (unless (or result results)
424 (error "Document is not of any recognized type"))
425 (if result
426 (car entry)
427 (cadar (last (sort results 'car-less-than-car))))))
430 ;;; Built-in type predicates and functions
433 (defun nndoc-mbox-type-p ()
434 (when (looking-at message-unix-mail-delimiter)
437 (defun nndoc-mbox-article-begin ()
438 (when (re-search-forward (concat "^" message-unix-mail-delimiter) nil t)
439 (goto-char (match-beginning 0))))
441 (defun nndoc-mbox-body-end ()
442 (let ((beg (point))
443 len end)
444 (when
445 (save-excursion
446 (and (re-search-backward
447 (concat "^" message-unix-mail-delimiter) nil t)
448 (setq end (point))
449 (search-forward "\n\n" beg t)
450 (re-search-backward
451 "^Content-Length:[ \t]*\\([0-9]+\\) *$" end t)
452 (setq len (string-to-number (match-string 1)))
453 (search-forward "\n\n" beg t)
454 (unless (= (setq len (+ (point) len)) (point-max))
455 (and (< len (point-max))
456 (goto-char len)
457 (looking-at message-unix-mail-delimiter)))))
458 (goto-char len))))
460 (defun nndoc-mmdf-type-p ()
461 (when (looking-at "\^A\^A\^A\^A$")
464 (defun nndoc-news-type-p ()
465 (when (looking-at "^Path:.*\n")
468 (defun nndoc-rnews-type-p ()
469 (when (looking-at "#! *rnews")
472 (defun nndoc-rnews-body-end ()
473 (and (re-search-backward nndoc-article-begin nil t)
474 (forward-line 1)
475 (goto-char (+ (point) (string-to-number (match-string 1))))))
477 (defun nndoc-google-type-p ()
478 (when (re-search-forward "^=3D=3D 1 of [0-9]+ =3D=3D$" nil t)
481 (defun nndoc-decode-content-transfer-encoding ()
482 (let ((encoding
483 (save-restriction
484 (message-narrow-to-head)
485 (message-fetch-field "content-transfer-encoding"))))
486 (when (and encoding
487 (search-forward "\n\n" nil t))
488 (save-restriction
489 (narrow-to-region (point) (point-max))
490 (mm-decode-content-transfer-encoding
491 (intern (downcase (mail-header-strip encoding))))))))
493 (defun nndoc-babyl-type-p ()
494 (when (re-search-forward "\^_\^L *\n" nil t)
497 (defun nndoc-babyl-body-begin ()
498 (re-search-forward "^\n" nil t)
499 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
500 (let ((next (or (save-excursion
501 (re-search-forward nndoc-article-begin nil t))
502 (point-max))))
503 (unless (re-search-forward "^\n" next t)
504 (goto-char next)
505 (forward-line -1)
506 (insert "\n")
507 (forward-line -1)))))
509 (defun nndoc-babyl-head-begin ()
510 (when (re-search-forward "^[0-9].*\n" nil t)
511 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
512 (forward-line 1))
515 (defun nndoc-forward-type-p ()
516 (when (and (re-search-forward "^-+ \\(Start of \\)?forwarded message.*\n+"
517 nil t)
518 (looking-at "[\r\n]*[a-zA-Z][a-zA-Z0-9-]*:\\|^>?From "))
521 (defun nndoc-rfc934-type-p ()
522 (when (and (re-search-forward "^-+ Start of forwarded.*\n+" nil t)
523 (not (re-search-forward "^Subject:.*digest" nil t))
524 (not (re-search-backward "^From:" nil t 2))
525 (not (re-search-forward "^From:" nil t 2)))
528 (defun nndoc-mailman-type-p ()
529 (when (re-search-forward "^--__--__--\n+" nil t)
532 (defun nndoc-rfc822-forward-type-p ()
533 (save-restriction
534 (message-narrow-to-head)
535 (when (re-search-forward "^Content-Type: *message/rfc822" nil t)
536 t)))
538 (defun nndoc-rfc822-forward-body-end-function ()
539 (goto-char (point-max)))
541 (defun nndoc-rfc822-forward-generate-article (article &optional head)
542 (let ((entry (cdr (assq article nndoc-dissection-alist)))
543 (begin (point))
544 encoding)
545 (with-current-buffer nndoc-current-buffer
546 (save-restriction
547 (message-narrow-to-head)
548 (setq encoding (message-fetch-field "content-transfer-encoding"))))
549 (insert-buffer-substring nndoc-current-buffer (car entry) (nth 3 entry))
550 (when encoding
551 (save-restriction
552 (narrow-to-region begin (point-max))
553 (mm-decode-content-transfer-encoding
554 (intern (downcase (mail-header-strip encoding))))))
555 (when head
556 (goto-char begin)
557 (when (search-forward "\n\n" nil t)
558 (delete-region (1- (point)) (point-max)))))
561 (defun nndoc-rfc822-forward-generate-head (article)
562 (nndoc-rfc822-forward-generate-article article 'head))
564 (defun nndoc-mime-parts-type-p ()
565 (let ((case-fold-search t)
566 (limit (search-forward "\n\n" nil t)))
567 (goto-char (point-min))
568 (when (and limit
569 (re-search-forward
570 (concat "\
571 ^Content-Type:[ \t]*multipart/[a-z]+ *; *\\(\\(\n[ \t]\\)?.*;\\)*"
572 "\\(\n[ \t]\\)?[ \t]*boundary=\"?[^\"\n]*[^\" \t\n]")
573 limit t))
574 t)))
576 (defun nndoc-transform-mime-parts (article)
577 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
578 (headers (nth 5 entry)))
579 (when headers
580 (goto-char (point-min))
581 (insert headers))))
583 (defun nndoc-generate-mime-parts-head (article)
584 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
585 (headers (nth 6 entry)))
586 (save-restriction
587 (narrow-to-region (point) (point))
588 (insert-buffer-substring
589 nndoc-current-buffer (car entry) (nth 1 entry))
590 (goto-char (point-max)))
591 (when headers
592 (insert headers))))
594 (defun nndoc-clari-briefs-type-p ()
595 (when (let ((case-fold-search nil))
596 (re-search-forward "^\t[^a-z]+ ([^a-z]+) --" nil t))
599 (defun nndoc-transform-clari-briefs (article)
600 (goto-char (point-min))
601 (when (looking-at " *\\*\\(.*\\)\n")
602 (replace-match "" t t))
603 (nndoc-generate-clari-briefs-head article))
605 (defun nndoc-generate-clari-briefs-head (article)
606 (let ((entry (cdr (assq article nndoc-dissection-alist)))
607 subject from)
608 (with-current-buffer nndoc-current-buffer
609 (save-restriction
610 (narrow-to-region (car entry) (nth 3 entry))
611 (goto-char (point-min))
612 (when (looking-at " *\\*\\(.*\\)$")
613 (setq subject (match-string 1))
614 (when (string-match "[ \t]+$" subject)
615 (setq subject (substring subject 0 (match-beginning 0)))))
616 (when
617 (let ((case-fold-search nil))
618 (re-search-forward
619 "^\t\\([^a-z]+\\(,[^(]+\\)? ([^a-z]+)\\) --" nil t))
620 (setq from (match-string 1)))))
621 (insert "From: " "clari@clari.net (" (or from "unknown") ")"
622 "\nSubject: " (or subject "(no subject)") "\n")))
624 (defun nndoc-exim-bounce-type-p ()
625 (and (re-search-forward "^------ This is a copy of the message, including all the headers. ------" nil t)
628 (defun nndoc-exim-bounce-body-end-function ()
629 (goto-char (point-max)))
632 (defun nndoc-mime-digest-type-p ()
633 (let ((case-fold-search t)
634 boundary-id b-delimiter entry)
635 (when (and
636 (re-search-forward
637 (concat "^Content-Type: *multipart/digest;[ \t\n]*[ \t]"
638 "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)")
639 nil t)
640 (match-beginning 1))
641 (setq boundary-id (match-string 1)
642 b-delimiter (concat "\n--" boundary-id "[ \t]*$"))
643 (setq entry (assq 'mime-digest nndoc-type-alist))
644 (setcdr entry
645 (list
646 (cons 'head-begin "^ ?\n")
647 (cons 'head-end "^ ?$")
648 (cons 'body-begin "^ ?\n")
649 (cons 'article-begin b-delimiter)
650 (cons 'body-end-function 'nndoc-digest-body-end)
651 (cons 'file-end (concat "^--" boundary-id "--[ \t]*$"))))
652 t)))
654 (defun nndoc-standard-digest-type-p ()
655 (when (and (re-search-forward (concat "^" (make-string 70 ?-) "\n\n") nil t)
656 (re-search-forward
657 (concat "\n\n" (make-string 30 ?-) "\n\n") nil t))
660 (defun nndoc-digest-body-end ()
661 (and (re-search-forward nndoc-article-begin nil t)
662 (goto-char (match-beginning 0))))
664 (defun nndoc-slack-digest-type-p ()
667 (defun nndoc-git-type-p ()
668 (and (search-forward "\n- Log ---" nil t)
669 (search-forward "\ncommit " nil t)
670 (search-forward "\nAuthor: " nil t)))
672 (defun nndoc-transform-git-article (article)
673 (goto-char (point-min))
674 (when (re-search-forward "^Author: " nil t)
675 (replace-match "From: " t t)))
677 (defun nndoc-transform-git-headers (entry)
678 (goto-char (point-min))
679 (when (re-search-forward "^Author: " nil t)
680 (replace-match "From: " t t))
681 (let (subject)
682 (with-current-buffer nndoc-current-buffer
683 (goto-char (car entry))
684 (when (search-forward "\n\n" nil t)
685 (setq subject (buffer-substring (point) (line-end-position)))))
686 (when subject
687 (goto-char (point-min))
688 (forward-line 1)
689 (insert (format "Subject: %s\n" subject)))))
691 (defun nndoc-lanl-gov-announce-type-p ()
692 (when (let ((case-fold-search nil))
693 (re-search-forward "^\\\\\\\\\n\\(Paper\\( (\\*cross-listing\\*)\\)?: [a-zA-Z-\\.]+/[0-9]+\\|arXiv:\\)" nil t))
696 (defun nndoc-transform-lanl-gov-announce (article)
697 (let ((case-fold-search nil))
698 (goto-char (point-max))
699 (when (re-search-backward "^\\\\\\\\ +( *\\([^ ]*\\) , *\\([^ ]*\\))" nil t)
700 (replace-match "\n\nGet it at \\1 (\\2)" t nil))
701 (goto-char (point-min))
702 (while (re-search-forward "^\\\\\\\\$" nil t)
703 (replace-match "" t nil))
704 (goto-char (point-min))
705 (when (re-search-forward "^replaced with revised version +\\(.*[^ ]\\) +" nil t)
706 (replace-match "Date: \\1 (revised) " t nil))
707 (goto-char (point-min))
708 (unless (re-search-forward "^From" nil t)
709 (goto-char (point-min))
710 (when (re-search-forward "^Authors?: \\(.*\\)" nil t)
711 (goto-char (point-min))
712 (insert "From: " (match-string 1) "\n")))
713 (when (re-search-forward "^arXiv:" nil t)
714 (replace-match "Paper: arXiv:" t nil))))
716 (defun nndoc-generate-lanl-gov-head (article)
717 (let ((entry (cdr (assq article nndoc-dissection-alist)))
718 (from "<no address given>")
719 subject date)
720 (with-current-buffer nndoc-current-buffer
721 (save-restriction
722 (narrow-to-region (car entry) (nth 1 entry))
723 (goto-char (point-min))
724 (when (looking-at "^\\(Paper.*: \\|arXiv:\\)\\([0-9a-zA-Z-\\./]+\\)")
725 (setq subject (concat " (" (match-string 2) ")"))
726 (when (re-search-forward "^From: \\(.*\\)" nil t)
727 (setq from (concat "<"
728 (cadr (funcall gnus-extract-address-components
729 (match-string 1))) ">")))
730 (if (re-search-forward "^Date: +\\([^(]*\\)" nil t)
731 (setq date (match-string 1))
732 (when (re-search-forward "^replaced with revised version +\\([^(]*\\)" nil t)
733 (setq date (match-string 1))))
734 (when (re-search-forward "^Title: \\([^\f]*\\)\nAuthors?: \\(.*\\)"
735 nil t)
736 (setq subject (concat (match-string 1) subject))
737 (setq from (concat (match-string 2) " " from))))))
738 (while (and from (string-match "(\[^)\]*)" from))
739 (setq from (replace-match "" t t from)))
740 (insert "From: " (or from "unknown")
741 "\nSubject: " (or subject "(no subject)") "\n")
742 (if date (insert "Date: " date))))
744 (defun nndoc-nsmail-type-p ()
745 (when (looking-at "From - ")
748 (defun nndoc-outlook-article-begin ()
749 (prog1 (re-search-forward "From:\\|Received:" nil t)
750 (goto-char (match-beginning 0))))
752 (defun nndoc-outlook-type-p ()
753 ;; FIXME: Is JMF the magic of outlook mailbox? -- ShengHuo.
754 (looking-at "JMF"))
756 (defun nndoc-oe-dbx-type-p ()
757 (looking-at (mm-string-to-multibyte "\317\255\022\376")))
759 (defun nndoc-read-little-endian ()
760 (+ (prog1 (char-after) (forward-char 1))
761 (lsh (prog1 (char-after) (forward-char 1)) 8)
762 (lsh (prog1 (char-after) (forward-char 1)) 16)
763 (lsh (prog1 (char-after) (forward-char 1)) 24)))
765 (defun nndoc-oe-dbx-decode-block ()
766 (list
767 (nndoc-read-little-endian) ;; this address
768 (nndoc-read-little-endian) ;; next address offset
769 (nndoc-read-little-endian) ;; blocksize
770 (nndoc-read-little-endian))) ;; next address
772 (defun nndoc-oe-dbx-dissection ()
773 (let ((i 0) blk p tp)
774 (goto-char 60117) ;; 0x0000EAD4+1
775 (setq p (point))
776 (unless (eobp)
777 (setq blk (nndoc-oe-dbx-decode-block)))
778 (while (and blk (> (car blk) 0) (or (zerop (nth 3 blk))
779 (> (nth 3 blk) p)))
780 (push (list (incf i) p nil nil nil 0) nndoc-dissection-alist)
781 (while (and (> (car blk) 0) (> (nth 3 blk) p))
782 (goto-char (1+ (nth 3 blk)))
783 (setq blk (nndoc-oe-dbx-decode-block)))
784 (if (or (<= (car blk) p)
785 (<= (nth 1 blk) 0)
786 (not (zerop (nth 3 blk))))
787 (setq blk nil)
788 (setq tp (+ (car blk) (nth 1 blk) 17))
789 (if (or (<= tp p) (>= tp (point-max)))
790 (setq blk nil)
791 (goto-char tp)
792 (setq p tp
793 blk (nndoc-oe-dbx-decode-block)))))))
795 (defun nndoc-oe-dbx-generate-article (article &optional head)
796 (let ((entry (cdr (assq article nndoc-dissection-alist)))
797 (cur (current-buffer))
798 (begin (point))
799 blk p)
800 (with-current-buffer nndoc-current-buffer
801 (setq p (car entry))
802 (while (> p (point-min))
803 (goto-char p)
804 (setq blk (nndoc-oe-dbx-decode-block))
805 (setq p (point))
806 (with-current-buffer cur
807 (insert-buffer-substring nndoc-current-buffer p (+ p (nth 2 blk))))
808 (setq p (1+ (nth 3 blk)))))
809 (goto-char begin)
810 (while (re-search-forward "\r$" nil t)
811 (delete-char -1))
812 (when head
813 (goto-char begin)
814 (when (search-forward "\n\n" nil t)
815 (setcar (cddddr entry) (count-lines (point) (point-max)))
816 (delete-region (1- (point)) (point-max))))
819 (defun nndoc-oe-dbx-generate-head (article)
820 (nndoc-oe-dbx-generate-article article 'head))
822 (defun nndoc-mail-in-mail-type-p ()
823 (let (found)
824 (save-excursion
825 (catch 'done
826 (while (re-search-forward "\n\n[-A-Za-z0-9]+:" nil t)
827 (setq found 0)
828 (forward-line)
829 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
830 (if (looking-at "[-A-Za-z0-9]+:")
831 (setq found (1+ found)))
832 (forward-line))
833 (if (and (> found 0) (looking-at "\n"))
834 (throw 'done 9999)))
835 nil))))
837 (defun nndoc-mail-in-mail-article-begin ()
838 (let (point found)
839 (if (catch 'done
840 (while (re-search-forward "\n\n\\([-A-Za-z0-9]+:\\)" nil t)
841 (setq found 0)
842 (setq point (match-beginning 1))
843 (forward-line)
844 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
845 (if (looking-at "[-A-Za-z0-9]+:")
846 (setq found (1+ found)))
847 (forward-line))
848 (if (and (> found 0) (looking-at "\n"))
849 (throw 'done t)))
850 nil)
851 (goto-char point))))
853 (deffoo nndoc-request-accept-article (group &optional server last)
854 nil)
857 ;;; Functions for dissecting the documents
860 (defun nndoc-search (regexp)
861 (prog1
862 (re-search-forward regexp nil t)
863 (beginning-of-line)))
865 (defun nndoc-dissect-buffer ()
866 "Go through the document and partition it into heads/bodies/articles."
867 (let ((i 0)
868 (first t)
869 art-begin head-begin head-end body-begin body-end)
870 (setq nndoc-dissection-alist nil)
871 (with-current-buffer nndoc-current-buffer
872 (goto-char (point-min))
873 ;; Remove blank lines.
874 (while (eq (following-char) ?\n)
875 (delete-char 1))
876 (when nndoc-pre-dissection-function
877 (save-excursion
878 (funcall nndoc-pre-dissection-function)))
879 (if nndoc-dissection-function
880 (funcall nndoc-dissection-function)
881 ;; Find the beginning of the file.
882 (when nndoc-file-begin
883 (nndoc-search nndoc-file-begin))
884 ;; Go through the file.
885 (while (if (and first nndoc-first-article)
886 (nndoc-search nndoc-first-article)
887 (if art-begin
888 (goto-char art-begin)
889 (nndoc-article-begin)))
890 (setq first nil
891 art-begin nil)
892 (cond (nndoc-head-begin-function
893 (funcall nndoc-head-begin-function))
894 (nndoc-head-begin
895 (nndoc-search nndoc-head-begin)))
896 (if (or (eobp)
897 (and nndoc-file-end
898 (looking-at nndoc-file-end)))
899 (goto-char (point-max))
900 (setq head-begin (point))
901 (nndoc-search (or nndoc-head-end "^$"))
902 (setq head-end (point))
903 (if nndoc-body-begin-function
904 (funcall nndoc-body-begin-function)
905 (nndoc-search (or nndoc-body-begin "^\n")))
906 (setq body-begin (point))
907 (or (and nndoc-body-end-function
908 (funcall nndoc-body-end-function))
909 (and nndoc-body-end
910 (nndoc-search nndoc-body-end))
911 (and (nndoc-article-begin)
912 (setq art-begin (point)))
913 (progn
914 (goto-char (point-max))
915 (when nndoc-file-end
916 (and (re-search-backward nndoc-file-end nil t)
917 (beginning-of-line)))))
918 (setq body-end (point))
919 (push (list (incf i) head-begin head-end body-begin body-end
920 (count-lines body-begin body-end))
921 nndoc-dissection-alist)))))
922 (setq nndoc-dissection-alist (nreverse nndoc-dissection-alist))))
924 (defun nndoc-article-begin ()
925 (if nndoc-article-begin-function
926 (funcall nndoc-article-begin-function)
927 (ignore-errors
928 (nndoc-search nndoc-article-begin))))
930 (defun nndoc-unquote-dashes ()
931 "Unquote quoted non-separators in digests."
932 (while (re-search-forward "^- -"nil t)
933 (replace-match "-" t t)))
935 ;; Against compiler warnings.
936 (defvar nndoc-mime-split-ordinal)
938 (defun nndoc-dissect-mime-parts ()
939 "Go through a MIME composite article and partition it into sub-articles.
940 When a MIME entity contains sub-entities, dissection produces one article for
941 the header of this entity, and one article per sub-entity."
942 (setq nndoc-dissection-alist nil
943 nndoc-mime-split-ordinal 0)
944 (with-current-buffer nndoc-current-buffer
945 (nndoc-dissect-mime-parts-sub (point-min) (point-max) nil nil nil)))
947 (defun nndoc-dissect-mime-parts-sub (head-begin body-end article-insert
948 position parent)
949 "Dissect an entity, within a composite MIME message.
950 The complete message or MIME entity extends from HEAD-BEGIN to BODY-END.
951 ARTICLE-INSERT should be added at beginning for generating a full article.
952 The string POSITION holds a dotted decimal representation of the article
953 position in the hierarchical structure, it is nil for the outer entity.
954 PARENT is the message-ID of the parent summary line, or nil for none."
955 (let ((case-fold-search t)
956 (message-id (nnmail-message-id))
957 head-end body-begin summary-insert message-rfc822 multipart-any
958 subject content-type type subtype boundary-regexp)
959 ;; Gracefully handle a missing body.
960 (goto-char head-begin)
961 (if (or (and (eq (char-after) ?\n) (or (forward-char 1) t))
962 (search-forward "\n\n" body-end t))
963 (setq head-end (1- (point))
964 body-begin (point))
965 (setq head-end body-end
966 body-begin body-end))
967 (narrow-to-region head-begin head-end)
968 ;; Save MIME attributes.
969 (goto-char head-begin)
970 (setq content-type (message-fetch-field "Content-Type"))
971 (when content-type
972 (when (string-match
973 "^ *\\([^ \t\n/;]+\\)/\\([^ \t\n/;]+\\)" content-type)
974 (setq type (downcase (match-string 1 content-type))
975 subtype (downcase (match-string 2 content-type))
976 message-rfc822 (and (string= type "message")
977 (string= subtype "rfc822"))
978 multipart-any (string= type "multipart")))
979 (when (string-match ";[ \t\n]*name=\\([^ \t\n;]+\\)" content-type)
980 (setq subject (match-string 1 content-type)))
981 (when (string-match "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)" content-type)
982 (setq boundary-regexp (concat "^--"
983 (regexp-quote
984 (match-string 1 content-type))
985 "\\(--\\)?[ \t]*\n"))))
986 (unless subject
987 (when (or multipart-any (not article-insert))
988 (setq subject (message-fetch-field "Subject"))))
989 (unless type
990 (setq type "text"
991 subtype "plain"))
992 ;; Prepare the article and summary inserts.
993 (unless article-insert
994 (setq article-insert (buffer-string)
995 head-end head-begin))
996 ;; Fix MIME-Version
997 (unless (string-match "MIME-Version:" article-insert)
998 (setq article-insert
999 (concat article-insert "MIME-Version: 1.0\n")))
1000 (setq summary-insert article-insert)
1001 ;; - summary Subject.
1002 (setq summary-insert
1003 (let ((line (concat "Subject: <" position
1004 (and position multipart-any ".")
1005 (and multipart-any "*")
1006 (and (or position multipart-any) " ")
1007 (cond ((string= subtype "plain") type)
1008 ((string= subtype "basic") type)
1009 (t subtype))
1011 (and subject " ")
1012 subject
1013 "\n")))
1014 (if (string-match "Subject:.*\n\\([ \t].*\n\\)*" summary-insert)
1015 (replace-match line t t summary-insert)
1016 (concat summary-insert line))))
1017 ;; - summary Message-ID.
1018 (setq summary-insert
1019 (let ((line (concat "Message-ID: " message-id "\n")))
1020 (if (string-match "Message-ID:.*\n\\([ \t].*\n\\)*" summary-insert)
1021 (replace-match line t t summary-insert)
1022 (concat summary-insert line))))
1023 ;; - summary References.
1024 (when parent
1025 (setq summary-insert
1026 (let ((line (concat "References: " parent "\n")))
1027 (if (string-match "References:.*\n\\([ \t].*\n\\)*"
1028 summary-insert)
1029 (replace-match line t t summary-insert)
1030 (concat summary-insert line)))))
1031 ;; Generate dissection information for this entity.
1032 (push (list (incf nndoc-mime-split-ordinal)
1033 head-begin head-end body-begin body-end
1034 (count-lines body-begin body-end)
1035 article-insert summary-insert)
1036 nndoc-dissection-alist)
1037 ;; Recurse for all sub-entities, if any.
1038 (widen)
1039 (cond
1040 (message-rfc822
1041 (save-excursion
1042 (nndoc-dissect-mime-parts-sub body-begin body-end nil
1043 position message-id)))
1044 ((and multipart-any boundary-regexp)
1045 (let ((part-counter 0)
1046 part-begin part-end eof-flag)
1047 (while (string-match "\
1048 ^\\(Lines\\|Content-\\(Type\\|Transfer-Encoding\\|Disposition\\)\\):.*\n\\([ \t].*\n\\)*"
1049 article-insert)
1050 (setq article-insert (replace-match "" t t article-insert)))
1051 (let ((case-fold-search nil))
1052 (goto-char body-begin)
1053 (setq eof-flag (not (re-search-forward boundary-regexp body-end t)))
1054 (while (not eof-flag)
1055 (setq part-begin (point))
1056 (cond ((re-search-forward boundary-regexp body-end t)
1057 (or (not (match-string 1))
1058 (string= (match-string 1) "")
1059 (setq eof-flag t))
1060 (forward-line -1)
1061 (setq part-end (point))
1062 (forward-line 1))
1063 (t (setq part-end body-end
1064 eof-flag t)))
1065 (save-excursion
1066 (nndoc-dissect-mime-parts-sub
1067 part-begin part-end article-insert
1068 (concat position
1069 (and position ".")
1070 (format "%d" (incf part-counter)))
1071 message-id)))))))))
1073 ;;;###autoload
1074 (defun nndoc-add-type (definition &optional position)
1075 "Add document DEFINITION to the list of nndoc document definitions.
1076 If POSITION is nil or `last', the definition will be added
1077 as the last checked definition, if t or `first', add as the
1078 first definition, and if any other symbol, add after that
1079 symbol in the alist."
1080 ;; First remove any old instances.
1081 (gnus-alist-pull (car definition) nndoc-type-alist)
1082 ;; Then enter the new definition in the proper place.
1083 (cond
1084 ((or (null position) (eq position 'last))
1085 (setq nndoc-type-alist (nconc nndoc-type-alist (list definition))))
1086 ((or (eq position t) (eq position 'first))
1087 (push definition nndoc-type-alist))
1089 (let ((list (memq (assq position nndoc-type-alist)
1090 nndoc-type-alist)))
1091 (unless list
1092 (error "No such position: %s" position))
1093 (setcdr list (cons definition (cdr list)))))))
1095 (provide 'nndoc)
1097 ;;; nndoc.el ends here