new version
[emacs.git] / lisp / gnus / nndoc.el
blobe0816e8dce81b83bb228cf1fa4ced7e2257731c5
1 ;;; nndoc.el --- single file access for Gnus
2 ;; Copyright (C) 1995,96,97 Free Software Foundation, Inc.
4 ;; Author: Lars Magne Ingebrigtsen <larsi@ifi.uio.no>
5 ;; Masanobu UMEDA <umerin@flab.flab.fujitsu.junet>
6 ;; Keywords: news
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
25 ;;; Commentary:
27 ;;; Code:
29 (require 'nnheader)
30 (require 'message)
31 (require 'nnmail)
32 (require 'nnoo)
33 (eval-when-compile (require 'cl))
35 (nnoo-declare nndoc)
37 (defvoo nndoc-article-type 'guess
38 "*Type of the file.
39 One of `mbox', `babyl', `digest', `news', `rnews', `mmdf', `forward',
40 `rfc934', `rfc822-forward', `mime-digest', `standard-digest',
41 `slack-digest', `clari-briefs' or `guess'.")
43 (defvoo nndoc-post-type 'mail
44 "*Whether the nndoc group is `mail' or `post'.")
46 (defvar nndoc-type-alist
47 `((mmdf
48 (article-begin . "^\^A\^A\^A\^A\n")
49 (body-end . "^\^A\^A\^A\^A\n"))
50 (news
51 (article-begin . "^Path:"))
52 (rnews
53 (article-begin . "^#! *rnews +\\([0-9]+\\) *\n")
54 (body-end-function . nndoc-rnews-body-end))
55 (mbox
56 (article-begin-function . nndoc-mbox-article-begin)
57 (body-end-function . nndoc-mbox-body-end))
58 (babyl
59 (article-begin . "\^_\^L *\n")
60 (body-end . "\^_")
61 (body-begin-function . nndoc-babyl-body-begin)
62 (head-begin-function . nndoc-babyl-head-begin))
63 (forward
64 (article-begin . "^-+ Start of forwarded message -+\n+")
65 (body-end . "^-+ End of forwarded message -+$")
66 (prepare-body-function . nndoc-unquote-dashes))
67 (rfc934
68 (article-begin . "^--.*\n+")
69 (body-end . "^--.*$")
70 (prepare-body-function . nndoc-unquote-dashes))
71 (clari-briefs
72 (article-begin . "^ \\*")
73 (body-end . "^\t------*[ \t]^*\n^ \\*")
74 (body-begin . "^\t")
75 (head-end . "^\t")
76 (generate-head-function . nndoc-generate-clari-briefs-head)
77 (article-transform-function . nndoc-transform-clari-briefs))
78 (mime-digest
79 (article-begin . "")
80 (head-end . "^ ?$")
81 (body-end . "")
82 (file-end . "")
83 (subtype digest guess))
84 (standard-digest
85 (first-article . ,(concat "^" (make-string 70 ?-) "\n\n+"))
86 (article-begin . ,(concat "^\n" (make-string 30 ?-) "\n\n+"))
87 (prepare-body-function . nndoc-unquote-dashes)
88 (body-end-function . nndoc-digest-body-end)
89 (head-end . "^ ?$")
90 (body-begin . "^ ?\n")
91 (file-end . "^End of .*digest.*[0-9].*\n\\*\\*\\|^End of.*Digest *$")
92 (subtype digest guess))
93 (slack-digest
94 (article-begin . "^------------------------------*[\n \t]+")
95 (head-end . "^ ?$")
96 (body-end-function . nndoc-digest-body-end)
97 (body-begin . "^ ?$")
98 (file-end . "^End of")
99 (prepare-body-function . nndoc-unquote-dashes)
100 (subtype digest guess))
101 (lanl-gov-announce
102 (article-begin . "^\\\\\\\\\n")
103 (head-begin . "^Paper.*:")
104 (head-end . "\\(^\\\\\\\\.*\n\\|-----------------\\)")
105 (body-begin . "")
106 (body-end . "-------------------------------------------------")
107 (file-end . "^Title: Recent Seminal")
108 (generate-head-function . nndoc-generate-lanl-gov-head)
109 (article-transform-function . nndoc-transform-lanl-gov-announce)
110 (subtype preprints guess))
111 (rfc822-forward
112 (article-begin . "^\n")
113 (body-end-function . nndoc-rfc822-forward-body-end-function))
114 (guess
115 (guess . t)
116 (subtype nil))
117 (digest
118 (guess . t)
119 (subtype nil))
120 (preprints
121 (guess . t)
122 (subtype nil))))
126 (defvoo nndoc-file-begin nil)
127 (defvoo nndoc-first-article nil)
128 (defvoo nndoc-article-end nil)
129 (defvoo nndoc-article-begin nil)
130 (defvoo nndoc-head-begin nil)
131 (defvoo nndoc-head-end nil)
132 (defvoo nndoc-file-end nil)
133 (defvoo nndoc-body-begin nil)
134 (defvoo nndoc-body-end-function nil)
135 (defvoo nndoc-body-begin-function nil)
136 (defvoo nndoc-head-begin-function nil)
137 (defvoo nndoc-body-end nil)
138 (defvoo nndoc-dissection-alist nil)
139 (defvoo nndoc-prepare-body-function nil)
140 (defvoo nndoc-generate-head-function nil)
141 (defvoo nndoc-article-transform-function nil)
142 (defvoo nndoc-article-begin-function nil)
144 (defvoo nndoc-status-string "")
145 (defvoo nndoc-group-alist nil)
146 (defvoo nndoc-current-buffer nil
147 "Current nndoc news buffer.")
148 (defvoo nndoc-address nil)
150 (defconst nndoc-version "nndoc 1.0"
151 "nndoc version.")
155 ;;; Interface functions
157 (nnoo-define-basics nndoc)
159 (deffoo nndoc-retrieve-headers (articles &optional newsgroup server fetch-old)
160 (when (nndoc-possibly-change-buffer newsgroup server)
161 (save-excursion
162 (set-buffer nntp-server-buffer)
163 (erase-buffer)
164 (let (article entry)
165 (if (stringp (car articles))
166 'headers
167 (while articles
168 (when (setq entry (cdr (assq (setq article (pop articles))
169 nndoc-dissection-alist)))
170 (insert (format "221 %d Article retrieved.\n" article))
171 (if nndoc-generate-head-function
172 (funcall nndoc-generate-head-function article)
173 (insert-buffer-substring
174 nndoc-current-buffer (car entry) (nth 1 entry)))
175 (goto-char (point-max))
176 (unless (= (char-after (1- (point))) ?\n)
177 (insert "\n"))
178 (insert (format "Lines: %d\n" (nth 4 entry)))
179 (insert ".\n")))
181 (nnheader-fold-continuation-lines)
182 'headers)))))
184 (deffoo nndoc-request-article (article &optional newsgroup server buffer)
185 (nndoc-possibly-change-buffer newsgroup server)
186 (save-excursion
187 (let ((buffer (or buffer nntp-server-buffer))
188 (entry (cdr (assq article nndoc-dissection-alist)))
189 beg)
190 (set-buffer buffer)
191 (erase-buffer)
192 (when entry
193 (if (stringp article)
195 (insert-buffer-substring
196 nndoc-current-buffer (car entry) (nth 1 entry))
197 (insert "\n")
198 (setq beg (point))
199 (insert-buffer-substring
200 nndoc-current-buffer (nth 2 entry) (nth 3 entry))
201 (goto-char beg)
202 (when nndoc-prepare-body-function
203 (funcall nndoc-prepare-body-function))
204 (when nndoc-article-transform-function
205 (funcall nndoc-article-transform-function article))
206 t)))))
208 (deffoo nndoc-request-group (group &optional server dont-check)
209 "Select news GROUP."
210 (let (number)
211 (cond
212 ((not (nndoc-possibly-change-buffer group server))
213 (nnheader-report 'nndoc "No such file or buffer: %s"
214 nndoc-address))
215 (dont-check
216 (nnheader-report 'nndoc "Selected group %s" group)
218 ((zerop (setq number (length nndoc-dissection-alist)))
219 (nndoc-close-group group)
220 (nnheader-report 'nndoc "No articles in group %s" group))
222 (nnheader-insert "211 %d %d %d %s\n" number 1 number group)))))
224 (deffoo nndoc-request-type (group &optional article)
225 (cond ((not article) 'unknown)
226 (nndoc-post-type nndoc-post-type)
227 (t 'unknown)))
229 (deffoo nndoc-close-group (group &optional server)
230 (nndoc-possibly-change-buffer group server)
231 (and nndoc-current-buffer
232 (buffer-name nndoc-current-buffer)
233 (kill-buffer nndoc-current-buffer))
234 (setq nndoc-group-alist (delq (assoc group nndoc-group-alist)
235 nndoc-group-alist))
236 (setq nndoc-current-buffer nil)
237 (nnoo-close-server 'nndoc server)
238 (setq nndoc-dissection-alist nil)
241 (deffoo nndoc-request-list (&optional server)
242 nil)
244 (deffoo nndoc-request-newgroups (date &optional server)
245 nil)
247 (deffoo nndoc-request-list-newsgroups (&optional server)
248 nil)
251 ;;; Internal functions.
253 (defun nndoc-possibly-change-buffer (group source)
254 (let (buf)
255 (cond
256 ;; The current buffer is this group's buffer.
257 ((and nndoc-current-buffer
258 (buffer-name nndoc-current-buffer)
259 (eq nndoc-current-buffer
260 (setq buf (cdr (assoc group nndoc-group-alist))))))
261 ;; We change buffers by taking an old from the group alist.
262 ;; `source' is either a string (a file name) or a buffer object.
263 (buf
264 (setq nndoc-current-buffer buf))
265 ;; It's a totally new group.
266 ((or (and (bufferp nndoc-address)
267 (buffer-name nndoc-address))
268 (and (stringp nndoc-address)
269 (file-exists-p nndoc-address)
270 (not (file-directory-p nndoc-address))))
271 (push (cons group (setq nndoc-current-buffer
272 (get-buffer-create
273 (concat " *nndoc " group "*"))))
274 nndoc-group-alist)
275 (setq nndoc-dissection-alist nil)
276 (save-excursion
277 (set-buffer nndoc-current-buffer)
278 (buffer-disable-undo (current-buffer))
279 (erase-buffer)
280 (if (stringp nndoc-address)
281 (nnheader-insert-file-contents nndoc-address)
282 (insert-buffer-substring nndoc-address)))))
283 ;; Initialize the nndoc structures according to this new document.
284 (when (and nndoc-current-buffer
285 (not nndoc-dissection-alist))
286 (save-excursion
287 (set-buffer nndoc-current-buffer)
288 (nndoc-set-delims)
289 (nndoc-dissect-buffer)))
290 (unless nndoc-current-buffer
291 (nndoc-close-server))
292 ;; Return whether we managed to select a file.
293 nndoc-current-buffer))
296 ;;; Deciding what document type we have
299 (defun nndoc-set-delims ()
300 "Set the nndoc delimiter variables according to the type of the document."
301 (let ((vars '(nndoc-file-begin
302 nndoc-first-article
303 nndoc-article-end nndoc-head-begin nndoc-head-end
304 nndoc-file-end nndoc-article-begin
305 nndoc-body-begin nndoc-body-end-function nndoc-body-end
306 nndoc-prepare-body-function nndoc-article-transform-function
307 nndoc-generate-head-function nndoc-body-begin-function
308 nndoc-head-begin-function)))
309 (while vars
310 (set (pop vars) nil)))
311 (let (defs)
312 ;; Guess away until we find the real file type.
313 (while (assq 'guess (setq defs (cdr (assq nndoc-article-type
314 nndoc-type-alist))))
315 (setq nndoc-article-type (nndoc-guess-type nndoc-article-type)))
316 ;; Set the nndoc variables.
317 (while defs
318 (set (intern (format "nndoc-%s" (caar defs)))
319 (cdr (pop defs))))))
321 (defun nndoc-guess-type (subtype)
322 (let ((alist nndoc-type-alist)
323 results result entry)
324 (while (and (not result)
325 (setq entry (pop alist)))
326 (when (memq subtype (or (cdr (assq 'subtype entry)) '(guess)))
327 (goto-char (point-min))
328 (when (numberp (setq result (funcall (intern
329 (format "nndoc-%s-type-p"
330 (car entry))))))
331 (push (cons result entry) results)
332 (setq result nil))))
333 (unless (or result results)
334 (error "Document is not of any recognized type"))
335 (if result
336 (car entry)
337 (cadar (sort results (lambda (r1 r2) (< (car r1) (car r2))))))))
340 ;;; Built-in type predicates and functions
343 (defun nndoc-mbox-type-p ()
344 (when (looking-at message-unix-mail-delimiter)
347 (defun nndoc-mbox-article-begin ()
348 (when (re-search-forward (concat "^" message-unix-mail-delimiter) nil t)
349 (goto-char (match-beginning 0))))
351 (defun nndoc-mbox-body-end ()
352 (let ((beg (point))
353 len end)
354 (when
355 (save-excursion
356 (and (re-search-backward
357 (concat "^" message-unix-mail-delimiter) nil t)
358 (setq end (point))
359 (search-forward "\n\n" beg t)
360 (re-search-backward
361 "^Content-Length:[ \t]*\\([0-9]+\\) *$" end t)
362 (setq len (string-to-int (match-string 1)))
363 (search-forward "\n\n" beg t)
364 (unless (= (setq len (+ (point) len)) (point-max))
365 (and (< len (point-max))
366 (goto-char len)
367 (looking-at message-unix-mail-delimiter)))))
368 (goto-char len))))
370 (defun nndoc-mmdf-type-p ()
371 (when (looking-at "\^A\^A\^A\^A$")
374 (defun nndoc-news-type-p ()
375 (when (looking-at "^Path:.*\n")
378 (defun nndoc-rnews-type-p ()
379 (when (looking-at "#! *rnews")
382 (defun nndoc-rnews-body-end ()
383 (and (re-search-backward nndoc-article-begin nil t)
384 (forward-line 1)
385 (goto-char (+ (point) (string-to-int (match-string 1))))))
387 (defun nndoc-babyl-type-p ()
388 (when (re-search-forward "\^_\^L *\n" nil t)
391 (defun nndoc-babyl-body-begin ()
392 (re-search-forward "^\n" nil t)
393 (when (looking-at "\*\*\* EOOH \*\*\*")
394 (let ((next (or (save-excursion
395 (re-search-forward nndoc-article-begin nil t))
396 (point-max))))
397 (unless (re-search-forward "^\n" next t)
398 (goto-char next)
399 (forward-line -1)
400 (insert "\n")
401 (forward-line -1)))))
403 (defun nndoc-babyl-head-begin ()
404 (when (re-search-forward "^[0-9].*\n" nil t)
405 (when (looking-at "\*\*\* EOOH \*\*\*")
406 (forward-line 1))
409 (defun nndoc-forward-type-p ()
410 (when (and (re-search-forward "^-+ Start of forwarded message -+\n+" nil t)
411 (not (re-search-forward "^Subject:.*digest" nil t))
412 (not (re-search-backward "^From:" nil t 2))
413 (not (re-search-forward "^From:" nil t 2)))
416 (defun nndoc-rfc934-type-p ()
417 (when (and (re-search-forward "^-+ Start of forwarded.*\n+" nil t)
418 (not (re-search-forward "^Subject:.*digest" nil t))
419 (not (re-search-backward "^From:" nil t 2))
420 (not (re-search-forward "^From:" nil t 2)))
423 (defun nndoc-rfc822-forward-type-p ()
424 (save-restriction
425 (message-narrow-to-head)
426 (when (re-search-forward "^Content-Type: *message/rfc822" nil t)
427 t)))
429 (defun nndoc-rfc822-forward-body-end-function ()
430 (goto-char (point-max)))
432 (defun nndoc-clari-briefs-type-p ()
433 (when (let ((case-fold-search nil))
434 (re-search-forward "^\t[^a-z]+ ([^a-z]+) --" nil t))
437 (defun nndoc-transform-clari-briefs (article)
438 (goto-char (point-min))
439 (when (looking-at " *\\*\\(.*\\)\n")
440 (replace-match "" t t))
441 (nndoc-generate-clari-briefs-head article))
443 (defun nndoc-generate-clari-briefs-head (article)
444 (let ((entry (cdr (assq article nndoc-dissection-alist)))
445 subject from)
446 (save-excursion
447 (set-buffer nndoc-current-buffer)
448 (save-restriction
449 (narrow-to-region (car entry) (nth 3 entry))
450 (goto-char (point-min))
451 (when (looking-at " *\\*\\(.*\\)$")
452 (setq subject (match-string 1))
453 (when (string-match "[ \t]+$" subject)
454 (setq subject (substring subject 0 (match-beginning 0)))))
455 (when
456 (let ((case-fold-search nil))
457 (re-search-forward
458 "^\t\\([^a-z]+\\(,[^(]+\\)? ([^a-z]+)\\) --" nil t))
459 (setq from (match-string 1)))))
460 (insert "From: " "clari@clari.net (" (or from "unknown") ")"
461 "\nSubject: " (or subject "(no subject)") "\n")))
463 (defun nndoc-mime-digest-type-p ()
464 (let ((case-fold-search t)
465 boundary-id b-delimiter entry)
466 (when (and
467 (re-search-forward
468 (concat "^Content-Type: *multipart/digest;[ \t\n]*[ \t]"
469 "boundary=\"\\([^\"\n]*[^\" \t\n]\\)\"")
470 nil t)
471 (match-beginning 1))
472 (setq boundary-id (match-string 1)
473 b-delimiter (concat "\n--" boundary-id "[\n \t]+"))
474 (setq entry (assq 'mime-digest nndoc-type-alist))
475 (setcdr entry
476 (list
477 (cons 'head-end "^ ?$")
478 (cons 'body-begin "^ ?\n")
479 (cons 'article-begin b-delimiter)
480 (cons 'body-end-function 'nndoc-digest-body-end)
481 (cons 'file-end (concat "\n--" boundary-id "--[ \t]*$"))))
482 t)))
484 (defun nndoc-standard-digest-type-p ()
485 (when (and (re-search-forward (concat "^" (make-string 70 ?-) "\n\n") nil t)
486 (re-search-forward
487 (concat "\n\n" (make-string 30 ?-) "\n\n") nil t))
490 (defun nndoc-digest-body-end ()
491 (and (re-search-forward nndoc-article-begin nil t)
492 (goto-char (match-beginning 0))))
494 (defun nndoc-slack-digest-type-p ()
497 (defun nndoc-lanl-gov-announce-type-p ()
498 (when (let ((case-fold-search nil))
499 (re-search-forward "^\\\\\\\\\nPaper: [a-z-]+/[0-9]+" nil t))
502 (defun nndoc-transform-lanl-gov-announce (article)
503 (goto-char (point-max))
504 (when (re-search-backward "^\\\\\\\\ +(\\([^ ]*\\) , *\\([^ ]*\\))" nil t)
505 (replace-match "\n\nGet it at \\1 (\\2)" t nil))
506 ;; (when (re-search-backward "^\\\\\\\\$" nil t)
507 ;; (replace-match "" t t))
510 (defun nndoc-generate-lanl-gov-head (article)
511 (let ((entry (cdr (assq article nndoc-dissection-alist)))
512 (e-mail "no address given")
513 subject from)
514 (save-excursion
515 (set-buffer nndoc-current-buffer)
516 (save-restriction
517 (narrow-to-region (car entry) (nth 1 entry))
518 (goto-char (point-min))
519 (when (looking-at "^Paper.*: \\([a-z-]+/[0-9]+\\)")
520 (setq subject (concat " (" (match-string 1) ")"))
521 (when (re-search-forward "^From: \\([^ ]+\\)" nil t)
522 (setq e-mail (match-string 1)))
523 (when (re-search-forward "^Title: \\([^\f]*\\)\nAuthors?: \\(.*\\)"
524 nil t)
525 (setq subject (concat (match-string 1) subject))
526 (setq from (concat (match-string 2) " <" e-mail ">"))))
528 (while (and from (string-match "(\[^)\]*)" from))
529 (setq from (replace-match "" t t from)))
530 (insert "From: " (or from "unknown")
531 "\nSubject: " (or subject "(no subject)") "\n")))
536 ;;; Functions for dissecting the documents
539 (defun nndoc-search (regexp)
540 (prog1
541 (re-search-forward regexp nil t)
542 (beginning-of-line)))
544 (defun nndoc-dissect-buffer ()
545 "Go through the document and partition it into heads/bodies/articles."
546 (let ((i 0)
547 (first t)
548 head-begin head-end body-begin body-end)
549 (setq nndoc-dissection-alist nil)
550 (save-excursion
551 (set-buffer nndoc-current-buffer)
552 (goto-char (point-min))
553 ;; Find the beginning of the file.
554 (when nndoc-file-begin
555 (nndoc-search nndoc-file-begin))
556 ;; Go through the file.
557 (while (if (and first nndoc-first-article)
558 (nndoc-search nndoc-first-article)
559 (nndoc-article-begin))
560 (setq first nil)
561 (cond (nndoc-head-begin-function
562 (funcall nndoc-head-begin-function))
563 (nndoc-head-begin
564 (nndoc-search nndoc-head-begin)))
565 (if (or (>= (point) (point-max))
566 (and nndoc-file-end
567 (looking-at nndoc-file-end)))
568 (goto-char (point-max))
569 (setq head-begin (point))
570 (nndoc-search (or nndoc-head-end "^$"))
571 (setq head-end (point))
572 (if nndoc-body-begin-function
573 (funcall nndoc-body-begin-function)
574 (nndoc-search (or nndoc-body-begin "^\n")))
575 (setq body-begin (point))
576 (or (and nndoc-body-end-function
577 (funcall nndoc-body-end-function))
578 (and nndoc-body-end
579 (nndoc-search nndoc-body-end))
580 (nndoc-article-begin)
581 (progn
582 (goto-char (point-max))
583 (when nndoc-file-end
584 (and (re-search-backward nndoc-file-end nil t)
585 (beginning-of-line)))))
586 (setq body-end (point))
587 (push (list (incf i) head-begin head-end body-begin body-end
588 (count-lines body-begin body-end))
589 nndoc-dissection-alist))))))
591 (defun nndoc-article-begin ()
592 (if nndoc-article-begin-function
593 (funcall nndoc-article-begin-function)
594 (ignore-errors
595 (nndoc-search nndoc-article-begin))))
597 (defun nndoc-unquote-dashes ()
598 "Unquote quoted non-separators in digests."
599 (while (re-search-forward "^- -"nil t)
600 (replace-match "-" t t)))
602 ;;;###autoload
603 (defun nndoc-add-type (definition &optional position)
604 "Add document DEFINITION to the list of nndoc document definitions.
605 If POSITION is nil or `last', the definition will be added
606 as the last checked definition, if t or `first', add as the
607 first definition, and if any other symbol, add after that
608 symbol in the alist."
609 ;; First remove any old instances.
610 (setq nndoc-type-alist
611 (delq (assq (car definition) nndoc-type-alist)
612 nndoc-type-alist))
613 ;; Then enter the new definition in the proper place.
614 (cond
615 ((or (null position) (eq position 'last))
616 (setq nndoc-type-alist (nconc nndoc-type-alist (list definition))))
617 ((or (eq position t) (eq position 'first))
618 (push definition nndoc-type-alist))
620 (let ((list (memq (assq position nndoc-type-alist)
621 nndoc-type-alist)))
622 (unless list
623 (error "No such position: %s" position))
624 (setcdr list (cons definition (cdr list)))))))
626 (provide 'nndoc)
628 ;;; nndoc.el ends here