Remove time-stamp annoyance.
[emacs.git] / lisp / gnus / nndoc.el
blobdbaaa4d71a96298b1166083ad72bf7c1e340ddd0
1 ;;; nndoc.el --- single file access for Gnus
3 ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
4 ;; 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
6 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
7 ;; Masanobu UMEDA <umerin@flab.flab.fujitsu.junet>
8 ;; Keywords: news
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software; you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation; either version 3, or (at your option)
15 ;; any later version.
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs; see the file COPYING. If not, write to the
24 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25 ;; Boston, MA 02110-1301, USA.
27 ;;; Commentary:
29 ;; For Outlook mail boxes format, see http://mbx2mbox.sourceforge.net/
31 ;;; Code:
33 (require 'nnheader)
34 (require 'message)
35 (require 'nnmail)
36 (require 'nnoo)
37 (require 'gnus-util)
38 (require 'mm-util)
39 (eval-when-compile (require 'cl))
41 (nnoo-declare nndoc)
43 (defvoo nndoc-article-type 'guess
44 "*Type of the file.
45 One of `mbox', `babyl', `digest', `news', `rnews', `mmdf', `forward',
46 `rfc934', `rfc822-forward', `mime-parts', `standard-digest',
47 `slack-digest', `clari-briefs', `nsmail', `outlook', `oe-dbx',
48 `mailman', `exim-bounce', or `guess'.")
50 (defvoo nndoc-post-type 'mail
51 "*Whether the nndoc group is `mail' or `post'.")
53 (defvoo nndoc-open-document-hook 'nnheader-ms-strip-cr
54 "Hook run after opening a document.
55 The default function removes all trailing carriage returns
56 from the document.")
58 (defvar nndoc-type-alist
59 `((mmdf
60 (article-begin . "^\^A\^A\^A\^A\n")
61 (body-end . "^\^A\^A\^A\^A\n"))
62 (mime-digest
63 (article-begin . "")
64 (head-begin . "^ ?\n")
65 (head-end . "^ ?$")
66 (body-end . "")
67 (file-end . "")
68 (subtype digest guess))
69 (mime-parts
70 (generate-head-function . nndoc-generate-mime-parts-head)
71 (article-transform-function . nndoc-transform-mime-parts))
72 (nsmail
73 (article-begin . "^From - "))
74 (news
75 (article-begin . "^Path:"))
76 (rnews
77 (article-begin . "^#! *rnews +\\([0-9]+\\) *\n")
78 (body-end-function . nndoc-rnews-body-end))
79 (mbox
80 (article-begin-function . nndoc-mbox-article-begin)
81 (body-end-function . nndoc-mbox-body-end))
82 (babyl
83 (article-begin . "\^_\^L *\n")
84 (body-end . "\^_")
85 (body-begin-function . nndoc-babyl-body-begin)
86 (head-begin-function . nndoc-babyl-head-begin))
87 (exim-bounce
88 (article-begin . "^------ This is a copy of the message, including all the headers. ------\n\n")
89 (body-end-function . nndoc-exim-bounce-body-end-function))
90 (rfc934
91 (article-begin . "^--.*\n+")
92 (body-end . "^--.*$")
93 (prepare-body-function . nndoc-unquote-dashes))
94 (mailman
95 (article-begin . "^--__--__--\n\nMessage:")
96 (body-end . "^--__--__--$")
97 (prepare-body-function . nndoc-unquote-dashes))
98 (clari-briefs
99 (article-begin . "^ \\*")
100 (body-end . "^\t------*[ \t]^*\n^ \\*")
101 (body-begin . "^\t")
102 (head-end . "^\t")
103 (generate-head-function . nndoc-generate-clari-briefs-head)
104 (article-transform-function . nndoc-transform-clari-briefs))
106 (standard-digest
107 (first-article . ,(concat "^" (make-string 70 ?-) "\n *\n+"))
108 (article-begin . ,(concat "^\n" (make-string 30 ?-) "\n *\n+"))
109 (prepare-body-function . nndoc-unquote-dashes)
110 (body-end-function . nndoc-digest-body-end)
111 (head-end . "^ *$")
112 (body-begin . "^ *\n")
113 (file-end . "^End of .*digest.*[0-9].*\n\\*\\*\\|^End of.*Digest *$")
114 (subtype digest guess))
115 (slack-digest
116 (article-begin . "^------------------------------*[\n \t]+")
117 (head-end . "^ ?$")
118 (body-end-function . nndoc-digest-body-end)
119 (body-begin . "^ ?$")
120 (file-end . "^End of")
121 (prepare-body-function . nndoc-unquote-dashes)
122 (subtype digest guess))
123 (lanl-gov-announce
124 (article-begin . "^\\\\\\\\\n")
125 (head-begin . "^\\(Paper.*:\\|arXiv:\\)")
126 (head-end . "\\(^\\\\\\\\.*\n\\|-----------------\\)")
127 (body-begin . "")
128 (body-end . "\\(-------------------------------------------------\\|%-%-%-%-%-%-%-%-%-%-%-%-%-%-\\|%%--%%--%%--%%--%%--%%--%%--%%--\\|%%%---%%%---%%%---%%%---\\)")
129 (file-end . "\\(^Title: Recent Seminal\\|%%%---%%%---%%%---%%%---\\)")
130 (generate-head-function . nndoc-generate-lanl-gov-head)
131 (article-transform-function . nndoc-transform-lanl-gov-announce)
132 (subtype preprints guess))
133 (rfc822-forward
134 (article-begin . "^\n+")
135 (body-end-function . nndoc-rfc822-forward-body-end-function)
136 (generate-head-function . nndoc-rfc822-forward-generate-head)
137 (generate-article-function . nndoc-rfc822-forward-generate-article))
138 (outlook
139 (article-begin-function . nndoc-outlook-article-begin)
140 (body-end . "\0"))
141 (oe-dbx ;; Outlook Express DBX format
142 (dissection-function . nndoc-oe-dbx-dissection)
143 (generate-head-function . nndoc-oe-dbx-generate-head)
144 (generate-article-function . nndoc-oe-dbx-generate-article))
145 (forward
146 (article-begin . "^-+ \\(Start of \\)?forwarded message.*\n+")
147 (body-end . "^-+ End \\(of \\)?forwarded message.*$")
148 (prepare-body-function . nndoc-unquote-dashes))
149 (mail-in-mail ;; Wild guess on mailer daemon's messages or others
150 (article-begin-function . nndoc-mail-in-mail-article-begin))
151 (guess
152 (guess . t)
153 (subtype nil))
154 (digest
155 (guess . t)
156 (subtype nil))
157 (preprints
158 (guess . t)
159 (subtype nil))))
161 (defvar nndoc-binary-file-names ".[Dd][Bb][Xx]$"
162 "Regexp for binary nndoc file names.")
165 (defvoo nndoc-file-begin nil)
166 (defvoo nndoc-first-article nil)
167 (defvoo nndoc-article-begin nil)
168 (defvoo nndoc-head-begin nil)
169 (defvoo nndoc-head-end nil)
170 (defvoo nndoc-file-end nil)
171 (defvoo nndoc-body-begin nil)
172 (defvoo nndoc-body-end-function nil)
173 (defvoo nndoc-body-begin-function nil)
174 (defvoo nndoc-head-begin-function nil)
175 (defvoo nndoc-body-end nil)
176 ;; nndoc-dissection-alist is a list of sublists. Each sublist holds the
177 ;; following items. ARTICLE acts as the association key and is an ordinal
178 ;; starting at 1. HEAD-BEGIN [0], HEAD-END [1], BODY-BEGIN [2] and BODY-END
179 ;; [3] are positions in the `nndoc' buffer. LINE-COUNT [4] is a count of
180 ;; lines in the body. For MIME dissections only, ARTICLE-INSERT [5] and
181 ;; SUMMARY-INSERT [6] give headers to insert for full article or summary line
182 ;; generation, respectively. Other headers usually follow directly from the
183 ;; buffer. Value `nil' means no insert.
184 (defvoo nndoc-dissection-alist nil)
185 (defvoo nndoc-prepare-body-function nil)
186 (defvoo nndoc-generate-head-function nil)
187 (defvoo nndoc-article-transform-function nil)
188 (defvoo nndoc-article-begin-function nil)
189 (defvoo nndoc-generate-article-function nil)
190 (defvoo nndoc-dissection-function nil)
192 (defvoo nndoc-status-string "")
193 (defvoo nndoc-group-alist nil)
194 (defvoo nndoc-current-buffer nil
195 "Current nndoc news buffer.")
196 (defvoo nndoc-address nil)
198 (defconst nndoc-version "nndoc 1.0"
199 "nndoc version.")
203 ;;; Interface functions
205 (nnoo-define-basics nndoc)
207 (deffoo nndoc-retrieve-headers (articles &optional newsgroup server fetch-old)
208 (when (nndoc-possibly-change-buffer newsgroup server)
209 (save-excursion
210 (set-buffer nntp-server-buffer)
211 (erase-buffer)
212 (let (article entry)
213 (if (stringp (car articles))
214 'headers
215 (while articles
216 (when (setq entry (cdr (assq (setq article (pop articles))
217 nndoc-dissection-alist)))
218 (insert (format "221 %d Article retrieved.\n" article))
219 (if nndoc-generate-head-function
220 (funcall nndoc-generate-head-function article)
221 (insert-buffer-substring
222 nndoc-current-buffer (car entry) (nth 1 entry)))
223 (goto-char (point-max))
224 (unless (eq (char-after (1- (point))) ?\n)
225 (insert "\n"))
226 (insert (format "Lines: %d\n" (nth 4 entry)))
227 (insert ".\n")))
229 (nnheader-fold-continuation-lines)
230 'headers)))))
232 (deffoo nndoc-request-article (article &optional newsgroup server buffer)
233 (nndoc-possibly-change-buffer newsgroup server)
234 (save-excursion
235 (let ((buffer (or buffer nntp-server-buffer))
236 (entry (cdr (assq article nndoc-dissection-alist)))
237 beg)
238 (set-buffer buffer)
239 (erase-buffer)
240 (when entry
241 (cond
242 ((stringp article) nil)
243 (nndoc-generate-article-function
244 (funcall nndoc-generate-article-function article))
246 (insert-buffer-substring
247 nndoc-current-buffer (car entry) (nth 1 entry))
248 (insert "\n")
249 (setq beg (point))
250 (insert-buffer-substring
251 nndoc-current-buffer (nth 2 entry) (nth 3 entry))
252 (goto-char beg)
253 (when nndoc-prepare-body-function
254 (funcall nndoc-prepare-body-function))
255 (when nndoc-article-transform-function
256 (funcall nndoc-article-transform-function article))
257 t))))))
259 (deffoo nndoc-request-group (group &optional server dont-check)
260 "Select news GROUP."
261 (let (number)
262 (cond
263 ((not (nndoc-possibly-change-buffer group server))
264 (nnheader-report 'nndoc "No such file or buffer: %s"
265 nndoc-address))
266 (dont-check
267 (nnheader-report 'nndoc "Selected group %s" group)
269 ((zerop (setq number (length nndoc-dissection-alist)))
270 (nndoc-close-group group)
271 (nnheader-report 'nndoc "No articles in group %s" group))
273 (nnheader-insert "211 %d %d %d %s\n" number 1 number group)))))
275 (deffoo nndoc-request-type (group &optional article)
276 (cond ((not article) 'unknown)
277 (nndoc-post-type nndoc-post-type)
278 (t 'unknown)))
280 (deffoo nndoc-close-group (group &optional server)
281 (nndoc-possibly-change-buffer group server)
282 (and nndoc-current-buffer
283 (buffer-name nndoc-current-buffer)
284 (kill-buffer nndoc-current-buffer))
285 (setq nndoc-group-alist (delq (assoc group nndoc-group-alist)
286 nndoc-group-alist))
287 (setq nndoc-current-buffer nil)
288 (nnoo-close-server 'nndoc server)
289 (setq nndoc-dissection-alist nil)
292 (deffoo nndoc-request-list (&optional server)
293 nil)
295 (deffoo nndoc-request-newgroups (date &optional server)
296 nil)
298 (deffoo nndoc-request-list-newsgroups (&optional server)
299 nil)
302 ;;; Internal functions.
304 (defun nndoc-possibly-change-buffer (group source)
305 (let (buf)
306 (cond
307 ;; The current buffer is this group's buffer.
308 ((and nndoc-current-buffer
309 (buffer-name nndoc-current-buffer)
310 (eq nndoc-current-buffer
311 (setq buf (cdr (assoc group nndoc-group-alist))))))
312 ;; We change buffers by taking an old from the group alist.
313 ;; `source' is either a string (a file name) or a buffer object.
314 (buf
315 (setq nndoc-current-buffer buf))
316 ;; It's a totally new group.
317 ((or (and (bufferp nndoc-address)
318 (buffer-name nndoc-address))
319 (and (stringp nndoc-address)
320 (file-exists-p nndoc-address)
321 (not (file-directory-p nndoc-address))))
322 (push (cons group (setq nndoc-current-buffer
323 (get-buffer-create
324 (concat " *nndoc " group "*"))))
325 nndoc-group-alist)
326 (setq nndoc-dissection-alist nil)
327 (save-excursion
328 (set-buffer nndoc-current-buffer)
329 (erase-buffer)
330 (if (and (stringp nndoc-address)
331 (string-match nndoc-binary-file-names nndoc-address))
332 (let ((coding-system-for-read 'binary))
333 (mm-insert-file-contents nndoc-address))
334 (if (stringp nndoc-address)
335 (nnheader-insert-file-contents nndoc-address)
336 (insert-buffer-substring nndoc-address))
337 (run-hooks 'nndoc-open-document-hook)))))
338 ;; Initialize the nndoc structures according to this new document.
339 (when (and nndoc-current-buffer
340 (not nndoc-dissection-alist))
341 (save-excursion
342 (set-buffer nndoc-current-buffer)
343 (nndoc-set-delims)
344 (if (eq nndoc-article-type 'mime-parts)
345 (nndoc-dissect-mime-parts)
346 (nndoc-dissect-buffer))))
347 (unless nndoc-current-buffer
348 (nndoc-close-server))
349 ;; Return whether we managed to select a file.
350 nndoc-current-buffer))
353 ;;; Deciding what document type we have
356 (defun nndoc-set-delims ()
357 "Set the nndoc delimiter variables according to the type of the document."
358 (let ((vars '(nndoc-file-begin
359 nndoc-first-article
360 nndoc-article-begin-function
361 nndoc-head-begin nndoc-head-end
362 nndoc-file-end nndoc-article-begin
363 nndoc-body-begin nndoc-body-end-function nndoc-body-end
364 nndoc-prepare-body-function nndoc-article-transform-function
365 nndoc-generate-head-function nndoc-body-begin-function
366 nndoc-head-begin-function
367 nndoc-generate-article-function
368 nndoc-dissection-function)))
369 (while vars
370 (set (pop vars) nil)))
371 (let (defs)
372 ;; Guess away until we find the real file type.
373 (while (assq 'guess (setq defs (cdr (assq nndoc-article-type
374 nndoc-type-alist))))
375 (setq nndoc-article-type (nndoc-guess-type nndoc-article-type)))
376 ;; Set the nndoc variables.
377 (while defs
378 (set (intern (format "nndoc-%s" (caar defs)))
379 (cdr (pop defs))))))
381 (defun nndoc-guess-type (subtype)
382 (let ((alist nndoc-type-alist)
383 results result entry)
384 (while (and (not result)
385 (setq entry (pop alist)))
386 (when (memq subtype (or (cdr (assq 'subtype entry)) '(guess)))
387 (goto-char (point-min))
388 ;; Remove blank lines.
389 (while (eq (following-char) ?\n)
390 (delete-char 1))
391 (when (numberp (setq result (funcall (intern
392 (format "nndoc-%s-type-p"
393 (car entry))))))
394 (push (cons result entry) results)
395 (setq result nil))))
396 (unless (or result results)
397 (error "Document is not of any recognized type"))
398 (if result
399 (car entry)
400 (cadar (last (sort results 'car-less-than-car))))))
403 ;;; Built-in type predicates and functions
406 (defun nndoc-mbox-type-p ()
407 (when (looking-at message-unix-mail-delimiter)
410 (defun nndoc-mbox-article-begin ()
411 (when (re-search-forward (concat "^" message-unix-mail-delimiter) nil t)
412 (goto-char (match-beginning 0))))
414 (defun nndoc-mbox-body-end ()
415 (let ((beg (point))
416 len end)
417 (when
418 (save-excursion
419 (and (re-search-backward
420 (concat "^" message-unix-mail-delimiter) nil t)
421 (setq end (point))
422 (search-forward "\n\n" beg t)
423 (re-search-backward
424 "^Content-Length:[ \t]*\\([0-9]+\\) *$" end t)
425 (setq len (string-to-number (match-string 1)))
426 (search-forward "\n\n" beg t)
427 (unless (= (setq len (+ (point) len)) (point-max))
428 (and (< len (point-max))
429 (goto-char len)
430 (looking-at message-unix-mail-delimiter)))))
431 (goto-char len))))
433 (defun nndoc-mmdf-type-p ()
434 (when (looking-at "\^A\^A\^A\^A$")
437 (defun nndoc-news-type-p ()
438 (when (looking-at "^Path:.*\n")
441 (defun nndoc-rnews-type-p ()
442 (when (looking-at "#! *rnews")
445 (defun nndoc-rnews-body-end ()
446 (and (re-search-backward nndoc-article-begin nil t)
447 (forward-line 1)
448 (goto-char (+ (point) (string-to-number (match-string 1))))))
450 (defun nndoc-babyl-type-p ()
451 (when (re-search-forward "\^_\^L *\n" nil t)
454 (defun nndoc-babyl-body-begin ()
455 (re-search-forward "^\n" nil t)
456 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
457 (let ((next (or (save-excursion
458 (re-search-forward nndoc-article-begin nil t))
459 (point-max))))
460 (unless (re-search-forward "^\n" next t)
461 (goto-char next)
462 (forward-line -1)
463 (insert "\n")
464 (forward-line -1)))))
466 (defun nndoc-babyl-head-begin ()
467 (when (re-search-forward "^[0-9].*\n" nil t)
468 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
469 (forward-line 1))
472 (defun nndoc-forward-type-p ()
473 (when (and (re-search-forward "^-+ \\(Start of \\)?forwarded message.*\n+"
474 nil t)
475 (looking-at "[\r\n]*[a-zA-Z][a-zA-Z0-9-]*:\\|^>?From "))
478 (defun nndoc-rfc934-type-p ()
479 (when (and (re-search-forward "^-+ Start of forwarded.*\n+" nil t)
480 (not (re-search-forward "^Subject:.*digest" nil t))
481 (not (re-search-backward "^From:" nil t 2))
482 (not (re-search-forward "^From:" nil t 2)))
485 (defun nndoc-mailman-type-p ()
486 (when (re-search-forward "^--__--__--\n+" nil t)
489 (defun nndoc-rfc822-forward-type-p ()
490 (save-restriction
491 (message-narrow-to-head)
492 (when (re-search-forward "^Content-Type: *message/rfc822" nil t)
493 t)))
495 (defun nndoc-rfc822-forward-body-end-function ()
496 (goto-char (point-max)))
498 (defun nndoc-rfc822-forward-generate-article (article &optional head)
499 (let ((entry (cdr (assq article nndoc-dissection-alist)))
500 (begin (point))
501 encoding)
502 (with-current-buffer nndoc-current-buffer
503 (save-restriction
504 (message-narrow-to-head)
505 (setq encoding (message-fetch-field "content-transfer-encoding"))))
506 (insert-buffer-substring nndoc-current-buffer (car entry) (nth 3 entry))
507 (when encoding
508 (save-restriction
509 (narrow-to-region begin (point-max))
510 (mm-decode-content-transfer-encoding
511 (intern (downcase (mail-header-strip encoding))))))
512 (when head
513 (goto-char begin)
514 (when (search-forward "\n\n" nil t)
515 (delete-region (1- (point)) (point-max)))))
518 (defun nndoc-rfc822-forward-generate-head (article)
519 (nndoc-rfc822-forward-generate-article article 'head))
521 (defun nndoc-mime-parts-type-p ()
522 (let ((case-fold-search t)
523 (limit (search-forward "\n\n" nil t)))
524 (goto-char (point-min))
525 (when (and limit
526 (re-search-forward
527 (concat "\
528 ^Content-Type:[ \t]*multipart/[a-z]+ *; *\\(\\(\n[ \t]\\)?.*;\\)*"
529 "\\(\n[ \t]\\)?[ \t]*boundary=\"?[^\"\n]*[^\" \t\n]")
530 limit t))
531 t)))
533 (defun nndoc-transform-mime-parts (article)
534 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
535 (headers (nth 5 entry)))
536 (when headers
537 (goto-char (point-min))
538 (insert headers))))
540 (defun nndoc-generate-mime-parts-head (article)
541 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
542 (headers (nth 6 entry)))
543 (save-restriction
544 (narrow-to-region (point) (point))
545 (insert-buffer-substring
546 nndoc-current-buffer (car entry) (nth 1 entry))
547 (goto-char (point-max)))
548 (when headers
549 (insert headers))))
551 (defun nndoc-clari-briefs-type-p ()
552 (when (let ((case-fold-search nil))
553 (re-search-forward "^\t[^a-z]+ ([^a-z]+) --" nil t))
556 (defun nndoc-transform-clari-briefs (article)
557 (goto-char (point-min))
558 (when (looking-at " *\\*\\(.*\\)\n")
559 (replace-match "" t t))
560 (nndoc-generate-clari-briefs-head article))
562 (defun nndoc-generate-clari-briefs-head (article)
563 (let ((entry (cdr (assq article nndoc-dissection-alist)))
564 subject from)
565 (save-excursion
566 (set-buffer nndoc-current-buffer)
567 (save-restriction
568 (narrow-to-region (car entry) (nth 3 entry))
569 (goto-char (point-min))
570 (when (looking-at " *\\*\\(.*\\)$")
571 (setq subject (match-string 1))
572 (when (string-match "[ \t]+$" subject)
573 (setq subject (substring subject 0 (match-beginning 0)))))
574 (when
575 (let ((case-fold-search nil))
576 (re-search-forward
577 "^\t\\([^a-z]+\\(,[^(]+\\)? ([^a-z]+)\\) --" nil t))
578 (setq from (match-string 1)))))
579 (insert "From: " "clari@clari.net (" (or from "unknown") ")"
580 "\nSubject: " (or subject "(no subject)") "\n")))
582 (defun nndoc-exim-bounce-type-p ()
583 (and (re-search-forward "^------ This is a copy of the message, including all the headers. ------" nil t)
586 (defun nndoc-exim-bounce-body-end-function ()
587 (goto-char (point-max)))
590 (defun nndoc-mime-digest-type-p ()
591 (let ((case-fold-search t)
592 boundary-id b-delimiter entry)
593 (when (and
594 (re-search-forward
595 (concat "^Content-Type: *multipart/digest;[ \t\n]*[ \t]"
596 "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)")
597 nil t)
598 (match-beginning 1))
599 (setq boundary-id (match-string 1)
600 b-delimiter (concat "\n--" boundary-id "[ \t]*$"))
601 (setq entry (assq 'mime-digest nndoc-type-alist))
602 (setcdr entry
603 (list
604 (cons 'head-begin "^ ?\n")
605 (cons 'head-end "^ ?$")
606 (cons 'body-begin "^ ?\n")
607 (cons 'article-begin b-delimiter)
608 (cons 'body-end-function 'nndoc-digest-body-end)
609 (cons 'file-end (concat "^--" boundary-id "--[ \t]*$"))))
610 t)))
612 (defun nndoc-standard-digest-type-p ()
613 (when (and (re-search-forward (concat "^" (make-string 70 ?-) "\n\n") nil t)
614 (re-search-forward
615 (concat "\n\n" (make-string 30 ?-) "\n\n") nil t))
618 (defun nndoc-digest-body-end ()
619 (and (re-search-forward nndoc-article-begin nil t)
620 (goto-char (match-beginning 0))))
622 (defun nndoc-slack-digest-type-p ()
625 (defun nndoc-lanl-gov-announce-type-p ()
626 (when (let ((case-fold-search nil))
627 (re-search-forward "^\\\\\\\\\n\\(Paper\\( (\\*cross-listing\\*)\\)?: [a-zA-Z-\\.]+/[0-9]+\\|arXiv:\\)" nil t))
630 (defun nndoc-transform-lanl-gov-announce (article)
631 (let ((case-fold-search nil))
632 (goto-char (point-max))
633 (when (re-search-backward "^\\\\\\\\ +( *\\([^ ]*\\) , *\\([^ ]*\\))" nil t)
634 (replace-match "\n\nGet it at \\1 (\\2)" t nil))
635 (goto-char (point-min))
636 (while (re-search-forward "^\\\\\\\\$" nil t)
637 (replace-match "" t nil))
638 (goto-char (point-min))
639 (when (re-search-forward "^replaced with revised version +\\(.*[^ ]\\) +" nil t)
640 (replace-match "Date: \\1 (revised) " t nil))
641 (goto-char (point-min))
642 (unless (re-search-forward "^From" nil t)
643 (goto-char (point-min))
644 (when (re-search-forward "^Authors?: \\(.*\\)" nil t)
645 (goto-char (point-min))
646 (insert "From: " (match-string 1) "\n")))
647 (when (re-search-forward "^arXiv:" nil t)
648 (replace-match "Paper: arXiv:" t nil))))
650 (defun nndoc-generate-lanl-gov-head (article)
651 (let ((entry (cdr (assq article nndoc-dissection-alist)))
652 (from "<no address given>")
653 subject date)
654 (save-excursion
655 (set-buffer nndoc-current-buffer)
656 (save-restriction
657 (narrow-to-region (car entry) (nth 1 entry))
658 (goto-char (point-min))
659 (when (looking-at "^\\(Paper.*: \\|arXiv:\\)\\([0-9a-zA-Z-\\./]+\\)")
660 (setq subject (concat " (" (match-string 2) ")"))
661 (when (re-search-forward "^From: \\(.*\\)" nil t)
662 (setq from (concat "<"
663 (cadr (funcall gnus-extract-address-components
664 (match-string 1))) ">")))
665 (if (re-search-forward "^Date: +\\([^(]*\\)" nil t)
666 (setq date (match-string 1))
667 (when (re-search-forward "^replaced with revised version +\\([^(]*\\)" nil t)
668 (setq date (match-string 1))))
669 (when (re-search-forward "^Title: \\([^\f]*\\)\nAuthors?: \\(.*\\)"
670 nil t)
671 (setq subject (concat (match-string 1) subject))
672 (setq from (concat (match-string 2) " " from))))))
673 (while (and from (string-match "(\[^)\]*)" from))
674 (setq from (replace-match "" t t from)))
675 (insert "From: " (or from "unknown")
676 "\nSubject: " (or subject "(no subject)") "\n")
677 (if date (insert "Date: " date))))
679 (defun nndoc-nsmail-type-p ()
680 (when (looking-at "From - ")
683 (defun nndoc-outlook-article-begin ()
684 (prog1 (re-search-forward "From:\\|Received:" nil t)
685 (goto-char (match-beginning 0))))
687 (defun nndoc-outlook-type-p ()
688 ;; FIXME: Is JMF the magic of outlook mailbox? -- ShengHuo.
689 (looking-at "JMF"))
691 (defun nndoc-oe-dbx-type-p ()
692 (looking-at (mm-string-to-multibyte "\317\255\022\376")))
694 (defun nndoc-read-little-endian ()
695 (+ (prog1 (char-after) (forward-char 1))
696 (lsh (prog1 (char-after) (forward-char 1)) 8)
697 (lsh (prog1 (char-after) (forward-char 1)) 16)
698 (lsh (prog1 (char-after) (forward-char 1)) 24)))
700 (defun nndoc-oe-dbx-decode-block ()
701 (list
702 (nndoc-read-little-endian) ;; this address
703 (nndoc-read-little-endian) ;; next address offset
704 (nndoc-read-little-endian) ;; blocksize
705 (nndoc-read-little-endian))) ;; next address
707 (defun nndoc-oe-dbx-dissection ()
708 (let ((i 0) blk p tp)
709 (goto-char 60117) ;; 0x0000EAD4+1
710 (setq p (point))
711 (unless (eobp)
712 (setq blk (nndoc-oe-dbx-decode-block)))
713 (while (and blk (> (car blk) 0) (or (zerop (nth 3 blk))
714 (> (nth 3 blk) p)))
715 (push (list (incf i) p nil nil nil 0) nndoc-dissection-alist)
716 (while (and (> (car blk) 0) (> (nth 3 blk) p))
717 (goto-char (1+ (nth 3 blk)))
718 (setq blk (nndoc-oe-dbx-decode-block)))
719 (if (or (<= (car blk) p)
720 (<= (nth 1 blk) 0)
721 (not (zerop (nth 3 blk))))
722 (setq blk nil)
723 (setq tp (+ (car blk) (nth 1 blk) 17))
724 (if (or (<= tp p) (>= tp (point-max)))
725 (setq blk nil)
726 (goto-char tp)
727 (setq p tp
728 blk (nndoc-oe-dbx-decode-block)))))))
730 (defun nndoc-oe-dbx-generate-article (article &optional head)
731 (let ((entry (cdr (assq article nndoc-dissection-alist)))
732 (cur (current-buffer))
733 (begin (point))
734 blk p)
735 (with-current-buffer nndoc-current-buffer
736 (setq p (car entry))
737 (while (> p (point-min))
738 (goto-char p)
739 (setq blk (nndoc-oe-dbx-decode-block))
740 (setq p (point))
741 (with-current-buffer cur
742 (insert-buffer-substring nndoc-current-buffer p (+ p (nth 2 blk))))
743 (setq p (1+ (nth 3 blk)))))
744 (goto-char begin)
745 (while (re-search-forward "\r$" nil t)
746 (delete-backward-char 1))
747 (when head
748 (goto-char begin)
749 (when (search-forward "\n\n" nil t)
750 (setcar (cddddr entry) (count-lines (point) (point-max)))
751 (delete-region (1- (point)) (point-max))))
754 (defun nndoc-oe-dbx-generate-head (article)
755 (nndoc-oe-dbx-generate-article article 'head))
757 (defun nndoc-mail-in-mail-type-p ()
758 (let (found)
759 (save-excursion
760 (catch 'done
761 (while (re-search-forward "\n\n[-A-Za-z0-9]+:" nil t)
762 (setq found 0)
763 (forward-line)
764 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
765 (if (looking-at "[-A-Za-z0-9]+:")
766 (setq found (1+ found)))
767 (forward-line))
768 (if (and (> found 0) (looking-at "\n"))
769 (throw 'done 9999)))
770 nil))))
772 (defun nndoc-mail-in-mail-article-begin ()
773 (let (point found)
774 (if (catch 'done
775 (while (re-search-forward "\n\n\\([-A-Za-z0-9]+:\\)" nil t)
776 (setq found 0)
777 (setq point (match-beginning 1))
778 (forward-line)
779 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
780 (if (looking-at "[-A-Za-z0-9]+:")
781 (setq found (1+ found)))
782 (forward-line))
783 (if (and (> found 0) (looking-at "\n"))
784 (throw 'done t)))
785 nil)
786 (goto-char point))))
788 (deffoo nndoc-request-accept-article (group &optional server last)
789 nil)
792 ;;; Functions for dissecting the documents
795 (defun nndoc-search (regexp)
796 (prog1
797 (re-search-forward regexp nil t)
798 (beginning-of-line)))
800 (defun nndoc-dissect-buffer ()
801 "Go through the document and partition it into heads/bodies/articles."
802 (let ((i 0)
803 (first t)
804 art-begin head-begin head-end body-begin body-end)
805 (setq nndoc-dissection-alist nil)
806 (save-excursion
807 (set-buffer nndoc-current-buffer)
808 (goto-char (point-min))
809 ;; Remove blank lines.
810 (while (eq (following-char) ?\n)
811 (delete-char 1))
812 (if nndoc-dissection-function
813 (funcall nndoc-dissection-function)
814 ;; Find the beginning of the file.
815 (when nndoc-file-begin
816 (nndoc-search nndoc-file-begin))
817 ;; Go through the file.
818 (while (if (and first nndoc-first-article)
819 (nndoc-search nndoc-first-article)
820 (if art-begin
821 (goto-char art-begin)
822 (nndoc-article-begin)))
823 (setq first nil
824 art-begin nil)
825 (cond (nndoc-head-begin-function
826 (funcall nndoc-head-begin-function))
827 (nndoc-head-begin
828 (nndoc-search nndoc-head-begin)))
829 (if (or (eobp)
830 (and nndoc-file-end
831 (looking-at nndoc-file-end)))
832 (goto-char (point-max))
833 (setq head-begin (point))
834 (nndoc-search (or nndoc-head-end "^$"))
835 (setq head-end (point))
836 (if nndoc-body-begin-function
837 (funcall nndoc-body-begin-function)
838 (nndoc-search (or nndoc-body-begin "^\n")))
839 (setq body-begin (point))
840 (or (and nndoc-body-end-function
841 (funcall nndoc-body-end-function))
842 (and nndoc-body-end
843 (nndoc-search nndoc-body-end))
844 (and (nndoc-article-begin)
845 (setq art-begin (point)))
846 (progn
847 (goto-char (point-max))
848 (when nndoc-file-end
849 (and (re-search-backward nndoc-file-end nil t)
850 (beginning-of-line)))))
851 (setq body-end (point))
852 (push (list (incf i) head-begin head-end body-begin body-end
853 (count-lines body-begin body-end))
854 nndoc-dissection-alist)))))))
856 (defun nndoc-article-begin ()
857 (if nndoc-article-begin-function
858 (funcall nndoc-article-begin-function)
859 (ignore-errors
860 (nndoc-search nndoc-article-begin))))
862 (defun nndoc-unquote-dashes ()
863 "Unquote quoted non-separators in digests."
864 (while (re-search-forward "^- -"nil t)
865 (replace-match "-" t t)))
867 ;; Against compiler warnings.
868 (defvar nndoc-mime-split-ordinal)
870 (defun nndoc-dissect-mime-parts ()
871 "Go through a MIME composite article and partition it into sub-articles.
872 When a MIME entity contains sub-entities, dissection produces one article for
873 the header of this entity, and one article per sub-entity."
874 (setq nndoc-dissection-alist nil
875 nndoc-mime-split-ordinal 0)
876 (save-excursion
877 (set-buffer nndoc-current-buffer)
878 (nndoc-dissect-mime-parts-sub (point-min) (point-max) nil nil nil)))
880 (defun nndoc-dissect-mime-parts-sub (head-begin body-end article-insert
881 position parent)
882 "Dissect an entity, within a composite MIME message.
883 The complete message or MIME entity extends from HEAD-BEGIN to BODY-END.
884 ARTICLE-INSERT should be added at beginning for generating a full article.
885 The string POSITION holds a dotted decimal representation of the article
886 position in the hierarchical structure, it is nil for the outer entity.
887 PARENT is the message-ID of the parent summary line, or nil for none."
888 (let ((case-fold-search t)
889 (message-id (nnmail-message-id))
890 head-end body-begin summary-insert message-rfc822 multipart-any
891 subject content-type type subtype boundary-regexp)
892 ;; Gracefully handle a missing body.
893 (goto-char head-begin)
894 (if (or (and (eq (char-after) ?\n) (or (forward-char 1) t))
895 (search-forward "\n\n" body-end t))
896 (setq head-end (1- (point))
897 body-begin (point))
898 (setq head-end body-end
899 body-begin body-end))
900 (narrow-to-region head-begin head-end)
901 ;; Save MIME attributes.
902 (goto-char head-begin)
903 (setq content-type (message-fetch-field "Content-Type"))
904 (when content-type
905 (when (string-match
906 "^ *\\([^ \t\n/;]+\\)/\\([^ \t\n/;]+\\)" content-type)
907 (setq type (downcase (match-string 1 content-type))
908 subtype (downcase (match-string 2 content-type))
909 message-rfc822 (and (string= type "message")
910 (string= subtype "rfc822"))
911 multipart-any (string= type "multipart")))
912 (when (string-match ";[ \t\n]*name=\\([^ \t\n;]+\\)" content-type)
913 (setq subject (match-string 1 content-type)))
914 (when (string-match "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)" content-type)
915 (setq boundary-regexp (concat "^--"
916 (regexp-quote
917 (match-string 1 content-type))
918 "\\(--\\)?[ \t]*\n"))))
919 (unless subject
920 (when (or multipart-any (not article-insert))
921 (setq subject (message-fetch-field "Subject"))))
922 (unless type
923 (setq type "text"
924 subtype "plain"))
925 ;; Prepare the article and summary inserts.
926 (unless article-insert
927 (setq article-insert (buffer-string)
928 head-end head-begin))
929 ;; Fix MIME-Version
930 (unless (string-match "MIME-Version:" article-insert)
931 (setq article-insert
932 (concat article-insert "MIME-Version: 1.0\n")))
933 (setq summary-insert article-insert)
934 ;; - summary Subject.
935 (setq summary-insert
936 (let ((line (concat "Subject: <" position
937 (and position multipart-any ".")
938 (and multipart-any "*")
939 (and (or position multipart-any) " ")
940 (cond ((string= subtype "plain") type)
941 ((string= subtype "basic") type)
942 (t subtype))
944 (and subject " ")
945 subject
946 "\n")))
947 (if (string-match "Subject:.*\n\\([ \t].*\n\\)*" summary-insert)
948 (replace-match line t t summary-insert)
949 (concat summary-insert line))))
950 ;; - summary Message-ID.
951 (setq summary-insert
952 (let ((line (concat "Message-ID: " message-id "\n")))
953 (if (string-match "Message-ID:.*\n\\([ \t].*\n\\)*" summary-insert)
954 (replace-match line t t summary-insert)
955 (concat summary-insert line))))
956 ;; - summary References.
957 (when parent
958 (setq summary-insert
959 (let ((line (concat "References: " parent "\n")))
960 (if (string-match "References:.*\n\\([ \t].*\n\\)*"
961 summary-insert)
962 (replace-match line t t summary-insert)
963 (concat summary-insert line)))))
964 ;; Generate dissection information for this entity.
965 (push (list (incf nndoc-mime-split-ordinal)
966 head-begin head-end body-begin body-end
967 (count-lines body-begin body-end)
968 article-insert summary-insert)
969 nndoc-dissection-alist)
970 ;; Recurse for all sub-entities, if any.
971 (widen)
972 (cond
973 (message-rfc822
974 (save-excursion
975 (nndoc-dissect-mime-parts-sub body-begin body-end nil
976 position message-id)))
977 ((and multipart-any boundary-regexp)
978 (let ((part-counter 0)
979 part-begin part-end eof-flag)
980 (while (string-match "\
981 ^\\(Lines\\|Content-\\(Type\\|Transfer-Encoding\\|Disposition\\)\\):.*\n\\([ \t].*\n\\)*"
982 article-insert)
983 (setq article-insert (replace-match "" t t article-insert)))
984 (let ((case-fold-search nil))
985 (goto-char body-begin)
986 (setq eof-flag (not (re-search-forward boundary-regexp body-end t)))
987 (while (not eof-flag)
988 (setq part-begin (point))
989 (cond ((re-search-forward boundary-regexp body-end t)
990 (or (not (match-string 1))
991 (string= (match-string 1) "")
992 (setq eof-flag t))
993 (forward-line -1)
994 (setq part-end (point))
995 (forward-line 1))
996 (t (setq part-end body-end
997 eof-flag t)))
998 (save-excursion
999 (nndoc-dissect-mime-parts-sub
1000 part-begin part-end article-insert
1001 (concat position
1002 (and position ".")
1003 (format "%d" (incf part-counter)))
1004 message-id)))))))))
1006 ;;;###autoload
1007 (defun nndoc-add-type (definition &optional position)
1008 "Add document DEFINITION to the list of nndoc document definitions.
1009 If POSITION is nil or `last', the definition will be added
1010 as the last checked definition, if t or `first', add as the
1011 first definition, and if any other symbol, add after that
1012 symbol in the alist."
1013 ;; First remove any old instances.
1014 (gnus-pull (car definition) nndoc-type-alist)
1015 ;; Then enter the new definition in the proper place.
1016 (cond
1017 ((or (null position) (eq position 'last))
1018 (setq nndoc-type-alist (nconc nndoc-type-alist (list definition))))
1019 ((or (eq position t) (eq position 'first))
1020 (push definition nndoc-type-alist))
1022 (let ((list (memq (assq position nndoc-type-alist)
1023 nndoc-type-alist)))
1024 (unless list
1025 (error "No such position: %s" position))
1026 (setcdr list (cons definition (cdr list)))))))
1028 (provide 'nndoc)
1030 ;; arch-tag: f5c2970e-0387-47ac-a0b3-6cc317dffabe
1031 ;;; nndoc.el ends here