merge emacs-23
[emacs.git] / lisp / gnus / nndoc.el
blobb3361bb4a945c46366225d5bf9210827d43b22c4
1 ;;; nndoc.el --- single file access for Gnus
3 ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
4 ;; 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
6 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
7 ;; Masanobu UMEDA <umerin@flab.flab.fujitsu.junet>
8 ;; Keywords: news
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software: you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation, either version 3 of the License, or
15 ;; (at your option) any later version.
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
25 ;;; Commentary:
27 ;; For Outlook mail boxes format, see http://mbx2mbox.sourceforge.net/
29 ;;; Code:
31 (require 'nnheader)
32 (require 'message)
33 (require 'nnmail)
34 (require 'nnoo)
35 (require 'gnus-util)
36 (require 'mm-util)
37 (eval-when-compile (require 'cl))
39 (nnoo-declare nndoc)
41 (defvoo nndoc-article-type 'guess
42 "*Type of the file.
43 One of `mbox', `babyl', `digest', `news', `rnews', `mmdf', `forward',
44 `rfc934', `rfc822-forward', `mime-parts', `standard-digest',
45 `slack-digest', `clari-briefs', `nsmail', `outlook', `oe-dbx',
46 `mailman', `exim-bounce', or `guess'.")
48 (defvoo nndoc-post-type 'mail
49 "*Whether the nndoc group is `mail' or `post'.")
51 (defvoo nndoc-open-document-hook 'nnheader-ms-strip-cr
52 "Hook run after opening a document.
53 The default function removes all trailing carriage returns
54 from the document.")
56 (defvar nndoc-type-alist
57 `((mmdf
58 (article-begin . "^\^A\^A\^A\^A\n")
59 (body-end . "^\^A\^A\^A\^A\n"))
60 (mime-digest
61 (article-begin . "")
62 (head-begin . "^ ?\n")
63 (head-end . "^ ?$")
64 (body-end . "")
65 (file-end . "")
66 (subtype digest guess))
67 (mime-parts
68 (generate-head-function . nndoc-generate-mime-parts-head)
69 (article-transform-function . nndoc-transform-mime-parts))
70 (nsmail
71 (article-begin . "^From - "))
72 (news
73 (article-begin . "^Path:"))
74 (rnews
75 (article-begin . "^#! *rnews +\\([0-9]+\\) *\n")
76 (body-end-function . nndoc-rnews-body-end))
77 (mbox
78 (article-begin-function . nndoc-mbox-article-begin)
79 (body-end-function . nndoc-mbox-body-end))
80 (babyl
81 (article-begin . "\^_\^L *\n")
82 (body-end . "\^_")
83 (body-begin-function . nndoc-babyl-body-begin)
84 (head-begin-function . nndoc-babyl-head-begin))
85 (exim-bounce
86 (article-begin . "^------ This is a copy of the message, including all the headers. ------\n\n")
87 (body-end-function . nndoc-exim-bounce-body-end-function))
88 (rfc934
89 (article-begin . "^--.*\n+")
90 (body-end . "^--.*$")
91 (prepare-body-function . nndoc-unquote-dashes))
92 (mailman
93 (article-begin . "^--__--__--\n\nMessage:")
94 (body-end . "^--__--__--$")
95 (prepare-body-function . nndoc-unquote-dashes))
96 (clari-briefs
97 (article-begin . "^ \\*")
98 (body-end . "^\t------*[ \t]^*\n^ \\*")
99 (body-begin . "^\t")
100 (head-end . "^\t")
101 (generate-head-function . nndoc-generate-clari-briefs-head)
102 (article-transform-function . nndoc-transform-clari-briefs))
104 (standard-digest
105 (first-article . ,(concat "^" (make-string 70 ?-) "\n *\n+"))
106 (article-begin . ,(concat "^\n" (make-string 30 ?-) "\n *\n+"))
107 (prepare-body-function . nndoc-unquote-dashes)
108 (body-end-function . nndoc-digest-body-end)
109 (head-end . "^ *$")
110 (body-begin . "^ *\n")
111 (file-end . "^End of .*digest.*[0-9].*\n\\*\\*\\|^End of.*Digest *$")
112 (subtype digest guess))
113 (slack-digest
114 (article-begin . "^------------------------------*[\n \t]+")
115 (head-end . "^ ?$")
116 (body-end-function . nndoc-digest-body-end)
117 (body-begin . "^ ?$")
118 (file-end . "^End of")
119 (prepare-body-function . nndoc-unquote-dashes)
120 (subtype digest guess))
121 (lanl-gov-announce
122 (article-begin . "^\\\\\\\\\n")
123 (head-begin . "^\\(Paper.*:\\|arXiv:\\)")
124 (head-end . "\\(^\\\\\\\\.*\n\\|-----------------\\)")
125 (body-begin . "")
126 (body-end . "\\(-------------------------------------------------\\|%-%-%-%-%-%-%-%-%-%-%-%-%-%-\\|%%--%%--%%--%%--%%--%%--%%--%%--\\|%%%---%%%---%%%---%%%---\\)")
127 (file-end . "\\(^Title: Recent Seminal\\|%%%---%%%---%%%---%%%---\\)")
128 (generate-head-function . nndoc-generate-lanl-gov-head)
129 (article-transform-function . nndoc-transform-lanl-gov-announce)
130 (subtype preprints guess))
131 (rfc822-forward
132 (article-begin . "^\n+")
133 (body-end-function . nndoc-rfc822-forward-body-end-function)
134 (generate-head-function . nndoc-rfc822-forward-generate-head)
135 (generate-article-function . nndoc-rfc822-forward-generate-article))
136 (outlook
137 (article-begin-function . nndoc-outlook-article-begin)
138 (body-end . "\0"))
139 (oe-dbx ;; Outlook Express DBX format
140 (dissection-function . nndoc-oe-dbx-dissection)
141 (generate-head-function . nndoc-oe-dbx-generate-head)
142 (generate-article-function . nndoc-oe-dbx-generate-article))
143 (forward
144 (article-begin . "^-+ \\(Start of \\)?forwarded message.*\n+")
145 (body-end . "^-+ End \\(of \\)?forwarded message.*$")
146 (prepare-body-function . nndoc-unquote-dashes))
147 (mail-in-mail ;; Wild guess on mailer daemon's messages or others
148 (article-begin-function . nndoc-mail-in-mail-article-begin))
149 (guess
150 (guess . t)
151 (subtype nil))
152 (digest
153 (guess . t)
154 (subtype nil))
155 (preprints
156 (guess . t)
157 (subtype nil))))
159 (defvar nndoc-binary-file-names ".[Dd][Bb][Xx]$"
160 "Regexp for binary nndoc file names.")
163 (defvoo nndoc-file-begin nil)
164 (defvoo nndoc-first-article nil)
165 (defvoo nndoc-article-begin nil)
166 (defvoo nndoc-head-begin nil)
167 (defvoo nndoc-head-end nil)
168 (defvoo nndoc-file-end nil)
169 (defvoo nndoc-body-begin nil)
170 (defvoo nndoc-body-end-function nil)
171 (defvoo nndoc-body-begin-function nil)
172 (defvoo nndoc-head-begin-function nil)
173 (defvoo nndoc-body-end nil)
174 ;; nndoc-dissection-alist is a list of sublists. Each sublist holds the
175 ;; following items. ARTICLE acts as the association key and is an ordinal
176 ;; starting at 1. HEAD-BEGIN [0], HEAD-END [1], BODY-BEGIN [2] and BODY-END
177 ;; [3] are positions in the `nndoc' buffer. LINE-COUNT [4] is a count of
178 ;; lines in the body. For MIME dissections only, ARTICLE-INSERT [5] and
179 ;; SUMMARY-INSERT [6] give headers to insert for full article or summary line
180 ;; generation, respectively. Other headers usually follow directly from the
181 ;; buffer. Value `nil' means no insert.
182 (defvoo nndoc-dissection-alist nil)
183 (defvoo nndoc-prepare-body-function nil)
184 (defvoo nndoc-generate-head-function nil)
185 (defvoo nndoc-article-transform-function nil)
186 (defvoo nndoc-article-begin-function nil)
187 (defvoo nndoc-generate-article-function nil)
188 (defvoo nndoc-dissection-function nil)
190 (defvoo nndoc-status-string "")
191 (defvoo nndoc-group-alist nil)
192 (defvoo nndoc-current-buffer nil
193 "Current nndoc news buffer.")
194 (defvoo nndoc-address nil)
196 (defconst nndoc-version "nndoc 1.0"
197 "nndoc version.")
201 ;;; Interface functions
203 (nnoo-define-basics nndoc)
205 (deffoo nndoc-retrieve-headers (articles &optional newsgroup server fetch-old)
206 (when (nndoc-possibly-change-buffer newsgroup server)
207 (save-excursion
208 (set-buffer nntp-server-buffer)
209 (erase-buffer)
210 (let (article entry)
211 (if (stringp (car articles))
212 'headers
213 (while articles
214 (when (setq entry (cdr (assq (setq article (pop articles))
215 nndoc-dissection-alist)))
216 (insert (format "221 %d Article retrieved.\n" article))
217 (if nndoc-generate-head-function
218 (funcall nndoc-generate-head-function article)
219 (insert-buffer-substring
220 nndoc-current-buffer (car entry) (nth 1 entry)))
221 (goto-char (point-max))
222 (unless (eq (char-after (1- (point))) ?\n)
223 (insert "\n"))
224 (insert (format "Lines: %d\n" (nth 4 entry)))
225 (insert ".\n")))
227 (nnheader-fold-continuation-lines)
228 'headers)))))
230 (deffoo nndoc-request-article (article &optional newsgroup server buffer)
231 (nndoc-possibly-change-buffer newsgroup server)
232 (save-excursion
233 (let ((buffer (or buffer nntp-server-buffer))
234 (entry (cdr (assq article nndoc-dissection-alist)))
235 beg)
236 (set-buffer buffer)
237 (erase-buffer)
238 (when entry
239 (cond
240 ((stringp article) nil)
241 (nndoc-generate-article-function
242 (funcall nndoc-generate-article-function article))
244 (insert-buffer-substring
245 nndoc-current-buffer (car entry) (nth 1 entry))
246 (insert "\n")
247 (setq beg (point))
248 (insert-buffer-substring
249 nndoc-current-buffer (nth 2 entry) (nth 3 entry))
250 (goto-char beg)
251 (when nndoc-prepare-body-function
252 (funcall nndoc-prepare-body-function))
253 (when nndoc-article-transform-function
254 (funcall nndoc-article-transform-function article))
255 t))))))
257 (deffoo nndoc-request-group (group &optional server dont-check)
258 "Select news GROUP."
259 (let (number)
260 (cond
261 ((not (nndoc-possibly-change-buffer group server))
262 (nnheader-report 'nndoc "No such file or buffer: %s"
263 nndoc-address))
264 (dont-check
265 (nnheader-report 'nndoc "Selected group %s" group)
267 ((zerop (setq number (length nndoc-dissection-alist)))
268 (nndoc-close-group group)
269 (nnheader-report 'nndoc "No articles in group %s" group))
271 (nnheader-insert "211 %d %d %d %s\n" number 1 number group)))))
273 (deffoo nndoc-request-type (group &optional article)
274 (cond ((not article) 'unknown)
275 (nndoc-post-type nndoc-post-type)
276 (t 'unknown)))
278 (deffoo nndoc-close-group (group &optional server)
279 (nndoc-possibly-change-buffer group server)
280 (and nndoc-current-buffer
281 (buffer-name nndoc-current-buffer)
282 (kill-buffer nndoc-current-buffer))
283 (setq nndoc-group-alist (delq (assoc group nndoc-group-alist)
284 nndoc-group-alist))
285 (setq nndoc-current-buffer nil)
286 (nnoo-close-server 'nndoc server)
287 (setq nndoc-dissection-alist nil)
290 (deffoo nndoc-request-list (&optional server)
291 nil)
293 (deffoo nndoc-request-newgroups (date &optional server)
294 nil)
296 (deffoo nndoc-request-list-newsgroups (&optional server)
297 nil)
300 ;;; Internal functions.
302 (defun nndoc-possibly-change-buffer (group source)
303 (let (buf)
304 (cond
305 ;; The current buffer is this group's buffer.
306 ((and nndoc-current-buffer
307 (buffer-name nndoc-current-buffer)
308 (eq nndoc-current-buffer
309 (setq buf (cdr (assoc group nndoc-group-alist))))))
310 ;; We change buffers by taking an old from the group alist.
311 ;; `source' is either a string (a file name) or a buffer object.
312 (buf
313 (setq nndoc-current-buffer buf))
314 ;; It's a totally new group.
315 ((or (and (bufferp nndoc-address)
316 (buffer-name nndoc-address))
317 (and (stringp nndoc-address)
318 (file-exists-p nndoc-address)
319 (not (file-directory-p nndoc-address))))
320 (push (cons group (setq nndoc-current-buffer
321 (get-buffer-create
322 (concat " *nndoc " group "*"))))
323 nndoc-group-alist)
324 (setq nndoc-dissection-alist nil)
325 (save-excursion
326 (set-buffer nndoc-current-buffer)
327 (erase-buffer)
328 (if (and (stringp nndoc-address)
329 (string-match nndoc-binary-file-names nndoc-address))
330 (let ((coding-system-for-read 'binary))
331 (mm-insert-file-contents nndoc-address))
332 (if (stringp nndoc-address)
333 (nnheader-insert-file-contents nndoc-address)
334 (insert-buffer-substring nndoc-address))
335 (run-hooks 'nndoc-open-document-hook)))))
336 ;; Initialize the nndoc structures according to this new document.
337 (when (and nndoc-current-buffer
338 (not nndoc-dissection-alist))
339 (save-excursion
340 (set-buffer nndoc-current-buffer)
341 (nndoc-set-delims)
342 (if (eq nndoc-article-type 'mime-parts)
343 (nndoc-dissect-mime-parts)
344 (nndoc-dissect-buffer))))
345 (unless nndoc-current-buffer
346 (nndoc-close-server))
347 ;; Return whether we managed to select a file.
348 nndoc-current-buffer))
351 ;;; Deciding what document type we have
354 (defun nndoc-set-delims ()
355 "Set the nndoc delimiter variables according to the type of the document."
356 (let ((vars '(nndoc-file-begin
357 nndoc-first-article
358 nndoc-article-begin-function
359 nndoc-head-begin nndoc-head-end
360 nndoc-file-end nndoc-article-begin
361 nndoc-body-begin nndoc-body-end-function nndoc-body-end
362 nndoc-prepare-body-function nndoc-article-transform-function
363 nndoc-generate-head-function nndoc-body-begin-function
364 nndoc-head-begin-function
365 nndoc-generate-article-function
366 nndoc-dissection-function)))
367 (while vars
368 (set (pop vars) nil)))
369 (let (defs)
370 ;; Guess away until we find the real file type.
371 (while (assq 'guess (setq defs (cdr (assq nndoc-article-type
372 nndoc-type-alist))))
373 (setq nndoc-article-type (nndoc-guess-type nndoc-article-type)))
374 ;; Set the nndoc variables.
375 (while defs
376 (set (intern (format "nndoc-%s" (caar defs)))
377 (cdr (pop defs))))))
379 (defun nndoc-guess-type (subtype)
380 (let ((alist nndoc-type-alist)
381 results result entry)
382 (while (and (not result)
383 (setq entry (pop alist)))
384 (when (memq subtype (or (cdr (assq 'subtype entry)) '(guess)))
385 (goto-char (point-min))
386 ;; Remove blank lines.
387 (while (eq (following-char) ?\n)
388 (delete-char 1))
389 (when (numberp (setq result (funcall (intern
390 (format "nndoc-%s-type-p"
391 (car entry))))))
392 (push (cons result entry) results)
393 (setq result nil))))
394 (unless (or result results)
395 (error "Document is not of any recognized type"))
396 (if result
397 (car entry)
398 (cadar (last (sort results 'car-less-than-car))))))
401 ;;; Built-in type predicates and functions
404 (defun nndoc-mbox-type-p ()
405 (when (looking-at message-unix-mail-delimiter)
408 (defun nndoc-mbox-article-begin ()
409 (when (re-search-forward (concat "^" message-unix-mail-delimiter) nil t)
410 (goto-char (match-beginning 0))))
412 (defun nndoc-mbox-body-end ()
413 (let ((beg (point))
414 len end)
415 (when
416 (save-excursion
417 (and (re-search-backward
418 (concat "^" message-unix-mail-delimiter) nil t)
419 (setq end (point))
420 (search-forward "\n\n" beg t)
421 (re-search-backward
422 "^Content-Length:[ \t]*\\([0-9]+\\) *$" end t)
423 (setq len (string-to-number (match-string 1)))
424 (search-forward "\n\n" beg t)
425 (unless (= (setq len (+ (point) len)) (point-max))
426 (and (< len (point-max))
427 (goto-char len)
428 (looking-at message-unix-mail-delimiter)))))
429 (goto-char len))))
431 (defun nndoc-mmdf-type-p ()
432 (when (looking-at "\^A\^A\^A\^A$")
435 (defun nndoc-news-type-p ()
436 (when (looking-at "^Path:.*\n")
439 (defun nndoc-rnews-type-p ()
440 (when (looking-at "#! *rnews")
443 (defun nndoc-rnews-body-end ()
444 (and (re-search-backward nndoc-article-begin nil t)
445 (forward-line 1)
446 (goto-char (+ (point) (string-to-number (match-string 1))))))
448 (defun nndoc-babyl-type-p ()
449 (when (re-search-forward "\^_\^L *\n" nil t)
452 (defun nndoc-babyl-body-begin ()
453 (re-search-forward "^\n" nil t)
454 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
455 (let ((next (or (save-excursion
456 (re-search-forward nndoc-article-begin nil t))
457 (point-max))))
458 (unless (re-search-forward "^\n" next t)
459 (goto-char next)
460 (forward-line -1)
461 (insert "\n")
462 (forward-line -1)))))
464 (defun nndoc-babyl-head-begin ()
465 (when (re-search-forward "^[0-9].*\n" nil t)
466 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
467 (forward-line 1))
470 (defun nndoc-forward-type-p ()
471 (when (and (re-search-forward "^-+ \\(Start of \\)?forwarded message.*\n+"
472 nil t)
473 (looking-at "[\r\n]*[a-zA-Z][a-zA-Z0-9-]*:\\|^>?From "))
476 (defun nndoc-rfc934-type-p ()
477 (when (and (re-search-forward "^-+ Start of forwarded.*\n+" nil t)
478 (not (re-search-forward "^Subject:.*digest" nil t))
479 (not (re-search-backward "^From:" nil t 2))
480 (not (re-search-forward "^From:" nil t 2)))
483 (defun nndoc-mailman-type-p ()
484 (when (re-search-forward "^--__--__--\n+" nil t)
487 (defun nndoc-rfc822-forward-type-p ()
488 (save-restriction
489 (message-narrow-to-head)
490 (when (re-search-forward "^Content-Type: *message/rfc822" nil t)
491 t)))
493 (defun nndoc-rfc822-forward-body-end-function ()
494 (goto-char (point-max)))
496 (defun nndoc-rfc822-forward-generate-article (article &optional head)
497 (let ((entry (cdr (assq article nndoc-dissection-alist)))
498 (begin (point))
499 encoding)
500 (with-current-buffer nndoc-current-buffer
501 (save-restriction
502 (message-narrow-to-head)
503 (setq encoding (message-fetch-field "content-transfer-encoding"))))
504 (insert-buffer-substring nndoc-current-buffer (car entry) (nth 3 entry))
505 (when encoding
506 (save-restriction
507 (narrow-to-region begin (point-max))
508 (mm-decode-content-transfer-encoding
509 (intern (downcase (mail-header-strip encoding))))))
510 (when head
511 (goto-char begin)
512 (when (search-forward "\n\n" nil t)
513 (delete-region (1- (point)) (point-max)))))
516 (defun nndoc-rfc822-forward-generate-head (article)
517 (nndoc-rfc822-forward-generate-article article 'head))
519 (defun nndoc-mime-parts-type-p ()
520 (let ((case-fold-search t)
521 (limit (search-forward "\n\n" nil t)))
522 (goto-char (point-min))
523 (when (and limit
524 (re-search-forward
525 (concat "\
526 ^Content-Type:[ \t]*multipart/[a-z]+ *; *\\(\\(\n[ \t]\\)?.*;\\)*"
527 "\\(\n[ \t]\\)?[ \t]*boundary=\"?[^\"\n]*[^\" \t\n]")
528 limit t))
529 t)))
531 (defun nndoc-transform-mime-parts (article)
532 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
533 (headers (nth 5 entry)))
534 (when headers
535 (goto-char (point-min))
536 (insert headers))))
538 (defun nndoc-generate-mime-parts-head (article)
539 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
540 (headers (nth 6 entry)))
541 (save-restriction
542 (narrow-to-region (point) (point))
543 (insert-buffer-substring
544 nndoc-current-buffer (car entry) (nth 1 entry))
545 (goto-char (point-max)))
546 (when headers
547 (insert headers))))
549 (defun nndoc-clari-briefs-type-p ()
550 (when (let ((case-fold-search nil))
551 (re-search-forward "^\t[^a-z]+ ([^a-z]+) --" nil t))
554 (defun nndoc-transform-clari-briefs (article)
555 (goto-char (point-min))
556 (when (looking-at " *\\*\\(.*\\)\n")
557 (replace-match "" t t))
558 (nndoc-generate-clari-briefs-head article))
560 (defun nndoc-generate-clari-briefs-head (article)
561 (let ((entry (cdr (assq article nndoc-dissection-alist)))
562 subject from)
563 (save-excursion
564 (set-buffer nndoc-current-buffer)
565 (save-restriction
566 (narrow-to-region (car entry) (nth 3 entry))
567 (goto-char (point-min))
568 (when (looking-at " *\\*\\(.*\\)$")
569 (setq subject (match-string 1))
570 (when (string-match "[ \t]+$" subject)
571 (setq subject (substring subject 0 (match-beginning 0)))))
572 (when
573 (let ((case-fold-search nil))
574 (re-search-forward
575 "^\t\\([^a-z]+\\(,[^(]+\\)? ([^a-z]+)\\) --" nil t))
576 (setq from (match-string 1)))))
577 (insert "From: " "clari@clari.net (" (or from "unknown") ")"
578 "\nSubject: " (or subject "(no subject)") "\n")))
580 (defun nndoc-exim-bounce-type-p ()
581 (and (re-search-forward "^------ This is a copy of the message, including all the headers. ------" nil t)
584 (defun nndoc-exim-bounce-body-end-function ()
585 (goto-char (point-max)))
588 (defun nndoc-mime-digest-type-p ()
589 (let ((case-fold-search t)
590 boundary-id b-delimiter entry)
591 (when (and
592 (re-search-forward
593 (concat "^Content-Type: *multipart/digest;[ \t\n]*[ \t]"
594 "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)")
595 nil t)
596 (match-beginning 1))
597 (setq boundary-id (match-string 1)
598 b-delimiter (concat "\n--" boundary-id "[ \t]*$"))
599 (setq entry (assq 'mime-digest nndoc-type-alist))
600 (setcdr entry
601 (list
602 (cons 'head-begin "^ ?\n")
603 (cons 'head-end "^ ?$")
604 (cons 'body-begin "^ ?\n")
605 (cons 'article-begin b-delimiter)
606 (cons 'body-end-function 'nndoc-digest-body-end)
607 (cons 'file-end (concat "^--" boundary-id "--[ \t]*$"))))
608 t)))
610 (defun nndoc-standard-digest-type-p ()
611 (when (and (re-search-forward (concat "^" (make-string 70 ?-) "\n\n") nil t)
612 (re-search-forward
613 (concat "\n\n" (make-string 30 ?-) "\n\n") nil t))
616 (defun nndoc-digest-body-end ()
617 (and (re-search-forward nndoc-article-begin nil t)
618 (goto-char (match-beginning 0))))
620 (defun nndoc-slack-digest-type-p ()
623 (defun nndoc-lanl-gov-announce-type-p ()
624 (when (let ((case-fold-search nil))
625 (re-search-forward "^\\\\\\\\\n\\(Paper\\( (\\*cross-listing\\*)\\)?: [a-zA-Z-\\.]+/[0-9]+\\|arXiv:\\)" nil t))
628 (defun nndoc-transform-lanl-gov-announce (article)
629 (let ((case-fold-search nil))
630 (goto-char (point-max))
631 (when (re-search-backward "^\\\\\\\\ +( *\\([^ ]*\\) , *\\([^ ]*\\))" nil t)
632 (replace-match "\n\nGet it at \\1 (\\2)" t nil))
633 (goto-char (point-min))
634 (while (re-search-forward "^\\\\\\\\$" nil t)
635 (replace-match "" t nil))
636 (goto-char (point-min))
637 (when (re-search-forward "^replaced with revised version +\\(.*[^ ]\\) +" nil t)
638 (replace-match "Date: \\1 (revised) " t nil))
639 (goto-char (point-min))
640 (unless (re-search-forward "^From" nil t)
641 (goto-char (point-min))
642 (when (re-search-forward "^Authors?: \\(.*\\)" nil t)
643 (goto-char (point-min))
644 (insert "From: " (match-string 1) "\n")))
645 (when (re-search-forward "^arXiv:" nil t)
646 (replace-match "Paper: arXiv:" t nil))))
648 (defun nndoc-generate-lanl-gov-head (article)
649 (let ((entry (cdr (assq article nndoc-dissection-alist)))
650 (from "<no address given>")
651 subject date)
652 (save-excursion
653 (set-buffer nndoc-current-buffer)
654 (save-restriction
655 (narrow-to-region (car entry) (nth 1 entry))
656 (goto-char (point-min))
657 (when (looking-at "^\\(Paper.*: \\|arXiv:\\)\\([0-9a-zA-Z-\\./]+\\)")
658 (setq subject (concat " (" (match-string 2) ")"))
659 (when (re-search-forward "^From: \\(.*\\)" nil t)
660 (setq from (concat "<"
661 (cadr (funcall gnus-extract-address-components
662 (match-string 1))) ">")))
663 (if (re-search-forward "^Date: +\\([^(]*\\)" nil t)
664 (setq date (match-string 1))
665 (when (re-search-forward "^replaced with revised version +\\([^(]*\\)" nil t)
666 (setq date (match-string 1))))
667 (when (re-search-forward "^Title: \\([^\f]*\\)\nAuthors?: \\(.*\\)"
668 nil t)
669 (setq subject (concat (match-string 1) subject))
670 (setq from (concat (match-string 2) " " from))))))
671 (while (and from (string-match "(\[^)\]*)" from))
672 (setq from (replace-match "" t t from)))
673 (insert "From: " (or from "unknown")
674 "\nSubject: " (or subject "(no subject)") "\n")
675 (if date (insert "Date: " date))))
677 (defun nndoc-nsmail-type-p ()
678 (when (looking-at "From - ")
681 (defun nndoc-outlook-article-begin ()
682 (prog1 (re-search-forward "From:\\|Received:" nil t)
683 (goto-char (match-beginning 0))))
685 (defun nndoc-outlook-type-p ()
686 ;; FIXME: Is JMF the magic of outlook mailbox? -- ShengHuo.
687 (looking-at "JMF"))
689 (defun nndoc-oe-dbx-type-p ()
690 (looking-at (mm-string-to-multibyte "\317\255\022\376")))
692 (defun nndoc-read-little-endian ()
693 (+ (prog1 (char-after) (forward-char 1))
694 (lsh (prog1 (char-after) (forward-char 1)) 8)
695 (lsh (prog1 (char-after) (forward-char 1)) 16)
696 (lsh (prog1 (char-after) (forward-char 1)) 24)))
698 (defun nndoc-oe-dbx-decode-block ()
699 (list
700 (nndoc-read-little-endian) ;; this address
701 (nndoc-read-little-endian) ;; next address offset
702 (nndoc-read-little-endian) ;; blocksize
703 (nndoc-read-little-endian))) ;; next address
705 (defun nndoc-oe-dbx-dissection ()
706 (let ((i 0) blk p tp)
707 (goto-char 60117) ;; 0x0000EAD4+1
708 (setq p (point))
709 (unless (eobp)
710 (setq blk (nndoc-oe-dbx-decode-block)))
711 (while (and blk (> (car blk) 0) (or (zerop (nth 3 blk))
712 (> (nth 3 blk) p)))
713 (push (list (incf i) p nil nil nil 0) nndoc-dissection-alist)
714 (while (and (> (car blk) 0) (> (nth 3 blk) p))
715 (goto-char (1+ (nth 3 blk)))
716 (setq blk (nndoc-oe-dbx-decode-block)))
717 (if (or (<= (car blk) p)
718 (<= (nth 1 blk) 0)
719 (not (zerop (nth 3 blk))))
720 (setq blk nil)
721 (setq tp (+ (car blk) (nth 1 blk) 17))
722 (if (or (<= tp p) (>= tp (point-max)))
723 (setq blk nil)
724 (goto-char tp)
725 (setq p tp
726 blk (nndoc-oe-dbx-decode-block)))))))
728 (defun nndoc-oe-dbx-generate-article (article &optional head)
729 (let ((entry (cdr (assq article nndoc-dissection-alist)))
730 (cur (current-buffer))
731 (begin (point))
732 blk p)
733 (with-current-buffer nndoc-current-buffer
734 (setq p (car entry))
735 (while (> p (point-min))
736 (goto-char p)
737 (setq blk (nndoc-oe-dbx-decode-block))
738 (setq p (point))
739 (with-current-buffer cur
740 (insert-buffer-substring nndoc-current-buffer p (+ p (nth 2 blk))))
741 (setq p (1+ (nth 3 blk)))))
742 (goto-char begin)
743 (while (re-search-forward "\r$" nil t)
744 (delete-backward-char 1))
745 (when head
746 (goto-char begin)
747 (when (search-forward "\n\n" nil t)
748 (setcar (cddddr entry) (count-lines (point) (point-max)))
749 (delete-region (1- (point)) (point-max))))
752 (defun nndoc-oe-dbx-generate-head (article)
753 (nndoc-oe-dbx-generate-article article 'head))
755 (defun nndoc-mail-in-mail-type-p ()
756 (let (found)
757 (save-excursion
758 (catch 'done
759 (while (re-search-forward "\n\n[-A-Za-z0-9]+:" nil t)
760 (setq found 0)
761 (forward-line)
762 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
763 (if (looking-at "[-A-Za-z0-9]+:")
764 (setq found (1+ found)))
765 (forward-line))
766 (if (and (> found 0) (looking-at "\n"))
767 (throw 'done 9999)))
768 nil))))
770 (defun nndoc-mail-in-mail-article-begin ()
771 (let (point found)
772 (if (catch 'done
773 (while (re-search-forward "\n\n\\([-A-Za-z0-9]+:\\)" nil t)
774 (setq found 0)
775 (setq point (match-beginning 1))
776 (forward-line)
777 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
778 (if (looking-at "[-A-Za-z0-9]+:")
779 (setq found (1+ found)))
780 (forward-line))
781 (if (and (> found 0) (looking-at "\n"))
782 (throw 'done t)))
783 nil)
784 (goto-char point))))
786 (deffoo nndoc-request-accept-article (group &optional server last)
787 nil)
790 ;;; Functions for dissecting the documents
793 (defun nndoc-search (regexp)
794 (prog1
795 (re-search-forward regexp nil t)
796 (beginning-of-line)))
798 (defun nndoc-dissect-buffer ()
799 "Go through the document and partition it into heads/bodies/articles."
800 (let ((i 0)
801 (first t)
802 art-begin head-begin head-end body-begin body-end)
803 (setq nndoc-dissection-alist nil)
804 (save-excursion
805 (set-buffer nndoc-current-buffer)
806 (goto-char (point-min))
807 ;; Remove blank lines.
808 (while (eq (following-char) ?\n)
809 (delete-char 1))
810 (if nndoc-dissection-function
811 (funcall nndoc-dissection-function)
812 ;; Find the beginning of the file.
813 (when nndoc-file-begin
814 (nndoc-search nndoc-file-begin))
815 ;; Go through the file.
816 (while (if (and first nndoc-first-article)
817 (nndoc-search nndoc-first-article)
818 (if art-begin
819 (goto-char art-begin)
820 (nndoc-article-begin)))
821 (setq first nil
822 art-begin nil)
823 (cond (nndoc-head-begin-function
824 (funcall nndoc-head-begin-function))
825 (nndoc-head-begin
826 (nndoc-search nndoc-head-begin)))
827 (if (or (eobp)
828 (and nndoc-file-end
829 (looking-at nndoc-file-end)))
830 (goto-char (point-max))
831 (setq head-begin (point))
832 (nndoc-search (or nndoc-head-end "^$"))
833 (setq head-end (point))
834 (if nndoc-body-begin-function
835 (funcall nndoc-body-begin-function)
836 (nndoc-search (or nndoc-body-begin "^\n")))
837 (setq body-begin (point))
838 (or (and nndoc-body-end-function
839 (funcall nndoc-body-end-function))
840 (and nndoc-body-end
841 (nndoc-search nndoc-body-end))
842 (and (nndoc-article-begin)
843 (setq art-begin (point)))
844 (progn
845 (goto-char (point-max))
846 (when nndoc-file-end
847 (and (re-search-backward nndoc-file-end nil t)
848 (beginning-of-line)))))
849 (setq body-end (point))
850 (push (list (incf i) head-begin head-end body-begin body-end
851 (count-lines body-begin body-end))
852 nndoc-dissection-alist)))))))
854 (defun nndoc-article-begin ()
855 (if nndoc-article-begin-function
856 (funcall nndoc-article-begin-function)
857 (ignore-errors
858 (nndoc-search nndoc-article-begin))))
860 (defun nndoc-unquote-dashes ()
861 "Unquote quoted non-separators in digests."
862 (while (re-search-forward "^- -"nil t)
863 (replace-match "-" t t)))
865 ;; Against compiler warnings.
866 (defvar nndoc-mime-split-ordinal)
868 (defun nndoc-dissect-mime-parts ()
869 "Go through a MIME composite article and partition it into sub-articles.
870 When a MIME entity contains sub-entities, dissection produces one article for
871 the header of this entity, and one article per sub-entity."
872 (setq nndoc-dissection-alist nil
873 nndoc-mime-split-ordinal 0)
874 (save-excursion
875 (set-buffer nndoc-current-buffer)
876 (nndoc-dissect-mime-parts-sub (point-min) (point-max) nil nil nil)))
878 (defun nndoc-dissect-mime-parts-sub (head-begin body-end article-insert
879 position parent)
880 "Dissect an entity, within a composite MIME message.
881 The complete message or MIME entity extends from HEAD-BEGIN to BODY-END.
882 ARTICLE-INSERT should be added at beginning for generating a full article.
883 The string POSITION holds a dotted decimal representation of the article
884 position in the hierarchical structure, it is nil for the outer entity.
885 PARENT is the message-ID of the parent summary line, or nil for none."
886 (let ((case-fold-search t)
887 (message-id (nnmail-message-id))
888 head-end body-begin summary-insert message-rfc822 multipart-any
889 subject content-type type subtype boundary-regexp)
890 ;; Gracefully handle a missing body.
891 (goto-char head-begin)
892 (if (or (and (eq (char-after) ?\n) (or (forward-char 1) t))
893 (search-forward "\n\n" body-end t))
894 (setq head-end (1- (point))
895 body-begin (point))
896 (setq head-end body-end
897 body-begin body-end))
898 (narrow-to-region head-begin head-end)
899 ;; Save MIME attributes.
900 (goto-char head-begin)
901 (setq content-type (message-fetch-field "Content-Type"))
902 (when content-type
903 (when (string-match
904 "^ *\\([^ \t\n/;]+\\)/\\([^ \t\n/;]+\\)" content-type)
905 (setq type (downcase (match-string 1 content-type))
906 subtype (downcase (match-string 2 content-type))
907 message-rfc822 (and (string= type "message")
908 (string= subtype "rfc822"))
909 multipart-any (string= type "multipart")))
910 (when (string-match ";[ \t\n]*name=\\([^ \t\n;]+\\)" content-type)
911 (setq subject (match-string 1 content-type)))
912 (when (string-match "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)" content-type)
913 (setq boundary-regexp (concat "^--"
914 (regexp-quote
915 (match-string 1 content-type))
916 "\\(--\\)?[ \t]*\n"))))
917 (unless subject
918 (when (or multipart-any (not article-insert))
919 (setq subject (message-fetch-field "Subject"))))
920 (unless type
921 (setq type "text"
922 subtype "plain"))
923 ;; Prepare the article and summary inserts.
924 (unless article-insert
925 (setq article-insert (buffer-string)
926 head-end head-begin))
927 ;; Fix MIME-Version
928 (unless (string-match "MIME-Version:" article-insert)
929 (setq article-insert
930 (concat article-insert "MIME-Version: 1.0\n")))
931 (setq summary-insert article-insert)
932 ;; - summary Subject.
933 (setq summary-insert
934 (let ((line (concat "Subject: <" position
935 (and position multipart-any ".")
936 (and multipart-any "*")
937 (and (or position multipart-any) " ")
938 (cond ((string= subtype "plain") type)
939 ((string= subtype "basic") type)
940 (t subtype))
942 (and subject " ")
943 subject
944 "\n")))
945 (if (string-match "Subject:.*\n\\([ \t].*\n\\)*" summary-insert)
946 (replace-match line t t summary-insert)
947 (concat summary-insert line))))
948 ;; - summary Message-ID.
949 (setq summary-insert
950 (let ((line (concat "Message-ID: " message-id "\n")))
951 (if (string-match "Message-ID:.*\n\\([ \t].*\n\\)*" summary-insert)
952 (replace-match line t t summary-insert)
953 (concat summary-insert line))))
954 ;; - summary References.
955 (when parent
956 (setq summary-insert
957 (let ((line (concat "References: " parent "\n")))
958 (if (string-match "References:.*\n\\([ \t].*\n\\)*"
959 summary-insert)
960 (replace-match line t t summary-insert)
961 (concat summary-insert line)))))
962 ;; Generate dissection information for this entity.
963 (push (list (incf nndoc-mime-split-ordinal)
964 head-begin head-end body-begin body-end
965 (count-lines body-begin body-end)
966 article-insert summary-insert)
967 nndoc-dissection-alist)
968 ;; Recurse for all sub-entities, if any.
969 (widen)
970 (cond
971 (message-rfc822
972 (save-excursion
973 (nndoc-dissect-mime-parts-sub body-begin body-end nil
974 position message-id)))
975 ((and multipart-any boundary-regexp)
976 (let ((part-counter 0)
977 part-begin part-end eof-flag)
978 (while (string-match "\
979 ^\\(Lines\\|Content-\\(Type\\|Transfer-Encoding\\|Disposition\\)\\):.*\n\\([ \t].*\n\\)*"
980 article-insert)
981 (setq article-insert (replace-match "" t t article-insert)))
982 (let ((case-fold-search nil))
983 (goto-char body-begin)
984 (setq eof-flag (not (re-search-forward boundary-regexp body-end t)))
985 (while (not eof-flag)
986 (setq part-begin (point))
987 (cond ((re-search-forward boundary-regexp body-end t)
988 (or (not (match-string 1))
989 (string= (match-string 1) "")
990 (setq eof-flag t))
991 (forward-line -1)
992 (setq part-end (point))
993 (forward-line 1))
994 (t (setq part-end body-end
995 eof-flag t)))
996 (save-excursion
997 (nndoc-dissect-mime-parts-sub
998 part-begin part-end article-insert
999 (concat position
1000 (and position ".")
1001 (format "%d" (incf part-counter)))
1002 message-id)))))))))
1004 ;;;###autoload
1005 (defun nndoc-add-type (definition &optional position)
1006 "Add document DEFINITION to the list of nndoc document definitions.
1007 If POSITION is nil or `last', the definition will be added
1008 as the last checked definition, if t or `first', add as the
1009 first definition, and if any other symbol, add after that
1010 symbol in the alist."
1011 ;; First remove any old instances.
1012 (gnus-pull (car definition) nndoc-type-alist)
1013 ;; Then enter the new definition in the proper place.
1014 (cond
1015 ((or (null position) (eq position 'last))
1016 (setq nndoc-type-alist (nconc nndoc-type-alist (list definition))))
1017 ((or (eq position t) (eq position 'first))
1018 (push definition nndoc-type-alist))
1020 (let ((list (memq (assq position nndoc-type-alist)
1021 nndoc-type-alist)))
1022 (unless list
1023 (error "No such position: %s" position))
1024 (setcdr list (cons definition (cdr list)))))))
1026 (provide 'nndoc)
1028 ;; arch-tag: f5c2970e-0387-47ac-a0b3-6cc317dffabe
1029 ;;; nndoc.el ends here