Merge branch 'master' into comment-cache
[emacs.git] / lisp / gnus / nndoc.el
blob7f7db8721db8dbf3b8a73eaba7b330e100956b9a
1 ;;; nndoc.el --- single file access for Gnus
3 ;; Copyright (C) 1995-2017 Free Software Foundation, Inc.
5 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
6 ;; Masanobu UMEDA <umerin@flab.flab.fujitsu.junet>
7 ;; Keywords: news
9 ;; This file is part of GNU Emacs.
11 ;; GNU Emacs is free software: you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation, either version 3 of the License, or
14 ;; (at your option) any later version.
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
24 ;;; Commentary:
26 ;; For Outlook mail boxes format, see http://mbx2mbox.sourceforge.net/
28 ;;; Code:
30 (require 'nnheader)
31 (require 'message)
32 (require 'nnmail)
33 (require 'nnoo)
34 (require 'gnus-util)
35 (require 'mm-util)
36 (eval-when-compile (require 'cl))
38 (nnoo-declare nndoc)
40 (defvoo nndoc-article-type 'guess
41 "*Type of the file.
42 One of `mbox', `babyl', `digest', `news', `rnews', `mmdf', `forward',
43 `rfc934', `rfc822-forward', `mime-parts', `standard-digest',
44 `slack-digest', `clari-briefs', `nsmail', `outlook', `oe-dbx',
45 `mailman', `exim-bounce', or `guess'.")
47 (defvoo nndoc-post-type 'mail
48 "*Whether the nndoc group is `mail' or `post'.")
50 (defvoo nndoc-open-document-hook 'nnheader-ms-strip-cr
51 "Hook run after opening a document.
52 The default function removes all trailing carriage returns
53 from the document.")
55 (defvar nndoc-type-alist
56 `((mmdf
57 (article-begin . "^\^A\^A\^A\^A\n")
58 (body-end . "^\^A\^A\^A\^A\n"))
59 (debbugs-db
60 (file-begin . "^\005")
61 (article-begin . "^[\005\007]\n")
62 (body-end . "^\003"))
63 (mime-digest
64 (article-begin . "")
65 (head-begin . "^ ?\n")
66 (head-end . "^ ?$")
67 (body-end . "")
68 (file-end . "")
69 (subtype digest guess))
70 (nsmail
71 (article-begin . "^From - "))
72 (news
73 (article-begin . "^Path:"))
74 (rnews
75 (article-begin . "^#! *rnews +\\([0-9]+\\) *\n")
76 (body-end-function . nndoc-rnews-body-end))
77 (mbox
78 (article-begin-function . nndoc-mbox-article-begin)
79 (body-end-function . nndoc-mbox-body-end))
80 (babyl
81 (article-begin . "\^_\^L *\n")
82 (body-end . "\^_")
83 (body-begin-function . nndoc-babyl-body-begin)
84 (head-begin-function . nndoc-babyl-head-begin))
85 (mime-parts
86 (generate-head-function . nndoc-generate-mime-parts-head)
87 (article-transform-function . nndoc-transform-mime-parts))
88 (exim-bounce
89 (article-begin . "^------ This is a copy of the message, including all the headers. ------\n\n")
90 (body-end-function . nndoc-exim-bounce-body-end-function))
91 (rfc934
92 (article-begin . "^--.*\n+")
93 (body-end . "^--.*$")
94 (prepare-body-function . nndoc-unquote-dashes))
95 (mailman
96 (article-begin . "^--__--__--\n\nMessage:")
97 (body-end . "^--__--__--$")
98 (prepare-body-function . nndoc-unquote-dashes))
99 (clari-briefs
100 (article-begin . "^ \\*")
101 (body-end . "^\t------*[ \t]^*\n^ \\*")
102 (body-begin . "^\t")
103 (head-end . "^\t")
104 (generate-head-function . nndoc-generate-clari-briefs-head)
105 (article-transform-function . nndoc-transform-clari-briefs))
107 (standard-digest
108 (first-article . ,(concat "^" (make-string 70 ?-) "\n *\n+"))
109 (article-begin . ,(concat "^\n" (make-string 30 ?-) "\n *\n+"))
110 (prepare-body-function . nndoc-unquote-dashes)
111 (body-end-function . nndoc-digest-body-end)
112 (head-end . "^ *$")
113 (body-begin . "^ *\n")
114 (file-end . "^End of .*digest.*[0-9].*\n\\*\\*\\|^End of.*Digest *$")
115 (subtype digest guess))
116 (slack-digest
117 (article-begin . "^------------------------------*[\n \t]+")
118 (head-end . "^ ?$")
119 (body-end-function . nndoc-digest-body-end)
120 (body-begin . "^ ?$")
121 (file-end . "^End of")
122 (prepare-body-function . nndoc-unquote-dashes)
123 (subtype digest guess))
124 (google
125 (pre-dissection-function . nndoc-decode-content-transfer-encoding)
126 (article-begin . "^== [0-9]+ of [0-9]+ ==$")
127 (head-begin . "^Date:")
128 (head-end . "^$")
129 (body-end-function . nndoc-digest-body-end)
130 (body-begin . "^$")
131 (file-end . "^==============================================================================$")
132 (prepare-body-function . nndoc-unquote-dashes)
133 (subtype digest guess))
134 (lanl-gov-announce
135 (article-begin . "^\\\\\\\\\n")
136 (head-begin . "^\\(Paper.*:\\|arXiv:\\)")
137 (head-end . "\\(^\\\\\\\\.*\n\\|-----------------\\)")
138 (body-begin . "")
139 (body-end . "\\(-------------------------------------------------\\|%-%-%-%-%-%-%-%-%-%-%-%-%-%-\\|%%--%%--%%--%%--%%--%%--%%--%%--\\|%%%---%%%---%%%---%%%---\\)")
140 (file-end . "\\(^Title: Recent Seminal\\|%%%---%%%---%%%---%%%---\\)")
141 (generate-head-function . nndoc-generate-lanl-gov-head)
142 (article-transform-function . nndoc-transform-lanl-gov-announce)
143 (subtype preprints guess))
144 (git
145 (file-begin . "\n- Log ---.*")
146 (article-begin . "^commit ")
147 (head-begin . "^Author: ")
148 (body-begin . "^$")
149 (file-end . "\n-----------------------------------------------------------------------")
150 (article-transform-function . nndoc-transform-git-article)
151 (header-transform-function . nndoc-transform-git-headers))
152 (rfc822-forward
153 (article-begin . "^\n+")
154 (body-end-function . nndoc-rfc822-forward-body-end-function)
155 (generate-head-function . nndoc-rfc822-forward-generate-head)
156 (generate-article-function . nndoc-rfc822-forward-generate-article))
157 (outlook
158 (article-begin-function . nndoc-outlook-article-begin)
159 (body-end . "\0"))
160 (oe-dbx ;; Outlook Express DBX format
161 (dissection-function . nndoc-oe-dbx-dissection)
162 (generate-head-function . nndoc-oe-dbx-generate-head)
163 (generate-article-function . nndoc-oe-dbx-generate-article))
164 (forward
165 (article-begin . "^-+ \\(Start of \\)?forwarded message.*\n+")
166 (body-end . "^-+ End \\(of \\)?forwarded message.*$")
167 (prepare-body-function . nndoc-unquote-dashes))
168 (mail-in-mail ;; Wild guess on mailer daemon's messages or others
169 (article-begin-function . nndoc-mail-in-mail-article-begin))
170 (guess
171 (guess . t)
172 (subtype nil))
173 (digest
174 (guess . t)
175 (subtype nil))
176 (preprints
177 (guess . t)
178 (subtype nil))))
180 (defvar nndoc-binary-file-names ".[Dd][Bb][Xx]$"
181 "Regexp for binary nndoc file names.")
184 (defvoo nndoc-file-begin nil)
185 (defvoo nndoc-first-article nil)
186 (defvoo nndoc-article-begin nil)
187 (defvoo nndoc-head-begin nil)
188 (defvoo nndoc-head-end nil)
189 (defvoo nndoc-file-end nil)
190 (defvoo nndoc-body-begin nil)
191 (defvoo nndoc-body-end-function nil)
192 (defvoo nndoc-body-begin-function nil)
193 (defvoo nndoc-head-begin-function nil)
194 (defvoo nndoc-body-end nil)
195 ;; nndoc-dissection-alist is a list of sublists. Each sublist holds the
196 ;; following items. ARTICLE acts as the association key and is an ordinal
197 ;; starting at 1. HEAD-BEGIN [0], HEAD-END [1], BODY-BEGIN [2] and BODY-END
198 ;; [3] are positions in the `nndoc' buffer. LINE-COUNT [4] is a count of
199 ;; lines in the body. For MIME dissections only, ARTICLE-INSERT [5] and
200 ;; SUMMARY-INSERT [6] give headers to insert for full article or summary line
201 ;; generation, respectively. Other headers usually follow directly from the
202 ;; buffer. Value nil means no insert.
203 (defvoo nndoc-dissection-alist nil)
204 (defvoo nndoc-prepare-body-function nil)
205 (defvoo nndoc-generate-head-function nil)
206 (defvoo nndoc-article-transform-function nil)
207 (defvoo nndoc-header-transform-function nil)
208 (defvoo nndoc-article-begin-function nil)
209 (defvoo nndoc-generate-article-function nil)
210 (defvoo nndoc-dissection-function nil)
211 (defvoo nndoc-pre-dissection-function nil)
213 (defvoo nndoc-status-string "")
214 (defvoo nndoc-group-alist nil)
215 (defvoo nndoc-current-buffer nil
216 "Current nndoc news buffer.")
217 (defvoo nndoc-address nil)
219 (defconst nndoc-version "nndoc 1.0"
220 "nndoc version.")
224 ;;; Interface functions
226 (nnoo-define-basics nndoc)
228 (deffoo nndoc-retrieve-headers (articles &optional newsgroup server fetch-old)
229 (when (nndoc-possibly-change-buffer newsgroup server)
230 (with-current-buffer nntp-server-buffer
231 (erase-buffer)
232 (let (article entry)
233 (if (stringp (car articles))
234 'headers
235 (while articles
236 (when (setq entry (cdr (assq (setq article (pop articles))
237 nndoc-dissection-alist)))
238 (let ((start (point)))
239 (insert (format "221 %d Article retrieved.\n" article))
240 (if nndoc-generate-head-function
241 (funcall nndoc-generate-head-function article)
242 (insert-buffer-substring
243 nndoc-current-buffer (car entry) (nth 1 entry)))
244 (goto-char (point-max))
245 (unless (eq (char-after (1- (point))) ?\n)
246 (insert "\n"))
247 (insert (format "Lines: %d\n" (nth 4 entry)))
248 (insert ".\n")
249 (when nndoc-header-transform-function
250 (save-excursion
251 (save-restriction
252 (narrow-to-region start (point))
253 (funcall nndoc-header-transform-function entry)))))))
254 (nnheader-fold-continuation-lines)
255 'headers)))))
257 (deffoo nndoc-request-article (article &optional newsgroup server buffer)
258 (nndoc-possibly-change-buffer newsgroup server)
259 (save-excursion
260 (let ((buffer (or buffer nntp-server-buffer))
261 (entry (cdr (assq article nndoc-dissection-alist)))
262 beg)
263 (set-buffer buffer)
264 (erase-buffer)
265 (when entry
266 (cond
267 ((stringp article) nil)
268 (nndoc-generate-article-function
269 (funcall nndoc-generate-article-function article))
271 (insert-buffer-substring
272 nndoc-current-buffer (car entry) (nth 1 entry))
273 (insert "\n")
274 (setq beg (point))
275 (insert-buffer-substring
276 nndoc-current-buffer (nth 2 entry) (nth 3 entry))
277 (goto-char beg)
278 (when nndoc-prepare-body-function
279 (funcall nndoc-prepare-body-function))
280 (when nndoc-article-transform-function
281 (funcall nndoc-article-transform-function article))
282 t))))))
284 (deffoo nndoc-request-group (group &optional server dont-check info)
285 "Select news GROUP."
286 (let (number)
287 (cond
288 ((not (nndoc-possibly-change-buffer group server))
289 (nnheader-report 'nndoc "No such file or buffer: %s"
290 nndoc-address))
291 (dont-check
292 (nnheader-report 'nndoc "Selected group %s" group)
294 ((zerop (setq number (length nndoc-dissection-alist)))
295 (nndoc-close-group group)
296 (nnheader-report 'nndoc "No articles in group %s" group))
298 (nnheader-insert "211 %d %d %d %s\n" number 1 number group)))))
300 (deffoo nndoc-retrieve-groups (groups &optional server)
301 (dolist (group groups)
302 (nndoc-request-group group server))
305 (deffoo nndoc-request-type (group &optional article)
306 (cond ((not article) 'unknown)
307 (nndoc-post-type nndoc-post-type)
308 (t 'unknown)))
310 (deffoo nndoc-close-group (group &optional server)
311 (nndoc-possibly-change-buffer group server)
312 (and nndoc-current-buffer
313 (buffer-name nndoc-current-buffer)
314 (kill-buffer nndoc-current-buffer))
315 (setq nndoc-group-alist (delq (assoc group nndoc-group-alist)
316 nndoc-group-alist))
317 (setq nndoc-current-buffer nil)
318 (nnoo-close-server 'nndoc server)
319 (setq nndoc-dissection-alist nil)
322 (deffoo nndoc-request-list (&optional server)
325 (deffoo nndoc-request-newgroups (date &optional server)
326 nil)
328 (deffoo nndoc-request-list-newsgroups (&optional server)
329 nil)
332 ;;; Internal functions.
334 (defun nndoc-possibly-change-buffer (group source)
335 (let (buf)
336 (cond
337 ;; The current buffer is this group's buffer.
338 ((and nndoc-current-buffer
339 (buffer-name nndoc-current-buffer)
340 (eq nndoc-current-buffer
341 (setq buf (cdr (assoc group nndoc-group-alist))))))
342 ;; We change buffers by taking an old from the group alist.
343 ;; `source' is either a string (a file name) or a buffer object.
344 (buf
345 (setq nndoc-current-buffer buf))
346 ;; It's a totally new group.
347 ((or (and (bufferp nndoc-address)
348 (buffer-name nndoc-address))
349 (and (stringp nndoc-address)
350 (file-exists-p nndoc-address)
351 (not (file-directory-p nndoc-address))))
352 (push (cons group (setq nndoc-current-buffer
353 (get-buffer-create
354 (concat " *nndoc " group "*"))))
355 nndoc-group-alist)
356 (setq nndoc-dissection-alist nil)
357 (with-current-buffer nndoc-current-buffer
358 (erase-buffer)
359 (condition-case error
360 (if (and (stringp nndoc-address)
361 (string-match nndoc-binary-file-names nndoc-address))
362 (let ((coding-system-for-read 'binary))
363 (mm-insert-file-contents nndoc-address))
364 (if (stringp nndoc-address)
365 (nnheader-insert-file-contents nndoc-address)
366 (insert-buffer-substring nndoc-address))
367 (run-hooks 'nndoc-open-document-hook))
368 (file-error
369 (nnheader-report 'nndoc "Couldn't open %s: %s"
370 group error))))))
371 ;; Initialize the nndoc structures according to this new document.
372 (when (and nndoc-current-buffer
373 (not nndoc-dissection-alist))
374 (with-current-buffer nndoc-current-buffer
375 (nndoc-set-delims)
376 (if (eq nndoc-article-type 'mime-parts)
377 (nndoc-dissect-mime-parts)
378 (nndoc-dissect-buffer))))
379 (unless nndoc-current-buffer
380 (nndoc-close-server))
381 ;; Return whether we managed to select a file.
382 nndoc-current-buffer))
385 ;;; Deciding what document type we have
388 (defun nndoc-set-delims ()
389 "Set the nndoc delimiter variables according to the type of the document."
390 (let ((vars '(nndoc-file-begin
391 nndoc-first-article
392 nndoc-article-begin-function
393 nndoc-head-begin nndoc-head-end
394 nndoc-file-end nndoc-article-begin
395 nndoc-body-begin nndoc-body-end-function nndoc-body-end
396 nndoc-prepare-body-function nndoc-article-transform-function
397 nndoc-header-transform-function
398 nndoc-generate-head-function nndoc-body-begin-function
399 nndoc-head-begin-function
400 nndoc-generate-article-function
401 nndoc-dissection-function
402 nndoc-pre-dissection-function)))
403 (while vars
404 (set (pop vars) nil)))
405 (let (defs)
406 ;; Guess away until we find the real file type.
407 (while (assq 'guess (setq defs (cdr (assq nndoc-article-type
408 nndoc-type-alist))))
409 (setq nndoc-article-type (nndoc-guess-type nndoc-article-type)))
410 ;; Set the nndoc variables.
411 (while defs
412 (set (intern (format "nndoc-%s" (caar defs)))
413 (cdr (pop defs))))))
415 (defun nndoc-guess-type (subtype)
416 (let ((alist nndoc-type-alist)
417 results result entry)
418 (while (and (not result)
419 (setq entry (pop alist)))
420 (when (memq subtype (or (cdr (assq 'subtype entry)) '(guess)))
421 (goto-char (point-min))
422 ;; Remove blank lines.
423 (while (eq (following-char) ?\n)
424 (delete-char 1))
425 (when (numberp (setq result (funcall (intern
426 (format "nndoc-%s-type-p"
427 (car entry))))))
428 (push (cons result entry) results)
429 (setq result nil))))
430 (unless (or result results)
431 (error "Document is not of any recognized type"))
432 (if result
433 (car entry)
434 (cadar (last (sort results 'car-less-than-car))))))
437 ;;; Built-in type predicates and functions
440 (defun nndoc-mbox-type-p ()
441 (when (looking-at message-unix-mail-delimiter)
444 (defun nndoc-mbox-article-begin ()
445 (when (re-search-forward (concat "^" message-unix-mail-delimiter) nil t)
446 (goto-char (match-beginning 0))))
448 (defun nndoc-mbox-body-end ()
449 (let ((beg (point))
450 len end)
451 (when
452 (save-excursion
453 (and (re-search-backward
454 (concat "^" message-unix-mail-delimiter) nil t)
455 (setq end (point))
456 (search-forward "\n\n" beg t)
457 (re-search-backward
458 "^Content-Length:[ \t]*\\([0-9]+\\) *$" end t)
459 (setq len (string-to-number (match-string 1)))
460 (search-forward "\n\n" beg t)
461 (unless (= (setq len (+ (point) len)) (point-max))
462 (and (< len (point-max))
463 (goto-char len)
464 (looking-at message-unix-mail-delimiter)))))
465 (goto-char len))))
467 (defun nndoc-mmdf-type-p ()
468 (when (looking-at "\^A\^A\^A\^A$")
471 (defun nndoc-debbugs-db-type-p ()
472 (when (looking-at "\006$")
475 (defun nndoc-news-type-p ()
476 (when (looking-at "^Path:.*\n")
479 (defun nndoc-rnews-type-p ()
480 (when (looking-at "#! *rnews")
483 (defun nndoc-rnews-body-end ()
484 (and (re-search-backward nndoc-article-begin nil t)
485 (forward-line 1)
486 (goto-char (+ (point) (string-to-number (match-string 1))))))
488 (defun nndoc-google-type-p ()
489 (when (re-search-forward "^=3D=3D 1 of [0-9]+ =3D=3D$" nil t)
492 (defun nndoc-decode-content-transfer-encoding ()
493 (let ((encoding
494 (save-restriction
495 (message-narrow-to-head)
496 (message-fetch-field "content-transfer-encoding"))))
497 (when (and encoding
498 (search-forward "\n\n" nil t))
499 (save-restriction
500 (narrow-to-region (point) (point-max))
501 (mm-decode-content-transfer-encoding
502 (intern (downcase (mail-header-strip-cte encoding))))))))
504 (defun nndoc-babyl-type-p ()
505 (when (re-search-forward "\^_\^L *\n" nil t)
508 (defun nndoc-babyl-body-begin ()
509 (re-search-forward "^\n" nil t)
510 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
511 (let ((next (or (save-excursion
512 (re-search-forward nndoc-article-begin nil t))
513 (point-max))))
514 (unless (re-search-forward "^\n" next t)
515 (goto-char next)
516 (forward-line -1)
517 (insert "\n")
518 (forward-line -1)))))
520 (defun nndoc-babyl-head-begin ()
521 (when (re-search-forward "^[0-9].*\n" nil t)
522 (when (looking-at "\\*\\*\\* EOOH \\*\\*\\*")
523 (forward-line 1))
526 (defun nndoc-forward-type-p ()
527 (when (and (re-search-forward "^-+ \\(Start of \\)?forwarded message.*\n+"
528 nil t)
529 (looking-at "[\r\n]*[a-zA-Z][a-zA-Z0-9-]*:\\|^>?From "))
532 (defun nndoc-rfc934-type-p ()
533 (when (and (re-search-forward "^-+ Start of forwarded.*\n+" nil t)
534 (not (re-search-forward "^Subject:.*digest" nil t))
535 (not (re-search-backward "^From:" nil t 2))
536 (not (re-search-forward "^From:" nil t 2)))
539 (defun nndoc-mailman-type-p ()
540 (when (re-search-forward "^--__--__--\n+" nil t)
543 (defun nndoc-rfc822-forward-type-p ()
544 (save-restriction
545 (message-narrow-to-head)
546 (when (re-search-forward "^Content-Type: *message/rfc822" nil t)
547 t)))
549 (defun nndoc-rfc822-forward-body-end-function ()
550 (goto-char (point-max)))
552 (defun nndoc-rfc822-forward-generate-article (article &optional head)
553 (let ((entry (cdr (assq article nndoc-dissection-alist)))
554 (begin (point))
555 encoding)
556 (with-current-buffer nndoc-current-buffer
557 (save-restriction
558 (message-narrow-to-head)
559 (setq encoding (message-fetch-field "content-transfer-encoding"))))
560 (insert-buffer-substring nndoc-current-buffer (car entry) (nth 3 entry))
561 (when encoding
562 (save-restriction
563 (narrow-to-region begin (point-max))
564 (mm-decode-content-transfer-encoding
565 (intern (downcase (mail-header-strip-cte encoding))))))
566 (when head
567 (goto-char begin)
568 (when (search-forward "\n\n" nil t)
569 (delete-region (1- (point)) (point-max)))))
572 (defun nndoc-rfc822-forward-generate-head (article)
573 (nndoc-rfc822-forward-generate-article article 'head))
575 (defun nndoc-mime-parts-type-p ()
576 (let ((case-fold-search t)
577 (limit (search-forward "\n\n" nil t)))
578 (goto-char (point-min))
579 (when (and limit
580 (re-search-forward
581 (concat "\
582 ^Content-Type:[ \t]*multipart/[a-z]+ *; *\\(\\(\n[ \t]\\)?.*;\\)*"
583 "\\(\n[ \t]\\)?[ \t]*boundary=\"?[^\"\n]*[^\" \t\n]")
584 limit t))
585 t)))
587 (defun nndoc-transform-mime-parts (article)
588 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
589 (headers (nth 5 entry)))
590 (when headers
591 (goto-char (point-min))
592 (insert headers))))
594 (defun nndoc-generate-mime-parts-head (article)
595 (let* ((entry (cdr (assq article nndoc-dissection-alist)))
596 (headers (nth 6 entry)))
597 (save-restriction
598 (narrow-to-region (point) (point))
599 (insert-buffer-substring
600 nndoc-current-buffer (car entry) (nth 1 entry))
601 (goto-char (point-max)))
602 (when headers
603 (insert headers))))
605 (defun nndoc-clari-briefs-type-p ()
606 (when (let ((case-fold-search nil))
607 (re-search-forward "^\t[^a-z]+ ([^a-z]+) --" nil t))
610 (defun nndoc-transform-clari-briefs (article)
611 (goto-char (point-min))
612 (when (looking-at " *\\*\\(.*\\)\n")
613 (replace-match "" t t))
614 (nndoc-generate-clari-briefs-head article))
616 (defun nndoc-generate-clari-briefs-head (article)
617 (let ((entry (cdr (assq article nndoc-dissection-alist)))
618 subject from)
619 (with-current-buffer nndoc-current-buffer
620 (save-restriction
621 (narrow-to-region (car entry) (nth 3 entry))
622 (goto-char (point-min))
623 (when (looking-at " *\\*\\(.*\\)$")
624 (setq subject (match-string 1))
625 (when (string-match "[ \t]+$" subject)
626 (setq subject (substring subject 0 (match-beginning 0)))))
627 (when
628 (let ((case-fold-search nil))
629 (re-search-forward
630 "^\t\\([^a-z]+\\(,[^(]+\\)? ([^a-z]+)\\) --" nil t))
631 (setq from (match-string 1)))))
632 (insert "From: " "clari@clari.net (" (or from "unknown") ")"
633 "\nSubject: " (or subject "(no subject)") "\n")))
635 (defun nndoc-exim-bounce-type-p ()
636 (and (re-search-forward "^------ This is a copy of the message, including all the headers. ------" nil t)
639 (defun nndoc-exim-bounce-body-end-function ()
640 (goto-char (point-max)))
643 (defun nndoc-mime-digest-type-p ()
644 (let ((case-fold-search t)
645 boundary-id b-delimiter entry)
646 (when (and
647 (re-search-forward
648 (concat "^Content-Type: *multipart/digest;[ \t\n]*[ \t]"
649 "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)")
650 nil t)
651 (match-beginning 1))
652 (setq boundary-id (match-string 1)
653 b-delimiter (concat "\n--" boundary-id "[ \t]*$"))
654 (setq entry (assq 'mime-digest nndoc-type-alist))
655 (setcdr entry
656 (list
657 (cons 'head-begin "^ ?\n")
658 (cons 'head-end "^ ?$")
659 (cons 'body-begin "^ ?\n")
660 (cons 'article-begin b-delimiter)
661 (cons 'body-end-function 'nndoc-digest-body-end)
662 (cons 'file-end (concat "^--" boundary-id "--[ \t]*$"))))
663 t)))
665 (defun nndoc-standard-digest-type-p ()
666 (when (and (re-search-forward (concat "^" (make-string 70 ?-) "\n\n") nil t)
667 (re-search-forward
668 (concat "\n\n" (make-string 30 ?-) "\n\n") nil t))
671 (defun nndoc-digest-body-end ()
672 (and (re-search-forward nndoc-article-begin nil t)
673 (goto-char (match-beginning 0))))
675 (defun nndoc-slack-digest-type-p ()
678 (defun nndoc-git-type-p ()
679 (and (search-forward "\n- Log ---" nil t)
680 (search-forward "\ncommit " nil t)
681 (search-forward "\nAuthor: " nil t)))
683 (defun nndoc-transform-git-article (article)
684 (goto-char (point-min))
685 (when (re-search-forward "^Author: " nil t)
686 (replace-match "From: " t t)))
688 (defun nndoc-transform-git-headers (entry)
689 (goto-char (point-min))
690 (when (re-search-forward "^Author: " nil t)
691 (replace-match "From: " t t))
692 (let (subject)
693 (with-current-buffer nndoc-current-buffer
694 (goto-char (car entry))
695 (when (search-forward "\n\n" nil t)
696 (setq subject (buffer-substring (point) (line-end-position)))))
697 (when subject
698 (goto-char (point-min))
699 (forward-line 1)
700 (insert (format "Subject: %s\n" subject)))))
702 (defun nndoc-lanl-gov-announce-type-p ()
703 (when (let ((case-fold-search nil))
704 (re-search-forward "^\\\\\\\\\n\\(Paper\\( (\\*cross-listing\\*)\\)?: [a-zA-Z-\\.]+/[0-9]+\\|arXiv:\\)" nil t))
707 (defun nndoc-transform-lanl-gov-announce (article)
708 (let ((case-fold-search nil))
709 (goto-char (point-max))
710 (when (re-search-backward "^\\\\\\\\ +( *\\([^ ]*\\) , *\\([^ ]*\\))" nil t)
711 (replace-match "\n\nGet it at \\1 (\\2)" t nil))
712 (goto-char (point-min))
713 (while (re-search-forward "^\\\\\\\\$" nil t)
714 (replace-match "" t nil))
715 (goto-char (point-min))
716 (when (re-search-forward "^replaced with revised version +\\(.*[^ ]\\) +" nil t)
717 (replace-match "Date: \\1 (revised) " t nil))
718 (goto-char (point-min))
719 (unless (re-search-forward "^From" nil t)
720 (goto-char (point-min))
721 (when (re-search-forward "^Authors?: \\(.*\\)" nil t)
722 (goto-char (point-min))
723 (insert "From: " (match-string 1) "\n")))
724 (when (re-search-forward "^arXiv:" nil t)
725 (replace-match "Paper: arXiv:" t nil))))
727 (defun nndoc-generate-lanl-gov-head (article)
728 (let ((entry (cdr (assq article nndoc-dissection-alist)))
729 (from "<no address given>")
730 subject date)
731 (with-current-buffer nndoc-current-buffer
732 (save-restriction
733 (narrow-to-region (car entry) (nth 1 entry))
734 (goto-char (point-min))
735 (when (looking-at "^\\(Paper.*: \\|arXiv:\\)\\([0-9a-zA-Z-\\./]+\\)")
736 (setq subject (concat " (" (match-string 2) ")"))
737 (when (re-search-forward "^From: \\(.*\\)" nil t)
738 (setq from (concat "<"
739 (cadr (funcall gnus-extract-address-components
740 (match-string 1))) ">")))
741 (if (re-search-forward "^Date: +\\([^(]*\\)" nil t)
742 (setq date (match-string 1))
743 (when (re-search-forward "^replaced with revised version +\\([^(]*\\)" nil t)
744 (setq date (match-string 1))))
745 (when (re-search-forward "^Title: \\([^\f]*\\)\nAuthors?: \\(.*\\)"
746 nil t)
747 (setq subject (concat (match-string 1) subject))
748 (setq from (concat (match-string 2) " " from))))))
749 (while (and from (string-match "([^)]*)" from))
750 (setq from (replace-match "" t t from)))
751 (insert "From: " (or from "unknown")
752 "\nSubject: " (or subject "(no subject)") "\n")
753 (if date (insert "Date: " date))))
755 (defun nndoc-nsmail-type-p ()
756 (when (looking-at "From - ")
759 (defun nndoc-outlook-article-begin ()
760 (prog1 (re-search-forward "From:\\|Received:" nil t)
761 (goto-char (match-beginning 0))))
763 (defun nndoc-outlook-type-p ()
764 ;; FIXME: Is JMF the magic of outlook mailbox? -- ShengHuo.
765 (looking-at "JMF"))
767 (defun nndoc-oe-dbx-type-p ()
768 (looking-at (string-to-multibyte "\317\255\022\376")))
770 (defun nndoc-read-little-endian ()
771 (+ (prog1 (char-after) (forward-char 1))
772 (lsh (prog1 (char-after) (forward-char 1)) 8)
773 (lsh (prog1 (char-after) (forward-char 1)) 16)
774 (lsh (prog1 (char-after) (forward-char 1)) 24)))
776 (defun nndoc-oe-dbx-decode-block ()
777 (list
778 (nndoc-read-little-endian) ;; this address
779 (nndoc-read-little-endian) ;; next address offset
780 (nndoc-read-little-endian) ;; blocksize
781 (nndoc-read-little-endian))) ;; next address
783 (defun nndoc-oe-dbx-dissection ()
784 (let ((i 0) blk p tp)
785 (goto-char 60117) ;; 0x0000EAD4+1
786 (setq p (point))
787 (unless (eobp)
788 (setq blk (nndoc-oe-dbx-decode-block)))
789 (while (and blk (> (car blk) 0) (or (zerop (nth 3 blk))
790 (> (nth 3 blk) p)))
791 (push (list (incf i) p nil nil nil 0) nndoc-dissection-alist)
792 (while (and (> (car blk) 0) (> (nth 3 blk) p))
793 (goto-char (1+ (nth 3 blk)))
794 (setq blk (nndoc-oe-dbx-decode-block)))
795 (if (or (<= (car blk) p)
796 (<= (nth 1 blk) 0)
797 (not (zerop (nth 3 blk))))
798 (setq blk nil)
799 (setq tp (+ (car blk) (nth 1 blk) 17))
800 (if (or (<= tp p) (>= tp (point-max)))
801 (setq blk nil)
802 (goto-char tp)
803 (setq p tp
804 blk (nndoc-oe-dbx-decode-block)))))))
806 (defun nndoc-oe-dbx-generate-article (article &optional head)
807 (let ((entry (cdr (assq article nndoc-dissection-alist)))
808 (cur (current-buffer))
809 (begin (point))
810 blk p)
811 (with-current-buffer nndoc-current-buffer
812 (setq p (car entry))
813 (while (> p (point-min))
814 (goto-char p)
815 (setq blk (nndoc-oe-dbx-decode-block))
816 (setq p (point))
817 (with-current-buffer cur
818 (insert-buffer-substring nndoc-current-buffer p (+ p (nth 2 blk))))
819 (setq p (1+ (nth 3 blk)))))
820 (goto-char begin)
821 (while (re-search-forward "\r$" nil t)
822 (delete-char -1))
823 (when head
824 (goto-char begin)
825 (when (search-forward "\n\n" nil t)
826 (setcar (cddddr entry) (count-lines (point) (point-max)))
827 (delete-region (1- (point)) (point-max))))
830 (defun nndoc-oe-dbx-generate-head (article)
831 (nndoc-oe-dbx-generate-article article 'head))
833 (defun nndoc-mail-in-mail-type-p ()
834 (let (found)
835 (save-excursion
836 (catch 'done
837 (while (re-search-forward "\n\n[-A-Za-z0-9]+:" nil t)
838 (setq found 0)
839 (forward-line)
840 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
841 (if (looking-at "[-A-Za-z0-9]+:")
842 (setq found (1+ found)))
843 (forward-line))
844 (if (and (> found 0) (looking-at "\n"))
845 (throw 'done 9999)))
846 nil))))
848 (defun nndoc-mail-in-mail-article-begin ()
849 (let (point found)
850 (if (catch 'done
851 (while (re-search-forward "\n\n\\([-A-Za-z0-9]+:\\)" nil t)
852 (setq found 0)
853 (setq point (match-beginning 1))
854 (forward-line)
855 (while (looking-at "[ \t]\\|[-A-Za-z0-9]+:")
856 (if (looking-at "[-A-Za-z0-9]+:")
857 (setq found (1+ found)))
858 (forward-line))
859 (if (and (> found 0) (looking-at "\n"))
860 (throw 'done t)))
861 nil)
862 (goto-char point))))
864 (deffoo nndoc-request-accept-article (group &optional server last)
865 nil)
868 ;;; Functions for dissecting the documents
871 (defun nndoc-search (regexp)
872 (prog1
873 (re-search-forward regexp nil t)
874 (beginning-of-line)))
876 (defun nndoc-dissect-buffer ()
877 "Go through the document and partition it into heads/bodies/articles."
878 (let ((i 0)
879 (first t)
880 art-begin head-begin head-end body-begin body-end)
881 (setq nndoc-dissection-alist nil)
882 (with-current-buffer nndoc-current-buffer
883 (goto-char (point-min))
884 ;; Remove blank lines.
885 (while (eq (following-char) ?\n)
886 (delete-char 1))
887 (when nndoc-pre-dissection-function
888 (save-excursion
889 (funcall nndoc-pre-dissection-function)))
890 (if nndoc-dissection-function
891 (funcall nndoc-dissection-function)
892 ;; Find the beginning of the file.
893 (when nndoc-file-begin
894 (nndoc-search nndoc-file-begin))
895 ;; Go through the file.
896 (while (if (and first nndoc-first-article)
897 (nndoc-search nndoc-first-article)
898 (if art-begin
899 (goto-char art-begin)
900 (nndoc-article-begin)))
901 (setq first nil
902 art-begin nil)
903 (cond (nndoc-head-begin-function
904 (funcall nndoc-head-begin-function))
905 (nndoc-head-begin
906 (nndoc-search nndoc-head-begin)))
907 (if (or (eobp)
908 (and nndoc-file-end
909 (looking-at nndoc-file-end)))
910 (goto-char (point-max))
911 (setq head-begin (point))
912 (nndoc-search (or nndoc-head-end "^$"))
913 (setq head-end (point))
914 (if nndoc-body-begin-function
915 (funcall nndoc-body-begin-function)
916 (nndoc-search (or nndoc-body-begin "^\n")))
917 (setq body-begin (point))
918 (or (and nndoc-body-end-function
919 (funcall nndoc-body-end-function))
920 (and nndoc-body-end
921 (nndoc-search nndoc-body-end))
922 (and (nndoc-article-begin)
923 (setq art-begin (point)))
924 (progn
925 (goto-char (point-max))
926 (when nndoc-file-end
927 (and (re-search-backward nndoc-file-end nil t)
928 (beginning-of-line)))))
929 (setq body-end (point))
930 (push (list (incf i) head-begin head-end body-begin body-end
931 (count-lines body-begin body-end))
932 nndoc-dissection-alist)))))
933 (setq nndoc-dissection-alist (nreverse nndoc-dissection-alist))))
935 (defun nndoc-article-begin ()
936 (if nndoc-article-begin-function
937 (funcall nndoc-article-begin-function)
938 (ignore-errors
939 (nndoc-search nndoc-article-begin))))
941 (defun nndoc-unquote-dashes ()
942 "Unquote quoted non-separators in digests."
943 (while (re-search-forward "^- -"nil t)
944 (replace-match "-" t t)))
946 ;; Against compiler warnings.
947 (defvar nndoc-mime-split-ordinal)
949 (defun nndoc-dissect-mime-parts ()
950 "Go through a MIME composite article and partition it into sub-articles.
951 When a MIME entity contains sub-entities, dissection produces one article for
952 the header of this entity, and one article per sub-entity."
953 (setq nndoc-dissection-alist nil
954 nndoc-mime-split-ordinal 0)
955 (with-current-buffer nndoc-current-buffer
956 (nndoc-dissect-mime-parts-sub (point-min) (point-max) nil nil nil)))
958 (defun nndoc-dissect-mime-parts-sub (head-begin body-end article-insert
959 position parent)
960 "Dissect an entity, within a composite MIME message.
961 The complete message or MIME entity extends from HEAD-BEGIN to BODY-END.
962 ARTICLE-INSERT should be added at beginning for generating a full article.
963 The string POSITION holds a dotted decimal representation of the article
964 position in the hierarchical structure, it is nil for the outer entity.
965 PARENT is the message-ID of the parent summary line, or nil for none."
966 (let ((case-fold-search t)
967 (message-id (nnmail-message-id))
968 head-end body-begin summary-insert message-rfc822 multipart-any
969 subject content-type type subtype boundary-regexp)
970 ;; Gracefully handle a missing body.
971 (goto-char head-begin)
972 (if (or (and (eq (char-after) ?\n) (or (forward-char 1) t))
973 (search-forward "\n\n" body-end t))
974 (setq head-end (1- (point))
975 body-begin (point))
976 (setq head-end body-end
977 body-begin body-end))
978 (narrow-to-region head-begin head-end)
979 ;; Save MIME attributes.
980 (goto-char head-begin)
981 (setq content-type (message-fetch-field "Content-Type"))
982 (when content-type
983 (when (string-match
984 "^ *\\([^ \t\n/;]+\\)/\\([^ \t\n/;]+\\)" content-type)
985 (setq type (downcase (match-string 1 content-type))
986 subtype (downcase (match-string 2 content-type))
987 message-rfc822 (and (string= type "message")
988 (string= subtype "rfc822"))
989 multipart-any (string= type "multipart")))
990 (when (string-match ";[ \t\n]*name=\\([^ \t\n;]+\\)" content-type)
991 (setq subject (match-string 1 content-type)))
992 (when (string-match "boundary=\"?\\([^\"\n]*[^\" \t\n]\\)" content-type)
993 (setq boundary-regexp (concat "^--"
994 (regexp-quote
995 (match-string 1 content-type))
996 "\\(--\\)?[ \t]*\n"))))
997 (unless subject
998 (when (or multipart-any (not article-insert))
999 (setq subject (message-fetch-field "Subject"))))
1000 (unless type
1001 (setq type "text"
1002 subtype "plain"))
1003 ;; Prepare the article and summary inserts.
1004 (unless article-insert
1005 (setq article-insert (buffer-string)
1006 head-end head-begin))
1007 ;; Fix MIME-Version
1008 (unless (string-match "MIME-Version:" article-insert)
1009 (setq article-insert
1010 (concat article-insert "MIME-Version: 1.0\n")))
1011 (setq summary-insert article-insert)
1012 ;; - summary Subject.
1013 (setq summary-insert
1014 (let ((line (concat "Subject: <" position
1015 (and position multipart-any ".")
1016 (and multipart-any "*")
1017 (and (or position multipart-any) " ")
1018 (cond ((string= subtype "plain") type)
1019 ((string= subtype "basic") type)
1020 (t subtype))
1022 (and subject " ")
1023 subject
1024 "\n")))
1025 (if (string-match "Subject:.*\n\\([ \t].*\n\\)*" summary-insert)
1026 (replace-match line t t summary-insert)
1027 (concat summary-insert line))))
1028 ;; - summary Message-ID.
1029 (setq summary-insert
1030 (let ((line (concat "Message-ID: " message-id "\n")))
1031 (if (string-match "Message-ID:.*\n\\([ \t].*\n\\)*" summary-insert)
1032 (replace-match line t t summary-insert)
1033 (concat summary-insert line))))
1034 ;; - summary References.
1035 (when parent
1036 (setq summary-insert
1037 (let ((line (concat "References: " parent "\n")))
1038 (if (string-match "References:.*\n\\([ \t].*\n\\)*"
1039 summary-insert)
1040 (replace-match line t t summary-insert)
1041 (concat summary-insert line)))))
1042 ;; Generate dissection information for this entity.
1043 (push (list (incf nndoc-mime-split-ordinal)
1044 head-begin head-end body-begin body-end
1045 (count-lines body-begin body-end)
1046 article-insert summary-insert)
1047 nndoc-dissection-alist)
1048 ;; Recurse for all sub-entities, if any.
1049 (widen)
1050 (cond
1051 (message-rfc822
1052 (save-excursion
1053 (nndoc-dissect-mime-parts-sub body-begin body-end nil
1054 position message-id)))
1055 ((and multipart-any boundary-regexp)
1056 (let ((part-counter 0)
1057 part-begin part-end eof-flag)
1058 (while (string-match "\
1059 ^\\(Lines\\|Content-\\(Type\\|Transfer-Encoding\\|Disposition\\)\\):.*\n\\([ \t].*\n\\)*"
1060 article-insert)
1061 (setq article-insert (replace-match "" t t article-insert)))
1062 (let ((case-fold-search nil))
1063 (goto-char body-begin)
1064 (setq eof-flag (not (re-search-forward boundary-regexp body-end t)))
1065 (while (not eof-flag)
1066 (setq part-begin (point))
1067 (cond ((re-search-forward boundary-regexp body-end t)
1068 (or (not (match-string 1))
1069 (string= (match-string 1) "")
1070 (setq eof-flag t))
1071 (forward-line -1)
1072 (setq part-end (point))
1073 (forward-line 1))
1074 (t (setq part-end body-end
1075 eof-flag t)))
1076 (save-excursion
1077 (nndoc-dissect-mime-parts-sub
1078 part-begin part-end article-insert
1079 (concat position
1080 (and position ".")
1081 (format "%d" (incf part-counter)))
1082 message-id)))))))))
1084 ;;;###autoload
1085 (defun nndoc-add-type (definition &optional position)
1086 "Add document DEFINITION to the list of nndoc document definitions.
1087 If POSITION is nil or `last', the definition will be added
1088 as the last checked definition, if t or `first', add as the
1089 first definition, and if any other symbol, add after that
1090 symbol in the alist."
1091 ;; First remove any old instances.
1092 (gnus-alist-pull (car definition) nndoc-type-alist)
1093 ;; Then enter the new definition in the proper place.
1094 (cond
1095 ((or (null position) (eq position 'last))
1096 (setq nndoc-type-alist (nconc nndoc-type-alist (list definition))))
1097 ((or (eq position t) (eq position 'first))
1098 (push definition nndoc-type-alist))
1100 (let ((list (memq (assq position nndoc-type-alist)
1101 nndoc-type-alist)))
1102 (unless list
1103 (error "No such position: %s" position))
1104 (setcdr list (cons definition (cdr list)))))))
1106 (provide 'nndoc)
1108 ;;; nndoc.el ends here