1 ;;; nnwarchive.el --- interfacing with web archives
2 ;; Copyright (C) 1999, 2000 Free Software Foundation, Inc.
4 ;; Author: Shenghuo Zhu <zsh@cs.rochester.edu>
5 ;; Keywords: news egroups mail-archive
7 ;; This file is part of GNU Emacs.
9 ;; GNU Emacs is free software; you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published
11 ;; by the Free Software Foundation; either version 2, or (at your
12 ;; option) any later version.
14 ;; GNU Emacs is distributed in the hope that it will be useful, but
15 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 ;; General Public License for more details.
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GNU Emacs; see the file COPYING. If not, write to the
21 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22 ;; Boston, MA 02111-1307, USA.
26 ;; Note: You need to have `url' (w3 0.46) or greater version
27 ;; installed for this backend to work.
30 ;; 1. To support more web archives.
31 ;; 2. Generalize webmail to other MHonArc archive.
35 (eval-when-compile (require 'cl
))
44 (require 'mail-source
)
51 ;; Report failure to find w3 at load time if appropriate.
58 (nnoo-declare nnwarchive
)
60 (defvar nnwarchive-type-definition
62 (address .
"www.egroups.com")
64 "http://www.egroups.com/login.cgi?&login_email=%s&login_password=%s"
65 nnwarchive-login nnwarchive-passwd
)
67 "http://www.egroups.com/mygroups")
68 (list-dissect . nnwarchive-egroups-list
)
69 (list-groups . nnwarchive-egroups-list-groups
)
71 "http://www.egroups.com/messages/%s/%d" group aux
)
73 "http://www.egroups.com/messages/%s/" group
)
74 (xover-page-size .
13)
75 (xover-dissect . nnwarchive-egroups-xover
)
77 "http://www.egroups.com/message/%s/%d?source=1" group article
)
78 (article-dissect . nnwarchive-egroups-article
)
81 (xover-files . nnwarchive-egroups-xover-files
))
83 (address .
"www.mail-archive.com")
86 "http://www.mail-archive.com/lists.html")
87 (list-dissect . nnwarchive-mail-archive-list
)
88 (list-groups . nnwarchive-mail-archive-list-groups
)
90 "http://www.mail-archive.com/%s/mail%d.html" group aux
)
92 "http://www.mail-archive.com/%s/maillist.html" group
)
94 (xover-dissect . nnwarchive-mail-archive-xover
)
96 "http://www.mail-archive.com/%s/msg%05d.html" group article1
)
97 (article-dissect . nnwarchive-mail-archive-article
)
98 (xover-files . nnwarchive-mail-archive-xover-files
)
100 (article-offset .
1))))
102 (defvar nnwarchive-default-type
'egroups
)
104 (defvoo nnwarchive-directory
(nnheader-concat gnus-directory
"warchive/")
105 "Where nnwarchive will save its files.")
107 (defvoo nnwarchive-type nil
108 "The type of nnwarchive.")
110 (defvoo nnwarchive-address
""
111 "The address of nnwarchive.")
113 (defvoo nnwarchive-login nil
114 "Your login name for the group.")
116 (defvoo nnwarchive-passwd nil
117 "Your password for the group.")
119 (defvoo nnwarchive-groups nil
)
121 (defvoo nnwarchive-headers-cache nil
)
123 (defvoo nnwarchive-authentication nil
)
125 (defvoo nnwarchive-nov-is-evil nil
)
127 (defconst nnwarchive-version
"nnwarchive 1.0")
129 ;;; Internal variables
131 (defvoo nnwarchive-open-url nil
)
132 (defvoo nnwarchive-open-dissect nil
)
134 (defvoo nnwarchive-list-url nil
)
135 (defvoo nnwarchive-list-dissect nil
)
136 (defvoo nnwarchive-list-groups nil
)
138 (defvoo nnwarchive-xover-files nil
)
139 (defvoo nnwarchive-xover-url nil
)
140 (defvoo nnwarchive-xover-last-url nil
)
141 (defvoo nnwarchive-xover-dissect nil
)
142 (defvoo nnwarchive-xover-page-size nil
)
144 (defvoo nnwarchive-article-url nil
)
145 (defvoo nnwarchive-article-dissect nil
)
146 (defvoo nnwarchive-xover-files nil
)
147 (defvoo nnwarchive-article-offset
0)
149 (defvoo nnwarchive-buffer nil
)
151 (defvoo nnwarchive-keep-backlog
300)
152 (defvar nnwarchive-backlog-articles nil
)
153 (defvar nnwarchive-backlog-hashtb nil
)
155 (defvoo nnwarchive-headers nil
)
158 ;;; Interface functions
160 (nnoo-define-basics nnwarchive
)
162 (defun nnwarchive-set-default (type)
163 (let ((defs (cdr (assq type nnwarchive-type-definition
)))
166 (set (intern (concat "nnwarchive-" (symbol-name (car def
))))
169 (defmacro nnwarchive-backlog
(&rest form
)
170 `(let ((gnus-keep-backlog nnwarchive-keep-backlog
)
172 (format " *nnwarchive backlog %s*" nnwarchive-address
))
173 (gnus-backlog-articles nnwarchive-backlog-articles
)
174 (gnus-backlog-hashtb nnwarchive-backlog-hashtb
))
177 (setq nnwarchive-backlog-articles gnus-backlog-articles
178 nnwarchive-backlog-hashtb gnus-backlog-hashtb
))))
179 (put 'nnwarchive-backlog
'lisp-indent-function
0)
180 (put 'nnwarchive-backlog
'edebug-form-spec
'(form body
))
182 (defun nnwarchive-backlog-enter-article (group number buffer
)
184 (gnus-backlog-enter-article group number buffer
)))
186 (defun nnwarchive-get-article (article &optional group server buffer
)
187 (if (numberp article
)
188 (if (nnwarchive-backlog
189 (gnus-backlog-request-article group article
190 (or buffer nntp-server-buffer
)))
194 (set-buffer nnwarchive-buffer
)
195 (goto-char (point-min))
196 (let ((article1 (- article nnwarchive-article-offset
)))
197 (nnwarchive-url nnwarchive-article-url
))
198 (setq contents
(funcall nnwarchive-article-dissect group article
)))
201 (set-buffer (or buffer nntp-server-buffer
))
204 (nnwarchive-backlog-enter-article group article
(current-buffer))
205 (nnheader-report 'nnwarchive
"Fetched article %s" article
)
206 (cons group article
)))))
209 (deffoo nnwarchive-retrieve-headers
(articles &optional group server fetch-old
)
210 (nnwarchive-possibly-change-server group server
)
211 (if (or gnus-nov-is-evil nnwarchive-nov-is-evil
)
213 (with-current-buffer nntp-server-buffer
215 (let ((buf (current-buffer)) b e
)
216 (dolist (art articles
)
217 (nnwarchive-get-article art group server buf
)
218 (setq b
(goto-char (point-min)))
219 (if (search-forward "\n\n" nil t
)
221 (goto-char (point-max)))
223 (with-current-buffer nntp-server-buffer
224 (insert (format "221 %d Article retrieved.\n" art
))
225 (insert-buffer-substring buf b e
)
228 (setq nnwarchive-headers
(cdr (assoc group nnwarchive-headers-cache
)))
230 (set-buffer nnwarchive-buffer
)
232 (funcall nnwarchive-xover-files group articles
))
234 (set-buffer nntp-server-buffer
)
237 (dolist (art articles
)
238 (if (setq header
(assq art nnwarchive-headers
))
239 (nnheader-insert-nov (cdr header
))))))
240 (let ((elem (assoc group nnwarchive-headers-cache
)))
242 (setcdr elem nnwarchive-headers
)
243 (push (cons group nnwarchive-headers
) nnwarchive-headers-cache
)))
246 (deffoo nnwarchive-request-group
(group &optional server dont-check
)
247 (nnwarchive-possibly-change-server nil server
)
248 (when (and (not dont-check
) nnwarchive-list-groups
)
249 (funcall nnwarchive-list-groups
(list group
))
250 (nnwarchive-write-groups))
251 (let ((elem (assoc group nnwarchive-groups
)))
254 (nnheader-report 'nnwarchive
"Group does not exist"))
256 (nnheader-report 'nnwarchive
"Opened group %s" group
)
258 "211 %d %d %d %s\n" (or (cadr elem
) 0) 1 (or (cadr elem
) 0)
259 (prin1-to-string group
))
262 (deffoo nnwarchive-request-article
(article &optional group server buffer
)
263 (nnwarchive-possibly-change-server group server
)
264 (nnwarchive-get-article article group server buffer
))
266 (deffoo nnwarchive-close-server
(&optional server
)
267 (when (and (nnwarchive-server-opened server
)
268 (gnus-buffer-live-p nnwarchive-buffer
))
270 (set-buffer nnwarchive-buffer
)
271 (kill-buffer nnwarchive-buffer
)))
273 (gnus-backlog-shutdown))
274 (nnoo-close-server 'nnwarchive server
))
276 (deffoo nnwarchive-request-list
(&optional server
)
277 (nnwarchive-possibly-change-server nil server
)
279 (set-buffer nnwarchive-buffer
)
281 (if nnwarchive-list-url
282 (nnwarchive-url nnwarchive-list-url
))
283 (if nnwarchive-list-dissect
284 (funcall nnwarchive-list-dissect
))
285 (nnwarchive-write-groups)
286 (nnwarchive-generate-active))
289 (deffoo nnwarchive-open-server
(server &optional defs connectionless
)
290 (nnoo-change-server 'nnwarchive server defs
)
291 (nnwarchive-init server
)
292 (when nnwarchive-authentication
293 (setq nnwarchive-login
296 (format "Login at %s: " server
)
298 (setq nnwarchive-passwd
299 (or nnwarchive-passwd
300 (mail-source-read-passwd
301 (format "Password for %s at %s: "
302 nnwarchive-login server
)))))
303 (unless nnwarchive-groups
304 (nnwarchive-read-groups))
306 (set-buffer nnwarchive-buffer
)
308 (if nnwarchive-open-url
309 (nnwarchive-url nnwarchive-open-url
))
310 (if nnwarchive-open-dissect
311 (funcall nnwarchive-open-dissect
)))
314 (nnoo-define-skeleton nnwarchive
)
316 ;;; Internal functions
318 (defun nnwarchive-possibly-change-server (&optional group server
)
319 (nnwarchive-init server
)
321 (not (nnwarchive-server-opened server
)))
322 (nnwarchive-open-server server
)))
324 (defun nnwarchive-read-groups ()
325 (let ((file (expand-file-name (concat "groups-" nnwarchive-address
)
326 nnwarchive-directory
)))
327 (when (file-exists-p file
)
329 (insert-file-contents file
)
330 (goto-char (point-min))
331 (setq nnwarchive-groups
(read (current-buffer)))))))
333 (defun nnwarchive-write-groups ()
334 (with-temp-file (expand-file-name (concat "groups-" nnwarchive-address
)
335 nnwarchive-directory
)
336 (prin1 nnwarchive-groups
(current-buffer))))
338 (defun nnwarchive-init (server)
339 "Initialize buffers and such."
340 (let ((type (intern server
)) (defs nnwarchive-type-definition
) def
)
343 (setq type nnwarchive-default-type
))
344 ((assq type nnwarchive-type-definition
) t
)
347 (while (setq def
(pop defs
))
348 (when (equal (cdr (assq 'address
(cdr def
))) server
)
350 (setq type
(car def
))))
352 (error "Undefined server %s" server
))))
353 (setq nnwarchive-type type
))
354 (unless (file-exists-p nnwarchive-directory
)
355 (gnus-make-directory nnwarchive-directory
))
356 (unless (gnus-buffer-live-p nnwarchive-buffer
)
357 (setq nnwarchive-buffer
359 (nnheader-set-temp-buffer
360 (format " *nnwarchive %s %s*" nnwarchive-type server
)))))
361 (nnwarchive-set-default nnwarchive-type
))
363 (defun nnwarchive-encode-www-form-urlencoded (pairs)
364 "Return PAIRS encoded for forms."
368 (concat (w3-form-encode-xwfu (car data
)) "="
369 (w3-form-encode-xwfu (cdr data
)))))
372 (defun nnwarchive-fetch-form (url pairs
)
373 (let ((url-request-data (nnwarchive-encode-www-form-urlencoded pairs
))
374 (url-request-method "POST")
375 (url-request-extra-headers
376 '(("Content-type" .
"application/x-www-form-urlencoded"))))
380 (defun nnwarchive-eval (expr)
383 (cons (nnwarchive-eval (car expr
)) (nnwarchive-eval (cdr expr
))))
389 (defun nnwarchive-url (xurl)
390 (mm-with-unibyte-current-buffer
391 (let ((url-confirmation-func 'identity
)
392 (url-cookie-multiple-line nil
))
394 ((eq (car xurl
) 'post
)
396 (nnwarchive-fetch-form (car xurl
) (nnwarchive-eval (cdr xurl
))))
398 (nnweb-insert (apply 'format
(nnwarchive-eval xurl
))))))))
400 (defun nnwarchive-generate-active ()
402 (set-buffer nntp-server-buffer
)
404 (dolist (elem nnwarchive-groups
)
405 (insert (prin1-to-string (car elem
))
406 " " (number-to-string (or (cadr elem
) 0)) " 1 y\n"))))
408 (defun nnwarchive-paged (articles)
409 (let (art narts next
)
410 (while (setq art
(pop articles
))
411 (when (and (>= art
(or next
0))
412 (not (assq art nnwarchive-headers
)))
414 (setq next
(+ art nnwarchive-xover-page-size
))))
419 (defun nnwarchive-egroups-list-groups (groups)
422 (set-buffer nnwarchive-buffer
)
423 (dolist (group groups
)
425 (nnwarchive-url nnwarchive-xover-last-url
)
426 (goto-char (point-min))
427 (when (re-search-forward "of \\([0-9]+\\)[ \t\n\r]*</title>" nil t
)
428 (setq articles
(string-to-number (match-string 1))))
429 (let ((elem (assoc group nnwarchive-groups
)))
431 (setcar (cdr elem
) articles
)
432 (push (list group articles
"") nnwarchive-groups
)))
433 (setq nnwarchive-headers
(cdr (assoc group nnwarchive-headers-cache
)))
434 (nnwarchive-egroups-xover group
)
435 (let ((elem (assoc group nnwarchive-headers-cache
)))
437 (setcdr elem nnwarchive-headers
)
438 (push (cons group nnwarchive-headers
) nnwarchive-headers-cache
)))))))
440 (defun nnwarchive-egroups-list ()
441 (let ((case-fold-search t
)
442 group description elem articles
)
443 (goto-char (point-min))
445 (re-search-forward "href=\"/group/\\([^/\"\> ]+\\)" nil t
)
446 (setq group
(match-string 1)
447 description
(match-string 2))
448 (if (setq elem
(assoc group nnwarchive-groups
))
449 (setcar (cdr elem
) 0)
450 (push (list group articles description
) nnwarchive-groups
))))
453 (defun nnwarchive-egroups-xover (group)
454 (let (article subject from date
)
455 (goto-char (point-min))
456 (while (re-search-forward
457 "<a href=\"/group/\\([^/]+\\)/\\([0-9]+\\)[^>]+>\\([^<]+\\)<"
459 (setq group
(match-string 1)
460 article
(string-to-number (match-string 2))
461 subject
(match-string 3))
463 (unless (assq article nnwarchive-headers
)
464 (if (looking-at "<td[^>]+><font[^>]+>\\([^<]+\\)</font>")
465 (setq from
(match-string 1)))
467 (if (looking-at "<td[^>]+><font[^>]+>\\([^<]+\\)</font>")
468 (setq date
(identity (match-string 1))))
471 (make-full-mail-header
473 (nnweb-decode-entities-string subject
)
474 (nnweb-decode-entities-string from
)
476 (concat "<" group
"%"
477 (number-to-string article
)
480 0 0 "")) nnwarchive-headers
))))
483 (defun nnwarchive-egroups-article (group articles
)
484 (goto-char (point-min))
485 (if (search-forward "<pre>" nil t
)
486 (delete-region (point-min) (point)))
487 (goto-char (point-max))
488 (if (search-backward "</pre>" nil t
)
489 (delete-region (point) (point-max)))
490 (goto-char (point-min))
491 (while (re-search-forward "<a[^>]+>\\([^<]+\\)</a>" nil t
)
492 (replace-match "\\1"))
493 (nnweb-decode-entities)
496 (defun nnwarchive-egroups-xover-files (group articles
)
498 (setq auxs
(nnwarchive-paged (sort articles
'<)))
499 (while (setq aux
(pop auxs
))
500 (goto-char (point-max))
501 (nnwarchive-url nnwarchive-xover-url
))
502 (if nnwarchive-xover-dissect
503 (nnwarchive-egroups-xover group
))))
507 (defun nnwarchive-mail-archive-list-groups (groups)
510 (set-buffer nnwarchive-buffer
)
511 (dolist (group groups
)
513 (nnwarchive-url nnwarchive-xover-last-url
)
514 (goto-char (point-min))
515 (when (re-search-forward "msg\\([0-9]+\\)\\.html" nil t
)
516 (setq articles
(1+ (string-to-number (match-string 1)))))
517 (let ((elem (assoc group nnwarchive-groups
)))
519 (setcar (cdr elem
) articles
)
520 (push (list group articles
"") nnwarchive-groups
)))
521 (setq nnwarchive-headers
(cdr (assoc group nnwarchive-headers-cache
)))
522 (nnwarchive-mail-archive-xover group
)
523 (let ((elem (assoc group nnwarchive-headers-cache
)))
525 (setcdr elem nnwarchive-headers
)
526 (push (cons group nnwarchive-headers
)
527 nnwarchive-headers-cache
)))))))
529 (defun nnwarchive-mail-archive-list ()
530 (let ((case-fold-search t
)
531 group description elem articles
)
532 (goto-char (point-min))
533 (while (re-search-forward "<a href=\"\\([^/]+\\)/\">\\([^>]+\\)<" nil t
)
534 (setq group
(match-string 1)
535 description
(match-string 2))
538 (if (setq elem
(assoc group nnwarchive-groups
))
539 (setcar (cdr elem
) articles
)
540 (push (list group articles description
) nnwarchive-groups
))))
543 (defun nnwarchive-mail-archive-xover (group)
544 (let (article subject from date
)
545 (goto-char (point-min))
546 (while (re-search-forward
547 "<A[^>]*HREF=\"msg\\([0-9]+\\)\\.html[^>]+>\\([^<]+\\)<"
549 (setq article
(1+ (string-to-number (match-string 1)))
550 subject
(match-string 2))
552 (unless (assq article nnwarchive-headers
)
553 (if (looking-at "<UL><LI><EM>From</EM>:\\([^&]+\\)<\\([^&]+\\)>")
555 (setq from
(match-string 1)
556 date
(identity (match-string 2))))
557 (setq from
"" date
""))
560 (make-full-mail-header
562 (nnweb-decode-entities-string subject
)
563 (nnweb-decode-entities-string from
)
565 (format "<%05d%%%s>\n" (1- article
) group
)
567 0 0 "")) nnwarchive-headers
))))
570 (defun nnwarchive-mail-archive-xover-files (group articles
)
571 (unless nnwarchive-headers
573 (nnwarchive-url nnwarchive-xover-last-url
)
574 (goto-char (point-min))
575 (nnwarchive-mail-archive-xover group
))
576 (let ((minart (apply 'min articles
))
577 (min (apply 'min
(mapcar 'car nnwarchive-headers
)))
579 (while (> min minart
)
581 (nnwarchive-url nnwarchive-xover-url
)
582 (nnwarchive-mail-archive-xover group
)
583 (setq min
(apply 'min
(mapcar 'car nnwarchive-headers
))))))
585 (defvar nnwarchive-caesar-translation-table nil
586 "Modified rot13 table. tr/@A-Z[a-z/N-Z[@A-Mn-za-m/.")
588 (defun nnwarchive-make-caesar-translation-table ()
589 "Create modified rot13 table. tr/@A-Z[a-z/N-Z[@A-Mn-za-m/."
591 (table (make-string 256 0))
593 (A (mm-char-int ?A
)))
594 (while (< (incf i
) 256)
597 (substring table
0 (1- A
))
598 (substring table
(+ A
13) (+ A
27))
599 (substring table
(1- A
) (+ A
13))
600 (substring table
(+ A
27) a
)
601 (substring table
(+ a
13) (+ a
26))
602 (substring table a
(+ a
13))
603 (substring table
(+ a
26) 255))))
605 (defun nnwarchive-from-r13 (from-r13)
609 (let ((message-caesar-translation-table
610 (or nnwarchive-caesar-translation-table
611 (setq nnwarchive-caesar-translation-table
612 (nnwarchive-make-caesar-translation-table)))))
613 (message-caesar-region (point-min) (point-max))
616 (defun nnwarchive-mail-archive-article (group article
)
617 (let (p refs url mime e
622 (goto-char (point-min))
623 (when (search-forward "X-Head-End" nil t
)
625 (narrow-to-region (point-min) (point))
626 (nnweb-decode-entities)
627 (goto-char (point-min))
628 (while (search-forward "<!--X-" nil t
)
630 (goto-char (point-min))
631 (while (search-forward " -->" nil t
)
634 (or (mail-fetch-field "from")
636 (mail-fetch-field "from-r13"))))
637 (setq date
(mail-fetch-field "date"))
638 (setq id
(mail-fetch-field "message-id"))
639 (setq subject
(mail-fetch-field "subject"))
640 (goto-char (point-max))
642 (when (search-forward "<ul>" nil t
)
644 (delete-region (point-min) (point))
645 (search-forward "</ul>" nil t
)
647 (narrow-to-region (point-min) (point))
648 (nnweb-remove-markup)
649 (nnweb-decode-entities)
650 (goto-char (point-min))
653 (message-remove-header "from")
654 (goto-char (point-max))
655 (insert "From: " from
"\n"))
657 (message-remove-header "subject")
658 (goto-char (point-max))
659 (insert "Subject: " subject
"\n"))
661 (goto-char (point-max))
662 (insert "X-Message-ID: <" id
">\n"))
664 (message-remove-header "date")
665 (goto-char (point-max))
666 (insert "Date: " date
"\n"))
667 (goto-char (point-max))
671 (when (search-forward "X-Body-of-Message" nil t
)
673 (delete-region p
(point))
674 (search-forward "X-Body-of-Message-End" nil t
)
677 (narrow-to-region p
(point))
678 (goto-char (point-min))
679 (if (> (skip-chars-forward "\040\n\r\t") 0)
680 (delete-region (point-min) (point)))
683 ((looking-at "<PRE>\r?\n?")
684 (delete-region (match-beginning 0) (match-end 0))
686 (when (search-forward "</PRE>" nil t
)
687 (delete-region (match-beginning 0) (match-end 0))
689 (narrow-to-region p
(point))
690 (nnweb-remove-markup)
691 (nnweb-decode-entities)
692 (goto-char (point-max)))))
693 ((looking-at "<P><A HREF=\"\\([^\"]+\\)")
694 (setq url
(match-string 1))
695 (delete-region (match-beginning 0)
696 (progn (forward-line) (point)))
697 ;; I hate to download the url encode it, then immediately
699 ;; FixMe: Find a better solution to attach the URL.
700 ;; Maybe do some hack in external part of mml-generate-mim-1.
703 (format "<URL:http://www.mail-archive.com/%s/%s>"
710 (insert "<#part type=\"text/html\" disposition=inline>")
712 (if (re-search-forward
713 "[\040\n\r\t]*<PRE>\\|[\040\n\r\t]*<P><A HREF=\""
720 (if (> (skip-chars-forward "\040\n\r\t") 0)
721 (delete-region p
(point))))
722 (goto-char (point-max))))
724 (when (search-forward "X-References-End" nil t
)
727 (search-backward "X-References" p t
)
728 (while (re-search-forward "msg\\([0-9]+\\)\\.html" e t
)
729 (push (concat "<" (match-string 1) "%" group
">") refs
)))
730 (delete-region p
(point-max))
731 (goto-char (point-min))
732 (insert (format "Message-ID: <%05d%%%s>\n" (1- article
) group
))
734 (insert "References:")
736 (insert " " (pop refs
)))
739 (unless (looking-at "$")
740 (search-forward "\n\n" nil t
)
742 (narrow-to-region (point) (point-max))
743 (insert "MIME-Version: 1.0\n"
746 (delete-region (point-min) (point-max))))
750 (provide 'nnwarchive
)
752 ;;; nnwarchive.el ends here