1 ;;; nnwarchive.el --- interfacing with web archives
3 ;; Copyright (C) 1999, 2000, 2002, 2003, 2004,
4 ;; 2005, 2006 Free Software Foundation, Inc.
6 ;; Author: Shenghuo Zhu <zsh@cs.rochester.edu>
7 ;; Keywords: news egroups mail-archive
9 ;; This file is part of GNU Emacs.
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published
13 ;; by the Free Software Foundation; either version 2, or (at your
14 ;; option) any later version.
16 ;; GNU Emacs is distributed in the hope that it will be useful, but
17 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 ;; General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
24 ;; Boston, MA 02110-1301, USA.
28 ;; Note: You need to have `url' (w3 0.46) or greater version
29 ;; installed for some functions of this backend to work.
32 ;; 1. To support more web archives.
33 ;; 2. Generalize webmail to other MHonArc archive.
37 (eval-when-compile (require 'cl
))
48 (nnoo-declare nnwarchive
)
50 (defvar nnwarchive-type-definition
52 (address .
"www.egroups.com")
54 "http://www.egroups.com/login.cgi?&login_email=%s&login_password=%s"
55 nnwarchive-login nnwarchive-passwd
)
57 "http://www.egroups.com/mygroups")
58 (list-dissect . nnwarchive-egroups-list
)
59 (list-groups . nnwarchive-egroups-list-groups
)
61 "http://www.egroups.com/messages/%s/%d" group aux
)
63 "http://www.egroups.com/messages/%s/" group
)
64 (xover-page-size .
13)
65 (xover-dissect . nnwarchive-egroups-xover
)
67 "http://www.egroups.com/message/%s/%d?source=1" group article
)
68 (article-dissect . nnwarchive-egroups-article
)
71 (xover-files . nnwarchive-egroups-xover-files
))
73 (address .
"www.mail-archive.com")
76 "http://www.mail-archive.com/lists.html")
77 (list-dissect . nnwarchive-mail-archive-list
)
78 (list-groups . nnwarchive-mail-archive-list-groups
)
80 "http://www.mail-archive.com/%s/mail%d.html" group aux
)
82 "http://www.mail-archive.com/%s/maillist.html" group
)
84 (xover-dissect . nnwarchive-mail-archive-xover
)
86 "http://www.mail-archive.com/%s/msg%05d.html" group article1
)
87 (article-dissect . nnwarchive-mail-archive-article
)
88 (xover-files . nnwarchive-mail-archive-xover-files
)
90 (article-offset .
1))))
92 (defvar nnwarchive-default-type
'egroups
)
94 (defvoo nnwarchive-directory
(nnheader-concat gnus-directory
"warchive/")
95 "Where nnwarchive will save its files.")
97 (defvoo nnwarchive-type nil
98 "The type of nnwarchive.")
100 (defvoo nnwarchive-address
""
101 "The address of nnwarchive.")
103 (defvoo nnwarchive-login nil
104 "Your login name for the group.")
106 (defvoo nnwarchive-passwd nil
107 "Your password for the group.")
109 (defvoo nnwarchive-groups nil
)
111 (defvoo nnwarchive-headers-cache nil
)
113 (defvoo nnwarchive-authentication nil
)
115 (defvoo nnwarchive-nov-is-evil nil
)
117 (defconst nnwarchive-version
"nnwarchive 1.0")
119 ;;; Internal variables
121 (defvoo nnwarchive-open-url nil
)
122 (defvoo nnwarchive-open-dissect nil
)
124 (defvoo nnwarchive-list-url nil
)
125 (defvoo nnwarchive-list-dissect nil
)
126 (defvoo nnwarchive-list-groups nil
)
128 (defvoo nnwarchive-xover-files nil
)
129 (defvoo nnwarchive-xover-url nil
)
130 (defvoo nnwarchive-xover-last-url nil
)
131 (defvoo nnwarchive-xover-dissect nil
)
132 (defvoo nnwarchive-xover-page-size nil
)
134 (defvoo nnwarchive-article-url nil
)
135 (defvoo nnwarchive-article-dissect nil
)
136 (defvoo nnwarchive-xover-files nil
)
137 (defvoo nnwarchive-article-offset
0)
139 (defvoo nnwarchive-buffer nil
)
141 (defvoo nnwarchive-keep-backlog
300)
142 (defvar nnwarchive-backlog-articles nil
)
143 (defvar nnwarchive-backlog-hashtb nil
)
145 (defvoo nnwarchive-headers nil
)
148 ;;; Interface functions
150 (nnoo-define-basics nnwarchive
)
152 (defun nnwarchive-set-default (type)
153 (let ((defs (cdr (assq type nnwarchive-type-definition
)))
156 (set (intern (concat "nnwarchive-" (symbol-name (car def
))))
159 (defmacro nnwarchive-backlog
(&rest form
)
160 `(let ((gnus-keep-backlog nnwarchive-keep-backlog
)
162 (format " *nnwarchive backlog %s*" nnwarchive-address
))
163 (gnus-backlog-articles nnwarchive-backlog-articles
)
164 (gnus-backlog-hashtb nnwarchive-backlog-hashtb
))
167 (setq nnwarchive-backlog-articles gnus-backlog-articles
168 nnwarchive-backlog-hashtb gnus-backlog-hashtb
))))
169 (put 'nnwarchive-backlog
'lisp-indent-function
0)
170 (put 'nnwarchive-backlog
'edebug-form-spec
'(form body
))
172 (defun nnwarchive-backlog-enter-article (group number buffer
)
174 (gnus-backlog-enter-article group number buffer
)))
176 (defun nnwarchive-get-article (article &optional group server buffer
)
177 (if (numberp article
)
178 (if (nnwarchive-backlog
179 (gnus-backlog-request-article group article
180 (or buffer nntp-server-buffer
)))
184 (set-buffer nnwarchive-buffer
)
185 (goto-char (point-min))
186 (let ((article1 (- article nnwarchive-article-offset
)))
187 (nnwarchive-url nnwarchive-article-url
))
188 (setq contents
(funcall nnwarchive-article-dissect group article
)))
191 (set-buffer (or buffer nntp-server-buffer
))
194 (nnwarchive-backlog-enter-article group article
(current-buffer))
195 (nnheader-report 'nnwarchive
"Fetched article %s" article
)
196 (cons group article
)))))
199 (deffoo nnwarchive-retrieve-headers
(articles &optional group server fetch-old
)
200 (nnwarchive-possibly-change-server group server
)
201 (if (or gnus-nov-is-evil nnwarchive-nov-is-evil
)
203 (with-current-buffer nntp-server-buffer
205 (let ((buf (current-buffer)) b e
)
206 (dolist (art articles
)
207 (nnwarchive-get-article art group server buf
)
208 (setq b
(goto-char (point-min)))
209 (if (search-forward "\n\n" nil t
)
211 (goto-char (point-max)))
213 (with-current-buffer nntp-server-buffer
214 (insert (format "221 %d Article retrieved.\n" art
))
215 (insert-buffer-substring buf b e
)
218 (setq nnwarchive-headers
(cdr (assoc group nnwarchive-headers-cache
)))
220 (set-buffer nnwarchive-buffer
)
222 (funcall nnwarchive-xover-files group articles
))
224 (set-buffer nntp-server-buffer
)
227 (dolist (art articles
)
228 (if (setq header
(assq art nnwarchive-headers
))
229 (nnheader-insert-nov (cdr header
))))))
230 (let ((elem (assoc group nnwarchive-headers-cache
)))
232 (setcdr elem nnwarchive-headers
)
233 (push (cons group nnwarchive-headers
) nnwarchive-headers-cache
)))
236 (deffoo nnwarchive-request-group
(group &optional server dont-check
)
237 (nnwarchive-possibly-change-server nil server
)
238 (when (and (not dont-check
) nnwarchive-list-groups
)
239 (funcall nnwarchive-list-groups
(list group
))
240 (nnwarchive-write-groups))
241 (let ((elem (assoc group nnwarchive-groups
)))
244 (nnheader-report 'nnwarchive
"Group does not exist"))
246 (nnheader-report 'nnwarchive
"Opened group %s" group
)
248 "211 %d %d %d %s\n" (or (cadr elem
) 0) 1 (or (cadr elem
) 0)
249 (prin1-to-string group
))
252 (deffoo nnwarchive-request-article
(article &optional group server buffer
)
253 (nnwarchive-possibly-change-server group server
)
254 (nnwarchive-get-article article group server buffer
))
256 (deffoo nnwarchive-close-server
(&optional server
)
257 (when (and (nnwarchive-server-opened server
)
258 (gnus-buffer-live-p nnwarchive-buffer
))
260 (set-buffer nnwarchive-buffer
)
261 (kill-buffer nnwarchive-buffer
)))
263 (gnus-backlog-shutdown))
264 (nnoo-close-server 'nnwarchive server
))
266 (deffoo nnwarchive-request-list
(&optional server
)
267 (nnwarchive-possibly-change-server nil server
)
269 (set-buffer nnwarchive-buffer
)
271 (if nnwarchive-list-url
272 (nnwarchive-url nnwarchive-list-url
))
273 (if nnwarchive-list-dissect
274 (funcall nnwarchive-list-dissect
))
275 (nnwarchive-write-groups)
276 (nnwarchive-generate-active))
279 (deffoo nnwarchive-open-server
(server &optional defs connectionless
)
280 (nnoo-change-server 'nnwarchive server defs
)
281 (nnwarchive-init server
)
282 (when nnwarchive-authentication
283 (setq nnwarchive-login
286 (format "Login at %s: " server
)
288 (setq nnwarchive-passwd
289 (or nnwarchive-passwd
291 (format "Password for %s at %s: "
292 nnwarchive-login server
)))))
293 (unless nnwarchive-groups
294 (nnwarchive-read-groups))
296 (set-buffer nnwarchive-buffer
)
298 (if nnwarchive-open-url
299 (nnwarchive-url nnwarchive-open-url
))
300 (if nnwarchive-open-dissect
301 (funcall nnwarchive-open-dissect
)))
304 (nnoo-define-skeleton nnwarchive
)
306 ;;; Internal functions
308 (defun nnwarchive-possibly-change-server (&optional group server
)
309 (nnwarchive-init server
)
311 (not (nnwarchive-server-opened server
)))
312 (nnwarchive-open-server server
)))
314 (defun nnwarchive-read-groups ()
315 (let ((file (expand-file-name (concat "groups-" nnwarchive-address
)
316 nnwarchive-directory
)))
317 (when (file-exists-p file
)
319 (insert-file-contents file
)
320 (goto-char (point-min))
321 (setq nnwarchive-groups
(read (current-buffer)))))))
323 (defun nnwarchive-write-groups ()
324 (with-temp-file (expand-file-name (concat "groups-" nnwarchive-address
)
325 nnwarchive-directory
)
326 (prin1 nnwarchive-groups
(current-buffer))))
328 (defun nnwarchive-init (server)
329 "Initialize buffers and such."
330 (let ((type (intern server
)) (defs nnwarchive-type-definition
) def
)
333 (setq type nnwarchive-default-type
))
334 ((assq type nnwarchive-type-definition
) t
)
337 (while (setq def
(pop defs
))
338 (when (equal (cdr (assq 'address
(cdr def
))) server
)
340 (setq type
(car def
))))
342 (error "Undefined server %s" server
))))
343 (setq nnwarchive-type type
))
344 (unless (file-exists-p nnwarchive-directory
)
345 (gnus-make-directory nnwarchive-directory
))
346 (unless (gnus-buffer-live-p nnwarchive-buffer
)
347 (setq nnwarchive-buffer
349 (nnheader-set-temp-buffer
350 (format " *nnwarchive %s %s*" nnwarchive-type server
)))))
351 (nnwarchive-set-default nnwarchive-type
))
353 (defun nnwarchive-eval (expr)
356 (cons (nnwarchive-eval (car expr
)) (nnwarchive-eval (cdr expr
))))
362 (defun nnwarchive-url (xurl)
363 (mm-with-unibyte-current-buffer
364 (let ((url-confirmation-func 'identity
) ;; Some hacks.
365 (url-cookie-multiple-line nil
))
367 ((eq (car xurl
) 'post
)
369 (mm-url-fetch-form (car xurl
) (nnwarchive-eval (cdr xurl
))))
371 (mm-url-insert (apply 'format
(nnwarchive-eval xurl
))))))))
373 (defun nnwarchive-generate-active ()
375 (set-buffer nntp-server-buffer
)
377 (dolist (elem nnwarchive-groups
)
378 (insert (prin1-to-string (car elem
))
379 " " (number-to-string (or (cadr elem
) 0)) " 1 y\n"))))
381 (defun nnwarchive-paged (articles)
382 (let (art narts next
)
383 (while (setq art
(pop articles
))
384 (when (and (>= art
(or next
0))
385 (not (assq art nnwarchive-headers
)))
387 (setq next
(+ art nnwarchive-xover-page-size
))))
392 (defun nnwarchive-egroups-list-groups (groups)
395 (set-buffer nnwarchive-buffer
)
396 (dolist (group groups
)
398 (nnwarchive-url nnwarchive-xover-last-url
)
399 (goto-char (point-min))
400 (when (re-search-forward "of \\([0-9]+\\)[ \t\n\r]*</title>" nil t
)
401 (setq articles
(string-to-number (match-string 1))))
402 (let ((elem (assoc group nnwarchive-groups
)))
404 (setcar (cdr elem
) articles
)
405 (push (list group articles
"") nnwarchive-groups
)))
406 (setq nnwarchive-headers
(cdr (assoc group nnwarchive-headers-cache
)))
407 (nnwarchive-egroups-xover group
)
408 (let ((elem (assoc group nnwarchive-headers-cache
)))
410 (setcdr elem nnwarchive-headers
)
411 (push (cons group nnwarchive-headers
) nnwarchive-headers-cache
)))))))
413 (defun nnwarchive-egroups-list ()
414 (let ((case-fold-search t
)
415 group description elem articles
)
416 (goto-char (point-min))
418 (re-search-forward "href=\"/group/\\([^/\"\> ]+\\)" nil t
)
419 (setq group
(match-string 1)
420 description
(match-string 2))
421 (if (setq elem
(assoc group nnwarchive-groups
))
422 (setcar (cdr elem
) 0)
423 (push (list group articles description
) nnwarchive-groups
))))
426 (defun nnwarchive-egroups-xover (group)
427 (let (article subject from date
)
428 (goto-char (point-min))
429 (while (re-search-forward
430 "<a href=\"/group/\\([^/]+\\)/\\([0-9]+\\)[^>]+>\\([^<]+\\)<"
432 (setq group
(match-string 1)
433 article
(string-to-number (match-string 2))
434 subject
(match-string 3))
436 (unless (assq article nnwarchive-headers
)
437 (if (looking-at "<td[^>]+><font[^>]+>\\([^<]+\\)</font>")
438 (setq from
(match-string 1)))
440 (if (looking-at "<td[^>]+><font[^>]+>\\([^<]+\\)</font>")
441 (setq date
(identity (match-string 1))))
444 (make-full-mail-header
446 (mm-url-decode-entities-string subject
)
447 (mm-url-decode-entities-string from
)
449 (concat "<" group
"%"
450 (number-to-string article
)
453 0 0 "")) nnwarchive-headers
))))
456 (defun nnwarchive-egroups-article (group articles
)
457 (goto-char (point-min))
458 (if (search-forward "<pre>" nil t
)
459 (delete-region (point-min) (point)))
460 (goto-char (point-max))
461 (if (search-backward "</pre>" nil t
)
462 (delete-region (point) (point-max)))
463 (goto-char (point-min))
464 (while (re-search-forward "<a[^>]+>\\([^<]+\\)</a>" nil t
)
465 (replace-match "\\1"))
466 (mm-url-decode-entities)
469 (defun nnwarchive-egroups-xover-files (group articles
)
471 (setq auxs
(nnwarchive-paged (sort articles
'<)))
472 (while (setq aux
(pop auxs
))
473 (goto-char (point-max))
474 (nnwarchive-url nnwarchive-xover-url
))
475 (if nnwarchive-xover-dissect
476 (nnwarchive-egroups-xover group
))))
480 (defun nnwarchive-mail-archive-list-groups (groups)
483 (set-buffer nnwarchive-buffer
)
484 (dolist (group groups
)
486 (nnwarchive-url nnwarchive-xover-last-url
)
487 (goto-char (point-min))
488 (when (re-search-forward "msg\\([0-9]+\\)\\.html" nil t
)
489 (setq articles
(1+ (string-to-number (match-string 1)))))
490 (let ((elem (assoc group nnwarchive-groups
)))
492 (setcar (cdr elem
) articles
)
493 (push (list group articles
"") nnwarchive-groups
)))
494 (setq nnwarchive-headers
(cdr (assoc group nnwarchive-headers-cache
)))
495 (nnwarchive-mail-archive-xover group
)
496 (let ((elem (assoc group nnwarchive-headers-cache
)))
498 (setcdr elem nnwarchive-headers
)
499 (push (cons group nnwarchive-headers
)
500 nnwarchive-headers-cache
)))))))
502 (defun nnwarchive-mail-archive-list ()
503 (let ((case-fold-search t
)
504 group description elem articles
)
505 (goto-char (point-min))
506 (while (re-search-forward "<a href=\"\\([^/]+\\)/\">\\([^>]+\\)<" nil t
)
507 (setq group
(match-string 1)
508 description
(match-string 2))
511 (if (setq elem
(assoc group nnwarchive-groups
))
512 (setcar (cdr elem
) articles
)
513 (push (list group articles description
) nnwarchive-groups
))))
516 (defun nnwarchive-mail-archive-xover (group)
517 (let (article subject from date
)
518 (goto-char (point-min))
519 (while (re-search-forward
520 "<A[^>]*HREF=\"msg\\([0-9]+\\)\\.html[^>]+>\\([^<]+\\)<"
522 (setq article
(1+ (string-to-number (match-string 1)))
523 subject
(match-string 2))
525 (unless (assq article nnwarchive-headers
)
526 (if (looking-at "<UL><LI><EM>From</EM>: *\\([^<]*[^< ]\\) *<\\([^&]+\\)>")
528 (setq from
(match-string 1)
529 date
(identity (match-string 2))))
530 (setq from
"" date
""))
533 (make-full-mail-header
535 (mm-url-decode-entities-string subject
)
536 (mm-url-decode-entities-string from
)
538 (format "<%05d%%%s>\n" (1- article
) group
)
540 0 0 "")) nnwarchive-headers
))))
543 (defun nnwarchive-mail-archive-xover-files (group articles
)
544 (unless nnwarchive-headers
546 (nnwarchive-url nnwarchive-xover-last-url
)
547 (goto-char (point-min))
548 (nnwarchive-mail-archive-xover group
))
549 (let ((minart (apply 'min articles
))
550 (min (apply 'min
(mapcar 'car nnwarchive-headers
)))
552 (while (> min minart
)
554 (nnwarchive-url nnwarchive-xover-url
)
555 (nnwarchive-mail-archive-xover group
)
556 (setq min
(apply 'min
(mapcar 'car nnwarchive-headers
))))))
558 (defvar nnwarchive-caesar-translation-table nil
559 "Modified rot13 table. tr/@A-Z[a-z/N-Z[@A-Mn-za-m/.")
561 (defun nnwarchive-make-caesar-translation-table ()
562 "Create modified rot13 table. tr/@A-Z[a-z/N-Z[@A-Mn-za-m/."
564 (table (make-string 256 0))
566 (A (mm-char-int ?A
)))
567 (while (< (incf i
) 256)
570 (substring table
0 (1- A
))
571 (substring table
(+ A
13) (+ A
27))
572 (substring table
(1- A
) (+ A
13))
573 (substring table
(+ A
27) a
)
574 (substring table
(+ a
13) (+ a
26))
575 (substring table a
(+ a
13))
576 (substring table
(+ a
26) 255))))
578 (defun nnwarchive-from-r13 (from-r13)
582 (let ((message-caesar-translation-table
583 (or nnwarchive-caesar-translation-table
584 (setq nnwarchive-caesar-translation-table
585 (nnwarchive-make-caesar-translation-table)))))
586 (message-caesar-region (point-min) (point-max))
589 (defun nnwarchive-mail-archive-article (group article
)
590 (let (p refs url mime e
593 (case-fold-search t
))
595 (goto-char (point-min))
596 (when (search-forward "X-Head-End" nil t
)
598 (narrow-to-region (point-min) (point))
599 (mm-url-decode-entities)
600 (goto-char (point-min))
601 (while (search-forward "<!--X-" nil t
)
603 (goto-char (point-min))
604 (while (search-forward " -->" nil t
)
607 (or (mail-fetch-field "from")
609 (mail-fetch-field "from-r13"))))
610 (setq date
(mail-fetch-field "date"))
611 (setq id
(mail-fetch-field "message-id"))
612 (setq subject
(mail-fetch-field "subject"))
613 (goto-char (point-max))
615 (when (search-forward "<ul>" nil t
)
617 (delete-region (point-min) (point))
618 (search-forward "</ul>" nil t
)
620 (narrow-to-region (point-min) (point))
621 (mm-url-remove-markup)
622 (mm-url-decode-entities)
623 (goto-char (point-min))
626 (message-remove-header "from")
627 (goto-char (point-max))
628 (insert "From: " from
"\n"))
630 (message-remove-header "subject")
631 (goto-char (point-max))
632 (insert "Subject: " subject
"\n"))
634 (goto-char (point-max))
635 (insert "X-Message-ID: <" id
">\n"))
637 (message-remove-header "date")
638 (goto-char (point-max))
639 (insert "Date: " date
"\n"))
640 (goto-char (point-max))
644 (when (search-forward "X-Body-of-Message" nil t
)
646 (delete-region p
(point))
647 (search-forward "X-Body-of-Message-End" nil t
)
650 (narrow-to-region p
(point))
651 (goto-char (point-min))
652 (if (> (skip-chars-forward "\040\n\r\t") 0)
653 (delete-region (point-min) (point)))
656 ((looking-at "<PRE>\r?\n?")
657 (delete-region (match-beginning 0) (match-end 0))
659 (when (search-forward "</PRE>" nil t
)
660 (delete-region (match-beginning 0) (match-end 0))
662 (narrow-to-region p
(point))
663 (mm-url-remove-markup)
664 (mm-url-decode-entities)
665 (goto-char (point-max)))))
666 ((looking-at "<P><A HREF=\"\\([^\"]+\\)")
667 (setq url
(match-string 1))
668 (delete-region (match-beginning 0)
669 (progn (forward-line) (point)))
670 ;; I hate to download the url encode it, then immediately
675 (string-match "\\.[^\\.]+$" url
)
676 (mailcap-extension-to-mime
677 (match-string 0 url
)))
678 "application/octet-stream")
679 (format " url=\"http://www.mail-archive.com/%s/%s\""
686 (insert "<#part type=\"text/html\" disposition=inline>")
688 (if (re-search-forward
689 "[\040\n\r\t]*<PRE>\\|[\040\n\r\t]*<P><A HREF=\""
696 (if (> (skip-chars-forward "\040\n\r\t") 0)
697 (delete-region p
(point))))
698 (goto-char (point-max))))
700 (when (search-forward "X-References-End" nil t
)
703 (search-backward "X-References" p t
)
704 (while (re-search-forward "msg\\([0-9]+\\)\\.html" e t
)
705 (push (concat "<" (match-string 1) "%" group
">") refs
)))
706 (delete-region p
(point-max))
707 (goto-char (point-min))
708 (insert (format "Message-ID: <%05d%%%s>\n" (1- article
) group
))
710 (insert "References:")
712 (insert " " (pop refs
)))
715 (unless (looking-at "$")
716 (search-forward "\n\n" nil t
)
718 (narrow-to-region (point) (point-max))
719 (insert "MIME-Version: 1.0\n"
722 (delete-region (point-min) (point-max))))
726 (provide 'nnwarchive
)
728 ;;; arch-tag: 1ab7a15c-777a-40e0-95c0-0c41b3963578
729 ;;; nnwarchive.el ends here