1 ;;; nnwarchive.el --- interfacing with web archives
3 ;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004,
4 ;; 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
6 ;; Author: Shenghuo Zhu <zsh@cs.rochester.edu>
7 ;; Keywords: news egroups mail-archive
9 ;; This file is part of GNU Emacs.
11 ;; GNU Emacs is free software: you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation, either version 3 of the License, or
14 ;; (at your option) any later version.
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
26 ;; Note: You need to have `url' (w3 0.46) or greater version
27 ;; installed for some functions of this backend to work.
30 ;; 1. To support more web archives.
31 ;; 2. Generalize webmail to other MHonArc archive.
35 (eval-when-compile (require 'cl
))
46 (nnoo-declare nnwarchive
)
48 (defvar nnwarchive-type-definition
50 (address .
"www.egroups.com")
52 "http://www.egroups.com/login.cgi?&login_email=%s&login_password=%s"
53 nnwarchive-login nnwarchive-passwd
)
55 "http://www.egroups.com/mygroups")
56 (list-dissect . nnwarchive-egroups-list
)
57 (list-groups . nnwarchive-egroups-list-groups
)
59 "http://www.egroups.com/messages/%s/%d" group aux
)
61 "http://www.egroups.com/messages/%s/" group
)
62 (xover-page-size .
13)
63 (xover-dissect . nnwarchive-egroups-xover
)
65 "http://www.egroups.com/message/%s/%d?source=1" group article
)
66 (article-dissect . nnwarchive-egroups-article
)
69 (xover-files . nnwarchive-egroups-xover-files
))
71 (address .
"www.mail-archive.com")
74 "http://www.mail-archive.com/lists.html")
75 (list-dissect . nnwarchive-mail-archive-list
)
76 (list-groups . nnwarchive-mail-archive-list-groups
)
78 "http://www.mail-archive.com/%s/mail%d.html" group aux
)
80 "http://www.mail-archive.com/%s/maillist.html" group
)
82 (xover-dissect . nnwarchive-mail-archive-xover
)
84 "http://www.mail-archive.com/%s/msg%05d.html" group article1
)
85 (article-dissect . nnwarchive-mail-archive-article
)
86 (xover-files . nnwarchive-mail-archive-xover-files
)
88 (article-offset .
1))))
90 (defvar nnwarchive-default-type
'egroups
)
92 (defvoo nnwarchive-directory
(nnheader-concat gnus-directory
"warchive/")
93 "Where nnwarchive will save its files.")
95 (defvoo nnwarchive-type nil
96 "The type of nnwarchive.")
98 (defvoo nnwarchive-address
""
99 "The address of nnwarchive.")
101 (defvoo nnwarchive-login nil
102 "Your login name for the group.")
104 (defvoo nnwarchive-passwd nil
105 "Your password for the group.")
107 (defvoo nnwarchive-groups nil
)
109 (defvoo nnwarchive-headers-cache nil
)
111 (defvoo nnwarchive-authentication nil
)
113 (defvoo nnwarchive-nov-is-evil nil
)
115 (defconst nnwarchive-version
"nnwarchive 1.0")
117 ;;; Internal variables
119 (defvoo nnwarchive-open-url nil
)
120 (defvoo nnwarchive-open-dissect nil
)
122 (defvoo nnwarchive-list-url nil
)
123 (defvoo nnwarchive-list-dissect nil
)
124 (defvoo nnwarchive-list-groups nil
)
126 (defvoo nnwarchive-xover-files nil
)
127 (defvoo nnwarchive-xover-url nil
)
128 (defvoo nnwarchive-xover-last-url nil
)
129 (defvoo nnwarchive-xover-dissect nil
)
130 (defvoo nnwarchive-xover-page-size nil
)
132 (defvoo nnwarchive-article-url nil
)
133 (defvoo nnwarchive-article-dissect nil
)
134 (defvoo nnwarchive-xover-files nil
)
135 (defvoo nnwarchive-article-offset
0)
137 (defvoo nnwarchive-buffer nil
)
139 (defvoo nnwarchive-keep-backlog
300)
140 (defvar nnwarchive-backlog-articles nil
)
141 (defvar nnwarchive-backlog-hashtb nil
)
143 (defvoo nnwarchive-headers nil
)
146 ;;; Interface functions
148 (nnoo-define-basics nnwarchive
)
150 (defun nnwarchive-set-default (type)
151 (let ((defs (cdr (assq type nnwarchive-type-definition
)))
154 (set (intern (concat "nnwarchive-" (symbol-name (car def
))))
157 (defmacro nnwarchive-backlog
(&rest form
)
158 `(let ((gnus-keep-backlog nnwarchive-keep-backlog
)
160 (format " *nnwarchive backlog %s*" nnwarchive-address
))
161 (gnus-backlog-articles nnwarchive-backlog-articles
)
162 (gnus-backlog-hashtb nnwarchive-backlog-hashtb
))
165 (setq nnwarchive-backlog-articles gnus-backlog-articles
166 nnwarchive-backlog-hashtb gnus-backlog-hashtb
))))
167 (put 'nnwarchive-backlog
'lisp-indent-function
0)
168 (put 'nnwarchive-backlog
'edebug-form-spec
'(form body
))
170 (defun nnwarchive-backlog-enter-article (group number buffer
)
172 (gnus-backlog-enter-article group number buffer
)))
174 (defun nnwarchive-get-article (article &optional group server buffer
)
175 (if (numberp article
)
176 (if (nnwarchive-backlog
177 (gnus-backlog-request-article group article
178 (or buffer nntp-server-buffer
)))
182 (set-buffer nnwarchive-buffer
)
183 (goto-char (point-min))
184 (let ((article1 (- article nnwarchive-article-offset
)))
185 (nnwarchive-url nnwarchive-article-url
))
186 (setq contents
(funcall nnwarchive-article-dissect group article
)))
189 (set-buffer (or buffer nntp-server-buffer
))
192 (nnwarchive-backlog-enter-article group article
(current-buffer))
193 (nnheader-report 'nnwarchive
"Fetched article %s" article
)
194 (cons group article
)))))
197 (deffoo nnwarchive-retrieve-headers
(articles &optional group server fetch-old
)
198 (nnwarchive-possibly-change-server group server
)
199 (if (or gnus-nov-is-evil nnwarchive-nov-is-evil
)
201 (with-current-buffer nntp-server-buffer
203 (let ((buf (current-buffer)) b e
)
204 (dolist (art articles
)
205 (nnwarchive-get-article art group server buf
)
206 (setq b
(goto-char (point-min)))
207 (if (search-forward "\n\n" nil t
)
209 (goto-char (point-max)))
211 (with-current-buffer nntp-server-buffer
212 (insert (format "221 %d Article retrieved.\n" art
))
213 (insert-buffer-substring buf b e
)
216 (setq nnwarchive-headers
(cdr (assoc group nnwarchive-headers-cache
)))
218 (set-buffer nnwarchive-buffer
)
220 (funcall nnwarchive-xover-files group articles
))
222 (set-buffer nntp-server-buffer
)
225 (dolist (art articles
)
226 (if (setq header
(assq art nnwarchive-headers
))
227 (nnheader-insert-nov (cdr header
))))))
228 (let ((elem (assoc group nnwarchive-headers-cache
)))
230 (setcdr elem nnwarchive-headers
)
231 (push (cons group nnwarchive-headers
) nnwarchive-headers-cache
)))
234 (deffoo nnwarchive-request-group
(group &optional server dont-check
)
235 (nnwarchive-possibly-change-server nil server
)
236 (when (and (not dont-check
) nnwarchive-list-groups
)
237 (funcall nnwarchive-list-groups
(list group
))
238 (nnwarchive-write-groups))
239 (let ((elem (assoc group nnwarchive-groups
)))
242 (nnheader-report 'nnwarchive
"Group does not exist"))
244 (nnheader-report 'nnwarchive
"Opened group %s" group
)
246 "211 %d %d %d %s\n" (or (cadr elem
) 0) 1 (or (cadr elem
) 0)
247 (prin1-to-string group
))
250 (deffoo nnwarchive-request-article
(article &optional group server buffer
)
251 (nnwarchive-possibly-change-server group server
)
252 (nnwarchive-get-article article group server buffer
))
254 (deffoo nnwarchive-close-server
(&optional server
)
255 (when (and (nnwarchive-server-opened server
)
256 (gnus-buffer-live-p nnwarchive-buffer
))
258 (set-buffer nnwarchive-buffer
)
259 (kill-buffer nnwarchive-buffer
)))
261 (gnus-backlog-shutdown))
262 (nnoo-close-server 'nnwarchive server
))
264 (deffoo nnwarchive-request-list
(&optional server
)
265 (nnwarchive-possibly-change-server nil server
)
267 (set-buffer nnwarchive-buffer
)
269 (if nnwarchive-list-url
270 (nnwarchive-url nnwarchive-list-url
))
271 (if nnwarchive-list-dissect
272 (funcall nnwarchive-list-dissect
))
273 (nnwarchive-write-groups)
274 (nnwarchive-generate-active))
277 (deffoo nnwarchive-open-server
(server &optional defs connectionless
)
278 (nnoo-change-server 'nnwarchive server defs
)
279 (nnwarchive-init server
)
280 (when nnwarchive-authentication
281 (setq nnwarchive-login
284 (format "Login at %s: " server
)
286 (setq nnwarchive-passwd
287 (or nnwarchive-passwd
289 (format "Password for %s at %s: "
290 nnwarchive-login server
)))))
291 (unless nnwarchive-groups
292 (nnwarchive-read-groups))
294 (set-buffer nnwarchive-buffer
)
296 (if nnwarchive-open-url
297 (nnwarchive-url nnwarchive-open-url
))
298 (if nnwarchive-open-dissect
299 (funcall nnwarchive-open-dissect
)))
302 (nnoo-define-skeleton nnwarchive
)
304 ;;; Internal functions
306 (defun nnwarchive-possibly-change-server (&optional group server
)
307 (nnwarchive-init server
)
309 (not (nnwarchive-server-opened server
)))
310 (nnwarchive-open-server server
)))
312 (defun nnwarchive-read-groups ()
313 (let ((file (expand-file-name (concat "groups-" nnwarchive-address
)
314 nnwarchive-directory
)))
315 (when (file-exists-p file
)
317 (insert-file-contents file
)
318 (goto-char (point-min))
319 (setq nnwarchive-groups
(read (current-buffer)))))))
321 (defun nnwarchive-write-groups ()
322 (with-temp-file (expand-file-name (concat "groups-" nnwarchive-address
)
323 nnwarchive-directory
)
324 (prin1 nnwarchive-groups
(current-buffer))))
326 (defun nnwarchive-init (server)
327 "Initialize buffers and such."
328 (let ((type (intern server
)) (defs nnwarchive-type-definition
) def
)
331 (setq type nnwarchive-default-type
))
332 ((assq type nnwarchive-type-definition
) t
)
335 (while (setq def
(pop defs
))
336 (when (equal (cdr (assq 'address
(cdr def
))) server
)
338 (setq type
(car def
))))
340 (error "Undefined server %s" server
))))
341 (setq nnwarchive-type type
))
342 (unless (file-exists-p nnwarchive-directory
)
343 (gnus-make-directory nnwarchive-directory
))
344 (unless (gnus-buffer-live-p nnwarchive-buffer
)
345 (setq nnwarchive-buffer
347 (nnheader-set-temp-buffer
348 (format " *nnwarchive %s %s*" nnwarchive-type server
)))))
349 (nnwarchive-set-default nnwarchive-type
))
351 (defun nnwarchive-eval (expr)
354 (cons (nnwarchive-eval (car expr
)) (nnwarchive-eval (cdr expr
))))
360 (defun nnwarchive-url (xurl)
361 (mm-with-unibyte-current-buffer
362 (let ((url-confirmation-func 'identity
) ;; Some hacks.
363 (url-cookie-multiple-line nil
))
365 ((eq (car xurl
) 'post
)
367 (mm-url-fetch-form (car xurl
) (nnwarchive-eval (cdr xurl
))))
369 (mm-url-insert (apply 'format
(nnwarchive-eval xurl
))))))))
371 (defun nnwarchive-generate-active ()
373 (set-buffer nntp-server-buffer
)
375 (dolist (elem nnwarchive-groups
)
376 (insert (prin1-to-string (car elem
))
377 " " (number-to-string (or (cadr elem
) 0)) " 1 y\n"))))
379 (defun nnwarchive-paged (articles)
380 (let (art narts next
)
381 (while (setq art
(pop articles
))
382 (when (and (>= art
(or next
0))
383 (not (assq art nnwarchive-headers
)))
385 (setq next
(+ art nnwarchive-xover-page-size
))))
390 (defun nnwarchive-egroups-list-groups (groups)
393 (set-buffer nnwarchive-buffer
)
394 (dolist (group groups
)
396 (nnwarchive-url nnwarchive-xover-last-url
)
397 (goto-char (point-min))
398 (when (re-search-forward "of \\([0-9]+\\)[ \t\n\r]*</title>" nil t
)
399 (setq articles
(string-to-number (match-string 1))))
400 (let ((elem (assoc group nnwarchive-groups
)))
402 (setcar (cdr elem
) articles
)
403 (push (list group articles
"") nnwarchive-groups
)))
404 (setq nnwarchive-headers
(cdr (assoc group nnwarchive-headers-cache
)))
405 (nnwarchive-egroups-xover group
)
406 (let ((elem (assoc group nnwarchive-headers-cache
)))
408 (setcdr elem nnwarchive-headers
)
409 (push (cons group nnwarchive-headers
) nnwarchive-headers-cache
)))))))
411 (defun nnwarchive-egroups-list ()
412 (let ((case-fold-search t
)
413 group description elem articles
)
414 (goto-char (point-min))
416 (re-search-forward "href=\"/group/\\([^/\"\> ]+\\)" nil t
)
417 (setq group
(match-string 1)
418 description
(match-string 2))
419 (if (setq elem
(assoc group nnwarchive-groups
))
420 (setcar (cdr elem
) 0)
421 (push (list group articles description
) nnwarchive-groups
))))
424 (defun nnwarchive-egroups-xover (group)
425 (let (article subject from date
)
426 (goto-char (point-min))
427 (while (re-search-forward
428 "<a href=\"/group/\\([^/]+\\)/\\([0-9]+\\)[^>]+>\\([^<]+\\)<"
430 (setq group
(match-string 1)
431 article
(string-to-number (match-string 2))
432 subject
(match-string 3))
434 (unless (assq article nnwarchive-headers
)
435 (if (looking-at "<td[^>]+><font[^>]+>\\([^<]+\\)</font>")
436 (setq from
(match-string 1)))
438 (if (looking-at "<td[^>]+><font[^>]+>\\([^<]+\\)</font>")
439 (setq date
(identity (match-string 1))))
442 (make-full-mail-header
444 (mm-url-decode-entities-string subject
)
445 (mm-url-decode-entities-string from
)
447 (concat "<" group
"%"
448 (number-to-string article
)
451 0 0 "")) nnwarchive-headers
))))
454 (defun nnwarchive-egroups-article (group articles
)
455 (goto-char (point-min))
456 (if (search-forward "<pre>" nil t
)
457 (delete-region (point-min) (point)))
458 (goto-char (point-max))
459 (if (search-backward "</pre>" nil t
)
460 (delete-region (point) (point-max)))
461 (goto-char (point-min))
462 (while (re-search-forward "<a[^>]+>\\([^<]+\\)</a>" nil t
)
463 (replace-match "\\1"))
464 (mm-url-decode-entities)
467 (defun nnwarchive-egroups-xover-files (group articles
)
469 (setq auxs
(nnwarchive-paged (sort articles
'<)))
470 (while (setq aux
(pop auxs
))
471 (goto-char (point-max))
472 (nnwarchive-url nnwarchive-xover-url
))
473 (if nnwarchive-xover-dissect
474 (nnwarchive-egroups-xover group
))))
478 (defun nnwarchive-mail-archive-list-groups (groups)
481 (set-buffer nnwarchive-buffer
)
482 (dolist (group groups
)
484 (nnwarchive-url nnwarchive-xover-last-url
)
485 (goto-char (point-min))
486 (when (re-search-forward "msg\\([0-9]+\\)\\.html" nil t
)
487 (setq articles
(1+ (string-to-number (match-string 1)))))
488 (let ((elem (assoc group nnwarchive-groups
)))
490 (setcar (cdr elem
) articles
)
491 (push (list group articles
"") nnwarchive-groups
)))
492 (setq nnwarchive-headers
(cdr (assoc group nnwarchive-headers-cache
)))
493 (nnwarchive-mail-archive-xover group
)
494 (let ((elem (assoc group nnwarchive-headers-cache
)))
496 (setcdr elem nnwarchive-headers
)
497 (push (cons group nnwarchive-headers
)
498 nnwarchive-headers-cache
)))))))
500 (defun nnwarchive-mail-archive-list ()
501 (let ((case-fold-search t
)
502 group description elem articles
)
503 (goto-char (point-min))
504 (while (re-search-forward "<a href=\"\\([^/]+\\)/\">\\([^>]+\\)<" nil t
)
505 (setq group
(match-string 1)
506 description
(match-string 2))
509 (if (setq elem
(assoc group nnwarchive-groups
))
510 (setcar (cdr elem
) articles
)
511 (push (list group articles description
) nnwarchive-groups
))))
514 (defun nnwarchive-mail-archive-xover (group)
515 (let (article subject from date
)
516 (goto-char (point-min))
517 (while (re-search-forward
518 "<A[^>]*HREF=\"msg\\([0-9]+\\)\\.html[^>]+>\\([^<]+\\)<"
520 (setq article
(1+ (string-to-number (match-string 1)))
521 subject
(match-string 2))
523 (unless (assq article nnwarchive-headers
)
524 (if (looking-at "<UL><LI><EM>From</EM>: *\\([^<]*[^< ]\\) *<\\([^&]+\\)>")
526 (setq from
(match-string 1)
527 date
(identity (match-string 2))))
528 (setq from
"" date
""))
531 (make-full-mail-header
533 (mm-url-decode-entities-string subject
)
534 (mm-url-decode-entities-string from
)
536 (format "<%05d%%%s>\n" (1- article
) group
)
538 0 0 "")) nnwarchive-headers
))))
541 (defun nnwarchive-mail-archive-xover-files (group articles
)
542 (unless nnwarchive-headers
544 (nnwarchive-url nnwarchive-xover-last-url
)
545 (goto-char (point-min))
546 (nnwarchive-mail-archive-xover group
))
547 (let ((minart (apply 'min articles
))
548 (min (apply 'min
(mapcar 'car nnwarchive-headers
)))
550 (while (> min minart
)
552 (nnwarchive-url nnwarchive-xover-url
)
553 (nnwarchive-mail-archive-xover group
)
554 (setq min
(apply 'min
(mapcar 'car nnwarchive-headers
))))))
556 (defvar nnwarchive-caesar-translation-table nil
557 "Modified rot13 table. tr/@A-Z[a-z/N-Z[@A-Mn-za-m/.")
559 (defun nnwarchive-make-caesar-translation-table ()
560 "Create modified rot13 table. tr/@A-Z[a-z/N-Z[@A-Mn-za-m/."
562 (table (make-string 256 0))
564 (A (mm-char-int ?A
)))
565 (while (< (incf i
) 256)
568 (substring table
0 (1- A
))
569 (substring table
(+ A
13) (+ A
27))
570 (substring table
(1- A
) (+ A
13))
571 (substring table
(+ A
27) a
)
572 (substring table
(+ a
13) (+ a
26))
573 (substring table a
(+ a
13))
574 (substring table
(+ a
26) 255))))
576 (defun nnwarchive-from-r13 (from-r13)
580 (let ((message-caesar-translation-table
581 (or nnwarchive-caesar-translation-table
582 (setq nnwarchive-caesar-translation-table
583 (nnwarchive-make-caesar-translation-table)))))
584 (message-caesar-region (point-min) (point-max))
587 (defun nnwarchive-mail-archive-article (group article
)
588 (let (p refs url mime e
591 (case-fold-search t
))
593 (goto-char (point-min))
594 (when (search-forward "X-Head-End" nil t
)
596 (narrow-to-region (point-min) (point))
597 (mm-url-decode-entities)
598 (goto-char (point-min))
599 (while (search-forward "<!--X-" nil t
)
601 (goto-char (point-min))
602 (while (search-forward " -->" nil t
)
605 (or (mail-fetch-field "from")
607 (mail-fetch-field "from-r13"))))
608 (setq date
(mail-fetch-field "date"))
609 (setq id
(mail-fetch-field "message-id"))
610 (setq subject
(mail-fetch-field "subject"))
611 (goto-char (point-max))
613 (when (search-forward "<ul>" nil t
)
615 (delete-region (point-min) (point))
616 (search-forward "</ul>" nil t
)
618 (narrow-to-region (point-min) (point))
619 (mm-url-remove-markup)
620 (mm-url-decode-entities)
621 (goto-char (point-min))
624 (message-remove-header "from")
625 (goto-char (point-max))
626 (insert "From: " from
"\n"))
628 (message-remove-header "subject")
629 (goto-char (point-max))
630 (insert "Subject: " subject
"\n"))
632 (goto-char (point-max))
633 (insert "X-Message-ID: <" id
">\n"))
635 (message-remove-header "date")
636 (goto-char (point-max))
637 (insert "Date: " date
"\n"))
638 (goto-char (point-max))
642 (when (search-forward "X-Body-of-Message" nil t
)
644 (delete-region p
(point))
645 (search-forward "X-Body-of-Message-End" nil t
)
648 (narrow-to-region p
(point))
649 (goto-char (point-min))
650 (if (> (skip-chars-forward "\040\n\r\t") 0)
651 (delete-region (point-min) (point)))
654 ((looking-at "<PRE>\r?\n?")
655 (delete-region (match-beginning 0) (match-end 0))
657 (when (search-forward "</PRE>" nil t
)
658 (delete-region (match-beginning 0) (match-end 0))
660 (narrow-to-region p
(point))
661 (mm-url-remove-markup)
662 (mm-url-decode-entities)
663 (goto-char (point-max)))))
664 ((looking-at "<P><A HREF=\"\\([^\"]+\\)")
665 (setq url
(match-string 1))
666 (delete-region (match-beginning 0)
667 (progn (forward-line) (point)))
668 ;; I hate to download the url encode it, then immediately
673 (string-match "\\.[^\\.]+$" url
)
674 (mailcap-extension-to-mime
675 (match-string 0 url
)))
676 "application/octet-stream")
677 (format " url=\"http://www.mail-archive.com/%s/%s\""
684 (insert "<#part type=\"text/html\" disposition=inline>")
686 (if (re-search-forward
687 "[\040\n\r\t]*<PRE>\\|[\040\n\r\t]*<P><A HREF=\""
694 (if (> (skip-chars-forward "\040\n\r\t") 0)
695 (delete-region p
(point))))
696 (goto-char (point-max))))
698 (when (search-forward "X-References-End" nil t
)
701 (search-backward "X-References" p t
)
702 (while (re-search-forward "msg\\([0-9]+\\)\\.html" e t
)
703 (push (concat "<" (match-string 1) "%" group
">") refs
)))
704 (delete-region p
(point-max))
705 (goto-char (point-min))
706 (insert (format "Message-ID: <%05d%%%s>\n" (1- article
) group
))
708 (insert "References:")
710 (insert " " (pop refs
)))
713 (unless (looking-at "$")
714 (search-forward "\n\n" nil t
)
716 (narrow-to-region (point) (point-max))
717 (insert "MIME-Version: 1.0\n"
720 (delete-region (point-min) (point-max))))
724 (provide 'nnwarchive
)
726 ;; arch-tag: 1ab7a15c-777a-40e0-95c0-0c41b3963578
727 ;;; nnwarchive.el ends here