1 ;;; nnweb.el --- retrieving articles via web search engines
2 ;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
3 ;; Free Software Foundation, Inc.
5 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 ;; Note: You need to have `w3' installed for some functions to work.
31 (eval-when-compile (require 'cl
))
43 (autoload 'w3-parse-buffer
"w3-parse")
47 (defvoo nnweb-directory
(nnheader-concat gnus-directory
"nnweb/")
48 "Where nnweb will save its files.")
50 (defvoo nnweb-type
'google
51 "What search engine type is being used.
52 Valid types include `google', `dejanews', and `gmane'.")
54 (defvar nnweb-type-definition
57 (id .
"http://groups.google.com/groups?selm=%s&output=gplain")
58 (reference . identity
)
59 (map . nnweb-google-create-mapping
)
60 (search . nnweb-google-search
)
61 (address .
"http://groups.google.com/groups")
62 (identifier . nnweb-google-identity
))
63 (dejanews ;; alias of google
65 (id .
"http://groups.google.com/groups?selm=%s&output=gplain")
66 (reference . identity
)
67 (map . nnweb-google-create-mapping
)
68 (search . nnweb-google-search
)
69 (address .
"http://groups.google.com/groups")
70 (identifier . nnweb-google-identity
))
72 (article . nnweb-gmane-wash-article
)
73 (id .
"http://gmane.org/view.php?group=%s")
74 (reference . identity
)
75 (map . nnweb-gmane-create-mapping
)
76 (search . nnweb-gmane-search
)
77 (address .
"http://gmane.org/")
78 (identifier . nnweb-gmane-identity
)))
79 "Type-definition alist.")
81 (defvoo nnweb-search nil
82 "Search string to feed to Google.")
84 (defvoo nnweb-max-hits
999
85 "Maximum number of hits to display.")
87 (defvoo nnweb-ephemeral-p nil
88 "Whether this nnweb server is ephemeral.")
90 ;;; Internal variables
92 (defvoo nnweb-articles nil
)
93 (defvoo nnweb-buffer nil
)
94 (defvoo nnweb-group-alist nil
)
95 (defvoo nnweb-group nil
)
96 (defvoo nnweb-hashtb nil
)
98 ;;; Interface functions
100 (nnoo-define-basics nnweb
)
102 (deffoo nnweb-retrieve-headers
(articles &optional group server fetch-old
)
103 (nnweb-possibly-change-server group server
)
105 (set-buffer nntp-server-buffer
)
107 (let (article header
)
108 (mm-with-unibyte-current-buffer
109 (while (setq article
(pop articles
))
110 (when (setq header
(cadr (assq article nnweb-articles
)))
111 (nnheader-insert-nov header
))))
114 (deffoo nnweb-request-scan
(&optional group server
)
115 (nnweb-possibly-change-server group server
)
116 (if nnweb-ephemeral-p
117 (setq nnweb-hashtb
(gnus-make-hashtable 4095)))
118 (funcall (nnweb-definition 'map
))
119 (unless nnweb-ephemeral-p
121 (nnweb-write-overview group
)))
123 (deffoo nnweb-request-group
(group &optional server dont-check
)
124 (nnweb-possibly-change-server nil server
)
126 (not (equal group nnweb-group
))
127 (not nnweb-ephemeral-p
))
128 (setq nnweb-group group
130 (let ((info (assoc group nnweb-group-alist
)))
132 (setq nnweb-type
(nth 2 info
))
133 (setq nnweb-search
(nth 3 info
))
135 (nnweb-read-overview group
)))))
137 ((not nnweb-articles
)
138 (nnheader-report 'nnweb
"No matching articles"))
140 (let ((active (if nnweb-ephemeral-p
141 (cons (caar nnweb-articles
)
142 (caar (last nnweb-articles
)))
143 (cadr (assoc group nnweb-group-alist
)))))
144 (nnheader-report 'nnweb
"Opened group %s" group
)
146 "211 %d %d %d %s\n" (length nnweb-articles
)
147 (car active
) (cdr active
) group
)))))
149 (deffoo nnweb-close-group
(group &optional server
)
150 (nnweb-possibly-change-server group server
)
151 (when (gnus-buffer-live-p nnweb-buffer
)
153 (set-buffer nnweb-buffer
)
154 (set-buffer-modified-p nil
)
155 (kill-buffer nnweb-buffer
)))
158 (deffoo nnweb-request-article
(article &optional group server buffer
)
159 (nnweb-possibly-change-server group server
)
161 (set-buffer (or buffer nntp-server-buffer
))
162 (let* ((header (cadr (assq article nnweb-articles
)))
163 (url (and header
(mail-header-xref header
))))
165 (mm-with-unibyte-current-buffer
166 (mm-url-insert url
)))
167 (and (stringp article
)
168 (nnweb-definition 'id t
)
169 (let ((fetch (nnweb-definition 'id
))
171 (when (string-match "^<\\(.*\\)>$" article
)
172 (setq art
(match-string 1 article
)))
173 (when (and fetch art
)
174 (setq url
(format fetch art
))
175 (mm-with-unibyte-current-buffer
177 (if (nnweb-definition 'reference t
)
179 (funcall (nnweb-definition
180 'reference
) article
)))))))
181 (unless nnheader-callback-function
182 (funcall (nnweb-definition 'article
)))
183 (nnheader-report 'nnweb
"Fetched article %s" article
)
184 (cons group
(and (numberp article
) article
))))))
186 (deffoo nnweb-close-server
(&optional server
)
187 (when (and (nnweb-server-opened server
)
188 (gnus-buffer-live-p nnweb-buffer
))
190 (set-buffer nnweb-buffer
)
191 (set-buffer-modified-p nil
)
192 (kill-buffer nnweb-buffer
)))
193 (nnoo-close-server 'nnweb server
))
195 (deffoo nnweb-request-list
(&optional server
)
196 (nnweb-possibly-change-server nil server
)
198 (set-buffer nntp-server-buffer
)
199 (nnmail-generate-active nnweb-group-alist
)
202 (deffoo nnweb-request-update-info
(group info
&optional server
)
203 (nnweb-possibly-change-server group server
))
205 (deffoo nnweb-asynchronous-p
()
208 (deffoo nnweb-request-create-group
(group &optional server args
)
209 (nnweb-possibly-change-server nil server
)
210 (nnweb-request-delete-group group
)
211 (push `(,group
,(cons 1 0) ,@args
) nnweb-group-alist
)
215 (deffoo nnweb-request-delete-group
(group &optional force server
)
216 (nnweb-possibly-change-server group server
)
217 (gnus-pull group nnweb-group-alist t
)
219 (gnus-delete-file (nnweb-overview-file group
))
222 (nnoo-define-skeleton nnweb
)
224 ;;; Internal functions
226 (defun nnweb-read-overview (group)
227 "Read the overview of GROUP and build the map."
228 (when (file-exists-p (nnweb-overview-file group
))
229 (mm-with-unibyte-buffer
230 (nnheader-insert-file-contents (nnweb-overview-file group
))
231 (goto-char (point-min))
234 (setq header
(nnheader-parse-nov))
236 (push (list (mail-header-number header
)
237 header
(mail-header-xref header
))
239 (nnweb-set-hashtb header
(car nnweb-articles
)))))))
241 (defun nnweb-write-overview (group)
242 "Write the overview file for GROUP."
243 (with-temp-file (nnweb-overview-file group
)
244 (let ((articles nnweb-articles
))
246 (nnheader-insert-nov (cadr (pop articles
)))))))
248 (defun nnweb-set-hashtb (header data
)
249 (gnus-sethash (nnweb-identifier (mail-header-xref header
))
252 (defun nnweb-get-hashtb (url)
253 (gnus-gethash (nnweb-identifier url
) nnweb-hashtb
))
255 (defun nnweb-identifier (ident)
256 (funcall (nnweb-definition 'identifier
) ident
))
258 (defun nnweb-overview-file (group)
259 "Return the name of the overview file of GROUP."
260 (nnheader-concat nnweb-directory group
".overview"))
262 (defun nnweb-write-active ()
263 "Save the active file."
264 (gnus-make-directory nnweb-directory
)
265 (with-temp-file (nnheader-concat nnweb-directory
"active")
266 (prin1 `(setq nnweb-group-alist
',nnweb-group-alist
) (current-buffer))))
268 (defun nnweb-read-active ()
269 "Read the active file."
270 (load (nnheader-concat nnweb-directory
"active") t t t
))
272 (defun nnweb-definition (type &optional noerror
)
273 "Return the definition of TYPE."
274 (let ((def (cdr (assq type
(assq nnweb-type nnweb-type-definition
)))))
277 (error "Undefined definition %s" type
))
280 (defun nnweb-possibly-change-server (&optional group server
)
283 (unless (nnweb-server-opened server
)
284 (nnweb-open-server server
)))
285 (unless nnweb-group-alist
288 (setq nnweb-hashtb
(gnus-make-hashtable 4095)))
290 (when (and (not nnweb-ephemeral-p
)
291 (equal group nnweb-group
))
292 (nnweb-request-group group nil t
))))
294 (defun nnweb-init (server)
295 "Initialize buffers and such."
296 (unless (gnus-buffer-live-p nnweb-buffer
)
300 (nnheader-set-temp-buffer
301 (format " *nnweb %s %s %s*"
302 nnweb-type nnweb-search server
))
303 (current-buffer))))))
306 ;;; Deja bought by google.com
309 (defun nnweb-google-wash-article ()
310 (let ((case-fold-search t
) url
)
311 (goto-char (point-min))
312 (re-search-forward "^<pre>" nil t
)
313 (narrow-to-region (point-min) (point))
314 (search-backward "<table " nil t
2)
315 (delete-region (point-min) (point))
316 (if (re-search-forward "Search Result [0-9]+" nil t
)
318 (if (re-search-forward "View complete thread ([0-9]+ articles?)" nil t
)
320 (goto-char (point-min))
321 (while (search-forward "<br>" nil t
)
322 (replace-match "\n"))
323 (mm-url-remove-markup)
324 (goto-char (point-min))
325 (while (re-search-forward "^[ \t]*\n" nil t
)
327 (goto-char (point-max))
330 (narrow-to-region (point) (point-max))
331 (search-forward "</pre>" nil t
)
332 (delete-region (point) (point-max))
333 (mm-url-remove-markup)
336 (defun nnweb-google-parse-1 (&optional Message-ID
)
339 (active (cadr (assoc nnweb-group nnweb-group-alist
)))
340 Subject Score Date Newsgroups From
343 (push (list nnweb-group
(setq active
(cons 1 0))
344 nnweb-type nnweb-search
)
346 ;; Go through all the article hits on this page.
347 (goto-char (point-min))
348 (while (re-search-forward
349 "a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t
)
350 (setq mid
(match-string 2)
352 "http://groups.google.com/groups?selm=%s&output=gplain" mid
))
353 (narrow-to-region (search-forward ">" nil t
)
354 (search-forward "</a>" nil t
))
355 (mm-url-remove-markup)
356 (mm-url-decode-entities)
357 (setq Subject
(buffer-string))
358 (goto-char (point-max))
361 (when (looking-at "<br><font[^>]+>")
362 (goto-char (match-end 0)))
363 (if (not (looking-at "<a[^>]+>"))
364 (skip-chars-forward " \t")
365 (narrow-to-region (point)
366 (search-forward "</a>" nil t
))
367 (mm-url-remove-markup)
368 (mm-url-decode-entities)
369 (setq Newsgroups
(buffer-string))
370 (goto-char (point-max))
372 (skip-chars-forward "- \t"))
374 "\\([0-9]+\\)[/ ]\\([A-Za-z]+\\)[/ ]\\([0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - <a")
375 (setq From
(match-string 4)
376 Date
(format "%s %s 00:00:00 %s"
377 (match-string 2) (match-string 1)
381 (unless (nnweb-get-hashtb url
)
385 (make-full-mail-header
386 (cdr active
) (if Newsgroups
387 (concat "(" Newsgroups
") " Subject
)
389 From Date
(or Message-ID mid
)
392 (nnweb-set-hashtb (cadar map
) (car map
))))
395 (defun nnweb-google-reference (id)
396 (let ((map (nnweb-google-parse-1 id
)) header
)
398 (nconc nnweb-articles map
))
399 (when (setq header
(cadar map
))
400 (mm-with-unibyte-current-buffer
401 (mm-url-insert (mail-header-xref header
)))
404 (defun nnweb-google-create-mapping ()
405 "Perform the search and create a number-to-url alist."
407 (set-buffer nnweb-buffer
)
409 (when (funcall (nnweb-definition 'search
) nnweb-search
)
414 (nconc nnweb-articles
(nnweb-google-parse-1)))
415 ;; Check if there are more articles to fetch
416 (goto-char (point-min))
418 (if (or (not (re-search-forward
419 "<td nowrap><a href=\\([^>]+\\).*<span class=b>Next</span>" nil t
))
420 (>= i nnweb-max-hits
))
422 ;; Yup, there are more articles
423 (setq more
(concat "http://groups.google.com" (match-string 1)))
426 (mm-url-insert more
))))
427 ;; Return the articles in the right order.
429 (sort nnweb-articles
'car-less-than-car
))))))
431 (defun nnweb-google-search (search)
434 (nnweb-definition 'address
)
436 (mm-url-encode-www-form-urlencoded
443 ("sites" .
"groups")))))
446 (defun nnweb-google-identity (url)
447 "Return an unique identifier based on URL."
448 (if (string-match "selm=\\([^ &>]+\\)" url
)
455 (defun nnweb-gmane-create-mapping ()
456 "Perform the search and create a number-to-url alist."
458 (set-buffer nnweb-buffer
)
460 (when (funcall (nnweb-definition 'search
) nnweb-search
)
463 (active (or (cadr (assoc nnweb-group nnweb-group-alist
))
467 ;; Remove stuff from the beginning of results
468 (goto-char (point-min))
469 (search-forward "Search Results</h1><ul>" nil t
)
470 (delete-region (point-min) (point))
471 (goto-char (point-min))
472 ;; Iterate over the actual hits
473 (while (re-search-forward ".*href=\"\\([^\"]+\\)\">\\(.*\\)" nil t
)
474 (setq url
(concat "http://gmane.org/" (match-string 1)))
475 (setq subject
(match-string 2))
476 (unless (nnweb-get-hashtb url
)
480 (make-full-mail-header
481 (cdr active
) (concat "(" group
") " subject
) nil nil
484 (nnweb-set-hashtb (cadar map
) (car map
))))
485 ;; Return the articles in the right order.
487 (sort (nconc nnweb-articles map
) 'car-less-than-car
))))))
489 (defun nnweb-gmane-wash-article ()
490 (let ((case-fold-search t
))
491 (goto-char (point-min))
492 (re-search-forward "<!--X-Head-of-Message-->" nil t
)
493 (delete-region (point-min) (point))
494 (goto-char (point-min))
495 (while (looking-at "^<li><em>\\([^ ]+\\)</em>.*</li>")
496 (replace-match "\\1\\2" t
)
498 (mm-url-remove-markup)))
500 (defun nnweb-gmane-search (search)
503 (nnweb-definition 'address
)
505 (mm-url-encode-www-form-urlencoded
506 `(("query" .
,search
)))))
507 (setq buffer-file-name nil
)
511 (defun nnweb-gmane-identity (url)
512 "Return a unique identifier based on URL."
513 (if (string-match "group=\\(.+\\)" url
)
518 ;;; General web/w3 interface utility functions
521 (defun nnweb-insert-html (parse)
522 "Insert HTML based on a w3 parse tree."
524 (insert (nnheader-string-as-multibyte parse
))
525 (insert "<" (symbol-name (car parse
)) " ")
528 (concat (symbol-name (car param
)) "="
530 (if (consp (cdr param
))
536 (mapcar 'nnweb-insert-html
(nth 2 parse
))
537 (insert "</" (symbol-name (car parse
)) ">\n")))
539 (defun nnweb-parse-find (type parse
&optional maxdepth
)
540 "Find the element of TYPE in PARSE."
542 (nnweb-parse-find-1 type parse maxdepth
)))
544 (defun nnweb-parse-find-1 (type contents maxdepth
)
545 (when (or (null maxdepth
)
546 (not (zerop maxdepth
)))
547 (when (consp contents
)
548 (when (eq (car contents
) type
)
549 (throw 'found contents
))
550 (when (listp (cdr contents
))
551 (dolist (element contents
)
552 (when (consp element
)
553 (nnweb-parse-find-1 type element
554 (and maxdepth
(1- maxdepth
)))))))))
556 (defun nnweb-parse-find-all (type parse
)
557 "Find all elements of TYPE in PARSE."
559 (nnweb-parse-find-all-1 type parse
)))
561 (defun nnweb-parse-find-all-1 (type contents
)
563 (when (consp contents
)
564 (if (eq (car contents
) type
)
565 (push contents result
)
566 (when (listp (cdr contents
))
567 (dolist (element contents
)
568 (when (consp element
)
570 (nconc result
(nnweb-parse-find-all-1 type element
))))))))
574 (defun nnweb-text (parse)
575 "Return a list of text contents in PARSE."
576 (let ((nnweb-text nil
))
578 (nreverse nnweb-text
)))
580 (defun nnweb-text-1 (contents)
581 (dolist (element contents
)
582 (if (stringp element
)
583 (push element nnweb-text
)
584 (when (and (consp element
)
585 (listp (cdr element
)))
586 (nnweb-text-1 element
)))))
590 ;;; arch-tag: f59307eb-c90f-479f-b7d2-dbd8bf51b697
591 ;;; nnweb.el ends here