1 ;;; nnweb.el --- retrieving articles via web search engines
2 ;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001
3 ;; Free Software Foundation, Inc.
5 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 ;; Note: You need to have `url' and `w3' installed for this
32 (eval-when-compile (require 'cl
))
46 ;; Report failure to find w3 at load time if appropriate.
47 (unless noninteractive
51 (require 'w3-forms
))))
55 (defvoo nnweb-directory
(nnheader-concat gnus-directory
"nnweb/")
56 "Where nnweb will save its files.")
58 (defvoo nnweb-type
'google
59 "What search engine type is being used.
60 Valid types include `google', `dejanews', `dejanewsold', `reference',
63 (defvar nnweb-type-definition
66 ;;(article . nnweb-google-wash-article)
67 ;;(id . "http://groups.google.com/groups?as_umsgid=%s")
69 (id .
"http://groups.google.com/groups?selm=%s&output=gplain")
70 ;;(reference . nnweb-google-reference)
71 (reference . identity
)
72 (map . nnweb-google-create-mapping
)
73 (search . nnweb-google-search
)
74 (address .
"http://groups.google.com/groups")
75 (identifier . nnweb-google-identity
))
76 (dejanews ;; alias of google
77 ;;(article . nnweb-google-wash-article)
78 ;;(id . "http://groups.google.com/groups?as_umsgid=%s")
80 (id .
"http://groups.google.com/groups?selm=%s&output=gplain")
81 ;;(reference . nnweb-google-reference)
82 (reference . identity
)
83 (map . nnweb-google-create-mapping
)
84 (search . nnweb-google-search
)
85 (address .
"http://groups.google.com/groups")
86 (identifier . nnweb-google-identity
))
88 ;;; (article . ignore)
89 ;;; (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text")
90 ;;; (map . nnweb-dejanews-create-mapping)
91 ;;; (search . nnweb-dejanews-search)
92 ;;; (address . "http://www.deja.com/=dnc/qs.xp")
93 ;;; (identifier . nnweb-dejanews-identity))
95 ;;; (article . ignore)
96 ;;; (map . nnweb-dejanews-create-mapping)
97 ;;; (search . nnweb-dejanewsold-search)
98 ;;; (address . "http://www.deja.com/dnquery.xp")
99 ;;; (identifier . nnweb-dejanews-identity))
101 (article . nnweb-reference-wash-article
)
102 (map . nnweb-reference-create-mapping
)
103 (search . nnweb-reference-search
)
104 (address .
"http://www.reference.com/cgi-bin/pn/go")
105 (identifier . identity
))
107 (article . nnweb-altavista-wash-article
)
108 (map . nnweb-altavista-create-mapping
)
109 (search . nnweb-altavista-search
)
110 (address .
"http://www.altavista.digital.com/cgi-bin/query")
111 (id .
"/cgi-bin/news?id@%s")
112 (identifier . identity
)))
113 "Type-definition alist.")
115 (defvoo nnweb-search nil
116 "Search string to feed to DejaNews.")
118 (defvoo nnweb-max-hits
999
119 "Maximum number of hits to display.")
121 (defvoo nnweb-ephemeral-p nil
122 "Whether this nnweb server is ephemeral.")
124 ;;; Internal variables
126 (defvoo nnweb-articles nil
)
127 (defvoo nnweb-buffer nil
)
128 (defvoo nnweb-group-alist nil
)
129 (defvoo nnweb-group nil
)
130 (defvoo nnweb-hashtb nil
)
132 ;;; Interface functions
134 (nnoo-define-basics nnweb
)
136 (deffoo nnweb-retrieve-headers
(articles &optional group server fetch-old
)
137 (nnweb-possibly-change-server group server
)
139 (set-buffer nntp-server-buffer
)
141 (let (article header
)
142 (mm-with-unibyte-current-buffer
143 (while (setq article
(pop articles
))
144 (when (setq header
(cadr (assq article nnweb-articles
)))
145 (nnheader-insert-nov header
))))
148 (deffoo nnweb-request-scan
(&optional group server
)
149 (nnweb-possibly-change-server group server
)
150 (if nnweb-ephemeral-p
151 (setq nnweb-hashtb
(gnus-make-hashtable 4095)))
152 (funcall (nnweb-definition 'map
))
153 (unless nnweb-ephemeral-p
155 (nnweb-write-overview group
)))
157 (deffoo nnweb-request-group
(group &optional server dont-check
)
158 (nnweb-possibly-change-server nil server
)
160 (not (equal group nnweb-group
))
161 (not nnweb-ephemeral-p
))
162 (setq nnweb-group group
164 (let ((info (assoc group nnweb-group-alist
)))
166 (setq nnweb-type
(nth 2 info
))
167 (setq nnweb-search
(nth 3 info
))
169 (nnweb-read-overview group
)))))
171 ((not nnweb-articles
)
172 (nnheader-report 'nnweb
"No matching articles"))
174 (let ((active (if nnweb-ephemeral-p
175 (cons (caar nnweb-articles
)
176 (caar (last nnweb-articles
)))
177 (cadr (assoc group nnweb-group-alist
)))))
178 (nnheader-report 'nnweb
"Opened group %s" group
)
180 "211 %d %d %d %s\n" (length nnweb-articles
)
181 (car active
) (cdr active
) group
)))))
183 (deffoo nnweb-close-group
(group &optional server
)
184 (nnweb-possibly-change-server group server
)
185 (when (gnus-buffer-live-p nnweb-buffer
)
187 (set-buffer nnweb-buffer
)
188 (set-buffer-modified-p nil
)
189 (kill-buffer nnweb-buffer
)))
192 (deffoo nnweb-request-article
(article &optional group server buffer
)
193 (nnweb-possibly-change-server group server
)
195 (set-buffer (or buffer nntp-server-buffer
))
196 (let* ((header (cadr (assq article nnweb-articles
)))
197 (url (and header
(mail-header-xref header
))))
199 (mm-with-unibyte-current-buffer
200 (nnweb-fetch-url url
)))
201 (and (stringp article
)
202 (nnweb-definition 'id t
)
203 (let ((fetch (nnweb-definition 'id
))
205 (when (string-match "^<\\(.*\\)>$" article
)
206 (setq art
(match-string 1 article
)))
207 (when (and fetch art
)
208 (setq url
(format fetch art
))
209 (mm-with-unibyte-current-buffer
210 (nnweb-fetch-url url
))
211 (if (nnweb-definition 'reference t
)
213 (funcall (nnweb-definition
214 'reference
) article
)))))))
215 (unless nnheader-callback-function
216 (funcall (nnweb-definition 'article
)))
217 (nnheader-report 'nnweb
"Fetched article %s" article
)
218 (cons group
(and (numberp article
) article
))))))
220 (deffoo nnweb-close-server
(&optional server
)
221 (when (and (nnweb-server-opened server
)
222 (gnus-buffer-live-p nnweb-buffer
))
224 (set-buffer nnweb-buffer
)
225 (set-buffer-modified-p nil
)
226 (kill-buffer nnweb-buffer
)))
227 (nnoo-close-server 'nnweb server
))
229 (deffoo nnweb-request-list
(&optional server
)
230 (nnweb-possibly-change-server nil server
)
232 (set-buffer nntp-server-buffer
)
233 (nnmail-generate-active nnweb-group-alist
)
236 (deffoo nnweb-request-update-info
(group info
&optional server
)
237 (nnweb-possibly-change-server group server
))
239 (deffoo nnweb-asynchronous-p
()
242 (deffoo nnweb-request-create-group
(group &optional server args
)
243 (nnweb-possibly-change-server nil server
)
244 (nnweb-request-delete-group group
)
245 (push `(,group
,(cons 1 0) ,@args
) nnweb-group-alist
)
249 (deffoo nnweb-request-delete-group
(group &optional force server
)
250 (nnweb-possibly-change-server group server
)
251 (gnus-pull group nnweb-group-alist t
)
253 (gnus-delete-file (nnweb-overview-file group
))
256 (nnoo-define-skeleton nnweb
)
258 ;;; Internal functions
260 (defun nnweb-read-overview (group)
261 "Read the overview of GROUP and build the map."
262 (when (file-exists-p (nnweb-overview-file group
))
263 (mm-with-unibyte-buffer
264 (nnheader-insert-file-contents (nnweb-overview-file group
))
265 (goto-char (point-min))
268 (setq header
(nnheader-parse-nov))
270 (push (list (mail-header-number header
)
271 header
(mail-header-xref header
))
273 (nnweb-set-hashtb header
(car nnweb-articles
)))))))
275 (defun nnweb-write-overview (group)
276 "Write the overview file for GROUP."
277 (with-temp-file (nnweb-overview-file group
)
278 (let ((articles nnweb-articles
))
280 (nnheader-insert-nov (cadr (pop articles
)))))))
282 (defun nnweb-set-hashtb (header data
)
283 (gnus-sethash (nnweb-identifier (mail-header-xref header
))
286 (defun nnweb-get-hashtb (url)
287 (gnus-gethash (nnweb-identifier url
) nnweb-hashtb
))
289 (defun nnweb-identifier (ident)
290 (funcall (nnweb-definition 'identifier
) ident
))
292 (defun nnweb-overview-file (group)
293 "Return the name of the overview file of GROUP."
294 (nnheader-concat nnweb-directory group
".overview"))
296 (defun nnweb-write-active ()
297 "Save the active file."
298 (gnus-make-directory nnweb-directory
)
299 (with-temp-file (nnheader-concat nnweb-directory
"active")
300 (prin1 `(setq nnweb-group-alist
',nnweb-group-alist
) (current-buffer))))
302 (defun nnweb-read-active ()
303 "Read the active file."
304 (load (nnheader-concat nnweb-directory
"active") t t t
))
306 (defun nnweb-definition (type &optional noerror
)
307 "Return the definition of TYPE."
308 (let ((def (cdr (assq type
(assq nnweb-type nnweb-type-definition
)))))
311 (error "Undefined definition %s" type
))
314 (defun nnweb-possibly-change-server (&optional group server
)
317 (unless (nnweb-server-opened server
)
318 (nnweb-open-server server
)))
319 (unless nnweb-group-alist
322 (setq nnweb-hashtb
(gnus-make-hashtable 4095)))
324 (when (and (not nnweb-ephemeral-p
)
325 (equal group nnweb-group
))
326 (nnweb-request-group group nil t
))))
328 (defun nnweb-init (server)
329 "Initialize buffers and such."
330 (unless (gnus-buffer-live-p nnweb-buffer
)
334 (nnheader-set-temp-buffer
335 (format " *nnweb %s %s %s*"
336 nnweb-type nnweb-search server
))
337 (current-buffer))))))
339 (defun nnweb-fetch-url (url)
342 (if (not nnheader-callback-function
)
345 (mm-enable-multibyte)
346 (let ((coding-system-for-read 'binary
)
347 (coding-system-for-write 'binary
)
348 (default-process-coding-system 'binary
))
350 (setq buf
(buffer-string)))
354 (nnweb-url-retrieve-asynch
355 url
'nnweb-callback
(current-buffer) nnheader-callback-function
)
358 (defun nnweb-callback (buffer callback
)
359 (when (gnus-buffer-live-p url-working-buffer
)
361 (set-buffer url-working-buffer
)
362 (funcall (nnweb-definition 'article
))
363 (nnweb-decode-entities)
365 (goto-char (point-max))
366 (insert-buffer-substring url-working-buffer
))
368 (gnus-kill-buffer url-working-buffer
)))
370 (defun nnweb-url-retrieve-asynch (url callback
&rest data
)
371 (let ((url-request-method "GET")
372 (old-asynch url-be-asynchronous
)
373 (url-request-data nil
)
374 (url-request-extra-headers nil
)
375 (url-working-buffer (generate-new-buffer-name " *nnweb*")))
376 (setq-default url-be-asynchronous t
)
378 (set-buffer (get-buffer-create url-working-buffer
))
379 (setq url-current-callback-data data
380 url-be-asynchronous t
381 url-current-callback-func callback
)
382 (url-retrieve url nil
))
383 (setq-default url-be-asynchronous old-asynch
)))
385 (if (fboundp 'url-retrieve-synchronously
)
386 (defun nnweb-url-retrieve-asynch (url callback
&rest data
)
387 (url-retrieve url callback data
)))
390 ;;; DejaNews functions.
393 (defun nnweb-dejanews-create-mapping ()
394 "Perform the search and create a number-to-url alist."
396 (set-buffer nnweb-buffer
)
398 (when (funcall (nnweb-definition 'search
) nnweb-search
)
402 (active (or (cadr (assoc nnweb-group nnweb-group-alist
))
405 map url parse a table group text
)
407 ;; Go through all the article hits on this page.
408 (goto-char (point-min))
409 (setq parse
(w3-parse-buffer (current-buffer))
410 table
(nth 1 (nnweb-parse-find-all 'table parse
)))
411 (dolist (row (nth 2 (car (nth 2 table
))))
412 (setq a
(nnweb-parse-find 'a row
)
413 url
(cdr (assq 'href
(nth 1 a
)))
414 text
(nreverse (nnweb-text row
)))
416 (setq subject
(nth 4 text
)
420 (if (string-match "\\([0-9]+\\)/\\([0-9]+\\)/\\([0-9]+\\)" date
)
421 (setq date
(format "%s %s 00:00:00 %s"
422 (car (rassq (string-to-number
423 (match-string 2 date
))
425 (match-string 3 date
)
426 (match-string 1 date
)))
427 (setq date
"Jan 1 00:00:00 0000"))
429 (setq url
(concat url
"&fmt=text"))
430 (when (string-match "&context=[^&]+" url
)
431 (setq url
(replace-match "" t t url
)))
432 (unless (nnweb-get-hashtb url
)
436 (make-full-mail-header
437 (cdr active
) (concat subject
" (" group
")") from date
438 (concat "<" (nnweb-identifier url
) "@dejanews>")
441 (nnweb-set-hashtb (cadar map
) (car map
)))))
442 ;; See whether there is a "Get next 20 hits" button here.
443 (goto-char (point-min))
444 (if (or (not (re-search-forward
445 "HREF=\"\\([^\"]+\\)\"[<>b]+Next result" nil t
))
446 (>= i nnweb-max-hits
))
449 (setq more
(match-string 1))
451 (url-insert-file-contents more
)))
452 ;; Return the articles in the right order.
454 (sort (nconc nnweb-articles map
) 'car-less-than-car
))))))
456 (defun nnweb-dejanews-search (search)
459 (nnweb-definition 'address
)
461 (nnweb-encode-www-form-urlencoded
463 ("svcclass" .
"dnyr")
465 ("defaultOp" .
"AND")
467 ("OP" .
"dnquery.xp")
471 ("format" .
"verbose2")
472 ("showsort" .
"date")
474 ("ageweight" .
"1")))))
477 (defun nnweb-dejanewsold-search (search)
479 (nnweb-definition 'address
)
480 `(("query" .
,search
)
481 ("defaultOp" .
"AND")
482 ("svcclass" .
"dnold")
484 ("format" .
"verbose2")
486 ("showsort" .
"date")
488 ("ageweight" .
"1")))
491 (defun nnweb-dejanews-identity (url)
492 "Return an unique identifier based on URL."
493 (if (string-match "AN=\\([0-9]+\\)" url
)
501 (defun nnweb-reference-create-mapping ()
502 "Perform the search and create a number-to-url alist."
504 (set-buffer nnweb-buffer
)
506 (when (funcall (nnweb-definition 'search
) nnweb-search
)
510 (active (or (cadr (assoc nnweb-group nnweb-group-alist
))
512 Subject Score Date Newsgroups From Message-ID
515 ;; Go through all the article hits on this page.
516 (goto-char (point-min))
517 (search-forward "</pre><hr>" nil t
)
518 (delete-region (point-min) (point))
519 (goto-char (point-min))
520 (while (re-search-forward "^ +[0-9]+\\." nil t
)
523 (if (re-search-forward "^$" nil t
)
526 (goto-char (point-min))
527 (when (looking-at ".*href=\"\\([^\"]+\\)\"")
528 (setq url
(match-string 1)))
529 (nnweb-remove-markup)
530 (goto-char (point-min))
531 (while (search-forward "\t" nil t
)
533 (goto-char (point-min))
534 (while (re-search-forward "^\\([^:]+\\): \\(.*\\)$" nil t
)
535 (set (intern (match-string 1)) (match-string 2)))
537 (search-forward "</pre>" nil t
)
539 (unless (nnweb-get-hashtb url
)
543 (make-full-mail-header
544 (cdr active
) (concat "(" Newsgroups
") " Subject
) From Date
546 nil
0 (string-to-int Score
) url
))
548 (nnweb-set-hashtb (cadar map
) (car map
))))
550 ;; Return the articles in the right order.
552 (sort (nconc nnweb-articles map
) 'car-less-than-car
))))))
554 (defun nnweb-reference-wash-article ()
555 (let ((case-fold-search t
))
556 (goto-char (point-min))
557 (re-search-forward "^</center><hr>" nil t
)
558 (delete-region (point-min) (point))
559 (search-forward "<pre>" nil t
)
561 (let ((body (point-marker)))
562 (search-forward "</pre>" nil t
)
563 (delete-region (point) (point-max))
564 (nnweb-remove-markup)
565 (goto-char (point-min))
566 (while (looking-at " *$")
568 (narrow-to-region (point-min) body
)
569 (while (and (re-search-forward "^$" nil t
)
572 (goto-char (point-min))
573 (while (looking-at "\\(^[^ ]+:\\) *")
574 (replace-match "\\1 " t
)
576 (goto-char (point-min))
577 (when (re-search-forward "^References:" nil t
)
579 (point) (if (re-search-forward "^$\\|^[^:]+:" nil t
)
582 (goto-char (point-min))
584 (unless (looking-at "References")
587 (goto-char (point-min))
588 (while (search-forward "," nil t
)
589 (replace-match " " t t
)))
591 (nnweb-decode-entities)
592 (set-marker body nil
))))
594 (defun nnweb-reference-search (search)
595 (url-insert-file-contents
597 (nnweb-definition 'address
)
599 (nnweb-encode-www-form-urlencoded
600 `(("search" .
"advanced")
601 ("querytext" .
,search
)
606 ("organization" .
"")
609 ("choice" .
"Search")
610 ("startmonth" .
"Jul")
612 ("startyear" .
"1996")
617 ("verbosity" .
"Verbose")
618 ("ranking" .
"Relevance")
622 (setq buffer-file-name nil
)
629 (defun nnweb-altavista-create-mapping ()
630 "Perform the search and create a number-to-url alist."
632 (set-buffer nnweb-buffer
)
635 (when (funcall (nnweb-definition 'search
) nnweb-search part
)
639 (active (or (cadr (assoc nnweb-group nnweb-group-alist
))
641 subject date from id group
644 ;; Go through all the article hits on this page.
645 (goto-char (point-min))
646 (search-forward "<dt>" nil t
)
647 (delete-region (point-min) (match-beginning 0))
648 (goto-char (point-min))
649 (while (search-forward "<dt>" nil t
)
650 (replace-match "\n<blubb>"))
651 (nnweb-decode-entities)
652 (goto-char (point-min))
653 (while (re-search-forward "<blubb>.*href=\"\\([^\"]+\\)\"><strong>\\([^>]*\\)</strong></a><dd>\\([^-]+\\)- <b>\\([^<]+\\)<.*href=\"news:\\([^\"]+\\)\">.*\">\\(.+\\)</a><P>"
655 (setq url
(match-string 1)
656 subject
(match-string 2)
657 date
(match-string 3)
658 group
(match-string 4)
659 id
(concat "<" (match-string 5) ">")
660 from
(match-string 6))
662 (unless (nnweb-get-hashtb url
)
666 (make-full-mail-header
667 (cdr active
) (concat "(" group
") " subject
) from date
670 (nnweb-set-hashtb (cadar map
) (car map
))))
671 ;; See if we want more.
672 (when (or (not nnweb-articles
)
673 (>= i nnweb-max-hits
)
674 (not (funcall (nnweb-definition 'search
)
675 nnweb-search
(incf part
))))
677 ;; Return the articles in the right order.
679 (sort (nconc nnweb-articles map
) 'car-less-than-car
)))))))
681 (defun nnweb-altavista-wash-article ()
682 (goto-char (point-min))
683 (let ((case-fold-search t
))
684 (when (re-search-forward "^<strong>" nil t
)
685 (delete-region (point-min) (match-beginning 0)))
686 (goto-char (point-min))
687 (while (looking-at "<strong>\\([^ ]+\\) +</strong> +\\(.*\\)$")
688 (replace-match "\\1: \\2" t
)
690 (when (re-search-backward "^References:" nil t
)
691 (narrow-to-region (point) (progn (forward-line 1) (point)))
692 (goto-char (point-min))
693 (while (re-search-forward "<A.*\\?id@\\([^\"]+\\)\">[0-9]+</A>" nil t
)
694 (replace-match "<\\1> " t
)))
696 (nnweb-remove-markup)
697 (nnweb-decode-entities)))
699 (defun nnweb-altavista-search (search &optional part
)
700 (url-insert-file-contents
702 (nnweb-definition 'address
)
704 (nnweb-encode-www-form-urlencoded
707 ,@(when part
`(("stq" .
,(int-to-string (* part
30)))))
713 (setq buffer-file-name nil
)
717 ;;; Deja bought by google.com
720 (defun nnweb-google-wash-article ()
721 (let ((case-fold-search t
) url
)
722 (goto-char (point-min))
723 (re-search-forward "^<pre>" nil t
)
724 (narrow-to-region (point-min) (point))
725 (search-backward "<table " nil t
2)
726 (delete-region (point-min) (point))
727 (if (re-search-forward "Search Result [0-9]+" nil t
)
729 (if (re-search-forward "View complete thread ([0-9]+ articles?)" nil t
)
731 (goto-char (point-min))
732 (while (search-forward "<br>" nil t
)
733 (replace-match "\n"))
734 (nnweb-remove-markup)
735 (goto-char (point-min))
736 (while (re-search-forward "^[ \t]*\n" nil t
)
738 (goto-char (point-max))
741 (narrow-to-region (point) (point-max))
742 (search-forward "</pre>" nil t
)
743 (delete-region (point) (point-max))
744 (nnweb-remove-markup)
747 (defun nnweb-google-parse-1 (&optional Message-ID
)
750 (active (cadr (assoc nnweb-group nnweb-group-alist
)))
751 Subject Score Date Newsgroups From
754 (push (list nnweb-group
(setq active
(cons 1 0))
755 nnweb-type nnweb-search
)
757 ;; Go through all the article hits on this page.
758 (goto-char (point-min))
759 (while (re-search-forward
760 "a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t
)
761 (setq mid
(match-string 2)
763 "http://groups.google.com/groups?selm=%s&output=gplain" mid
))
764 (narrow-to-region (search-forward ">" nil t
)
765 (search-forward "</a>" nil t
))
766 (nnweb-remove-markup)
767 (nnweb-decode-entities)
768 (setq Subject
(buffer-string))
769 (goto-char (point-max))
772 (when (looking-at "<br><font[^>]+>")
773 (goto-char (match-end 0)))
774 (if (not (looking-at "<a[^>]+>"))
775 (skip-chars-forward " \t")
776 (narrow-to-region (point)
777 (search-forward "</a>" nil t
))
778 (nnweb-remove-markup)
779 (nnweb-decode-entities)
780 (setq Newsgroups
(buffer-string))
781 (goto-char (point-max))
783 (skip-chars-forward "- \t"))
785 "\\([0-9]+[/ ][A-Za-z]+[/ ][0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - <a")
786 (setq From
(match-string 2)
787 Date
(match-string 1)))
790 (unless (nnweb-get-hashtb url
)
794 (make-full-mail-header
795 (cdr active
) (if Newsgroups
796 (concat "(" Newsgroups
") " Subject
)
798 From Date
(or Message-ID mid
)
801 (nnweb-set-hashtb (cadar map
) (car map
))))
804 (defun nnweb-google-reference (id)
805 (let ((map (nnweb-google-parse-1 id
)) header
)
807 (nconc nnweb-articles map
))
808 (when (setq header
(cadar map
))
809 (mm-with-unibyte-current-buffer
810 (nnweb-fetch-url (mail-header-xref header
)))
813 (defun nnweb-google-create-mapping ()
814 "Perform the search and create a number-to-url alist."
816 (set-buffer nnweb-buffer
)
818 (when (funcall (nnweb-definition 'search
) nnweb-search
)
822 (nconc nnweb-articles
(nnweb-google-parse-1)))
823 ;; FIXME: There is more.
825 ;; Return the articles in the right order.
827 (sort nnweb-articles
'car-less-than-car
))))))
829 (defun nnweb-google-search (search)
832 (nnweb-definition 'address
)
834 (nnweb-encode-www-form-urlencoded
841 ("sites" .
"groups")))))
844 (defun nnweb-google-identity (url)
845 "Return an unique identifier based on URL."
846 (if (string-match "selm=\\([^ &>]+\\)" url
)
851 ;;; General web/w3 interface utility functions
854 (defun nnweb-insert-html (parse)
855 "Insert HTML based on a w3 parse tree."
857 (insert (nnheader-string-as-multibyte parse
))
858 (insert "<" (symbol-name (car parse
)) " ")
861 (concat (symbol-name (car param
)) "="
863 (if (consp (cdr param
))
869 (mapcar 'nnweb-insert-html
(nth 2 parse
))
870 (insert "</" (symbol-name (car parse
)) ">\n")))
872 (defun nnweb-encode-www-form-urlencoded (pairs)
873 "Return PAIRS encoded for forms."
877 (concat (w3-form-encode-xwfu (car data
)) "="
878 (w3-form-encode-xwfu (cdr data
)))))
881 (defun nnweb-fetch-form (url pairs
)
882 "Fetch a form from URL with PAIRS as the data using the POST method."
883 (let ((url-request-data (nnweb-encode-www-form-urlencoded pairs
))
884 (url-request-method "POST")
885 (url-request-extra-headers
886 '(("Content-type" .
"application/x-www-form-urlencoded"))))
887 (url-insert-file-contents url
)
888 (setq buffer-file-name nil
))
891 (defun nnweb-decode-entities ()
892 "Decode all HTML entities."
893 (goto-char (point-min))
894 (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t
)
895 (let ((elem (if (eq (aref (match-string 1) 0) ?\
#)
897 (string-to-number (substring
898 (match-string 1) 1))))
899 (if (mm-char-or-char-int-p c
) c
32))
900 (or (cdr (assq (intern (match-string 1))
903 (unless (stringp elem
)
904 (setq elem
(char-to-string elem
)))
905 (replace-match elem t t
))))
907 (defun nnweb-decode-entities-string (string)
910 (nnweb-decode-entities)
911 (buffer-substring (point-min) (point-max))))
913 (defun nnweb-remove-markup ()
914 "Remove all HTML markup, leaving just plain text."
915 (goto-char (point-min))
916 (while (search-forward "<!--" nil t
)
917 (delete-region (match-beginning 0)
918 (or (search-forward "-->" nil t
)
920 (goto-char (point-min))
921 (while (re-search-forward "<[^>]+>" nil t
)
922 (replace-match "" t t
)))
924 (defun nnweb-insert (url &optional follow-refresh
)
925 "Insert the contents from an URL in the current buffer.
926 If FOLLOW-REFRESH is non-nil, redirect refresh url in META."
927 (let ((name buffer-file-name
))
930 (narrow-to-region (point) (point))
931 (url-insert-file-contents url
)
932 (goto-char (point-min))
933 (when (re-search-forward
934 "<meta[ \t\r\n]*http-equiv=\"Refresh\"[^>]*URL=\\([^\"]+\\)\"" nil t
)
935 (let ((url (match-string 1)))
936 (delete-region (point-min) (point-max))
937 (nnweb-insert url t
))))
938 (url-insert-file-contents url
))
939 (setq buffer-file-name name
)))
941 (defun nnweb-parse-find (type parse
&optional maxdepth
)
942 "Find the element of TYPE in PARSE."
944 (nnweb-parse-find-1 type parse maxdepth
)))
946 (defun nnweb-parse-find-1 (type contents maxdepth
)
947 (when (or (null maxdepth
)
948 (not (zerop maxdepth
)))
949 (when (consp contents
)
950 (when (eq (car contents
) type
)
951 (throw 'found contents
))
952 (when (listp (cdr contents
))
953 (dolist (element contents
)
954 (when (consp element
)
955 (nnweb-parse-find-1 type element
956 (and maxdepth
(1- maxdepth
)))))))))
958 (defun nnweb-parse-find-all (type parse
)
959 "Find all elements of TYPE in PARSE."
961 (nnweb-parse-find-all-1 type parse
)))
963 (defun nnweb-parse-find-all-1 (type contents
)
965 (when (consp contents
)
966 (if (eq (car contents
) type
)
967 (push contents result
)
968 (when (listp (cdr contents
))
969 (dolist (element contents
)
970 (when (consp element
)
972 (nconc result
(nnweb-parse-find-all-1 type element
))))))))
976 (defun nnweb-text (parse)
977 "Return a list of text contents in PARSE."
978 (let ((nnweb-text nil
))
980 (nreverse nnweb-text
)))
982 (defun nnweb-text-1 (contents)
983 (dolist (element contents
)
984 (if (stringp element
)
985 (push element nnweb-text
)
986 (when (and (consp element
)
987 (listp (cdr element
)))
988 (nnweb-text-1 element
)))))
990 (defun nnweb-replace-in-string (string match newtext
)
991 (while (string-match match string
)
992 (setq string
(replace-match newtext t t string
)))
997 ;;; nnweb.el ends here