1 ;;; url-queue.el --- Fetching web pages in parallel
3 ;; Copyright (C) 2011-2013 Free Software Foundation, Inc.
5 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
25 ;; The point of this package is to allow fetching web pages in
26 ;; parallel -- but control the level of parallelism to avoid DoS-ing
27 ;; web servers and Emacs.
31 (eval-when-compile (require 'cl-lib
))
35 (defcustom url-queue-parallel-processes
6
36 "The number of concurrent processes."
41 (defcustom url-queue-timeout
5
42 "How long to let a job live once it's started (in seconds)."
47 ;;; Internal variables.
49 (defvar url-queue nil
)
51 (cl-defstruct url-queue
52 url callback cbargs silentp
53 buffer start-time pre-triggered
57 (defun url-queue-retrieve (url callback
&optional cbargs silent inhibit-cookies
)
58 "Retrieve URL asynchronously and call CALLBACK with CBARGS when finished.
59 This is like `url-retrieve' (which see for details of the arguments),
60 but with limits on the degree of parallelism. The variable
61 `url-queue-parallel-processes' sets the number of concurrent processes.
62 The variable `url-queue-timeout' sets a timeout."
65 (list (make-url-queue :url url
69 :inhibit-cookiesp inhibit-cookies
))))
70 (url-queue-setup-runners))
72 ;; To ensure asynch behaviour, we start the required number of queue
73 ;; runners from `run-with-idle-timer'. So we're basically going
74 ;; through the queue in two ways: 1) synchronously when a program
75 ;; calls `url-queue-retrieve' (which will then start the required
76 ;; number of queue runners), and 2) at the exit of each job, which
77 ;; will then not start any further threads, but just reuse the
80 (defun url-queue-setup-runners ()
83 (dolist (entry url-queue
)
85 ((or (url-queue-start-time entry
)
86 (url-queue-pre-triggered entry
))
89 (setq waiting entry
))))
91 (< running url-queue-parallel-processes
))
92 (setf (url-queue-pre-triggered waiting
) t
)
93 (run-with-idle-timer 0.01 nil
'url-queue-run-queue
))))
95 (defun url-queue-run-queue ()
96 (url-queue-prune-old-entries)
99 (dolist (entry url-queue
)
101 ((url-queue-start-time entry
)
104 (setq waiting entry
))))
106 (< running url-queue-parallel-processes
))
107 (setf (url-queue-start-time waiting
) (float-time))
108 (url-queue-start-retrieve waiting
))))
110 (defun url-queue-callback-function (status job
)
111 (setq url-queue
(delq job url-queue
))
112 (when (and (eq (car status
) :error
)
113 (eq (cadr (cadr status
)) 'connection-failed
))
114 ;; If we get a connection error, then flush all other jobs from
115 ;; the host from the queue. This particularly makes sense if the
116 ;; error really is a DNS resolver issue, which happens
117 ;; synchronously and totally halts Emacs.
118 (url-queue-remove-jobs-from-host
119 (plist-get (nthcdr 3 (cadr status
)) :host
)))
120 (url-queue-run-queue)
121 (apply (url-queue-callback job
) (cons status
(url-queue-cbargs job
))))
123 (defun url-queue-remove-jobs-from-host (host)
125 (dolist (job url-queue
)
126 (when (equal (url-host (url-generic-parse-url (url-queue-url job
)))
130 (url-queue-kill-job job
)
131 (setq url-queue
(delq job url-queue
)))))
133 (defun url-queue-start-retrieve (job)
134 (setf (url-queue-buffer job
)
136 (url-retrieve (url-queue-url job
)
137 #'url-queue-callback-function
(list job
)
138 (url-queue-silentp job
)
139 (url-queue-inhibit-cookiesp job
)))))
141 (defun url-queue-prune-old-entries ()
143 (dolist (job url-queue
)
144 ;; Kill jobs that have lasted longer than the timeout.
145 (when (and (url-queue-start-time job
)
146 (> (- (float-time) (url-queue-start-time job
))
148 (push job dead-jobs
)))
149 (dolist (job dead-jobs
)
150 (url-queue-kill-job job
)
151 (setq url-queue
(delq job url-queue
)))))
153 (defun url-queue-kill-job (job)
154 (when (bufferp (url-queue-buffer job
))
156 (while (setq process
(get-buffer-process (url-queue-buffer job
)))
157 (set-process-sentinel process
'ignore
)
159 (delete-process process
)))))
160 ;; Call the callback with an error message to ensure that the caller
161 ;; is notified that the job has failed.
163 (if (and (bufferp (url-queue-buffer job
))
164 (buffer-live-p (url-queue-buffer job
)))
165 ;; Use the (partially filled) process buffer it it exists.
166 (url-queue-buffer job
)
167 ;; If not, just create a new buffer, which will probably be
168 ;; killed again by the caller.
169 (generate-new-buffer " *temp*"))
170 (apply (url-queue-callback job
)
171 (cons (list :error
(list 'error
'url-queue-timeout
172 "Queue timeout exceeded"))
173 (url-queue-cbargs job
)))))
177 ;;; url-queue.el ends here