Merge branch 'master' into comment-cache
[emacs.git] / lisp / url / url-queue.el
blobdd1699bd082c56cbe48b70058629835af84b4774
1 ;;; url-queue.el --- Fetching web pages in parallel -*- lexical-binding: t -*-
3 ;; Copyright (C) 2011-2017 Free Software Foundation, Inc.
5 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
6 ;; Keywords: comm
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
23 ;;; Commentary:
25 ;; The point of this package is to allow fetching web pages in
26 ;; parallel -- but control the level of parallelism to avoid DoS-ing
27 ;; web servers and Emacs.
29 ;;; Code:
31 (eval-when-compile (require 'cl-lib))
32 (require 'browse-url)
33 (require 'url-parse)
35 (defcustom url-queue-parallel-processes 6
36 "The number of concurrent processes."
37 :version "24.1"
38 :type 'integer
39 :group 'url)
41 (defcustom url-queue-timeout 5
42 "How long to let a job live once it's started (in seconds)."
43 :version "24.1"
44 :type 'integer
45 :group 'url)
47 ;;; Internal variables.
49 (defvar url-queue nil)
50 (defvar url-queue-progress-timer nil)
52 (cl-defstruct url-queue
53 url callback cbargs silentp
54 buffer start-time pre-triggered
55 inhibit-cookiesp)
57 ;;;###autoload
58 (defun url-queue-retrieve (url callback &optional cbargs silent inhibit-cookies)
59 "Retrieve URL asynchronously and call CALLBACK with CBARGS when finished.
60 This is like `url-retrieve' (which see for details of the arguments),
61 but with limits on the degree of parallelism. The variable
62 `url-queue-parallel-processes' sets the number of concurrent processes.
63 The variable `url-queue-timeout' sets a timeout."
64 (setq url-queue
65 (append url-queue
66 (list (make-url-queue :url url
67 :callback callback
68 :cbargs cbargs
69 :silentp silent
70 :inhibit-cookiesp inhibit-cookies))))
71 (url-queue-setup-runners))
73 ;; To ensure asynch behavior, we start the required number of queue
74 ;; runners from `run-with-idle-timer'. So we're basically going
75 ;; through the queue in two ways: 1) synchronously when a program
76 ;; calls `url-queue-retrieve' (which will then start the required
77 ;; number of queue runners), and 2) at the exit of each job, which
78 ;; will then not start any further threads, but just reuse the
79 ;; previous "slot".
81 (defun url-queue-setup-runners ()
82 (let ((running 0)
83 waiting)
84 (dolist (entry url-queue)
85 (cond
86 ((or (url-queue-start-time entry)
87 (url-queue-pre-triggered entry))
88 (cl-incf running))
89 ((not waiting)
90 (setq waiting entry))))
91 (when (and waiting
92 (< running url-queue-parallel-processes))
93 (setf (url-queue-pre-triggered waiting) t)
94 ;; We start fetching from this idle timer...
95 (run-with-idle-timer 0.01 nil #'url-queue-run-queue)
96 ;; And then we set up a separate timer to ensure progress when a
97 ;; web server is unresponsive.
98 (unless url-queue-progress-timer
99 (setq url-queue-progress-timer
100 (run-with-idle-timer 1 1 #'url-queue-check-progress))))))
102 (defun url-queue-run-queue ()
103 (url-queue-prune-old-entries)
104 (let ((running 0)
105 waiting)
106 (dolist (entry url-queue)
107 (cond
108 ((url-queue-start-time entry)
109 (cl-incf running))
110 ((not waiting)
111 (setq waiting entry))))
112 (when (and waiting
113 (< running url-queue-parallel-processes))
114 (setf (url-queue-start-time waiting) (float-time))
115 (url-queue-start-retrieve waiting))))
117 (defun url-queue-check-progress ()
118 (when url-queue-progress-timer
119 (if url-queue
120 (url-queue-run-queue)
121 (cancel-timer url-queue-progress-timer)
122 (setq url-queue-progress-timer nil))))
124 (defun url-queue-callback-function (status job)
125 (setq url-queue (delq job url-queue))
126 (when (and (eq (car status) :error)
127 (eq (cadr (cadr status)) 'connection-failed))
128 ;; If we get a connection error, then flush all other jobs from
129 ;; the host from the queue. This particularly makes sense if the
130 ;; error really is a DNS resolver issue, which happens
131 ;; synchronously and totally halts Emacs.
132 (url-queue-remove-jobs-from-host
133 (plist-get (nthcdr 3 (cadr status)) :host)))
134 (url-queue-run-queue)
135 (apply (url-queue-callback job) (cons status (url-queue-cbargs job))))
137 (defun url-queue-remove-jobs-from-host (host)
138 (let ((jobs nil))
139 (dolist (job url-queue)
140 (when (equal (url-host (url-generic-parse-url (url-queue-url job)))
141 host)
142 (push job jobs)))
143 (dolist (job jobs)
144 (url-queue-kill-job job)
145 (setq url-queue (delq job url-queue)))))
147 (defun url-queue-start-retrieve (job)
148 (setf (url-queue-buffer job)
149 (ignore-errors
150 (let ((url-request-noninteractive t))
151 (url-retrieve (url-queue-url job)
152 #'url-queue-callback-function (list job)
153 (url-queue-silentp job)
154 (url-queue-inhibit-cookiesp job))))))
156 (defun url-queue-prune-old-entries ()
157 (let (dead-jobs)
158 (dolist (job url-queue)
159 ;; Kill jobs that have lasted longer than the timeout.
160 (when (and (url-queue-start-time job)
161 (> (- (float-time) (url-queue-start-time job))
162 url-queue-timeout))
163 (push job dead-jobs)))
164 (dolist (job dead-jobs)
165 (url-queue-kill-job job)
166 (setq url-queue (delq job url-queue)))))
168 (defun url-queue-kill-job (job)
169 (when (bufferp (url-queue-buffer job))
170 (let (process)
171 (while (setq process (get-buffer-process (url-queue-buffer job)))
172 (set-process-sentinel process 'ignore)
173 (ignore-errors
174 (delete-process process)))))
175 ;; Call the callback with an error message to ensure that the caller
176 ;; is notified that the job has failed.
177 (with-current-buffer
178 (if (and (bufferp (url-queue-buffer job))
179 (buffer-live-p (url-queue-buffer job)))
180 ;; Use the (partially filled) process buffer it it exists.
181 (url-queue-buffer job)
182 ;; If not, just create a new buffer, which will probably be
183 ;; killed again by the caller.
184 (generate-new-buffer " *temp*"))
185 (apply (url-queue-callback job)
186 (cons (list :error (list 'error 'url-queue-timeout
187 "Queue timeout exceeded"))
188 (url-queue-cbargs job)))))
190 (provide 'url-queue)
192 ;;; url-queue.el ends here