Use the ZIP library instead of run-shell-command for the zip:// protocol.
[closure-html.git] / src / net / http.lisp
blobcd7d6e473259d315b21eb2e81f747a87478a05df
1 ;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: NETLIB; -*-
2 ;;; ---------------------------------------------------------------------------
3 ;;; Title: HTTP and Stuff
4 ;;; Created: 1997-09-25
5 ;;; Author: Gilbert Baumann <gilbert@base-engineering.com>
6 ;;; License: MIT style (see below)
7 ;;; ---------------------------------------------------------------------------
8 ;;; (c) copyright 1997-2001 by Gilbert Baumann
10 ;;; Permission is hereby granted, free of charge, to any person obtaining
11 ;;; a copy of this software and associated documentation files (the
12 ;;; "Software"), to deal in the Software without restriction, including
13 ;;; without limitation the rights to use, copy, modify, merge, publish,
14 ;;; distribute, sublicense, and/or sell copies of the Software, and to
15 ;;; permit persons to whom the Software is furnished to do so, subject to
16 ;;; the following conditions:
17 ;;;
18 ;;; The above copyright notice and this permission notice shall be
19 ;;; included in all copies or substantial portions of the Software.
20 ;;;
21 ;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 ;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 ;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 ;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
25 ;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
26 ;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
27 ;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 (in-package :NETLIB)
31 ;;; TODO
33 ;; . I would a higher level interface to making POST request which
34 ;; abstracts from the correct method needed to encode the data and
35 ;; stuff.
37 ;; . Also: there is some interface needed with abstracts above the
38 ;; POST/GET methods. Maybe we would just handle that adding gaining
39 ;; the posted values from the url and gaining the exact encoding
40 ;; method used for such a POST from some url parameter.
42 ;; That is we something like a REQUEST object which denotes a
43 ;; complete request to be made.
45 ;; Also take a peek into HTTP/1.1 and see what is needed for e.g.
46 ;; PUT and stuff.
48 ;; For extra confusion there is both MAKE-HTTP-REQUEST and
49 ;; HTTP-MAKE-REQUEST.
51 ;;; TODO
53 ;; Configuration of access methods must be more flexible. There must
54 ;; be a mapping of an URL to some access method.
56 ;;; Options
58 (defparameter *use-http-proxy-p* nil
59 "Whether to use the HTTP proxy as defined by *HTTP-PROXY-HOST* and *HTTP-PROXY-PORT*.")
61 (defparameter *http-proxy-host* nil
62 "Specifies the HTTP proxy host; see also *USE-HTTP-PROXY-P* and *HTTP-PROXY-PORT*.")
64 (defparameter *http-proxy-port* nil
65 "Specifies the HTTP proxy port; see also *USE-HTTP-PROXY-P* and *HTTP-PROXY-HOST*.")
67 #+NIL
68 (defparameter *http-cache-dir* "test-cache/*"
69 "A directory, where the HTTP document cache resides.")
71 (defparameter *always-use-cache-p* nil)
73 (defparameter *send-host-field-never-the-less-p* t
74 "Insert the 'Host:' request header field when talking directly with a HTTP server?
76 Albeit HTTP/1.0 states that the \"Host:\" request header field should
77 only been sent, when talking with a proxy and *not* when talking
78 with a server directly, some servers seem to need it never the
79 less. Example: 'http://validator.w3.org/images/vh40.gif'. Sigh!
80 So finally make it a user tweakable option.")
82 (defparameter *trace-http-p* nil
83 "Trace all HTTP traffic; see also *HTTP-TRACE-OUTPUT*")
85 (defparameter *http-trace-output* t
86 "Output stream to send http traces to; see also *TRACE-HTTP-P*")
88 (defparameter *trust-expires-p* t
89 "Whether to trust servers 'Expires' header field [and your clock].")
91 (defvar *referer* nil)
93 (defvar *http-cache* nil)
95 ;;;
97 (defparameter *http-proxy-host* "www-cache.rz.uni-karlsruhe.de")
98 (defparameter *http-proxy-port* 3128)
100 (defparameter *user-agent* "Lynx/2.7.1ac-0.98 libwww-FM/2.14")
101 (defparameter *user-agent* "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)")
102 (defparameter *user-agent* "Closure/200507")
106 (defparameter *options/connection-timeout* 30)
107 (defvar *trace-http-p* nil)
110 (defun http-cache ()
111 (or *http-cache*
112 (let ((dir (merge-pathnames
113 (make-pathname
114 :directory '(:relative ".closure" "test-cache")
115 :name nil
116 :type nil
117 :defaults (user-homedir-pathname))
118 (user-homedir-pathname))))
119 (ensure-directories-exist dir)
120 (setf *http-cache* (uncommit-cache dir)))))
122 ;;; ---- HTTP dates ---------------------------------------------------------------------------
124 ;; HTTP-date = rfc1123-date | rfc850-date | asctime-date
125 ;; rfc1123-date = wkday "," SP date1 SP time SP "GMT"
126 ;; rfc850-date = weekday "," SP date2 SP time SP "GMT"
127 ;; asctime-date = wkday SP date3 SP time SP 4DIGIT
129 ;; date1 = 2DIGIT SP month SP 4DIGIT
130 ;; date2 = 2DIGIT "-" month "-" 2DIGIT
131 ;; date3 = month SP ( 2DIGIT | ( SP 1DIGIT ))
132 ;; time = 2DIGIT ":" 2DIGIT ":" 2DIGIT
133 ;; wkday = "Mon" | "Tue" | "Wed" | "Thu" | "Fri" | "Sat" | "Sun"
134 ;; weekday = "Monday" | "Tuesday" | "Wednesday" | "Thursday" | "Friday" | "Saturday" | "Sunday"
135 ;; month = "Jan" | "Feb" | "Mar" | "Apr" | "May" | "Jun" | "Jul" | "Aug" | "Sep" | "Oct" | "Nov" | "Dec"
137 (defun maybe-parse-http-date (string)
138 (and string
139 (let ((toks (split-by-member '(#\space #\tab #\newline #\return #\- #\,) string
140 :nuke-empty-p t))
141 day month year time)
142 (cond ((and (setq day (maybe-parse-day (elt toks 1)))
143 (setq month (maybe-parse-month (elt toks 2)))
144 (setq year (maybe-parse-year (elt toks 3)))
145 (setq time (maybe-parse-time (elt toks 4))))
146 (ignore-errors (encode-universal-time (third time) (second time) (first time)
147 day month year 0)) )
148 ((and (>= (length toks) 5)
149 (setq month (maybe-parse-month (elt toks 1)))
150 (setq day (maybe-parse-day (elt toks 2)))
151 (setq time (maybe-parse-time (elt toks 3)))
152 (setq year (maybe-parse-year (car (last toks)))))
153 (ignore-errors (encode-universal-time (third time) (second time) (first time)
154 day month year 0)) )
156 nil)))))
158 (defun maybe-parse-day (string)
159 (and (<= 1 (length string) 2)
160 (let ((r (maybe-parse-integer string)))
161 (and (<= 1 r 31) r))))
163 (defun maybe-parse-year (string)
164 (cond ((= (length string) 4)
165 (maybe-parse-integer string) )
166 ((= (length string) 2)
167 (maybe-parse-integer string))))
169 (defun maybe-parse-month (string)
170 (let ((r (position string '#("Jan" "Feb" "Mar" "Apr" "May" "Jun"
171 "Jul" "Aug" "Sep" "Oct" "Nov" "Dec")
172 :test #'string-equal)))
173 (and r (+ r 1))))
175 (defun maybe-parse-time (string)
176 (let ((s (split-by #\: string))
177 hour minute second)
178 (cond ((and (= 3 (length s))
179 (setq hour (maybe-parse-integer (elt s 0)))
180 (setq minute (maybe-parse-integer (elt s 1)))
181 (setq second (maybe-parse-integer (elt s 2)))
182 (<= 0 hour 23)
183 (<= 0 minute 59)
184 (<= 0 second 59))
185 (list hour minute second)) )))
187 (defun unparse-http-date (ut)
188 ;; nach rfc1123
189 (multiple-value-bind (second minute hour date month year day) (decode-universal-time ut 0)
190 (format nil
191 "~[Mon~;Tue~;Wed~;Thu~;Fri~;Sat~;Sun~], ~
192 ~2,'0D ~[Jan~;Feb~;Mar~;Apr~;May~;Jun~;Jul~;Aug~;Sep~;Oct~;Nov~;Dec~;~] ~4,'0D ~
193 ~2,'0D:~2,'0D:~2,'0D GMT"
194 day date (1- month) year hour minute second)))
197 ;;; ---- basic http protocol ------------------------------------------------------------------
199 (defun http-send-request-line (sink method uri http-version)
200 (g/write-string method sink)
201 (g/write-char #\space sink)
202 (g/write-string uri sink)
203 (g/write-char #\space sink)
204 (g/write-string http-version sink)
205 (g/write-byte 13 sink)
206 (g/write-byte 10 sink))
208 (defun http-send-request-header (sink header)
209 (dolist (k header)
210 (g/write-string (car k) sink)
211 (g/write-char #\: sink)
212 (g/write-char #\space sink)
213 (g/write-string (cdr k) sink)
214 (g/write-byte 13 sink)
215 (g/write-byte 10 sink)))
217 (defun http-send-request (sink method uri http-version header)
218 (http-send-request-line sink method uri http-version)
219 (http-send-request-header sink (cons (cons "User-Agent" *user-agent*) header))
220 (g/write-byte 13 sink)
221 (g/write-byte 10 sink))
223 (defun read-response-line (input)
224 (finish-output (slot-value input 'glisp:cl-stream))
225 (let ((res (g/read-line* input)))
226 (when *trace-http-p*
227 (ignore-errors
228 (format *http-trace-output* "~%;; <-- ~A." res)
229 (finish-output *http-trace-output*)))
230 (parse-response-line res)))
232 (defun parse-response-line (string)
233 (let* ((p0 (position #\space string))
234 (p1 (position #\space string :start (+ p0 1))))
235 (unless (and p0 p1 (= (- p1 p0) 4))
236 (error "HTTP repsonse line '~A' has illegal syntax." string))
237 (values (string-upcase (subseq string 0 p0))
238 (parse-integer string :junk-allowed nil :start (+ 1 p0) :end p1)
239 (subseq string (+ 1 p1)))))
241 (defun make-http-request (io method uri http-version header &optional data)
242 ;; -> version code explanation header
243 (http-send-request io method uri http-version header)
244 (when (and *trace-http-p*)
245 (ignore-errors
246 (format *http-trace-output* "~&;; --> ")
247 (loop for ch across
248 (with-output-to-string (sink)
249 (let ((q (make-instance 'glisp:cl-char-stream :cl-stream sink)))
250 (http-send-request q method uri http-version header)))
252 (cond ((char= ch #\newline)
253 (terpri *http-trace-output*)
254 (princ ";; --> " *http-trace-output*))
256 (princ ch *http-trace-output*))))))
257 (when (equal method "POST") ;zzz
258 (cond ((stringp data)
259 (dotimes (i (length data))
260 (g/write-byte (char-code (char data i)) io)))
262 (dotimes (i (length data))
263 (g/write-byte (aref data i) io)))))
264 (multiple-value-bind (version code explanation) (read-response-line io)
265 (values version code explanation
266 (read-response-header io))))
268 (defun read-response-header (input)
269 (let ((res nil))
270 (do ((line (g/read-line* input) (g/read-line* input)))
271 ((= (length line) 0)
272 (nreverse res))
273 (when *trace-http-p*
274 (ignore-errors
275 (format *http-trace-output* "~&;; <-- ~A" line)
276 (finish-output *http-trace-output*)))
277 (let (p)
278 (cond ((or (char= (char line 0) #\space)
279 (char= (char line 0) #\tab))
280 (if (null res)
281 (error "Badly formed response header.")
282 (setf (cdar res)
283 (concatenate 'string (cdar res) " " (string-trim '(#\space #\tab) line)))))
284 ((setq p (search ": " line))
285 (push (cons (subseq line 0 p) (subseq line (+ p 2))) res))
287 (warn "Badly formed response header line: '~A' -- ignored." line)) )))))
289 (defun unparse-url-for-http (url)
290 (let ((url (url::copy-url url)))
291 (setf (url:url-protocol url) nil
292 (url:url-host url) nil
293 (url:url-port url) nil
294 (url:url-user url) nil
295 (url:url-password url) nil
296 (url:url-parameters url) nil
297 (url:url-anchor url) nil
299 (url:unparse-url url)))
301 (defun unparse-url-for-http/proxy (url)
302 (let ((url (url::copy-url url)))
303 (setf (url:url-user url) nil
304 (url:url-password url) nil
305 (url:url-anchor url) nil)
306 (url:unparse-url url)))
308 (defun open-socket-for-http (url)
309 "This is the basic switch to decide whether to use a proxy and which proxy to use."
310 ;; -> io proxyp
311 (let* ((host (or (url:url-host url) "localhost"))
312 (https-p (string= (url:url-protocol url) "https"))
313 ;; ### HTTPS support doesn't exist
314 (port (or (url:url-port url)
315 (if https-p
317 80)))
318 (proxyp (and *use-http-proxy-p*
319 (= port 80)
320 (not (url:url-port url))
321 (not (string-equal host "localhost")))))
322 (values
323 (cl-byte-stream->gstream
324 (if proxyp
325 (trivial-sockets:open-stream *http-proxy-host*
326 *http-proxy-port*
327 :element-type '(unsigned-byte 8))
328 (trivial-sockets:open-stream host
329 port
330 :element-type '(unsigned-byte 8))))
331 proxyp)))
333 (defun http-make-request (method url header post-data)
334 "Makes a single HTTP request for the URL url;
335 Returns: io protocol-version response-code response-message response-header."
336 ;; eval hack
337 #+NIL
338 (cond ((string-equal (url:url-host url) "images.cjb.net")
339 (error "No data from images.cjb.net!")))
340 (when *trace-http-p*
341 (ignore-errors
342 (format *http-trace-output* "~&;; Making ~S request for ~S ..." method url)
343 (finish-output *http-trace-output*)))
344 (let ((host (or (url:url-host url) "localhost")))
345 (multiple-value-bind (io proxyp) (open-socket-for-http url)
346 (let ((method-string (ecase method (:GET "GET") (:POST "POST")))
347 (url-for-server (if proxyp
348 (unparse-url-for-http/proxy url)
349 (unparse-url-for-http url)))
350 (header (append (if (and (or *send-host-field-never-the-less-p*
351 proxyp)
352 (not (member :host header :test #'string-equal :key #'car)))
353 (if (and (numberp (url:url-port url)) (not (= (url:url-port url) 80)))
354 (list (cons "Host" (format nil "~A:~A" host (url:url-port url))))
355 (list (cons "Host" host)))
356 nil)
357 (if *referer*
358 (list (cons "Referer" (if (url:url-p *referer*)
359 (url:unparse-url *referer*)
360 *referer*)))
361 nil)
362 (if (eq method :post)
363 (list (cons "Content-Length" (format nil "~D" (length post-data))))
364 nil)
365 header)))
366 (multiple-value-bind (protocol-version response-code response-message response-header)
367 (make-http-request io method-string url-for-server "HTTP/1.0" header post-data)
368 (values io protocol-version response-code response-message response-header))))))
370 (defun get-header-field (header field)
371 (check-type field (member :LOCATION :LAST-MODIFIED
372 :CONTENT-TYPE :EXPIRES
373 :PRAGMA :DATE
374 :CONTENT-LENGTH))
375 (cdr (assoc field header :test #'string-equal)) )
377 ;;; ---- HTTP Cache ---------------------------------------------------------------------------
381 ;; get-hce cache url
382 ;; put-hce cache hce
383 ;; invent-cache-filename cache
384 ;; uncommit-cache directory -> cache
385 ;; commit-cache cache
387 ;; o Timeout handhaben
388 ;; o hook fuer 'nk von mk gelesen (r bps)'
389 ;; o expire routine fuer den cache schreiben.
390 ;; o robustere fehler behandung
391 ;; o dem 'benuzter' eine moeglichkeit geben zusaetzliche header felder zu uebergeben
392 ;; o schon mal ueber cookies nachdenken
393 ;; o so aus Spass schon mal eine CLM-Routine schreiben, um sich den Cache an zu schauen.
395 ;; funktioniert bis jetzt aber gut ...
398 (defstruct http-cache
399 lock
400 entries
401 serial
402 directory)
404 (defstruct (http-cache-entry (:conc-name "HCE-"))
406 last-modified ;NIL or UT server time
407 expires ;NIL or UT client time
408 ; we don't make use of this field yet
409 last-visit ;NIL or UT client time
410 filename ;file name in cache
411 filetype ;file type in cache
412 original-header) ;original document header
414 (defun uncommit-cache (directory)
415 (let ((fn (merge-pathnames "index" directory)))
416 (cond ((probe-file fn)
417 (with-open-file (stream fn :direction :input)
418 (let ((*package* (symbol-package 'http-cache-entry)))
419 (let ((res (make-http-cache :lock (bordeaux-threads:make-lock "HTTP cache lock")
420 :directory directory
421 :entries (make-hash-table :test #'equal))))
422 (setf (http-cache-serial res) (read stream))
423 (do ((x (read stream nil nil) (read stream nil nil)))
424 ((null x))
425 (assert (http-cache-entry-p x))
426 (put-hce res x))
427 res))))
429 (make-http-cache :lock (bordeaux-threads:make-lock "HTTP cache lock")
430 :directory directory
431 :entries (make-hash-table :test #'equal)
432 :serial 0)) )))
434 (defun commit-cache (&optional (cache (http-cache)))
435 (bordeaux-threads:with-recursive-lock-held ((http-cache-lock cache))
436 (with-open-file (sink (merge-pathnames "index" (http-cache-directory cache))
437 :direction :output :if-exists :new-version)
438 (let ((*print-pretty* nil)
439 (*print-readably* t)
440 (*package* (symbol-package 'http-cache-entry)))
441 (print (http-cache-serial cache) sink)
442 (maphash (lambda (key value)
443 (declare (ignore key))
444 (print value sink))
445 (http-cache-entries cache)) ))) )
447 (defun invent-cache-filename (cache)
448 (bordeaux-threads:with-recursive-lock-held ((http-cache-lock cache))
449 (format nil "~5,'0D" (incf (http-cache-serial cache)))))
451 (defun get-hce (cache url)
452 (bordeaux-threads:with-recursive-lock-held ((http-cache-lock cache))
453 (gethash url (http-cache-entries cache))))
455 (defun put-hce (cache hce)
456 (bordeaux-threads:with-recursive-lock-held ((http-cache-lock cache))
457 ;; if there was already an entry for that URL with under a different filename,
458 ;; delete the old file
459 (let ((old-ce (gethash (hce-url hce) (http-cache-entries cache))))
460 (cond ((and old-ce (not (and (string-equal (hce-filename old-ce) (hce-filename hce))
461 (string-equal (hce-filetype old-ce) (hce-filetype hce)))))
462 (ignore-errors (delete-file (hce-pathname cache old-ce))))))
463 (setf (gethash (hce-url hce) (http-cache-entries cache)) hce)))
465 (defun hce-pathname (cache hce)
466 (merge-pathnames (make-pathname :name (hce-filename hce)
467 :type (hce-filetype hce))
468 (http-cache-directory cache)))
470 ;;; ---------------------------------------------------------------------------
472 (defun http-open-document (url
473 &rest options
474 &key (yet-urls nil)
475 (method :get)
476 (post-data )
477 (post-header ))
478 ;; -> io ; header
479 (cond
480 ;; url lacks a path?
481 ((null (url:url-path url))
482 (let ((new-url (url:copy-url url)))
483 (setf (url:url-path new-url) (list :ABSOLUTE ""))
484 (multiple-value-bind (input header)
485 (apply #'http-open-document new-url options)
486 (values input
487 (append header
488 (list (cons "Location" (url:unparse-url new-url))))))))
490 ;; circular 302 chain?
491 ((member url yet-urls :test #'url:url-equal-p)
492 (error "Circular 301/302 chain: ~S." yet-urls))
495 (let ((ce (and (eq method :get)
496 (get-hce (http-cache) (url:unparse-url url))))
497 (hd nil)
498 (cache-p (and (eq method :get)
499 (null (url:url-query url))
500 (null (url:url-password url)))))
501 (when (and *trust-expires-p*
503 (integerp (hce-expires ce))
504 (> (hce-expires ce) (get-universal-time)))
505 (format *http-trace-output* "~&;; Serving ~S from cache." url)
506 (return-from http-open-document
507 (values (cl-byte-stream->gstream (open (hce-pathname (http-cache) ce)
508 :direction :input
509 :element-type '(unsigned-byte 8)))
510 (hce-original-header ce))))
511 (when (and ce (hce-last-modified ce))
512 (push (cons "If-Modified-Since"
513 (unparse-http-date (1+ (hce-last-modified ce)))) hd))
514 (when (and ce *always-use-cache-p*)
515 (format *http-trace-output* "~&;; Unkosherly serving ~S from cache." url)
516 (return-from http-open-document
517 (values (cl-byte-stream->gstream (open (hce-pathname (http-cache) ce)
518 :direction :input
519 :element-type '(unsigned-byte 8)))
520 (hce-original-header ce))))
521 (multiple-value-bind (io protocol-version
522 response-code response-message
523 response-header)
524 (http-make-request method url (append hd post-header) post-data)
525 (case response-code
526 ((200 204)
527 ;; everything fine
528 (let* ((her-date (maybe-parse-http-date (get-header-field response-header :date)))
529 (her-expires (maybe-parse-http-date (get-header-field response-header :expires)))
530 (really-cache-p (and cache-p
532 (not (null (get-header-field response-header :last-modified)))
533 ;; xxx when exactly to cache?
534 (and her-date her-expires
535 (> her-expires her-date))))))
536 ;; the logic below is incomplete; it should also be looking at Cache-control: headers
537 ;; emarsden2005-07-19
538 #+(and)
539 (when cache-p
540 (unless really-cache-p
541 (warn "~A will not be cached; cache-p = ~S; header: ~S"
542 url cache-p response-header)))
543 (values
544 (make-instance 'http-stream
545 :header response-header
546 :url (url:unparse-url url)
547 :input io
548 :buffer (make-array 1024
549 :element-type '(unsigned-byte 8)
550 :adjustable t
551 :fill-pointer 0)
552 :cache-p really-cache-p
553 :my-expires (and her-date her-expires
554 (+ (get-universal-time)
555 (- her-expires her-date))) )
556 response-header)))
558 ((301 302 303)
559 ;; moved permanently; moved temporary; see other
561 ;; the Location field may be either a complete URI, or just a path
562 (let* ((new-location (or (url:parse-url
563 (get-header-field response-header :location))
564 (error "301/302 Response from ~A lacks a 'Location' field."
565 (url:url-host url))))
566 (new-url (if (url:url-host new-location) new-location
567 (url:merge-url new-location url))))
568 (multiple-value-bind (input header)
569 (apply #'http-open-document new-url :yet-urls (cons url yet-urls) options)
570 (values input `(,@header ("Location" . ,(unparse-url new-url)))))))
572 (304
573 ;; not modified
574 (values (cl-byte-stream->gstream (open (hce-pathname (http-cache) ce)
575 :direction :input
576 :element-type '(unsigned-byte 8)))
577 (hce-original-header ce)))
579 (otherwise
580 (warn "Response ~D (~A) from ~A not understood."
581 response-code response-message (url:url-host url))
582 (values io response-header)) )) )) ))
584 (defclass http-stream (use-byte-for-char-stream-flavour gstream)
585 ((header :initarg :header)
586 (url :initarg :url)
587 (input :initarg :input)
588 (buffer :initarg :buffer)
589 (eof-seen-p :initform nil)
590 (cache-p :initarg :cache-p)
591 (my-expires :initarg :my-expires :initform nil) ;servers expires in my date
594 (defmethod g/read-byte ((stream http-stream) &optional (eof-error-p t) eof-value)
595 (with-slots (input eof-seen-p buffer cache-p) stream
596 (let ((ch (g/read-byte input nil :eof)))
597 (cond ((eq ch :eof)
598 (setf eof-seen-p t)
599 (when eof-error-p
600 (error "EOF seen on ~S." stream))
601 eof-value)
603 (when cache-p
604 (vector-push-extend ch buffer 1024))
605 ch) ))))
607 (defmethod g/unread-byte (byte (stream http-stream))
608 (with-slots (input buffer cache-p) stream
609 (when cache-p
610 (decf (fill-pointer buffer)))
611 (g/unread-byte byte input)))
613 (defmethod g/read-byte-sequence (sequence (stream http-stream)
614 &key (start 0) (end (length sequence)))
615 (with-slots (input buffer eof-seen-p cache-p) stream
616 (let ((n (g/read-byte-sequence sequence input :start start :end end)))
617 (when (= start n)
618 (setf eof-seen-p t))
619 (when cache-p
620 (do ((i start (+ i 1)))
621 ((>= i n))
622 (vector-push-extend (elt sequence i) buffer 1024)))
623 n)))
625 (defmethod g/write-byte (byte (stream http-stream))
626 (error "No ~S on ~S." 'g/write-byte stream))
628 (defmethod g/finish-output ((stream http-stream))
629 nil)
631 (defmethod g/close ((stream http-stream) &key abort)
632 (with-slots (cache-p eof-seen-p header url buffer input my-expires) stream
633 (g/close input :abort abort)
634 (when (and (not abort) cache-p eof-seen-p)
635 (let ((filename (invent-cache-filename (http-cache)))
636 (filetype nil))
638 (when (get-header-field header :content-type)
639 (multiple-value-bind (type subtype parameters)
640 (parse-mime-content-type (get-header-field header :content-type))
641 (declare (ignore parameters))
642 (let ((mt (find-mime-type (format nil "~A/~A" type subtype))))
643 (when mt
644 (setf filetype (car (mime-type-extensions mt)))))))
646 (let ((ce (make-http-cache-entry
647 :url url
648 :last-modified (and (get-header-field header :last-modified)
649 (maybe-parse-http-date
650 (get-header-field header :last-modified)))
651 :expires my-expires
652 :last-visit (get-universal-time)
653 :filename filename
654 :filetype filetype
655 :original-header header)))
656 (cond ((probe-file (hce-pathname (http-cache) ce))
657 (warn "File already exists: '~A'."
658 (namestring (hce-pathname (http-cache) ce)))
659 (ignore-errors (delete-file (hce-pathname (http-cache) ce)))))
660 (with-open-file (sink (hce-pathname (http-cache) ce)
661 :direction :output
662 :element-type '(unsigned-byte 8))
663 (g/write-byte-sequence buffer (cl-byte-stream->gstream sink)
664 :start 0
665 :end (fill-pointer buffer)))
666 (put-hce (http-cache) ce))))))
668 ;;; -------------------------------------------------------------------------------------------
669 ;;; File protocol
672 ;; Ich will jetzt folgenes:
673 ;; a. file:/foo/bar/baz und file:/foo/bar/baz/ sollen zunaechst gleich behandelt werden.
674 ;; b. wenn der name ein directory bezeichnet:
675 ;; b1. es ex. eine datei index.html -> diese liefern
676 ;; b2. sonst ein standard UNIX Listing decorieren
677 ;; c. wenn der name eine datei ist -> kein problem wie immer
678 ;; d. sonst fehler
680 ;; Alles unter UNIX:
682 ;; CLISP:
683 ;; [COMMON-LISP-USER]> (setq i (open "." :direction :input))
685 ;; *** - no file name given: #P"/usr/gilbert/closure/src/."
686 ;; 1. Break[COMMON-LISP-USER]>
687 ;; [COMMON-LISP-USER]> (setq i (open "/usr/gilbert" :direction :input))
689 ;; *** - NAMESTRING: "/usr/gilbert" names a directory, not a file
690 ;; 1. Break[COMMON-LISP-USER]>
692 ;; ACL 4.3
693 ;; USER(1): (setq i (open "." :direction :input))
694 ;; Error: File #p"./" is a DIRECTORY type file and cannot reasonably be opened as
695 ;; a Lisp stream.
696 ;; [condition type: FILE-ERROR]
698 ;; CMUCL 18a+
699 ;; * (setq i (open "." :direction :input))
700 ;; #<Stream for file ".">
701 ;; * (read-char i)
702 ;; Error in function COMMON-LISP::FD-STREAM-READ-N-BYTES:
703 ;; Error reading #<Stream for file ".">: Is a directory
705 ;; GCL
706 ;; >(setq i (open "." :direction :input))
707 ;; #<input stream ".">
708 ;; >(read-char i)
709 ;; #\\377 ;; Benutzen die stdio oder was?!
711 (defun really-probe-file (filename)
712 ;; Probe that a file named 'filename' is really readable. The only
713 ;; reliable way to achieve that is to actual attempt to open the
714 ;; file; But beware: Under some UNIX implementations a directory
715 ;; is actually readable like any other file.
716 ;; Under UNIX it would be probably better to spawn a 'ls -l'.
717 (and (ignore-errors (probe-file filename))
718 (let (stream)
719 (unwind-protect
720 (progn
721 (setf stream (ignore-errors (open filename :direction :input
722 :element-type '(unsigned-byte 8)
723 :if-does-not-exist nil)))
724 (and stream
725 (ignore-errors (read-byte stream nil :eof))))
726 (when stream
727 (close stream)) )) ))
729 #+CMU
730 (defun really-probe-file (filename)
731 ;; Probe that a file named 'filename' is really readable. The only
732 ;; reliable way to achieve that is to actual attempt to open the
733 ;; file; But beware: Under some UNIX implementations a directory
734 ;; is actually readable like any other file.
735 ;; Under UNIX it would be probably better to spawn a 'ls -l'.
736 (and (ignore-errors (probe-file filename))
737 (not (is-directory-p filename))))
739 #+CLISP
740 (defun is-directory-p (pathname)
741 (ignore-errors (lisp:probe-directory pathname)))
743 #+CMU
744 (defun is-directory-p (pathname)
745 (multiple-value-bind (success? dev ino mode) (unix:unix-stat (namestring pathname))
746 (declare (ignore dev ino))
747 (and success? (logbitp 14 mode))))
749 #+CMU
750 ;; #define S_IFMT 00170000
751 ;; #define S_IFSOCK 0140000
752 ;; #define S_IFLNK 0120000
753 ;; #define S_IFREG 0100000
754 ;; #define S_IFBLK 0060000
755 ;; #define S_IFDIR 0040000
756 ;; #define S_IFCHR 0020000
757 ;; #define S_IFIFO 0010000
758 (defun is-regualar-file-p (pathname)
759 (multiple-value-bind (success? dev ino mode) (unix:unix-stat (namestring pathname))
760 (declare (ignore dev ino))
761 (and success? (= (logand mode #o170000) #o100000))))
763 #+ALLEGRO
764 (defun is-directory-p (pathname)
765 (let ((pn (mungle-pathname-to-directory pathname)))
766 (probe-file (merge-pathnames pn (make-pathname :name ".")))))
768 #+sbcl
769 (defun is-directory-p (pathname)
770 (multiple-value-bind (success? dev ino mode)
771 (sb-unix:unix-stat (namestring pathname))
772 (declare (ignore dev ino))
773 (and success? (logbitp 14 mode))))
775 #-(OR CLISP CMU ALLEGRO SBCL)
776 (progn
777 #.(warn "Define IS-DIRECTORY-P for your flavour of LISP.")
778 (defun is-directory-p (pathname)
779 (eql 0 (glisp:run-unix-shell-command (format nil "test -d ~A" (namestring pathname))))))
781 ;; Wuenschenwert waere natuerlich auch, wenn wir es vermeiden wuerden
782 ;; von Geraeten und Sockets und der gleichen mehr zu lesen.
784 (defun mungle-pathname-to-directory (pathname)
785 (let ((dir (or (pathname-directory pathname)
786 (list :relative)))
787 (name (pathname-name pathname))
788 (type (pathname-type pathname)))
789 (cond ((and name type)
790 (make-pathname :name nil
791 :type nil
792 :directory (append dir (list (concatenate 'string name "." type)))
793 :host (pathname-host pathname)
794 :device (pathname-device pathname)
795 :version (pathname-version pathname)))
796 (name
797 (make-pathname :name nil
798 :type nil
799 :directory (append dir (list (concatenate 'string name)))
800 :host (pathname-host pathname)
801 :device (pathname-device pathname)
802 :version (pathname-version pathname)))
803 (type
804 (make-pathname :name nil
805 :type nil
806 :directory (append dir (list (concatenate 'string "." type)))
807 :host (pathname-host pathname)
808 :device (pathname-device pathname)
809 :version (pathname-version pathname)))
811 pathname))))
813 (defun url-with-index.html-appended (url)
814 (setf url (url-with-slash-appened url))
815 (let ((new (url:copy-url url)))
816 (setf (url:url-path new) (append (butlast (url:url-path url))
817 (list "index.html")))
818 new))
820 (defun url-with-slash-appened (url)
821 (cond ((string-equal "" (car (last (url:url-path url))))
822 url)
824 (let ((new (url:copy-url url)))
825 (setf (url:url-path new) (append (url:url-path url)
826 (list "")))
827 new))))
829 (defun open-file-document (url)
830 ;; this optimize declaration is to prevent an ACL compiler bug:
831 #+EXCL (declare (optimize (speed 0) (safety 3)))
832 (labels ((try (url)
833 (let ((pathname (url:url-pathname url)))
834 (cond ((string-equal "" (car (last (url:url-path url))))
835 (open-directory-as-file-document pathname))
836 ((really-probe-file pathname)
837 (multiple-value-bind (input header)
838 (open-file-document-2 pathname)
839 (values input
840 (append header
841 (list (cons "Location"
842 (url:unparse-url url :readably-p t)))))))
844 nil)))))
845 (multiple-value-or
846 (try url)
847 (multiple-value-or
848 (try (url-with-index.html-appended url))
849 (multiple-value-or
850 (try (url-with-slash-appened url))
851 (make-file-not-found-stream (url:url-pathname url)))))))
853 (defun pathname->url (pathname)
854 (let ((dir (pathname-directory pathname))
855 (name (pathname-name pathname))
856 (type (pathname-type pathname)))
857 (concatenate 'string
858 (ecase (car dir)
859 (:absolute
860 (format nil "file:/~{~A/~}" (cdr dir)))
861 (:relative
862 (format nil "file:~{~A/~}" (cdr dir))))
863 (cond ((and name type) (format nil "~A.~A" name type))
864 (name (format nil "~A" name))
865 (type (format nil ".~A" type))
866 (t "")))))
868 (defun open-directory-as-file-document (pathname)
869 (values
870 (cl-char-stream->gstream
871 (make-string-input-stream
872 (with-output-to-string (sink)
873 (format sink "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">~%")
874 (format sink "<LINK REL=STYLESHEET HREF='file://closure/resources/css/directory.css' TYPE='text/css'>")
875 (format sink "<TABLE width='100%'>~%")
876 (format sink "<COLGROUP><COL width='0*'><COL width='0*'><COL width='1*'></COLGROUP>")
877 (format sink "<TR><TH><B>Date</B><TH><B>Size</B><TH><B>Filename</B></TH>~%")
878 (format sink "<TR><TD colspan=3><HR>")
879 (let ((file-names (directory (merge-pathnames (make-pathname :name :wild :type :wild)
880 pathname))))
881 (setf file-names (sort file-names #'string< :key #'namestring))
882 (dolist (k file-names)
883 (let ((url (pathname->url k))
884 (nam (namestring k))
885 (size (and #+CMU (is-regualar-file-p k)
886 (ignore-errors (with-open-file (in k :direction :input)
887 (file-length in)))))
888 (date (ignore-errors (unparse-http-date (file-write-date k)))))
889 (format sink "<TR><TD class=date>~A<TD right class=size>~A<TD class=name><A href='~A'>~A</a>~%"
890 (substitute (code-char 160) #\space date)
891 size url nam))))
892 (format sink "</TABLE>~%"))))
893 (list (cons "Content-Type" "text/html")) ))
895 (defun make-file-not-found-stream (pathname)
896 (setq pn pathname)
897 (error "File not found ~S" pathname))
899 (defun open-file-document-2 (pathname)
900 ;; -> io ; header
901 (let ((input (open pathname :direction :input :element-type '(unsigned-byte 8))))
902 (let ((len (ignore-errors (file-length input)))
903 (tm (ignore-errors (file-write-date input)))
904 (mt (and (stringp (pathname-type pathname))
905 (find-mime-type-from-extension (pathname-type pathname)))))
906 (values (cl-byte-stream->gstream input)
907 (append (if len (list (cons "Content-Length" (format nil "~D" len))) nil)
908 (list (cons "Content-Type" (if mt
909 (mime-type-name mt)
910 "text/plain")))
911 (if tm (list (cons "Last-Modified" (unparse-http-date tm))) nil))) )))
913 ;;; -------------------------------------------------------------------------------------------
914 ;;; zip protocol
917 ;; Reading on-line documentation thru' an HTML browser has various drawbacks.
918 ;; Since most files are smaller than a reasonable file system block size, all
919 ;; these little files consume more disk space than necessary. The solution is
920 ;; to put all the files into a .zip file and browse them from therein.
922 ;; zip files are "seekable", extracting a random archive element is
923 ;; in O(1) wrt to the number of archive components. With .tar.gz
924 ;; files it would involve decompression of the whole archive first,
925 ;; which is O(n).
927 ;; When specifying a document within a zip file, the `zip:' protocol is
928 ;; used. A zip url consists of two parts: the specification of the zip
929 ;; archive and the specification of the document within that archive. Syntax
930 ;; is as follows:
933 ;; zip:/<filename of zip archive>/<filename of document within archive>
935 ;; e.g. #u"zip:documents/glspec.zip/index.html"
938 ;; If no filename compoment is given the default is 'index.html' as
939 ;; always.
941 ;; <FUTURE>
943 ;; However this syntax is a kludge, since it involves an empty path component
944 ;; as delimiter between the archive name and the document name. It would be
945 ;; much more orthogonal, if the specification of the archive file itself could
946 ;; be any url. Of course this would nested recursive urls. My suggestion for this is
947 ;; using the host component and dropping in an url instead of a real host
948 ;; name. You probably need parenthesis to be able to nest unambiguously.
950 ;; So #u"zip:documents/glspec.zip//index.html"
951 ;; could become #u"zip://[file:documents/glspec.zip]/index.html"
953 ;; These urls could get recursive then, so it would be possible to say
954 ;; something like:
955 ;; #u"zip://[zip://[file:/cdrom/dist/html-docu.zip]/opengl/glspec.zip]/index.html"
957 ;; I'll eventually implement the above behaviour.
959 ;; </FUTURE>
961 ;; Back to what is actually implemented. To read a document from within a zip
962 ;; archive, we simply use the ZIP library. So you must have it installed
963 ;; for a working zip protocol.
965 ;; TODO
966 ;; - detect non-existing archives and non-existing archive documents.
967 ;; - when no archive file name is given, attempt to format the zip file
968 ;; directory as HTML, to be able to inspect the zip file.
970 (defun open-zip-document (url)
971 (multiple-value-bind (zip-archive-pathname archive-component-file-name)
972 (split-zip-url url)
973 (cond
974 ((null zip-archive-pathname)
975 (error "Bad zip url: ~S" url))
977 (values
978 (cl-byte-stream->gstream
979 (flexi-streams:make-in-memory-input-stream
980 (zip:with-zipfile (zip zip-archive-pathname)
981 (zip:zipfile-entry-contents
982 (zip:get-zipfile-entry archive-component-file-name zip)))))
983 (list (cons "Content-Type"
984 (let ((mt (find-mime-type-from-extension
985 (url-extension url))))
986 (if mt
987 (mime-type-name mt)
988 "text/plain")))))))))
990 (defun split-zip-url (url)
991 ;; -> zip-archive-pathname ; archive-component-file-name
992 ;; or NIL
993 (let* ((path (url:url-path url))
994 (len (length path))
996 (dotimes (i (length path))
997 (setq n (url::copy-url url))
998 (setf (url:url-path n) (subseq path 0 (- len i)))
999 (when (ignore-errors (probe-file (url:url-pathname n)))
1000 (let ((component-filename (format nil "~{~A~#[~:;/~]~}" (subseq path (- len i)))))
1001 (when (or (string-equal component-filename "")
1002 (char= (char component-filename (1- (length component-filename))) #\/))
1003 (setf component-filename (concatenate 'string component-filename "index.html")))
1004 (return (values (url:url-pathname n) component-filename)) )))))
1006 ;;; ===========================================================================================
1007 ;;; Main Entry
1010 (defun open-document-2 (url)
1011 ;; returns I/O and header
1012 (cond ((string-equal (url:url-protocol url) "file")
1013 (open-file-document url))
1014 ((string-equal (url:url-protocol url) "http")
1015 (http-open-document url :method :get))
1016 ((string-equal (url:url-protocol url) "https")
1017 (http-open-document url :method :get))
1018 ((string-equal (url:url-protocol url) "zip")
1019 (open-zip-document url))
1020 ((string-equal (url:url-protocol url) "ftp")
1021 (open-ftp-document url))
1023 (error "Unknown URL scheme in ~S" url))))
1025 ;;; ===========================================================================================
1026 ;;; fake of ancient API
1028 (defun open-document (url &optional reload-p binary-p any-mine-type?)
1029 "Opens an http document and returns two values: stream and mime-type"
1030 (multiple-value-bind (io header) (open-document-2 url)
1031 (values io
1032 (multiple-value-bind (type subtype parameters)
1033 (parse-mime-content-type (get-header-field header :content-type))
1034 (find-mime-type (format nil "~A/~A" type subtype))))))
1036 (defmacro with-open-document (((input mime-type) url
1037 &optional (reload-p nil) (binary-p nil) (cache-p t) (any-p nil))
1038 &rest body)
1039 `(with-open-document-fn ,url ,reload-p ,binary-p ,cache-p ,any-p
1040 #'(lambda (,input ,mime-type)
1041 ,@body)))
1043 (defun with-open-document-fn (url reload-p binary-p cache-p any-p cont)
1044 (multiple-value-bind (input mime) (open-document url reload-p binary-p any-p)
1045 (unwind-protect
1046 (funcall cont input mime)
1047 (g/close input))))
1051 (defun dump-cache-toc (&optional (cache (http-cache)))
1052 (maphash (lambda (key value)
1054 (print (list (hce-url value)
1055 (hce-filename value)
1056 (hce-filetype value))))
1057 (http-cache-entries cache)))
1061 (defclass proxy-http-connection-mixin () ())
1062 (defclass direct-http-connection-mixin () ())
1063 (defclass ssl-http-connection-mixin () ())
1064 (defclass tcp-http-connection-mixin () ())
1066 (defmethod format-url ((connection proxy-http-connection-mixin) url)
1067 (unparse-url-for-http/proxy url))
1069 (defmethod format-url ((connection direct-http-connection-mixin) url)
1070 (unparse-url-for-http url))
1072 (defmethod make-client-socket ((connection ssl-http-connection-mixin) host port)
1073 (g/open-inet-socket-ssl host port))
1075 (defmethod make-client-socket ((connection http-connection) host port)
1076 (g/open-inet-socket host port))
1081 (defclass uri () ())
1082 (defclass pathname-uri-mixin () ())
1083 (defclass an-http-url (uri pathname-uri-mixin) ())
1084 (defclass http-url (an-http-url) ())
1085 (defclass https-url (an-http-url) ())
1086 (defclass file-url (uri pathname-uri-mixin) ())
1087 (defclass zip-url (uri pathname-uri-mixin) ())
1088 (defclass ftp-url (uri pathname-uri-mixin) ())
1090 (defmethod open-document ((uri an-http-url)
1091 &key direction)
1094 (defclass file-document () ())
1095 (defclass directory-document () ())
1100 ;;;; ===========================================================================================
1102 (defclass ftp-access-method () ())
1104 (defmethod am/open-document ((self ftp-access-method) uri &rest options)
1105 (open-ftp-document uri))
1107 (defconstant +ftp-access-methd+ (make-instance 'ftp-access-method))
1111 (defun map-uri-to-access-method (uri)
1112 (cond ((string-equal (url:url-protocol url) "file")
1113 +file-access-method+)
1114 ((string-equal (url:url-protocol url) "ftp")
1115 +ftp-access-method+)
1116 ((string-equal (url:url-protocol url) "http")
1117 +http-access-method+)
1118 ((string-equal (url:url-protocol url) "https")
1119 +https-access-method+)
1120 ((string-equal (url:url-protocol url) "zip")
1121 +zip-access-method+) ))
1123 (define-access-method #'(lambda (url)
1124 (string-equal (url:url-protocol url) "file"))
1125 +file-access-method+)
1131 (defclass http-access-method () ())
1133 (defclass http-proxy-access-method (http-access-method)
1134 ((host :reader host-of :initarg :host)
1135 (port :reader port-of :initarg :port)))
1137 (defclass http-direct-access-method (http-access-method)