Allow non-simple strings in MAKE-ROD-XSTREAM, for the benefit of Drakma.
[closure-common.git] / xstream.lisp
blob21150858e63ab8108d66c667f2b830acb6e024c5
1 ;;; -*- Mode: Lisp; Syntax: Common-Lisp; readtable: runes; Encoding: utf-8; -*-
2 ;;; ---------------------------------------------------------------------------
3 ;;; Title: Fast streams
4 ;;; Created: 1999-07-17
5 ;;; Author: Gilbert Baumann <unk6@rz.uni-karlsruhe.de>
6 ;;; License: Lisp-LGPL (See file COPYING for details).
7 ;;; ---------------------------------------------------------------------------
8 ;;; (c) copyright 1999 by Gilbert Baumann
10 ;;; This library is free software; you can redistribute it and/or
11 ;;; modify it under the terms of the GNU Library General Public
12 ;;; License as published by the Free Software Foundation; either
13 ;;; version 2 of the License, or (at your option) any later version.
14 ;;;
15 ;;; This library is distributed in the hope that it will be useful,
16 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;;; Library General Public License for more details.
19 ;;;
20 ;;; You should have received a copy of the GNU Library General Public
21 ;;; License along with this library; if not, write to the
22 ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;;; Boston, MA 02111-1307 USA.
25 (in-package :runes)
27 ;;; API
28 ;;
29 ;; MAKE-XSTREAM cl-stream &key name! speed initial-speed initial-encoding
30 ;; [function]
31 ;; MAKE-ROD-XSTREAM rod &key name [function]
32 ;; CLOSE-XSTREAM xstream [function]
33 ;; XSTREAM-P object [function]
35 ;; READ-RUNE xstream [macro]
36 ;; PEEK-RUNE xstream [macro]
37 ;; FREAD-RUNE xstream [function]
38 ;; FPEEK-RUNE xstream [function]
39 ;; CONSUME-RUNE xstream [macro]
40 ;; UNREAD-RUNE rune xstream [function]
42 ;; XSTREAM-NAME xstream [accessor]
43 ;; XSTREAM-POSITION xstream [function]
44 ;; XSTREAM-LINE-NUMBER xstream [function]
45 ;; XSTREAM-COLUMN-NUMBER xstream [function]
46 ;; XSTREAM-PLIST xstream [accessor]
47 ;; XSTREAM-ENCODING xstream [accessor] <-- be careful here. [*]
48 ;; SET-TO-FULL-SPEED xstream [function]
50 ;; [*] switching the encoding on the fly is only possible when the
51 ;; stream's buffer is empty; therefore to be able to switch the
52 ;; encoding, while some runes are already read, set the stream's speed
53 ;; to 1 initially (via the initial-speed argument for MAKE-XSTREAM)
54 ;; and later set it to full speed. (The encoding of the runes
55 ;; sequence, you fetch off with READ-RUNE is always UTF-16 though).
56 ;; After switching the encoding, SET-TO-FULL-SPEED can be used to bump the
57 ;; speed up to a full buffer length.
59 ;; An encoding is simply something, which provides the DECODE-SEQUENCE
60 ;; method.
62 ;;; Controller protocol
64 ;; READ-OCTECTS sequence os-stream start end -> first-non-written
65 ;; XSTREAM/CLOSE os-stream
68 (eval-when (:compile-toplevel :load-toplevel :execute)
69 (defparameter *fast* '(optimize (speed 3) (safety 0))))
71 ;; Let us first define fast fixnum arithmetric get rid of type
72 ;; checks. (After all we know what we do here).
74 (defmacro fx-op (op &rest xs)
75 `(the fixnum (,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs))))
76 (defmacro fx-pred (op &rest xs)
77 `(,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs)))
79 (defmacro %+ (&rest xs) `(fx-op + ,@xs))
80 (defmacro %= (&rest xs) `(fx-pred = ,@xs))
82 (deftype buffer-index ()
83 `(unsigned-byte ,(integer-length array-total-size-limit)))
85 (deftype buffer-byte ()
86 `(unsigned-byte 16))
88 (deftype octet ()
89 `(unsigned-byte 8))
91 ;; The usage of a special marker for EOF is experimental and
92 ;; considered unhygenic.
94 (defconstant +end+ #xFFFF
95 "Special marker inserted into stream buffers to indicate end of buffered data.")
97 (defvar +null-buffer+ (make-array 0 :element-type 'buffer-byte))
98 (defvar +null-octet-buffer+ (make-array 0 :element-type 'octet))
100 (defstruct (xstream
101 (:constructor make-xstream/low)
102 (:copier nil)
103 (:print-function print-xstream))
105 ;;; Read buffer
107 ;; the buffer itself
108 (buffer +null-buffer+
109 :type (simple-array buffer-byte (*)))
110 ;; points to the next element of `buffer' containing the next rune
111 ;; about to be read.
112 (read-ptr 0 :type buffer-index)
113 ;; points to the first element of `buffer' not containing a rune to
114 ;; be read.
115 (fill-ptr 0 :type buffer-index)
117 ;;; OS buffer
119 ;; a scratch pad for READ-SEQUENCE
120 (os-buffer +null-octet-buffer+
121 :type (simple-array octet (*)))
123 ;; `os-left-start', `os-left-end' designate a region of os-buffer,
124 ;; which still contains some undecoded data. This is needed because
125 ;; of the DECODE-SEQUENCE protocol
126 (os-left-start 0 :type buffer-index)
127 (os-left-end 0 :type buffer-index)
129 ;; How much to read each time
130 (speed 0 :type buffer-index)
131 (full-speed 0 :type buffer-index)
133 ;; Some stream object obeying to a certain protcol
134 os-stream
136 ;; The external format
137 ;; (some object offering the ENCODING protocol)
138 (encoding :utf-8)
140 ;;A STREAM-NAME object
141 (name nil)
143 ;; a plist a struct keeps the hack away
144 (plist nil)
146 ;; Stream Position
147 (line-number 1 :type integer) ;current line number
148 (line-start 0 :type integer) ;stream position the current line starts at
149 (buffer-start 0 :type integer) ;stream position the current buffer starts at
151 ;; There is no need to maintain a column counter for each character
152 ;; read, since we can easily compute it from `line-start' and
153 ;; `buffer-start'.
156 (defun print-xstream (self sink depth)
157 (declare (ignore depth))
158 (format sink "#<~S ~S>" (type-of self) (xstream-name self)))
160 (defmacro read-rune (input)
161 "Read a single rune off the xstream `input'. In case of end of file :EOF
162 is returned."
163 `((lambda (input)
164 (declare (type xstream input)
165 #.*fast*)
166 (let ((rp (xstream-read-ptr input)))
167 (declare (type buffer-index rp))
168 (let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input))
169 rp)))
170 (declare (type buffer-byte ch))
171 (setf (xstream-read-ptr input) (%+ rp 1))
172 (cond ((%= ch +end+)
173 (the (or (member :eof) rune)
174 (xstream-underflow input)))
175 ((%= ch #x000A) ;line break
176 (account-for-line-break input)
177 (code-rune ch))
179 (code-rune ch))))))
180 ,input))
182 (defmacro peek-rune (input)
183 "Peek a single rune off the xstream `input'. In case of end of file :EOF
184 is returned."
185 `((lambda (input)
186 (declare (type xstream input)
187 #.*fast*)
188 (let ((rp (xstream-read-ptr input)))
189 (declare (type buffer-index rp))
190 (let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input))
191 rp)))
192 (declare (type buffer-byte ch))
193 (cond ((%= ch +end+)
194 (prog1
195 (the (or (member :eof) rune) (xstream-underflow input))
196 (setf (xstream-read-ptr input) 0)))
198 (code-rune ch))))))
199 ,input))
201 (defmacro consume-rune (input)
202 "Like READ-RUNE, but does not actually return the read rune."
203 `((lambda (input)
204 (declare (type xstream input)
205 #.*fast*)
206 (let ((rp (xstream-read-ptr input)))
207 (declare (type buffer-index rp))
208 (let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input))
209 rp)))
210 (declare (type buffer-byte ch))
211 (setf (xstream-read-ptr input) (%+ rp 1))
212 (when (%= ch +end+)
213 (xstream-underflow input))
214 (when (%= ch #x000A) ;line break
215 (account-for-line-break input) )))
216 nil)
217 ,input))
219 (definline unread-rune (rune input)
220 "Unread the last recently read rune; if there wasn't such a rune, you
221 deserve to lose."
222 (declare (ignore rune))
223 (decf (xstream-read-ptr input))
224 (when (rune= (peek-rune input) #/u+000A) ;was it a line break?
225 (unaccount-for-line-break input)))
227 (defun fread-rune (input)
228 (read-rune input))
230 (defun fpeek-rune (input)
231 (peek-rune input))
233 ;;; Line counting
235 (defun account-for-line-break (input)
236 (declare (type xstream input))
237 (incf (xstream-line-number input))
238 (setf (xstream-line-start input)
239 (+ (xstream-buffer-start input) (xstream-read-ptr input))))
241 (defun unaccount-for-line-break (input)
242 ;; incomplete!
243 ;; We better use a traditional lookahead technique or forbid unread-rune.
244 (decf (xstream-line-number input)))
246 ;; User API:
248 (defun xstream-position (input)
249 (+ (xstream-buffer-start input) (xstream-read-ptr input)))
251 ;; xstream-line-number is structure accessor
253 (defun xstream-column-number (input)
254 (+ (- (xstream-position input)
255 (xstream-line-start input))
258 ;;; Underflow
260 (defconstant +default-buffer-size+ 100)
262 (defmethod xstream-underflow ((input xstream))
263 (declare (type xstream input))
264 ;; we are about to fill new data into the buffer, so we need to
265 ;; adjust buffer-start.
266 (incf (xstream-buffer-start input)
267 (- (xstream-fill-ptr input) 0))
268 (let (n m)
269 ;; when there is something left in the os-buffer, we move it to
270 ;; the start of the buffer.
271 (setf m (- (xstream-os-left-end input) (xstream-os-left-start input)))
272 (unless (zerop m)
273 (replace (xstream-os-buffer input) (xstream-os-buffer input)
274 :start1 0 :end1 m
275 :start2 (xstream-os-left-start input)
276 :end2 (xstream-os-left-end input))
277 ;; then we take care that the buffer is large enough to carry at
278 ;; least 100 bytes (a random number)
280 ;; David: My understanding is that any number of octets large enough
281 ;; to record the longest UTF-8 sequence or UTF-16 sequence is okay,
282 ;; so 100 is plenty for this purpose.
283 (unless (>= (length (xstream-os-buffer input))
284 +default-buffer-size+)
285 (error "You lost")))
286 (setf n
287 (read-octets (xstream-os-buffer input) (xstream-os-stream input)
288 m (min (1- (length (xstream-os-buffer input)))
289 (+ m (xstream-speed input)))))
290 (cond ((%= n 0)
291 (setf (xstream-read-ptr input) 0
292 (xstream-fill-ptr input) n)
293 (setf (aref (xstream-buffer input) (xstream-fill-ptr input)) +end+)
294 :eof)
296 (multiple-value-bind (fnw fnr)
297 (runes-encoding:decode-sequence
298 (xstream-encoding input)
299 (xstream-os-buffer input) 0 n
300 (xstream-buffer input) 0 (1- (length (xstream-buffer input)))
301 (= n m))
302 (setf (xstream-os-left-start input) fnr
303 (xstream-os-left-end input) n
304 (xstream-read-ptr input) 0
305 (xstream-fill-ptr input) fnw)
306 (setf (aref (xstream-buffer input) (xstream-fill-ptr input)) +end+)
307 (read-rune input))))))
309 ;;; constructor
311 (defun make-xstream (os-stream &key name
312 (speed 8192)
313 (initial-speed 1)
314 (initial-encoding :guess))
315 ;; XXX if initial-speed isn't 1, encoding will me munged up
316 (assert (eql initial-speed 1))
317 (multiple-value-bind (encoding preread)
318 (if (eq initial-encoding :guess)
319 (figure-encoding os-stream)
320 (values initial-encoding nil))
321 (let* ((bufsize (max speed +default-buffer-size+))
322 (osbuf (make-array bufsize :element-type '(unsigned-byte 8))))
323 (replace osbuf preread)
324 (make-xstream/low
325 :buffer (let ((r (make-array bufsize :element-type 'buffer-byte)))
326 (setf (elt r 0) #xFFFF)
328 :read-ptr 0
329 :fill-ptr 0
330 :os-buffer osbuf
331 :speed initial-speed
332 :full-speed speed
333 :os-stream os-stream
334 :os-left-start 0
335 :os-left-end (length preread)
336 :encoding encoding
337 :name name))))
339 (defun make-rod-xstream (string &key name)
340 (unless (typep string 'simple-array)
341 (setf string (coerce string 'simple-string)))
342 ;; XXX encoding is mis-handled by this kind of stream
343 (let ((n (length string)))
344 (let ((buffer (make-array (1+ n) :element-type 'buffer-byte)))
345 (declare (type (simple-array buffer-byte (*)) buffer))
346 ;; copy the rod
347 (do ((i (1- n) (- i 1)))
348 ((< i 0))
349 (declare (type fixnum i))
350 (setf (aref buffer i) (rune-code (%rune string i))))
351 (setf (aref buffer n) +end+)
353 (make-xstream/low :buffer buffer
354 :read-ptr 0
355 :fill-ptr n
356 ;; :os-buffer nil
357 :speed 1
358 :os-stream nil
359 :name name))))
361 (defmethod figure-encoding ((stream null))
362 (values :utf-8 nil))
364 (defmethod figure-encoding ((stream stream))
365 (let ((c0 (read-byte stream nil :eof)))
366 (cond ((eq c0 :eof)
367 (values :utf-8 nil))
369 (let ((c1 (read-byte stream nil :eof)))
370 (cond ((eq c1 :eof)
371 (values :utf-8 (list c0)))
373 (cond ((and (= c0 #xFE) (= c1 #xFF)) (values :utf-16-big-endian nil))
374 ((and (= c0 #xFF) (= c1 #xFE)) (values :utf-16-little-endian nil))
376 (values :utf-8 (list c0 c1)))))))))))
378 ;;; misc
380 (defun close-xstream (input)
381 (xstream/close (xstream-os-stream input)))
383 (defun set-to-full-speed (xstream)
384 (setf (xstream-speed xstream) (xstream-full-speed xstream)))
386 ;;; controller implementations
388 (defmethod read-octets (sequence (stream stream) start end)
389 (#+CLISP ext:read-byte-sequence
390 #-CLISP read-sequence
391 sequence stream :start start :end end))
393 #+cmu
394 (defmethod read-octets :around (sequence (stream stream) start end)
395 ;; CMUCL <= 19a on non-SunOS accidentally triggers EFAULT in read(2)
396 ;; if SEQUENCE has been write protected by GC. Workaround: Touch all pages
397 ;; in SEQUENCE and make sure no GC happens between that and the read(2).
398 (ext::without-gcing
399 (loop for i from start below end
400 do (setf (elt sequence i) (elt sequence i)))
401 (call-next-method)))
403 (defmethod read-octets (sequence (stream null) start end)
404 (declare (ignore sequence start end))
407 (defmethod xstream/close ((stream stream))
408 (close stream))
410 (defmethod xstream/close ((stream null))
411 nil)