1 ;;; -*- Mode: Lisp; Syntax: Common-Lisp; readtable: runes; Encoding: utf-8; -*-
2 ;;; ---------------------------------------------------------------------------
3 ;;; Title: Fast streams
4 ;;; Created: 1999-07-17
5 ;;; Author: Gilbert Baumann <unk6@rz.uni-karlsruhe.de>
6 ;;; License: Lisp-LGPL (See file COPYING for details).
7 ;;; ---------------------------------------------------------------------------
8 ;;; (c) copyright 1999 by Gilbert Baumann
10 ;;; This library is free software; you can redistribute it and/or
11 ;;; modify it under the terms of the GNU Library General Public
12 ;;; License as published by the Free Software Foundation; either
13 ;;; version 2 of the License, or (at your option) any later version.
15 ;;; This library is distributed in the hope that it will be useful,
16 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;;; Library General Public License for more details.
20 ;;; You should have received a copy of the GNU Library General Public
21 ;;; License along with this library; if not, write to the
22 ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;;; Boston, MA 02111-1307 USA.
29 ;; MAKE-XSTREAM cl-stream &key name! speed initial-speed initial-encoding
31 ;; MAKE-ROD-XSTREAM rod &key name [function]
32 ;; CLOSE-XSTREAM xstream [function]
33 ;; XSTREAM-P object [function]
35 ;; READ-RUNE xstream [macro]
36 ;; PEEK-RUNE xstream [macro]
37 ;; FREAD-RUNE xstream [function]
38 ;; FPEEK-RUNE xstream [function]
39 ;; CONSUME-RUNE xstream [macro]
40 ;; UNREAD-RUNE rune xstream [function]
42 ;; XSTREAM-NAME xstream [accessor]
43 ;; XSTREAM-POSITION xstream [function]
44 ;; XSTREAM-LINE-NUMBER xstream [function]
45 ;; XSTREAM-COLUMN-NUMBER xstream [function]
46 ;; XSTREAM-PLIST xstream [accessor]
47 ;; XSTREAM-ENCODING xstream [accessor] <-- be careful here. [*]
48 ;; SET-TO-FULL-SPEED xstream [function]
50 ;; [*] switching the encoding on the fly is only possible when the
51 ;; stream's buffer is empty; therefore to be able to switch the
52 ;; encoding, while some runes are already read, set the stream's speed
53 ;; to 1 initially (via the initial-speed argument for MAKE-XSTREAM)
54 ;; and later set it to full speed. (The encoding of the runes
55 ;; sequence, you fetch off with READ-RUNE is always UTF-16 though).
56 ;; After switching the encoding, SET-TO-FULL-SPEED can be used to bump the
57 ;; speed up to a full buffer length.
59 ;; An encoding is simply something, which provides the DECODE-SEQUENCE
62 ;;; Controller protocol
64 ;; READ-OCTECTS sequence os-stream start end -> first-non-written
65 ;; XSTREAM/CLOSE os-stream
68 (eval-when (:compile-toplevel
:load-toplevel
:execute
)
69 (defparameter *fast
* '(optimize (speed 3) (safety 0))))
71 ;; Let us first define fast fixnum arithmetric get rid of type
72 ;; checks. (After all we know what we do here).
74 (defmacro fx-op
(op &rest xs
)
75 `(the fixnum
(,op
,@(mapcar (lambda (x) `(the fixnum
,x
)) xs
))))
76 (defmacro fx-pred
(op &rest xs
)
77 `(,op
,@(mapcar (lambda (x) `(the fixnum
,x
)) xs
)))
79 (defmacro %
+ (&rest xs
) `(fx-op + ,@xs
))
80 (defmacro %
= (&rest xs
) `(fx-pred = ,@xs
))
82 (deftype buffer-index
()
83 `(unsigned-byte ,(integer-length array-total-size-limit
)))
85 (deftype buffer-byte
()
94 ;; The usage of a special marker for EOF is experimental and
95 ;; considered unhygenic.
97 (defconstant +end
+ #xFFFF
98 "Special marker inserted into stream buffers to indicate end of buffered data.")
100 (defvar +null-buffer
+ (make-array 0 :element-type
'buffer-byte
))
101 (defvar +null-octet-buffer
+ (make-array 0 :element-type
'octet
))
104 (:constructor make-xstream
/low
)
106 (:print-function print-xstream
))
111 (buffer +null-buffer
+
112 :type
(simple-array buffer-byte
(*)))
113 ;; points to the next element of `buffer' containing the next rune
115 (read-ptr 0 :type buffer-index
)
116 ;; points to the first element of `buffer' not containing a rune to
118 (fill-ptr 0 :type buffer-index
)
122 ;; a scratch pad for READ-SEQUENCE
123 (os-buffer +null-octet-buffer
+
124 :type
(simple-array octet
(*)))
126 ;; `os-left-start', `os-left-end' designate a region of os-buffer,
127 ;; which still contains some undecoded data. This is needed because
128 ;; of the DECODE-SEQUENCE protocol
129 (os-left-start 0 :type buffer-index
)
130 (os-left-end 0 :type buffer-index
)
132 ;; How much to read each time
133 (speed 0 :type buffer-index
)
134 (full-speed 0 :type buffer-index
)
136 ;; Some stream object obeying to a certain protcol
139 ;; The external format
140 ;; (some object offering the ENCODING protocol)
143 ;;A STREAM-NAME object
146 ;; a plist a struct keeps the hack away
150 (line-number 1 :type integer
) ;current line number
151 (line-start 0 :type integer
) ;stream position the current line starts at
152 (buffer-start 0 :type integer
) ;stream position the current buffer starts at
154 ;; There is no need to maintain a column counter for each character
155 ;; read, since we can easily compute it from `line-start' and
159 (defun print-xstream (self sink depth
)
160 (declare (ignore depth
))
161 (format sink
"#<~S ~S>" (type-of self
) (xstream-name self
)))
163 (defmacro read-rune
(input)
164 "Read a single rune off the xstream `input'. In case of end of file :EOF
167 (declare (type xstream input
)
169 (let ((rp (xstream-read-ptr input
)))
170 (declare (type buffer-index rp
))
171 (let ((ch (aref (the (simple-array buffer-byte
(*)) (xstream-buffer input
))
173 (declare (type buffer-byte ch
))
174 (setf (xstream-read-ptr input
) (%
+ rp
1))
176 (the (or (member :eof
) rune
)
177 (xstream-underflow input
)))
178 ((%
= ch
#x000A
) ;line break
179 (account-for-line-break input
)
185 (defmacro peek-rune
(input)
186 "Peek a single rune off the xstream `input'. In case of end of file :EOF
189 (declare (type xstream input
)
191 (let ((rp (xstream-read-ptr input
)))
192 (declare (type buffer-index rp
))
193 (let ((ch (aref (the (simple-array buffer-byte
(*)) (xstream-buffer input
))
195 (declare (type buffer-byte ch
))
198 (the (or (member :eof
) rune
) (xstream-underflow input
))
199 (setf (xstream-read-ptr input
) 0)))
204 (defmacro consume-rune
(input)
205 "Like READ-RUNE, but does not actually return the read rune."
207 (declare (type xstream input
)
209 (let ((rp (xstream-read-ptr input
)))
210 (declare (type buffer-index rp
))
211 (let ((ch (aref (the (simple-array buffer-byte
(*)) (xstream-buffer input
))
213 (declare (type buffer-byte ch
))
214 (setf (xstream-read-ptr input
) (%
+ rp
1))
216 (xstream-underflow input
))
217 (when (%
= ch
#x000A
) ;line break
218 (account-for-line-break input
) )))
222 (definline unread-rune
(rune input
)
223 "Unread the last recently read rune; if there wasn't such a rune, you
225 (declare (ignore rune
))
226 (decf (xstream-read-ptr input
))
227 (when (rune= (peek-rune input
) #/u
+000A
) ;was it a line break?
228 (unaccount-for-line-break input
)))
230 (defun fread-rune (input)
233 (defun fpeek-rune (input)
238 (defun account-for-line-break (input)
239 (declare (type xstream input
))
240 (incf (xstream-line-number input
))
241 (setf (xstream-line-start input
)
242 (+ (xstream-buffer-start input
) (xstream-read-ptr input
))))
244 (defun unaccount-for-line-break (input)
246 ;; We better use a traditional lookahead technique or forbid unread-rune.
247 (decf (xstream-line-number input
)))
251 (defun xstream-position (input)
252 (+ (xstream-buffer-start input
) (xstream-read-ptr input
)))
254 ;; xstream-line-number is structure accessor
256 (defun xstream-column-number (input)
257 (+ (- (xstream-position input
)
258 (xstream-line-start input
))
263 (defconstant +default-buffer-size
+ 100)
265 (defmethod xstream-underflow ((input xstream
))
266 (declare (type xstream input
))
267 ;; we are about to fill new data into the buffer, so we need to
268 ;; adjust buffer-start.
269 (incf (xstream-buffer-start input
)
270 (- (xstream-fill-ptr input
) 0))
272 ;; when there is something left in the os-buffer, we move it to
273 ;; the start of the buffer.
274 (setf m
(- (xstream-os-left-end input
) (xstream-os-left-start input
)))
276 (replace (xstream-os-buffer input
) (xstream-os-buffer input
)
278 :start2
(xstream-os-left-start input
)
279 :end2
(xstream-os-left-end input
))
280 ;; then we take care that the buffer is large enough to carry at
281 ;; least 100 bytes (a random number)
283 ;; David: My understanding is that any number of octets large enough
284 ;; to record the longest UTF-8 sequence or UTF-16 sequence is okay,
285 ;; so 100 is plenty for this purpose.
286 (unless (>= (length (xstream-os-buffer input
))
287 +default-buffer-size
+)
290 (read-octets (xstream-os-buffer input
) (xstream-os-stream input
)
291 m
(min (1- (length (xstream-os-buffer input
)))
292 (+ m
(xstream-speed input
)))))
294 (setf (xstream-read-ptr input
) 0
295 (xstream-fill-ptr input
) n
)
296 (setf (aref (xstream-buffer input
) (xstream-fill-ptr input
)) +end
+)
299 (multiple-value-bind (fnw fnr
)
300 (runes-encoding:decode-sequence
301 (xstream-encoding input
)
302 (xstream-os-buffer input
) 0 n
303 (xstream-buffer input
) 0 (1- (length (xstream-buffer input
)))
305 (setf (xstream-os-left-start input
) fnr
306 (xstream-os-left-end input
) n
307 (xstream-read-ptr input
) 0
308 (xstream-fill-ptr input
) fnw
)
309 (setf (aref (xstream-buffer input
) (xstream-fill-ptr input
)) +end
+)
310 (read-rune input
))))))
314 (defun make-xstream (os-stream &key name
317 (initial-encoding :guess
))
318 ;; XXX if initial-speed isn't 1, encoding will me munged up
319 (assert (eql initial-speed
1))
320 (multiple-value-bind (encoding preread
)
321 (if (eq initial-encoding
:guess
)
322 (figure-encoding os-stream
)
323 (values initial-encoding nil
))
324 (let* ((bufsize (max speed
+default-buffer-size
+))
325 (osbuf (make-array bufsize
:element-type
'(unsigned-byte 8))))
326 (replace osbuf preread
)
328 :buffer
(let ((r (make-array bufsize
:element-type
'buffer-byte
)))
329 (setf (elt r
0) #xFFFF
)
338 :os-left-end
(length preread
)
342 (defun make-rod-xstream (string &key name
)
343 (unless (typep string
'simple-array
)
344 (setf string
(coerce string
'simple-string
)))
345 (let ((n (length string
)))
346 (let ((buffer (make-array (1+ n
) :element-type
'buffer-byte
)))
347 (declare (type (simple-array buffer-byte
(*)) buffer
))
349 (do ((i (1- n
) (- i
1)))
351 (declare (type fixnum i
))
352 (setf (aref buffer i
) (rune-code (%rune string i
))))
353 (setf (aref buffer n
) +end
+)
355 (make-xstream/low
:buffer buffer
363 (defmethod figure-encoding ((stream null
))
366 (defmethod figure-encoding ((stream stream
))
367 (let ((c0 (read-byte stream nil
:eof
)))
371 (let ((c1 (read-byte stream nil
:eof
)))
373 (values :utf-8
(list c0
)))
375 (cond ((and (= c0
#xFE
) (= c1
#xFF
)) (values :utf-16-big-endian nil
))
376 ((and (= c0
#xFF
) (= c1
#xFE
)) (values :utf-16-little-endian nil
))
377 ((and (= c0
#xEF
) (= c1
#xBB
))
378 (let ((c2 (read-byte stream nil
:eof
)))
381 (values :utf-8
(list c0 c1 c2
)))))
383 (values :utf-8
(list c0 c1
)))))))))))
387 (defun close-xstream (input)
388 (xstream/close
(xstream-os-stream input
)))
390 (defun set-to-full-speed (xstream)
391 (setf (xstream-speed xstream
) (xstream-full-speed xstream
)))
393 ;;; controller implementations
395 (defmethod read-octets (sequence (stream stream
) start end
)
396 (#+CLISP ext
:read-byte-sequence
397 #-CLISP read-sequence
398 sequence stream
:start start
:end end
))
401 (defmethod read-octets :around
(sequence (stream stream
) start end
)
402 ;; CMUCL <= 19a on non-SunOS accidentally triggers EFAULT in read(2)
403 ;; if SEQUENCE has been write protected by GC. Workaround: Touch all pages
404 ;; in SEQUENCE and make sure no GC happens between that and the read(2).
406 (loop for i from start below end
407 do
(setf (elt sequence i
) (elt sequence i
)))
410 (defmethod read-octets (sequence (stream null
) start end
)
411 (declare (ignore sequence start end
))
414 (defmethod xstream/close
((stream stream
))
417 (defmethod xstream/close
((stream null
))