1 ;;; -*- Mode: Lisp; Syntax: Common-Lisp; readtable: runes; Encoding: utf-8; -*-
2 ;;; ---------------------------------------------------------------------------
3 ;;; Title: Fast streams
4 ;;; Created: 1999-07-17
5 ;;; Author: Gilbert Baumann <unk6@rz.uni-karlsruhe.de>
6 ;;; License: Lisp-LGPL (See file COPYING for details).
7 ;;; ---------------------------------------------------------------------------
8 ;;; © copyright 1999 by Gilbert Baumann
10 ;;; This library is free software; you can redistribute it and/or
11 ;;; modify it under the terms of the GNU Library General Public
12 ;;; License as published by the Free Software Foundation; either
13 ;;; version 2 of the License, or (at your option) any later version.
15 ;;; This library is distributed in the hope that it will be useful,
16 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;;; Library General Public License for more details.
20 ;;; You should have received a copy of the GNU Library General Public
21 ;;; License along with this library; if not, write to the
22 ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;;; Boston, MA 02111-1307 USA.
29 ;; MAKE-XSTREAM cl-stream &key name! speed initial-speed initial-encoding
31 ;; MAKE-ROD-XSTREAM rod &key name [function]
32 ;; CLOSE-XSTREAM xstream [function]
33 ;; XSTREAM-P object [function]
35 ;; READ-RUNE xstream [macro]
36 ;; PEEK-RUNE xstream [macro]
37 ;; FREAD-RUNE xstream [function]
38 ;; FPEEK-RUNE xstream [function]
39 ;; CONSUME-RUNE xstream [macro]
40 ;; UNREAD-RUNE rune xstream [function]
42 ;; XSTREAM-NAME xstream [accessor]
43 ;; XSTREAM-POSITION xstream [function]
44 ;; XSTREAM-LINE-NUMBER xstream [function]
45 ;; XSTREAM-COLUMN-NUMBER xstream [function]
46 ;; XSTREAM-PLIST xstream [accessor]
47 ;; XSTREAM-ENCODING xstream [accessor] <-- be careful here. [*]
48 ;; SET-TO-FULL-SPEED xstream [function]
50 ;; [*] switching the encoding on the fly is only possible when the
51 ;; stream's buffer is empty; therefore to be able to switch the
52 ;; encoding, while some runes are already read, set the stream's speed
53 ;; to 1 initially (via the initial-speed argument for MAKE-XSTREAM)
54 ;; and later set it to full speed. (The encoding of the runes
55 ;; sequence, you fetch off with READ-RUNE is always UTF-16 though).
56 ;; After switching the encoding, SET-TO-FULL-SPEED can be used to bump the
57 ;; speed up to a full buffer length.
59 ;; An encoding is simply something, which provides the DECODE-SEQUENCE
62 ;;; Controller protocol
64 ;; READ-OCTECTS sequence os-stream start end -> first-non-written
65 ;; XSTREAM/CLOSE os-stream
68 (eval-when (:compile-toplevel
:load-toplevel
:execute
)
69 (defparameter *fast
* '(optimize (speed 3) (safety 0))))
71 ;; Let us first define fast fixnum arithmetric get rid of type
72 ;; checks. (After all we know what we do here).
74 (defmacro fx-op
(op &rest xs
)
75 `(the fixnum
(,op
,@(mapcar (lambda (x) `(the fixnum
,x
)) xs
))))
76 (defmacro fx-pred
(op &rest xs
)
77 `(,op
,@(mapcar (lambda (x) `(the fixnum
,x
)) xs
)))
79 (defmacro %
+ (&rest xs
) `(fx-op + ,@xs
))
80 (defmacro %
= (&rest xs
) `(fx-pred = ,@xs
))
82 (deftype buffer-index
()
83 `(unsigned-byte ,(integer-length array-total-size-limit
)))
85 (deftype buffer-byte
()
91 ;; The usage of a special marker for EOF is experimental and
92 ;; considered unhygenic.
94 (defconstant +end
+ #xFFFF
95 "Special marker inserted into stream buffers to indicate end of buffered data.")
97 (defvar +null-buffer
+ (make-array 0 :element-type
'buffer-byte
))
98 (defvar +null-octet-buffer
+ (make-array 0 :element-type
'octet
))
101 (:constructor make-xstream
/low
)
103 (:print-function print-xstream
))
108 (buffer +null-buffer
+
109 :type
(simple-array buffer-byte
(*)))
110 ;; points to the next element of `buffer' containing the next rune
112 (read-ptr 0 :type buffer-index
)
113 ;; points to the first element of `buffer' not containing a rune to
115 (fill-ptr 0 :type buffer-index
)
119 ;; a scratch pad for READ-SEQUENCE
120 (os-buffer +null-octet-buffer
+
121 :type
(simple-array octet
(*)))
123 ;; `os-left-start', `os-left-end' designate a region of os-buffer,
124 ;; which still contains some undecoded data. This is needed because
125 ;; of the DECODE-SEQUENCE protocol
126 (os-left-start 0 :type buffer-index
)
127 (os-left-end 0 :type buffer-index
)
129 ;; How much to read each time
130 (speed 0 :type buffer-index
)
132 ;; Some stream object obeying to a certain protcol
135 ;; The external format
136 ;; (some object offering the ENCODING protocol)
139 ;;A STREAM-NAME object
142 ;; a plist a struct keeps the hack away
146 (line-number 1 :type integer
) ;current line number
147 (line-start 0 :type integer
) ;stream position the current line starts at
148 (buffer-start 0 :type integer
) ;stream position the current buffer starts at
150 ;; There is no need to maintain a column counter for each character
151 ;; read, since we can easily compute it from `line-start' and
155 (defun print-xstream (self sink depth
)
156 (declare (ignore depth
))
157 (format sink
"#<~S ~S>" (type-of self
) (xstream-name self
)))
159 (defmacro read-rune
(input)
160 "Read a single rune off the xstream `input'. In case of end of file :EOF
163 (declare (type xstream input
)
165 (let ((rp (xstream-read-ptr input
)))
166 (declare (type buffer-index rp
))
167 (let ((ch (aref (the (simple-array buffer-byte
(*)) (xstream-buffer input
))
169 (declare (type buffer-byte ch
))
170 (setf (xstream-read-ptr input
) (%
+ rp
1))
172 (the (or (member :eof
) rune
)
173 (xstream-underflow input
)))
174 ((%
= ch
#x000A
) ;line break
175 (account-for-line-break input
)
181 (defmacro peek-rune
(input)
182 "Peek a single rune off the xstream `input'. In case of end of file :EOF
185 (declare (type xstream input
)
187 (let ((rp (xstream-read-ptr input
)))
188 (declare (type buffer-index rp
))
189 (let ((ch (aref (the (simple-array buffer-byte
(*)) (xstream-buffer input
))
191 (declare (type buffer-byte ch
))
194 (the (or (member :eof
) rune
) (xstream-underflow input
))
195 (setf (xstream-read-ptr input
) 0)))
200 (defmacro consume-rune
(input)
201 "Like READ-RUNE, but does not actually return the read rune."
203 (declare (type xstream input
)
205 (let ((rp (xstream-read-ptr input
)))
206 (declare (type buffer-index rp
))
207 (let ((ch (aref (the (simple-array buffer-byte
(*)) (xstream-buffer input
))
209 (declare (type buffer-byte ch
))
210 (setf (xstream-read-ptr input
) (%
+ rp
1))
212 (xstream-underflow input
))
213 (when (%
= ch
#x000A
) ;line break
214 (account-for-line-break input
) )))
218 (definline unread-rune
(rune input
)
219 "Unread the last recently read rune; if there wasn't such a rune, you
221 (declare (ignore rune
))
222 (decf (xstream-read-ptr input
))
223 (when (rune= (peek-rune input
) #/u
+000A
) ;was it a line break?
224 (unaccount-for-line-break input
)))
226 (defun fread-rune (input)
229 (defun fpeek-rune (input)
234 (defun account-for-line-break (input)
235 (declare (type xstream input
))
236 (incf (xstream-line-number input
))
237 (setf (xstream-line-start input
)
238 (+ (xstream-buffer-start input
) (xstream-read-ptr input
))))
240 (defun unaccount-for-line-break (input)
242 ;; We better use a traditional lookahead technique or forbid unread-rune.
243 (decf (xstream-line-number input
)))
247 (defun xstream-position (input)
248 (+ (xstream-buffer-start input
) (xstream-read-ptr input
)))
250 ;; xstream-line-number is structure accessor
252 (defun xstream-column-number (input)
253 (+ (- (xstream-position input
)
254 (xstream-line-start input
))
259 ;;(defun read-runes (sequence input))
261 (defun xstream-underflow (input)
262 (declare (type xstream input
))
263 ;; we are about to fill new data into the buffer, so we need to
264 ;; adjust buffer-start.
265 (incf (xstream-buffer-start input
)
266 (- (xstream-fill-ptr input
) 0))
268 ;; when there is something left in the os-buffer, we move it to
269 ;; the start of the buffer.
270 (setf m
(- (xstream-os-left-end input
) (xstream-os-left-start input
)))
272 (replace (xstream-os-buffer input
) (xstream-os-buffer input
)
274 :start2
(xstream-os-left-start input
)
275 :end2
(xstream-os-left-end input
))
276 ;; then we take care that the buffer is large enough to carry at
277 ;; least 100 bytes (a random number)
279 ;; david: was heisst da random? ich nehme an, dass 100 einfach
280 ;; ausreichend sein soll, um die laengste utf-8 bytesequenz oder die
281 ;; beiden utf-16 surrogates zu halten? dann ist 100 ja wohl dicke
282 ;; ausreichend und koennte in make-xstream ordentlich geprueft werden.
283 ;; oder was geht hier vor?
284 (unless (>= (length (xstream-os-buffer input
)) 100)
287 (read-octets (xstream-os-buffer input
) (xstream-os-stream input
)
288 m
(min (1- (length (xstream-os-buffer input
)))
289 (+ m
(xstream-speed input
)))))
291 (setf (xstream-read-ptr input
) 0
292 (xstream-fill-ptr input
) n
)
293 (setf (aref (xstream-buffer input
) (xstream-fill-ptr input
)) +end
+)
296 (multiple-value-bind (fnw fnr
)
297 (runes-encoding:decode-sequence
298 (xstream-encoding input
)
299 (xstream-os-buffer input
) 0 n
300 (xstream-buffer input
) 0 (1- (length (xstream-buffer input
)))
302 (setf (xstream-os-left-start input
) fnr
303 (xstream-os-left-end input
) n
304 (xstream-read-ptr input
) 0
305 (xstream-fill-ptr input
) fnw
)
306 (setf (aref (xstream-buffer input
) (xstream-fill-ptr input
)) +end
+)
307 (read-rune input
))))))
311 (defun make-xstream (os-stream &key name
314 (initial-encoding :guess
))
315 ;; XXX if initial-speed isn't 1, encoding will me munged up
316 (assert (eql initial-speed
1))
317 (multiple-value-bind (encoding preread
)
318 (if (eq initial-encoding
:guess
)
319 (figure-encoding os-stream
)
320 (values initial-encoding nil
))
321 (let ((osbuf (make-array speed
:element-type
'(unsigned-byte 8))))
322 (replace osbuf preread
)
324 :buffer
(let ((r (make-array speed
:element-type
'buffer-byte
)))
325 (setf (elt r
0) #xFFFF
)
333 :os-left-end
(length preread
)
337 (defun make-rod-xstream (string &key name
)
338 ;; XXX encoding is mis-handled by this kind of stream
339 (let ((n (length string
)))
340 (let ((buffer (make-array (1+ n
) :element-type
'buffer-byte
)))
341 (declare (type (simple-array buffer-byte
(*)) buffer
))
343 (do ((i (1- n
) (- i
1)))
345 (declare (type fixnum i
))
346 (setf (aref buffer i
) (rune-code (%rune string i
))))
347 (setf (aref buffer n
) +end
+)
349 (make-xstream/low
:buffer buffer
357 (defmethod figure-encoding ((stream null
))
360 (defmethod figure-encoding ((stream stream
))
361 (let ((c0 (read-byte stream nil
:eof
)))
365 (let ((c1 (read-byte stream nil
:eof
)))
367 (values :utf-8
(list c0
)))
369 (cond ((and (= c0
#xFE
) (= c1
#xFF
)) (values :utf-16-big-endian nil
))
370 ((and (= c0
#xFF
) (= c1
#xFE
)) (values :utf-16-little-endian nil
))
372 (values :utf-8
(list c0 c1
)))))))))))
376 (defun close-xstream (input)
377 (xstream/close
(xstream-os-stream input
)))
379 (defun set-to-full-speed (xstream)
380 (setf (xstream-speed xstream
) (length (xstream-os-buffer xstream
))))
382 ;;; controller implementations
384 (defmethod read-octets (sequence (stream stream
) start end
)
385 (#+CLISP ext
:read-byte-sequence
386 #-CLISP read-sequence
387 sequence stream
:start start
:end end
))
390 (defmethod read-octets :around
(sequence (stream stream
) start end
)
391 ;; CMUCL <= 19a on non-SunOS accidentally triggers EFAULT in read(2)
392 ;; if SEQUENCE has been write protected by GC. Workaround: Touch all pages
393 ;; in SEQUENCE and make sure no GC happens between that and the read(2).
395 (loop for i from start below end
396 do
(setf (elt sequence i
) (elt sequence i
)))
399 (defmethod read-octets (sequence (stream null
) start end
)
400 (declare (ignore sequence start end
))
403 (defmethod xstream/close
((stream stream
))
406 (defmethod xstream/close
((stream null
))