need to :use trivial-gray-streams to fix the previous ci
[cxml/s11.git] / runes / xstream.lisp
blobae9bef73951880635cceb015fb5169f7fff5c14f
1 ;;; -*- Mode: Lisp; Syntax: Common-Lisp; readtable: runes; Encoding: utf-8; -*-
2 ;;; ---------------------------------------------------------------------------
3 ;;; Title: Fast streams
4 ;;; Created: 1999-07-17
5 ;;; Author: Gilbert Baumann <unk6@rz.uni-karlsruhe.de>
6 ;;; License: Lisp-LGPL (See file COPYING for details).
7 ;;; ---------------------------------------------------------------------------
8 ;;; (c) copyright 1999 by Gilbert Baumann
10 ;;; This library is free software; you can redistribute it and/or
11 ;;; modify it under the terms of the GNU Library General Public
12 ;;; License as published by the Free Software Foundation; either
13 ;;; version 2 of the License, or (at your option) any later version.
14 ;;;
15 ;;; This library is distributed in the hope that it will be useful,
16 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;;; Library General Public License for more details.
19 ;;;
20 ;;; You should have received a copy of the GNU Library General Public
21 ;;; License along with this library; if not, write to the
22 ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;;; Boston, MA 02111-1307 USA.
25 (in-package :runes)
27 ;;; API
28 ;;
29 ;; MAKE-XSTREAM cl-stream &key name! speed initial-speed initial-encoding
30 ;; [function]
31 ;; MAKE-ROD-XSTREAM rod &key name [function]
32 ;; CLOSE-XSTREAM xstream [function]
33 ;; XSTREAM-P object [function]
35 ;; READ-RUNE xstream [macro]
36 ;; PEEK-RUNE xstream [macro]
37 ;; FREAD-RUNE xstream [function]
38 ;; FPEEK-RUNE xstream [function]
39 ;; CONSUME-RUNE xstream [macro]
40 ;; UNREAD-RUNE rune xstream [function]
42 ;; XSTREAM-NAME xstream [accessor]
43 ;; XSTREAM-POSITION xstream [function]
44 ;; XSTREAM-LINE-NUMBER xstream [function]
45 ;; XSTREAM-COLUMN-NUMBER xstream [function]
46 ;; XSTREAM-PLIST xstream [accessor]
47 ;; XSTREAM-ENCODING xstream [accessor] <-- be careful here. [*]
48 ;; SET-TO-FULL-SPEED xstream [function]
50 ;; [*] switching the encoding on the fly is only possible when the
51 ;; stream's buffer is empty; therefore to be able to switch the
52 ;; encoding, while some runes are already read, set the stream's speed
53 ;; to 1 initially (via the initial-speed argument for MAKE-XSTREAM)
54 ;; and later set it to full speed. (The encoding of the runes
55 ;; sequence, you fetch off with READ-RUNE is always UTF-16 though).
56 ;; After switching the encoding, SET-TO-FULL-SPEED can be used to bump the
57 ;; speed up to a full buffer length.
59 ;; An encoding is simply something, which provides the DECODE-SEQUENCE
60 ;; method.
62 ;;; Controller protocol
64 ;; READ-OCTECTS sequence os-stream start end -> first-non-written
65 ;; XSTREAM/CLOSE os-stream
68 (eval-when (:compile-toplevel :load-toplevel :execute)
69 (defparameter *fast* '(optimize (speed 3) (safety 0))))
71 ;; Let us first define fast fixnum arithmetric get rid of type
72 ;; checks. (After all we know what we do here).
74 (defmacro fx-op (op &rest xs)
75 `(the fixnum (,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs))))
76 (defmacro fx-pred (op &rest xs)
77 `(,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs)))
79 (defmacro %+ (&rest xs) `(fx-op + ,@xs))
80 (defmacro %= (&rest xs) `(fx-pred = ,@xs))
82 (deftype buffer-index ()
83 `(unsigned-byte ,(integer-length array-total-size-limit)))
85 (deftype buffer-byte ()
86 `(unsigned-byte 16))
88 (deftype octet ()
89 `(unsigned-byte 8))
91 ;; The usage of a special marker for EOF is experimental and
92 ;; considered unhygenic.
94 (defconstant +end+ #xFFFF
95 "Special marker inserted into stream buffers to indicate end of buffered data.")
97 (defvar +null-buffer+ (make-array 0 :element-type 'buffer-byte))
98 (defvar +null-octet-buffer+ (make-array 0 :element-type 'octet))
100 (defstruct (xstream
101 (:constructor make-xstream/low)
102 (:copier nil)
103 (:print-function print-xstream))
105 ;;; Read buffer
107 ;; the buffer itself
108 (buffer +null-buffer+
109 :type (simple-array buffer-byte (*)))
110 ;; points to the next element of `buffer' containing the next rune
111 ;; about to be read.
112 (read-ptr 0 :type buffer-index)
113 ;; points to the first element of `buffer' not containing a rune to
114 ;; be read.
115 (fill-ptr 0 :type buffer-index)
117 ;;; OS buffer
119 ;; a scratch pad for READ-SEQUENCE
120 (os-buffer +null-octet-buffer+
121 :type (simple-array octet (*)))
123 ;; `os-left-start', `os-left-end' designate a region of os-buffer,
124 ;; which still contains some undecoded data. This is needed because
125 ;; of the DECODE-SEQUENCE protocol
126 (os-left-start 0 :type buffer-index)
127 (os-left-end 0 :type buffer-index)
129 ;; How much to read each time
130 (speed 0 :type buffer-index)
132 ;; Some stream object obeying to a certain protcol
133 os-stream
135 ;; The external format
136 ;; (some object offering the ENCODING protocol)
137 (encoding :utf-8)
139 ;;A STREAM-NAME object
140 (name nil)
142 ;; a plist a struct keeps the hack away
143 (plist nil)
145 ;; Stream Position
146 (line-number 1 :type integer) ;current line number
147 (line-start 0 :type integer) ;stream position the current line starts at
148 (buffer-start 0 :type integer) ;stream position the current buffer starts at
150 ;; There is no need to maintain a column counter for each character
151 ;; read, since we can easily compute it from `line-start' and
152 ;; `buffer-start'.
155 (defun print-xstream (self sink depth)
156 (declare (ignore depth))
157 (format sink "#<~S ~S>" (type-of self) (xstream-name self)))
159 (defmacro read-rune (input)
160 "Read a single rune off the xstream `input'. In case of end of file :EOF
161 is returned."
162 `((lambda (input)
163 (declare (type xstream input)
164 #.*fast*)
165 (let ((rp (xstream-read-ptr input)))
166 (declare (type buffer-index rp))
167 (let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input))
168 rp)))
169 (declare (type buffer-byte ch))
170 (setf (xstream-read-ptr input) (%+ rp 1))
171 (cond ((%= ch +end+)
172 (the (or (member :eof) rune)
173 (xstream-underflow input)))
174 ((%= ch #x000A) ;line break
175 (account-for-line-break input)
176 (code-rune ch))
178 (code-rune ch))))))
179 ,input))
181 (defmacro peek-rune (input)
182 "Peek a single rune off the xstream `input'. In case of end of file :EOF
183 is returned."
184 `((lambda (input)
185 (declare (type xstream input)
186 #.*fast*)
187 (let ((rp (xstream-read-ptr input)))
188 (declare (type buffer-index rp))
189 (let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input))
190 rp)))
191 (declare (type buffer-byte ch))
192 (cond ((%= ch +end+)
193 (prog1
194 (the (or (member :eof) rune) (xstream-underflow input))
195 (setf (xstream-read-ptr input) 0)))
197 (code-rune ch))))))
198 ,input))
200 (defmacro consume-rune (input)
201 "Like READ-RUNE, but does not actually return the read rune."
202 `((lambda (input)
203 (declare (type xstream input)
204 #.*fast*)
205 (let ((rp (xstream-read-ptr input)))
206 (declare (type buffer-index rp))
207 (let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input))
208 rp)))
209 (declare (type buffer-byte ch))
210 (setf (xstream-read-ptr input) (%+ rp 1))
211 (when (%= ch +end+)
212 (xstream-underflow input))
213 (when (%= ch #x000A) ;line break
214 (account-for-line-break input) )))
215 nil)
216 ,input))
218 (definline unread-rune (rune input)
219 "Unread the last recently read rune; if there wasn't such a rune, you
220 deserve to lose."
221 (declare (ignore rune))
222 (decf (xstream-read-ptr input))
223 (when (rune= (peek-rune input) #/u+000A) ;was it a line break?
224 (unaccount-for-line-break input)))
226 (defun fread-rune (input)
227 (read-rune input))
229 (defun fpeek-rune (input)
230 (peek-rune input))
232 ;;; Line counting
234 (defun account-for-line-break (input)
235 (declare (type xstream input))
236 (incf (xstream-line-number input))
237 (setf (xstream-line-start input)
238 (+ (xstream-buffer-start input) (xstream-read-ptr input))))
240 (defun unaccount-for-line-break (input)
241 ;; incomplete!
242 ;; We better use a traditional lookahead technique or forbid unread-rune.
243 (decf (xstream-line-number input)))
245 ;; User API:
247 (defun xstream-position (input)
248 (+ (xstream-buffer-start input) (xstream-read-ptr input)))
250 ;; xstream-line-number is structure accessor
252 (defun xstream-column-number (input)
253 (+ (- (xstream-position input)
254 (xstream-line-start input))
257 ;;; Underflow
259 (defmethod xstream-underflow ((input xstream))
260 (declare (type xstream input))
261 ;; we are about to fill new data into the buffer, so we need to
262 ;; adjust buffer-start.
263 (incf (xstream-buffer-start input)
264 (- (xstream-fill-ptr input) 0))
265 (let (n m)
266 ;; when there is something left in the os-buffer, we move it to
267 ;; the start of the buffer.
268 (setf m (- (xstream-os-left-end input) (xstream-os-left-start input)))
269 (unless (zerop m)
270 (replace (xstream-os-buffer input) (xstream-os-buffer input)
271 :start1 0 :end1 m
272 :start2 (xstream-os-left-start input)
273 :end2 (xstream-os-left-end input))
274 ;; then we take care that the buffer is large enough to carry at
275 ;; least 100 bytes (a random number)
277 ;; david: was heisst da random? ich nehme an, dass 100 einfach
278 ;; ausreichend sein soll, um die laengste utf-8 bytesequenz oder die
279 ;; beiden utf-16 surrogates zu halten? dann ist 100 ja wohl dicke
280 ;; ausreichend und koennte in make-xstream ordentlich geprueft werden.
281 ;; oder was geht hier vor?
282 (unless (>= (length (xstream-os-buffer input)) 100)
283 (error "You lost")))
284 (setf n
285 (read-octets (xstream-os-buffer input) (xstream-os-stream input)
286 m (min (1- (length (xstream-os-buffer input)))
287 (+ m (xstream-speed input)))))
288 (cond ((%= n 0)
289 (setf (xstream-read-ptr input) 0
290 (xstream-fill-ptr input) n)
291 (setf (aref (xstream-buffer input) (xstream-fill-ptr input)) +end+)
292 :eof)
294 (multiple-value-bind (fnw fnr)
295 (runes-encoding:decode-sequence
296 (xstream-encoding input)
297 (xstream-os-buffer input) 0 n
298 (xstream-buffer input) 0 (1- (length (xstream-buffer input)))
299 (= n m))
300 (setf (xstream-os-left-start input) fnr
301 (xstream-os-left-end input) n
302 (xstream-read-ptr input) 0
303 (xstream-fill-ptr input) fnw)
304 (setf (aref (xstream-buffer input) (xstream-fill-ptr input)) +end+)
305 (read-rune input))))))
307 ;;; constructor
309 (defun make-xstream (os-stream &key name
310 (speed 8192)
311 (initial-speed 1)
312 (initial-encoding :guess))
313 ;; XXX if initial-speed isn't 1, encoding will me munged up
314 (assert (eql initial-speed 1))
315 (multiple-value-bind (encoding preread)
316 (if (eq initial-encoding :guess)
317 (figure-encoding os-stream)
318 (values initial-encoding nil))
319 (let ((osbuf (make-array speed :element-type '(unsigned-byte 8))))
320 (replace osbuf preread)
321 (make-xstream/low
322 :buffer (let ((r (make-array speed :element-type 'buffer-byte)))
323 (setf (elt r 0) #xFFFF)
325 :read-ptr 0
326 :fill-ptr 0
327 :os-buffer osbuf
328 :speed initial-speed
329 :os-stream os-stream
330 :os-left-start 0
331 :os-left-end (length preread)
332 :encoding encoding
333 :name name))))
335 (defun make-rod-xstream (string &key name)
336 ;; XXX encoding is mis-handled by this kind of stream
337 (let ((n (length string)))
338 (let ((buffer (make-array (1+ n) :element-type 'buffer-byte)))
339 (declare (type (simple-array buffer-byte (*)) buffer))
340 ;; copy the rod
341 (do ((i (1- n) (- i 1)))
342 ((< i 0))
343 (declare (type fixnum i))
344 (setf (aref buffer i) (rune-code (%rune string i))))
345 (setf (aref buffer n) +end+)
347 (make-xstream/low :buffer buffer
348 :read-ptr 0
349 :fill-ptr n
350 ;; :os-buffer nil
351 :speed 1
352 :os-stream nil
353 :name name))))
355 (defmethod figure-encoding ((stream null))
356 (values :utf-8 nil))
358 (defmethod figure-encoding ((stream stream))
359 (let ((c0 (read-byte stream nil :eof)))
360 (cond ((eq c0 :eof)
361 (values :utf-8 nil))
363 (let ((c1 (read-byte stream nil :eof)))
364 (cond ((eq c1 :eof)
365 (values :utf-8 (list c0)))
367 (cond ((and (= c0 #xFE) (= c1 #xFF)) (values :utf-16-big-endian nil))
368 ((and (= c0 #xFF) (= c1 #xFE)) (values :utf-16-little-endian nil))
370 (values :utf-8 (list c0 c1)))))))))))
372 ;;; misc
374 (defun close-xstream (input)
375 (xstream/close (xstream-os-stream input)))
377 (defun set-to-full-speed (xstream)
378 (setf (xstream-speed xstream) (length (xstream-os-buffer xstream))))
380 ;;; controller implementations
382 (defmethod read-octets (sequence (stream stream) start end)
383 (#+CLISP ext:read-byte-sequence
384 #-CLISP read-sequence
385 sequence stream :start start :end end))
387 #+cmu
388 (defmethod read-octets :around (sequence (stream stream) start end)
389 ;; CMUCL <= 19a on non-SunOS accidentally triggers EFAULT in read(2)
390 ;; if SEQUENCE has been write protected by GC. Workaround: Touch all pages
391 ;; in SEQUENCE and make sure no GC happens between that and the read(2).
392 (ext::without-gcing
393 (loop for i from start below end
394 do (setf (elt sequence i) (elt sequence i)))
395 (call-next-method)))
397 (defmethod read-octets (sequence (stream null) start end)
398 (declare (ignore sequence start end))
401 (defmethod xstream/close ((stream stream))
402 (close stream))
404 (defmethod xstream/close ((stream null))
405 nil)