Fix asd for cmucl with unicode
[closure-common.git] / xstream.lisp
blob61600421aec542567a7e6bbc82beec03e7120b26
1 ;;; -*- Mode: Lisp; Syntax: Common-Lisp; readtable: runes; Encoding: utf-8; -*-
2 ;;; ---------------------------------------------------------------------------
3 ;;; Title: Fast streams
4 ;;; Created: 1999-07-17
5 ;;; Author: Gilbert Baumann <unk6@rz.uni-karlsruhe.de>
6 ;;; License: Lisp-LGPL (See file COPYING for details).
7 ;;; ---------------------------------------------------------------------------
8 ;;; (c) copyright 1999 by Gilbert Baumann
10 ;;; This library is free software; you can redistribute it and/or
11 ;;; modify it under the terms of the GNU Library General Public
12 ;;; License as published by the Free Software Foundation; either
13 ;;; version 2 of the License, or (at your option) any later version.
14 ;;;
15 ;;; This library is distributed in the hope that it will be useful,
16 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;;; Library General Public License for more details.
19 ;;;
20 ;;; You should have received a copy of the GNU Library General Public
21 ;;; License along with this library; if not, write to the
22 ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;;; Boston, MA 02111-1307 USA.
25 (in-package :runes)
27 ;;; API
28 ;;
29 ;; MAKE-XSTREAM cl-stream &key name! speed initial-speed initial-encoding
30 ;; [function]
31 ;; MAKE-ROD-XSTREAM rod &key name [function]
32 ;; CLOSE-XSTREAM xstream [function]
33 ;; XSTREAM-P object [function]
35 ;; READ-RUNE xstream [macro]
36 ;; PEEK-RUNE xstream [macro]
37 ;; FREAD-RUNE xstream [function]
38 ;; FPEEK-RUNE xstream [function]
39 ;; CONSUME-RUNE xstream [macro]
40 ;; UNREAD-RUNE rune xstream [function]
42 ;; XSTREAM-NAME xstream [accessor]
43 ;; XSTREAM-POSITION xstream [function]
44 ;; XSTREAM-LINE-NUMBER xstream [function]
45 ;; XSTREAM-COLUMN-NUMBER xstream [function]
46 ;; XSTREAM-PLIST xstream [accessor]
47 ;; XSTREAM-ENCODING xstream [accessor] <-- be careful here. [*]
48 ;; SET-TO-FULL-SPEED xstream [function]
50 ;; [*] switching the encoding on the fly is only possible when the
51 ;; stream's buffer is empty; therefore to be able to switch the
52 ;; encoding, while some runes are already read, set the stream's speed
53 ;; to 1 initially (via the initial-speed argument for MAKE-XSTREAM)
54 ;; and later set it to full speed. (The encoding of the runes
55 ;; sequence, you fetch off with READ-RUNE is always UTF-16 though).
56 ;; After switching the encoding, SET-TO-FULL-SPEED can be used to bump the
57 ;; speed up to a full buffer length.
59 ;; An encoding is simply something, which provides the DECODE-SEQUENCE
60 ;; method.
62 ;;; Controller protocol
64 ;; READ-OCTECTS sequence os-stream start end -> first-non-written
65 ;; XSTREAM/CLOSE os-stream
68 (eval-when (:compile-toplevel :load-toplevel :execute)
69 (defparameter *fast* '(optimize (speed 3) (safety 0))))
71 ;; Let us first define fast fixnum arithmetric get rid of type
72 ;; checks. (After all we know what we do here).
74 (defmacro fx-op (op &rest xs)
75 `(the fixnum (,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs))))
76 (defmacro fx-pred (op &rest xs)
77 `(,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs)))
79 (defmacro %+ (&rest xs) `(fx-op + ,@xs))
80 (defmacro %= (&rest xs) `(fx-pred = ,@xs))
82 (deftype buffer-index ()
83 `(unsigned-byte ,(integer-length array-total-size-limit)))
85 (deftype buffer-byte ()
86 #+rune-is-utf-16
87 `(unsigned-byte 16)
88 #-rune-is-utf-16
89 `(unsigned-byte 32))
91 (deftype octet ()
92 `(unsigned-byte 8))
94 ;; The usage of a special marker for EOF is experimental and
95 ;; considered unhygenic.
97 (defconstant +end+ #xFFFF
98 "Special marker inserted into stream buffers to indicate end of buffered data.")
100 (defvar +null-buffer+ (make-array 0 :element-type 'buffer-byte))
101 (defvar +null-octet-buffer+ (make-array 0 :element-type 'octet))
103 (defstruct (xstream
104 (:constructor make-xstream/low)
105 (:copier nil)
106 (:print-function print-xstream))
108 ;;; Read buffer
110 ;; the buffer itself
111 (buffer +null-buffer+
112 :type (simple-array buffer-byte (*)))
113 ;; points to the next element of `buffer' containing the next rune
114 ;; about to be read.
115 (read-ptr 0 :type buffer-index)
116 ;; points to the first element of `buffer' not containing a rune to
117 ;; be read.
118 (fill-ptr 0 :type buffer-index)
120 ;;; OS buffer
122 ;; a scratch pad for READ-SEQUENCE
123 (os-buffer +null-octet-buffer+
124 :type (simple-array octet (*)))
126 ;; `os-left-start', `os-left-end' designate a region of os-buffer,
127 ;; which still contains some undecoded data. This is needed because
128 ;; of the DECODE-SEQUENCE protocol
129 (os-left-start 0 :type buffer-index)
130 (os-left-end 0 :type buffer-index)
132 ;; How much to read each time
133 (speed 0 :type buffer-index)
134 (full-speed 0 :type buffer-index)
136 ;; Some stream object obeying to a certain protcol
137 os-stream
139 ;; The external format
140 ;; (some object offering the ENCODING protocol)
141 (encoding :utf-8)
143 ;;A STREAM-NAME object
144 (name nil)
146 ;; a plist a struct keeps the hack away
147 (plist nil)
149 ;; Stream Position
150 (line-number 1 :type integer) ;current line number
151 (line-start 0 :type integer) ;stream position the current line starts at
152 (buffer-start 0 :type integer) ;stream position the current buffer starts at
154 ;; There is no need to maintain a column counter for each character
155 ;; read, since we can easily compute it from `line-start' and
156 ;; `buffer-start'.
159 (defun print-xstream (self sink depth)
160 (declare (ignore depth))
161 (format sink "#<~S ~S>" (type-of self) (xstream-name self)))
163 (defmacro read-rune (input)
164 "Read a single rune off the xstream `input'. In case of end of file :EOF
165 is returned."
166 `((lambda (input)
167 (declare (type xstream input)
168 #.*fast*)
169 (let ((rp (xstream-read-ptr input)))
170 (declare (type buffer-index rp))
171 (let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input))
172 rp)))
173 (declare (type buffer-byte ch))
174 (setf (xstream-read-ptr input) (%+ rp 1))
175 (cond ((%= ch +end+)
176 (the (or (member :eof) rune)
177 (xstream-underflow input)))
178 ((%= ch #x000A) ;line break
179 (account-for-line-break input)
180 (code-rune ch))
182 (code-rune ch))))))
183 ,input))
185 (defmacro peek-rune (input)
186 "Peek a single rune off the xstream `input'. In case of end of file :EOF
187 is returned."
188 `((lambda (input)
189 (declare (type xstream input)
190 #.*fast*)
191 (let ((rp (xstream-read-ptr input)))
192 (declare (type buffer-index rp))
193 (let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input))
194 rp)))
195 (declare (type buffer-byte ch))
196 (cond ((%= ch +end+)
197 (prog1
198 (the (or (member :eof) rune) (xstream-underflow input))
199 (setf (xstream-read-ptr input) 0)))
201 (code-rune ch))))))
202 ,input))
204 (defmacro consume-rune (input)
205 "Like READ-RUNE, but does not actually return the read rune."
206 `((lambda (input)
207 (declare (type xstream input)
208 #.*fast*)
209 (let ((rp (xstream-read-ptr input)))
210 (declare (type buffer-index rp))
211 (let ((ch (aref (the (simple-array buffer-byte (*)) (xstream-buffer input))
212 rp)))
213 (declare (type buffer-byte ch))
214 (setf (xstream-read-ptr input) (%+ rp 1))
215 (when (%= ch +end+)
216 (xstream-underflow input))
217 (when (%= ch #x000A) ;line break
218 (account-for-line-break input) )))
219 nil)
220 ,input))
222 (definline unread-rune (rune input)
223 "Unread the last recently read rune; if there wasn't such a rune, you
224 deserve to lose."
225 (declare (ignore rune))
226 (decf (xstream-read-ptr input))
227 (when (rune= (peek-rune input) #/u+000A) ;was it a line break?
228 (unaccount-for-line-break input)))
230 (defun fread-rune (input)
231 (read-rune input))
233 (defun fpeek-rune (input)
234 (peek-rune input))
236 ;;; Line counting
238 (defun account-for-line-break (input)
239 (declare (type xstream input))
240 (incf (xstream-line-number input))
241 (setf (xstream-line-start input)
242 (+ (xstream-buffer-start input) (xstream-read-ptr input))))
244 (defun unaccount-for-line-break (input)
245 ;; incomplete!
246 ;; We better use a traditional lookahead technique or forbid unread-rune.
247 (decf (xstream-line-number input)))
249 ;; User API:
251 (defun xstream-position (input)
252 (+ (xstream-buffer-start input) (xstream-read-ptr input)))
254 ;; xstream-line-number is structure accessor
256 (defun xstream-column-number (input)
257 (+ (- (xstream-position input)
258 (xstream-line-start input))
261 ;;; Underflow
263 (defconstant +default-buffer-size+ 100)
265 (defmethod xstream-underflow ((input xstream))
266 (declare (type xstream input))
267 ;; we are about to fill new data into the buffer, so we need to
268 ;; adjust buffer-start.
269 (incf (xstream-buffer-start input)
270 (- (xstream-fill-ptr input) 0))
271 (let (n m)
272 ;; when there is something left in the os-buffer, we move it to
273 ;; the start of the buffer.
274 (setf m (- (xstream-os-left-end input) (xstream-os-left-start input)))
275 (unless (zerop m)
276 (replace (xstream-os-buffer input) (xstream-os-buffer input)
277 :start1 0 :end1 m
278 :start2 (xstream-os-left-start input)
279 :end2 (xstream-os-left-end input))
280 ;; then we take care that the buffer is large enough to carry at
281 ;; least 100 bytes (a random number)
283 ;; David: My understanding is that any number of octets large enough
284 ;; to record the longest UTF-8 sequence or UTF-16 sequence is okay,
285 ;; so 100 is plenty for this purpose.
286 (unless (>= (length (xstream-os-buffer input))
287 +default-buffer-size+)
288 (error "You lost")))
289 (setf n
290 (read-octets (xstream-os-buffer input) (xstream-os-stream input)
291 m (min (1- (length (xstream-os-buffer input)))
292 (+ m (xstream-speed input)))))
293 (cond ((%= n 0)
294 (setf (xstream-read-ptr input) 0
295 (xstream-fill-ptr input) n)
296 (setf (aref (xstream-buffer input) (xstream-fill-ptr input)) +end+)
297 :eof)
299 (multiple-value-bind (fnw fnr)
300 (runes-encoding:decode-sequence
301 (xstream-encoding input)
302 (xstream-os-buffer input) 0 n
303 (xstream-buffer input) 0 (1- (length (xstream-buffer input)))
304 (= n m))
305 (setf (xstream-os-left-start input) fnr
306 (xstream-os-left-end input) n
307 (xstream-read-ptr input) 0
308 (xstream-fill-ptr input) fnw)
309 (setf (aref (xstream-buffer input) (xstream-fill-ptr input)) +end+)
310 (read-rune input))))))
312 ;;; constructor
314 (defun make-xstream (os-stream &key name
315 (speed 8192)
316 (initial-speed 1)
317 (initial-encoding :guess))
318 ;; XXX if initial-speed isn't 1, encoding will me munged up
319 (assert (eql initial-speed 1))
320 (multiple-value-bind (encoding preread)
321 (if (eq initial-encoding :guess)
322 (figure-encoding os-stream)
323 (values initial-encoding nil))
324 (let* ((bufsize (max speed +default-buffer-size+))
325 (osbuf (make-array bufsize :element-type '(unsigned-byte 8))))
326 (replace osbuf preread)
327 (make-xstream/low
328 :buffer (let ((r (make-array bufsize :element-type 'buffer-byte)))
329 (setf (elt r 0) #xFFFF)
331 :read-ptr 0
332 :fill-ptr 0
333 :os-buffer osbuf
334 :speed initial-speed
335 :full-speed speed
336 :os-stream os-stream
337 :os-left-start 0
338 :os-left-end (length preread)
339 :encoding encoding
340 :name name))))
342 (defun make-rod-xstream (string &key name)
343 (unless (typep string 'simple-array)
344 (setf string (coerce string 'simple-string)))
345 (let ((n (length string)))
346 (let ((buffer (make-array (1+ n) :element-type 'buffer-byte)))
347 (declare (type (simple-array buffer-byte (*)) buffer))
348 ;; copy the rod
349 (do ((i (1- n) (- i 1)))
350 ((< i 0))
351 (declare (type fixnum i))
352 (setf (aref buffer i) (rune-code (%rune string i))))
353 (setf (aref buffer n) +end+)
355 (make-xstream/low :buffer buffer
356 :read-ptr 0
357 :fill-ptr n
358 ;; :os-buffer nil
359 :speed 1
360 :os-stream nil
361 :name name))))
363 (defmethod figure-encoding ((stream null))
364 (values :utf-8 nil))
366 (defmethod figure-encoding ((stream stream))
367 (let ((c0 (read-byte stream nil :eof)))
368 (cond ((eq c0 :eof)
369 (values :utf-8 nil))
371 (let ((c1 (read-byte stream nil :eof)))
372 (cond ((eq c1 :eof)
373 (values :utf-8 (list c0)))
375 (cond ((and (= c0 #xFE) (= c1 #xFF)) (values :utf-16-big-endian nil))
376 ((and (= c0 #xFF) (= c1 #xFE)) (values :utf-16-little-endian nil))
377 ((and (= c0 #xEF) (= c1 #xBB))
378 (let ((c2 (read-byte stream nil :eof)))
379 (if (= c2 #xBF)
380 (values :utf-8 nil)
381 (values :utf-8 (list c0 c1 c2)))))
383 (values :utf-8 (list c0 c1)))))))))))
385 ;;; misc
387 (defun close-xstream (input)
388 (xstream/close (xstream-os-stream input)))
390 (defun set-to-full-speed (xstream)
391 (setf (xstream-speed xstream) (xstream-full-speed xstream)))
393 ;;; controller implementations
395 (defmethod read-octets (sequence (stream stream) start end)
396 (#+CLISP ext:read-byte-sequence
397 #-CLISP read-sequence
398 sequence stream :start start :end end))
400 #+cmu
401 (defmethod read-octets :around (sequence (stream stream) start end)
402 ;; CMUCL <= 19a on non-SunOS accidentally triggers EFAULT in read(2)
403 ;; if SEQUENCE has been write protected by GC. Workaround: Touch all pages
404 ;; in SEQUENCE and make sure no GC happens between that and the read(2).
405 (ext::without-gcing
406 (loop for i from start below end
407 do (setf (elt sequence i) (elt sequence i)))
408 (call-next-method)))
410 (defmethod read-octets (sequence (stream null) start end)
411 (declare (ignore sequence start end))
414 (defmethod xstream/close ((stream stream))
415 (close stream))
417 (defmethod xstream/close ((stream null))
418 nil)