1 ;;; (c) 2005 David Lichteblau <david@lichteblau.com>
2 ;;; License: Lisp-LGPL (See file COPYING for details).
4 ;;; ystream (for lack of a better name): a rune output "stream"
8 (defconstant +ystream-bufsize
+ 1024)
10 (defun make-ub8-array (n)
11 (make-array n
:element-type
'(unsigned-byte 8)))
13 (defun make-ub16-array (n)
14 (make-array n
:element-type
'(unsigned-byte 16)))
16 (defun make-buffer (&key
(element-type '(unsigned-byte 8)))
18 :element-type element-type
22 (defmacro while
(test &body body
)
23 `(until (not ,test
) ,@body
))
25 (defmacro until
(test &body body
)
26 `(do () (,test
) ,@body
))
30 ;;; | +- octet-vector-ystream
31 ;;; | \- %stream-ystream
32 ;;; | +- octet-stream-ystream
33 ;;; | \- character-stream-ystream/utf8
34 ;;; | \- string-ystream/utf8
36 ;;; \-- character-stream-ystream
39 (column 0 :type integer
)
40 (in-ptr 0 :type fixnum
)
41 (in-buffer (make-rod +ystream-bufsize
+) :type simple-rod
))
43 (defstruct (utf8-ystream
45 (:conc-name
"YSTREAM-"))
46 (out-buffer (make-ub8-array (* 6 +ystream-bufsize
+))
47 :type
(simple-array (unsigned-byte 8) (*))))
49 (defstruct (%stream-ystream
(:include utf8-ystream
) (:conc-name
"YSTREAM-"))
52 (definline write-rune
(rune ystream
)
53 (let ((in (ystream-in-buffer ystream
)))
54 (when (eql (ystream-in-ptr ystream
) (length in
))
55 (flush-ystream ystream
)
56 (setf in
(ystream-in-buffer ystream
)))
57 (setf (elt in
(ystream-in-ptr ystream
)) rune
)
58 (incf (ystream-in-ptr ystream
))
59 (setf (ystream-column ystream
)
60 (if (eql rune
#/U
+0010) 0 (1+ (ystream-column ystream
))))
63 (defmethod close-ystream :before
((ystream ystream
))
64 (flush-ystream ystream
))
67 ;;;; UTF8-YSTREAM (abstract)
69 (defmethod close-ystream ((ystream %stream-ystream
))
70 (ystream-os-stream ystream
))
72 (defgeneric ystream-device-write
(ystream buf nbytes
))
74 (defmethod flush-ystream ((ystream utf8-ystream
))
75 (let ((ptr (ystream-in-ptr ystream
)))
77 (let* ((in (ystream-in-buffer ystream
))
78 (out (ystream-out-buffer ystream
))
79 (surrogatep (<= #xD800
(rune-code (elt in
(1- ptr
))) #xDBFF
))
84 (setf n
(runes-to-utf8 out in ptr
))
85 (ystream-device-write ystream out n
)
88 (setf (elt in
0) (elt in
(1- ptr
)))
89 (setf (ystream-in-ptr ystream
) 1))
91 (setf (ystream-in-ptr ystream
) 0))))))))
93 (defun write-rod (rod sink
)
94 (loop for rune across rod do
(write-rune rune sink
)))
96 (defun fast-push (new-element vector
)
97 (vector-push-extend new-element vector
(max 1 (array-dimension vector
0))))
99 (macrolet ((define-utf8-writer (name (byte &rest aux
) result
&body body
)
100 `(defun ,name
(out in n
)
101 (let ((high-surrogate nil
)
108 ((<= #x00000000 r
#x0000007F
)
110 ((<= #x00000080 r
#x000007FF
)
111 (write0 (logior #b11000000
(ldb (byte 5 6) r
)))
112 (write0 (logior #b10000000
(ldb (byte 6 0) r
))))
113 ((<= #x00000800 r
#x0000FFFF
)
114 (write0 (logior #b11100000
(ldb (byte 4 12) r
)))
115 (write0 (logior #b10000000
(ldb (byte 6 6) r
)))
116 (write0 (logior #b10000000
(ldb (byte 6 0) r
))))
117 ((<= #x00010000 r
#x001FFFFF
)
118 (write0 (logior #b11110000
(ldb (byte 3 18) r
)))
119 (write0 (logior #b10000000
(ldb (byte 6 12) r
)))
120 (write0 (logior #b10000000
(ldb (byte 6 6) r
)))
121 (write0 (logior #b10000000
(ldb (byte 6 0) r
))))
122 ((<= #x00200000 r
#x03FFFFFF
)
123 (write0 (logior #b11111000
(ldb (byte 2 24) r
)))
124 (write0 (logior #b10000000
(ldb (byte 6 18) r
)))
125 (write0 (logior #b10000000
(ldb (byte 6 12) r
)))
126 (write0 (logior #b10000000
(ldb (byte 6 6) r
)))
127 (write0 (logior #b10000000
(ldb (byte 6 0) r
))))
128 ((<= #x04000000 r
#x7FFFFFFF
)
129 (write0 (logior #b11111100
(ldb (byte 1 30) r
)))
130 (write0 (logior #b10000000
(ldb (byte 6 24) r
)))
131 (write0 (logior #b10000000
(ldb (byte 6 18) r
)))
132 (write0 (logior #b10000000
(ldb (byte 6 12) r
)))
133 (write0 (logior #b10000000
(ldb (byte 6 6) r
)))
134 (write0 (logior #b10000000
(ldb (byte 6 0) r
))))))
137 ((<= #xD800 r
#xDBFF
)
138 (setf high-surrogate r
))
139 ((<= #xDC00 r
#xDFFF
)
140 (let ((q (logior (ash (- high-surrogate
#xD7C0
) 10)
143 (setf high-surrogate nil
))
147 (write2 (rune-code (elt in j
)))))
149 (define-utf8-writer runes-to-utf8
(x (i 0))
153 (define-utf8-writer runes-to-utf8
/adjustable-string
(x)
155 (fast-push (code-char x
) out
)))
160 (defstruct (rod-ystream (:include ystream
)))
162 (defmethod flush-ystream ((ystream rod-ystream
))
163 (let* ((old (ystream-in-buffer ystream
))
164 (new (make-rod (* 2 (length old
)))))
166 (setf (ystream-in-buffer ystream
) new
)))
168 (defmethod close-ystream ((ystream rod-ystream
))
169 (subseq (ystream-in-buffer ystream
) 0 (ystream-in-ptr ystream
)))
172 ;;;; CHARACTER-STREAM-YSTREAM
176 (defstruct (character-stream-ystream
177 (:constructor make-character-stream-ystream
(target-stream))
179 (:conc-name
"YSTREAM-"))
182 (defmethod flush-ystream ((ystream character-stream-ystream
))
183 (write-string (ystream-in-buffer ystream
)
184 (ystream-target-stream ystream
)
185 :end
(ystream-in-ptr ystream
))
186 (setf (ystream-in-ptr ystream
) 0))
188 (defmethod close-ystream ((ystream character-stream-ystream
))
189 (ystream-target-stream ystream
)))
192 ;;;; OCTET-VECTOR-YSTREAM
194 (defstruct (octet-vector-ystream
195 (:include utf8-ystream
)
196 (:conc-name
"YSTREAM-"))
197 (result (make-buffer)))
199 (defmethod ystream-device-write ((ystream octet-vector-ystream
) buf nbytes
)
200 (let* ((result (ystream-result ystream
))
201 (start (length result
))
202 (size (array-dimension result
0)))
203 (while (> (+ start nbytes
) size
)
204 (setf size
(* 2 size
)))
205 (adjust-array result size
:fill-pointer
(+ start nbytes
))
206 (replace result buf
:start1 start
:end2 nbytes
)))
208 (defmethod close-ystream ((ystream octet-vector-ystream
))
209 (ystream-result ystream
))
212 ;;;; OCTET-STREAM-YSTREAM
214 (defstruct (octet-stream-ystream
215 (:include %stream-ystream
)
216 (:constructor make-octet-stream-ystream
(os-stream))
217 (:conc-name
"YSTREAM-")))
219 (defmethod ystream-device-write ((ystream octet-stream-ystream
) buf nbytes
)
220 (write-sequence buf
(ystream-os-stream ystream
) :end nbytes
))
223 ;;;; CHARACTER-STREAM-YSTREAM/UTF8
227 (defstruct (character-stream-ystream/utf8
228 (:constructor make-character-stream-ystream
/utf8
(os-stream))
229 (:include %stream-ystream
)
230 (:conc-name
"YSTREAM-")))
232 (defmethod ystream-device-write
233 ((ystream character-stream-ystream
/utf8
) buf nbytes
)
234 (declare (type (simple-array (unsigned-byte 8) (*)) buf
))
235 (let ((out (ystream-os-stream ystream
)))
237 (write-char (code-char (elt buf x
)) out
)))))
240 ;;;; STRING-YSTREAM/UTF8
244 (defstruct (string-ystream/utf8
245 (:include character-stream-ystream
/utf8
246 (os-stream (make-string-output-stream)))
247 (:conc-name
"YSTREAM-")))
249 (defmethod close-ystream ((ystream string-ystream
/utf8
))
250 (get-output-stream-string (ystream-os-stream ystream
))))
253 ;;;; helper functions
255 (defun rod-to-utf8-string (rod)
256 (let ((out (make-buffer :element-type
'character
)))
257 (runes-to-utf8/adjustable-string out rod
(length rod
))
260 (defun utf8-string-to-rod (str)
261 (let* ((bytes (map '(vector (unsigned-byte 8)) #'char-code str
))
262 (buffer (make-array (length bytes
) :element-type
'(unsigned-byte 16)))
263 (n (runes-encoding:decode-sequence
264 :utf-8 bytes
0 (length bytes
) buffer
0 0 nil
))
265 (result (make-array n
:element-type
'rune
)))
266 (map-into result
#'code-rune buffer
)
269 (defclass octet-input-stream
270 (trivial-gray-stream-mixin fundamental-binary-input-stream
)
271 ((octets :initarg
:octets
)
274 (defmethod close ((stream octet-input-stream
) &key abort
)
275 (declare (ignore abort
))
276 (open-stream-p stream
))
278 (defmethod stream-read-byte ((stream octet-input-stream
))
279 (with-slots (octets pos
) stream
280 (if (>= pos
(length octets
))
286 (defmethod stream-read-sequence
287 ((stream octet-input-stream
) sequence start end
&key
&allow-other-keys
)
288 (with-slots (octets pos
) stream
289 (let* ((length (min (- end start
) (- (length octets
) pos
)))
290 (end1 (+ start length
))
291 (end2 (+ pos length
)))
292 (replace sequence octets
:start1 start
:end1 end1
:start2 pos
:end2 end2
)
296 (defun make-octet-input-stream (octets)
297 (make-instance 'octet-input-stream
:octets octets
))