Fix asd for cmucl with unicode
[closure-common.git] / ystream.lisp
blob6305055d6c0382151b0d051967fc06f3401127c1
1 ;;; (c) 2005 David Lichteblau <david@lichteblau.com>
2 ;;; License: Lisp-LGPL (See file COPYING for details).
3 ;;;
4 ;;; ystream (for lack of a better name): a rune output "stream"
6 (in-package :runes)
8 (defconstant +ystream-bufsize+ 1024)
10 (defun make-ub8-array (n)
11 (make-array n :element-type '(unsigned-byte 8)))
13 (defun make-ub16-array (n)
14 (make-array n :element-type '(unsigned-byte 16)))
16 (defun make-buffer (&key (element-type '(unsigned-byte 8)))
17 (make-array 1
18 :element-type element-type
19 :adjustable t
20 :fill-pointer 0))
22 (defmacro while (test &body body)
23 `(until (not ,test) ,@body))
25 (defmacro until (test &body body)
26 `(do () (,test) ,@body))
28 (defun find-output-encoding (name)
29 (when (stringp name)
30 (setf name (find-symbol (string-upcase name) :keyword)))
31 (cond
32 ((null name)
33 (warn "Unknown encoding ~A, falling back to UTF-8" name)
34 :utf-8)
35 ((find name '(:utf-8 :utf_8 :utf8))
36 :utf-8)
37 #-rune-is-character
39 (warn "Unknown encoding ~A, falling back to UTF-8" name)
40 :utf-8)
41 #+rune-is-character
43 (handler-case
44 (babel-encodings:get-character-encoding name)
45 (error ()
46 (warn "Unknown encoding ~A, falling back to UTF-8" name)
47 :utf-8)))))
49 ;;; ystream
50 ;;; +- encoding-ystream
51 ;;; | +- octet-vector-ystream
52 ;;; | \- %stream-ystream
53 ;;; | +- octet-stream-ystream
54 ;;; | \- character-stream-ystream/utf8
55 ;;; | \- string-ystream/utf8
56 ;;; +- rod-ystream
57 ;;; \-- character-stream-ystream
59 (defstruct ystream
60 #+rune-is-character (encoding)
61 (column 0 :type integer)
62 (in-ptr 0 :type fixnum)
63 (in-buffer (make-rod +ystream-bufsize+) :type simple-rod))
65 (defun ystream-unicode-p (ystream)
66 (let ((enc (ystream-encoding ystream)))
67 (or (eq enc :utf-8)
68 (eq (babel-encodings:enc-name enc) :utf-16))))
70 (defstruct (encoding-ystream
71 (:include ystream)
72 (:conc-name "YSTREAM-"))
73 (out-buffer (make-ub8-array (* 6 +ystream-bufsize+))
74 :type (simple-array (unsigned-byte 8) (*))))
76 (defstruct (%stream-ystream
77 (:include encoding-ystream)
78 (:conc-name "YSTREAM-"))
79 (os-stream nil))
81 ;; writes a rune to the buffer. If the rune is not encodable, an error
82 ;; might be signalled later during flush-ystream.
83 (definline ystream-write-rune (rune ystream)
84 (let ((in (ystream-in-buffer ystream)))
85 (when (eql (ystream-in-ptr ystream) (length in))
86 (flush-ystream ystream)
87 (setf in (ystream-in-buffer ystream)))
88 (setf (elt in (ystream-in-ptr ystream)) rune)
89 (incf (ystream-in-ptr ystream))
90 (setf (ystream-column ystream)
91 (if (eql rune #/U+000A) 0 (1+ (ystream-column ystream))))
92 rune))
94 ;; Writes a rod to the buffer. If a rune in the rod not encodable, an error
95 ;; might be signalled later during flush-ystream.
96 (defun ystream-write-rod (rod ystream)
98 ;; OPTIMIZE ME
100 (loop for rune across rod do (ystream-write-rune rune ystream)))
102 (defun ystream-write-escapable-rune (rune ystream)
104 ;; OPTIMIZE ME
106 (let ((tmp (make-rod 1)))
107 (setf (elt tmp 0) rune)
108 (ystream-write-escapable-rod tmp ystream)))
110 #-rune-is-character
112 ;; on non-unicode lisps, we only support UTF-8 anyway, so this is like
113 ;; ystream-write-rod, which will never signal an error in this configuration.
114 (defun ystream-write-escapable-rod (rod ystream)
115 (ystream-write-rod rod ystream))
117 ;; Writes a rod to the buffer. If a rune in the rod not encodable, it is
118 ;; replaced by a character reference.
120 #+rune-is-character
121 (defun ystream-write-escapable-rod (rod ystream)
123 ;; OPTIMIZE ME
125 (if (ystream-unicode-p ystream)
126 (ystream-write-rod rod ystream)
127 (loop
128 with encoding = (ystream-encoding ystream)
129 for rune across rod
131 (if (encodablep rune encoding)
132 (ystream-write-rune rune ystream)
133 (let ((cr (string-rod (format nil "&#~D;" (rune-code rune)))))
134 (ystream-write-rod cr ystream))))))
136 #+rune-is-character
137 (defun encodablep (character encoding)
138 (handler-case
139 (babel:string-to-octets (string character) :encoding encoding)
140 (babel-encodings:character-encoding-error ()
141 nil)))
143 (defmethod close-ystream :before ((ystream ystream))
144 (flush-ystream ystream))
147 ;;;; ENCODING-YSTREAM (abstract)
149 (defmethod close-ystream ((ystream %stream-ystream))
150 (ystream-os-stream ystream))
152 (defgeneric ystream-device-write (ystream buf nbytes))
154 #-rune-is-character
155 (defun encode-runes (out in ptr encoding)
156 (runes-to-utf8 out in ptr))
158 #+rune-is-character
159 (defun encode-runes (out in ptr encoding)
160 (case encoding
161 (:utf-8
162 (runes-to-utf8 out in ptr))
164 ;; by lucky coincidence, babel::unicode-string is the same as simple-rod
165 #+nil (coerce string 'babel::unicode-string)
166 (let* ((babel::*suppress-character-coding-errors* nil)
167 (mapping (babel::lookup-mapping babel::*string-vector-mappings*
168 encoding)))
169 (funcall (babel::encoder mapping) in 0 ptr out 0)
170 (funcall (babel::octet-counter mapping) in 0 ptr -1)))))
172 (defmethod flush-ystream ((ystream encoding-ystream))
173 (let ((ptr (ystream-in-ptr ystream)))
174 (when (plusp ptr)
175 (let* ((in (ystream-in-buffer ystream))
176 (out (ystream-out-buffer ystream))
177 #+rune-is-utf-16
178 (surrogatep (<= #xD800 (rune-code (elt in (1- ptr))) #xDBFF))
180 #+rune-is-utf-16
181 (when surrogatep
182 (decf ptr))
183 (when (plusp ptr)
184 (setf n (encode-runes out in ptr (ystream-encoding ystream)))
185 (ystream-device-write ystream out n)
186 (cond
187 #+rune-is-utf-16
188 (surrogatep
189 (setf (elt in 0) (elt in (1- ptr)))
190 (setf (ystream-in-ptr ystream) 1))
192 (setf (ystream-in-ptr ystream) 0))))))))
194 (defun fast-push (new-element vector)
195 (vector-push-extend new-element vector (max 1 (array-dimension vector 0))))
197 (macrolet ((define-utf8-writer (name (byte &rest aux) result &body body)
198 `(defun ,name (out in n)
199 (let (#+rune-is-utf-16 (high-surrogate nil)
200 ,@aux)
201 (labels
202 ((write0 (,byte)
203 ,@body)
204 (write1 (r)
205 (cond
206 ((<= #x00000000 r #x0000007F)
207 (write0 r))
208 ((<= #x00000080 r #x000007FF)
209 (write0 (logior #b11000000 (ldb (byte 5 6) r)))
210 (write0 (logior #b10000000 (ldb (byte 6 0) r))))
211 ((<= #x00000800 r #x0000FFFF)
212 (write0 (logior #b11100000 (ldb (byte 4 12) r)))
213 (write0 (logior #b10000000 (ldb (byte 6 6) r)))
214 (write0 (logior #b10000000 (ldb (byte 6 0) r))))
215 ((<= #x00010000 r #x001FFFFF)
216 (write0 (logior #b11110000 (ldb (byte 3 18) r)))
217 (write0 (logior #b10000000 (ldb (byte 6 12) r)))
218 (write0 (logior #b10000000 (ldb (byte 6 6) r)))
219 (write0 (logior #b10000000 (ldb (byte 6 0) r))))
220 ((<= #x00200000 r #x03FFFFFF)
221 (write0 (logior #b11111000 (ldb (byte 2 24) r)))
222 (write0 (logior #b10000000 (ldb (byte 6 18) r)))
223 (write0 (logior #b10000000 (ldb (byte 6 12) r)))
224 (write0 (logior #b10000000 (ldb (byte 6 6) r)))
225 (write0 (logior #b10000000 (ldb (byte 6 0) r))))
226 ((<= #x04000000 r #x7FFFFFFF)
227 (write0 (logior #b11111100 (ldb (byte 1 30) r)))
228 (write0 (logior #b10000000 (ldb (byte 6 24) r)))
229 (write0 (logior #b10000000 (ldb (byte 6 18) r)))
230 (write0 (logior #b10000000 (ldb (byte 6 12) r)))
231 (write0 (logior #b10000000 (ldb (byte 6 6) r)))
232 (write0 (logior #b10000000 (ldb (byte 6 0) r))))))
233 (write2 (r)
234 (cond
235 #+rune-is-utf-16
236 ((<= #xD800 r #xDBFF)
237 (setf high-surrogate r))
238 #+rune-is-utf-16
239 ((<= #xDC00 r #xDFFF)
240 (let ((q (logior (ash (- high-surrogate #xD7C0) 10)
241 (- r #xDC00))))
242 (write1 q))
243 (setf high-surrogate nil))
244 #-rune-is-utf-16
245 ((<= #xD800 r #xDFFF)
246 (error
247 "surrogates not allowed in this configuration"))
249 (write1 r)))))
250 (dotimes (j n)
251 (write2 (rune-code (elt in j)))))
252 ,result))))
253 (define-utf8-writer runes-to-utf8 (x (i 0))
255 (setf (elt out i) x)
256 (incf i))
257 (define-utf8-writer runes-to-utf8/adjustable-string (x)
259 (fast-push (code-char x) out)))
262 ;;;; ROD-YSTREAM
264 (defstruct (rod-ystream (:include ystream)))
266 (defmethod flush-ystream ((ystream rod-ystream))
267 (let* ((old (ystream-in-buffer ystream))
268 (new (make-rod (* 2 (length old)))))
269 (replace new old)
270 (setf (ystream-in-buffer ystream) new)))
272 (defmethod close-ystream ((ystream rod-ystream))
273 (subseq (ystream-in-buffer ystream) 0 (ystream-in-ptr ystream)))
276 ;;;; CHARACTER-STREAM-YSTREAM
278 #+rune-is-character
279 (progn
280 (defstruct (character-stream-ystream
281 (:constructor make-character-stream-ystream (target-stream))
282 (:include ystream)
283 (:conc-name "YSTREAM-"))
284 (target-stream nil))
286 (defmethod flush-ystream ((ystream character-stream-ystream))
287 (write-string (ystream-in-buffer ystream)
288 (ystream-target-stream ystream)
289 :end (ystream-in-ptr ystream))
290 (setf (ystream-in-ptr ystream) 0))
292 (defmethod close-ystream ((ystream character-stream-ystream))
293 (ystream-target-stream ystream)))
296 ;;;; OCTET-VECTOR-YSTREAM
298 (defstruct (octet-vector-ystream
299 (:include encoding-ystream)
300 (:conc-name "YSTREAM-"))
301 (result (make-buffer)))
303 (defmethod ystream-device-write ((ystream octet-vector-ystream) buf nbytes)
304 (let* ((result (ystream-result ystream))
305 (start (length result))
306 (size (array-dimension result 0)))
307 (while (> (+ start nbytes) size)
308 (setf size (* 2 size)))
309 (adjust-array result size :fill-pointer (+ start nbytes))
310 (replace result buf :start1 start :end2 nbytes)))
312 (defmethod close-ystream ((ystream octet-vector-ystream))
313 (ystream-result ystream))
316 ;;;; OCTET-STREAM-YSTREAM
318 (defstruct (octet-stream-ystream
319 (:include %stream-ystream)
320 (:constructor make-octet-stream-ystream (os-stream))
321 (:conc-name "YSTREAM-")))
323 (defmethod ystream-device-write ((ystream octet-stream-ystream) buf nbytes)
324 (write-sequence buf (ystream-os-stream ystream) :end nbytes))
327 ;;;; CHARACTER-STREAM-YSTREAM/UTF8
329 ;; #+rune-is-integer
330 (progn
331 (defstruct (character-stream-ystream/utf8
332 (:constructor make-character-stream-ystream/utf8 (os-stream))
333 (:include %stream-ystream)
334 (:conc-name "YSTREAM-")))
336 (defmethod ystream-device-write
337 ((ystream character-stream-ystream/utf8) buf nbytes)
338 (declare (type (simple-array (unsigned-byte 8) (*)) buf))
339 (let ((out (ystream-os-stream ystream)))
340 (dotimes (x nbytes)
341 (write-char (code-char (elt buf x)) out)))))
344 ;;;; STRING-YSTREAM/UTF8
346 ;; #+rune-is-integer
347 (progn
348 (defstruct (string-ystream/utf8
349 (:include character-stream-ystream/utf8
350 (os-stream (make-string-output-stream)))
351 (:conc-name "YSTREAM-")))
353 (defmethod close-ystream ((ystream string-ystream/utf8))
354 (get-output-stream-string (ystream-os-stream ystream))))
357 ;;;; helper functions
359 (defun rod-to-utf8-string (rod)
360 (let ((out (make-buffer :element-type 'character)))
361 (runes-to-utf8/adjustable-string out rod (length rod))
362 out))
364 (defun utf8-string-to-rod (str)
365 (let* ((bytes (map '(vector (unsigned-byte 8)) #'char-code str))
366 (buffer (make-array (length bytes) :element-type 'buffer-byte))
367 (n (runes-encoding:decode-sequence
368 :utf-8 bytes 0 (length bytes) buffer 0 0 nil))
369 (result (make-array n :element-type 'rune)))
370 (map-into result #'code-rune buffer)
371 result))
373 (defclass octet-input-stream
374 (trivial-gray-stream-mixin fundamental-binary-input-stream)
375 ((octets :initarg :octets)
376 (pos :initform 0)))
378 (defmethod close ((stream octet-input-stream) &key abort)
379 (declare (ignore abort))
380 (open-stream-p stream))
382 (defmethod stream-read-byte ((stream octet-input-stream))
383 (with-slots (octets pos) stream
384 (if (>= pos (length octets))
385 :eof
386 (prog1
387 (elt octets pos)
388 (incf pos)))))
390 (defmethod stream-read-sequence
391 ((stream octet-input-stream) sequence start end &key &allow-other-keys)
392 (with-slots (octets pos) stream
393 (let* ((length (min (- end start) (- (length octets) pos)))
394 (end1 (+ start length))
395 (end2 (+ pos length)))
396 (replace sequence octets :start1 start :end1 end1 :start2 pos :end2 end2)
397 (setf pos end2)
398 end1)))
400 (defun make-octet-input-stream (octets)
401 (make-instance 'octet-input-stream :octets octets))