1 ;;; (c) 2005 David Lichteblau <david@lichteblau.com>
2 ;;; License: Lisp-LGPL (See file COPYING for details).
4 ;;; ystream (for lack of a better name): a rune output "stream"
8 (defconstant +ystream-bufsize
+ 1024)
10 (defun make-ub8-array (n)
11 (make-array n
:element-type
'(unsigned-byte 8)))
13 (defun make-ub16-array (n)
14 (make-array n
:element-type
'(unsigned-byte 16)))
16 (defun make-buffer (&key
(element-type '(unsigned-byte 8)))
18 :element-type element-type
22 (defmacro while
(test &body body
)
23 `(until (not ,test
) ,@body
))
25 (defmacro until
(test &body body
)
26 `(do () (,test
) ,@body
))
28 (defun find-output-encoding (name)
30 (setf name
(find-symbol (string-upcase name
) :keyword
)))
33 (warn "Unknown encoding ~A, falling back to UTF-8" name
)
35 ((find name
'(:utf-8
:utf_8
:utf8
))
39 (warn "Unknown encoding ~A, falling back to UTF-8" name
)
44 (babel-encodings:get-character-encoding name
)
46 (warn "Unknown encoding ~A, falling back to UTF-8" name
)
50 ;;; +- encoding-ystream
51 ;;; | +- octet-vector-ystream
52 ;;; | \- %stream-ystream
53 ;;; | +- octet-stream-ystream
54 ;;; | \- character-stream-ystream/utf8
55 ;;; | \- string-ystream/utf8
57 ;;; \-- character-stream-ystream
60 #+rune-is-character
(encoding)
61 (column 0 :type integer
)
62 (in-ptr 0 :type fixnum
)
63 (in-buffer (make-rod +ystream-bufsize
+) :type simple-rod
))
65 (defun ystream-unicode-p (ystream)
66 (let ((enc (ystream-encoding ystream
)))
68 (eq (babel-encodings:enc-name enc
) :utf-16
))))
70 (defstruct (encoding-ystream
72 (:conc-name
"YSTREAM-"))
73 (out-buffer (make-ub8-array (* 6 +ystream-bufsize
+))
74 :type
(simple-array (unsigned-byte 8) (*))))
76 (defstruct (%stream-ystream
77 (:include encoding-ystream
)
78 (:conc-name
"YSTREAM-"))
81 ;; writes a rune to the buffer. If the rune is not encodable, an error
82 ;; might be signalled later during flush-ystream.
83 (definline ystream-write-rune
(rune ystream
)
84 (let ((in (ystream-in-buffer ystream
)))
85 (when (eql (ystream-in-ptr ystream
) (length in
))
86 (flush-ystream ystream
)
87 (setf in
(ystream-in-buffer ystream
)))
88 (setf (elt in
(ystream-in-ptr ystream
)) rune
)
89 (incf (ystream-in-ptr ystream
))
90 (setf (ystream-column ystream
)
91 (if (eql rune
#/U
+000A
) 0 (1+ (ystream-column ystream
))))
94 ;; Writes a rod to the buffer. If a rune in the rod not encodable, an error
95 ;; might be signalled later during flush-ystream.
96 (defun ystream-write-rod (rod ystream
)
100 (loop for rune across rod do
(ystream-write-rune rune ystream
)))
102 (defun ystream-write-escapable-rune (rune ystream
)
106 (let ((tmp (make-rod 1)))
107 (setf (elt tmp
0) rune
)
108 (ystream-write-escapable-rod tmp ystream
)))
112 ;; on non-unicode lisps, we only support UTF-8 anyway, so this is like
113 ;; ystream-write-rod, which will never signal an error in this configuration.
114 (defun ystream-write-escapable-rod (rod ystream
)
115 (ystream-write-rod rod ystream
))
117 ;; Writes a rod to the buffer. If a rune in the rod not encodable, it is
118 ;; replaced by a character reference.
121 (defun ystream-write-escapable-rod (rod ystream
)
125 (if (ystream-unicode-p ystream
)
126 (ystream-write-rod rod ystream
)
128 with encoding
= (ystream-encoding ystream
)
131 (if (encodablep rune encoding
)
132 (ystream-write-rune rune ystream
)
133 (let ((cr (string-rod (format nil
"&#~D;" (rune-code rune
)))))
134 (ystream-write-rod cr ystream
))))))
137 (defun encodablep (character encoding
)
139 (babel:string-to-octets
(string character
) :encoding encoding
)
140 (babel-encodings:character-encoding-error
()
143 (defmethod close-ystream :before
((ystream ystream
))
144 (flush-ystream ystream
))
147 ;;;; ENCODING-YSTREAM (abstract)
149 (defmethod close-ystream ((ystream %stream-ystream
))
150 (ystream-os-stream ystream
))
152 (defgeneric ystream-device-write
(ystream buf nbytes
))
155 (defun encode-runes (out in ptr encoding
)
156 (runes-to-utf8 out in ptr
))
159 (defun encode-runes (out in ptr encoding
)
162 (runes-to-utf8 out in ptr
))
164 ;; by lucky coincidence, babel::unicode-string is the same as simple-rod
165 #+nil
(coerce string
'babel
::unicode-string
)
166 (let* ((babel::*suppress-character-coding-errors
* nil
)
167 (mapping (babel::lookup-mapping babel
::*string-vector-mappings
*
169 (funcall (babel::encoder mapping
) in
0 ptr out
0)
170 (funcall (babel::octet-counter mapping
) in
0 ptr -
1)))))
172 (defmethod flush-ystream ((ystream encoding-ystream
))
173 (let ((ptr (ystream-in-ptr ystream
)))
175 (let* ((in (ystream-in-buffer ystream
))
176 (out (ystream-out-buffer ystream
))
178 (surrogatep (<= #xD800
(rune-code (elt in
(1- ptr
))) #xDBFF
))
184 (setf n
(encode-runes out in ptr
(ystream-encoding ystream
)))
185 (ystream-device-write ystream out n
)
189 (setf (elt in
0) (elt in
(1- ptr
)))
190 (setf (ystream-in-ptr ystream
) 1))
192 (setf (ystream-in-ptr ystream
) 0))))))))
194 (defun fast-push (new-element vector
)
195 (vector-push-extend new-element vector
(max 1 (array-dimension vector
0))))
197 (macrolet ((define-utf8-writer (name (byte &rest aux
) result
&body body
)
198 `(defun ,name
(out in n
)
199 (let (#+rune-is-utf-16
(high-surrogate nil
)
206 ((<= #x00000000 r
#x0000007F
)
208 ((<= #x00000080 r
#x000007FF
)
209 (write0 (logior #b11000000
(ldb (byte 5 6) r
)))
210 (write0 (logior #b10000000
(ldb (byte 6 0) r
))))
211 ((<= #x00000800 r
#x0000FFFF
)
212 (write0 (logior #b11100000
(ldb (byte 4 12) r
)))
213 (write0 (logior #b10000000
(ldb (byte 6 6) r
)))
214 (write0 (logior #b10000000
(ldb (byte 6 0) r
))))
215 ((<= #x00010000 r
#x001FFFFF
)
216 (write0 (logior #b11110000
(ldb (byte 3 18) r
)))
217 (write0 (logior #b10000000
(ldb (byte 6 12) r
)))
218 (write0 (logior #b10000000
(ldb (byte 6 6) r
)))
219 (write0 (logior #b10000000
(ldb (byte 6 0) r
))))
220 ((<= #x00200000 r
#x03FFFFFF
)
221 (write0 (logior #b11111000
(ldb (byte 2 24) r
)))
222 (write0 (logior #b10000000
(ldb (byte 6 18) r
)))
223 (write0 (logior #b10000000
(ldb (byte 6 12) r
)))
224 (write0 (logior #b10000000
(ldb (byte 6 6) r
)))
225 (write0 (logior #b10000000
(ldb (byte 6 0) r
))))
226 ((<= #x04000000 r
#x7FFFFFFF
)
227 (write0 (logior #b11111100
(ldb (byte 1 30) r
)))
228 (write0 (logior #b10000000
(ldb (byte 6 24) r
)))
229 (write0 (logior #b10000000
(ldb (byte 6 18) r
)))
230 (write0 (logior #b10000000
(ldb (byte 6 12) r
)))
231 (write0 (logior #b10000000
(ldb (byte 6 6) r
)))
232 (write0 (logior #b10000000
(ldb (byte 6 0) r
))))))
236 ((<= #xD800 r
#xDBFF
)
237 (setf high-surrogate r
))
239 ((<= #xDC00 r
#xDFFF
)
240 (let ((q (logior (ash (- high-surrogate
#xD7C0
) 10)
243 (setf high-surrogate nil
))
245 ((<= #xD800 r
#xDFFF
)
247 "surrogates not allowed in this configuration"))
251 (write2 (rune-code (elt in j
)))))
253 (define-utf8-writer runes-to-utf8
(x (i 0))
257 (define-utf8-writer runes-to-utf8
/adjustable-string
(x)
259 (fast-push (code-char x
) out
)))
264 (defstruct (rod-ystream (:include ystream
)))
266 (defmethod flush-ystream ((ystream rod-ystream
))
267 (let* ((old (ystream-in-buffer ystream
))
268 (new (make-rod (* 2 (length old
)))))
270 (setf (ystream-in-buffer ystream
) new
)))
272 (defmethod close-ystream ((ystream rod-ystream
))
273 (subseq (ystream-in-buffer ystream
) 0 (ystream-in-ptr ystream
)))
276 ;;;; CHARACTER-STREAM-YSTREAM
280 (defstruct (character-stream-ystream
281 (:constructor make-character-stream-ystream
(target-stream))
283 (:conc-name
"YSTREAM-"))
286 (defmethod flush-ystream ((ystream character-stream-ystream
))
287 (write-string (ystream-in-buffer ystream
)
288 (ystream-target-stream ystream
)
289 :end
(ystream-in-ptr ystream
))
290 (setf (ystream-in-ptr ystream
) 0))
292 (defmethod close-ystream ((ystream character-stream-ystream
))
293 (ystream-target-stream ystream
)))
296 ;;;; OCTET-VECTOR-YSTREAM
298 (defstruct (octet-vector-ystream
299 (:include encoding-ystream
)
300 (:conc-name
"YSTREAM-"))
301 (result (make-buffer)))
303 (defmethod ystream-device-write ((ystream octet-vector-ystream
) buf nbytes
)
304 (let* ((result (ystream-result ystream
))
305 (start (length result
))
306 (size (array-dimension result
0)))
307 (while (> (+ start nbytes
) size
)
308 (setf size
(* 2 size
)))
309 (adjust-array result size
:fill-pointer
(+ start nbytes
))
310 (replace result buf
:start1 start
:end2 nbytes
)))
312 (defmethod close-ystream ((ystream octet-vector-ystream
))
313 (ystream-result ystream
))
316 ;;;; OCTET-STREAM-YSTREAM
318 (defstruct (octet-stream-ystream
319 (:include %stream-ystream
)
320 (:constructor make-octet-stream-ystream
(os-stream))
321 (:conc-name
"YSTREAM-")))
323 (defmethod ystream-device-write ((ystream octet-stream-ystream
) buf nbytes
)
324 (write-sequence buf
(ystream-os-stream ystream
) :end nbytes
))
327 ;;;; CHARACTER-STREAM-YSTREAM/UTF8
331 (defstruct (character-stream-ystream/utf8
332 (:constructor make-character-stream-ystream
/utf8
(os-stream))
333 (:include %stream-ystream
)
334 (:conc-name
"YSTREAM-")))
336 (defmethod ystream-device-write
337 ((ystream character-stream-ystream
/utf8
) buf nbytes
)
338 (declare (type (simple-array (unsigned-byte 8) (*)) buf
))
339 (let ((out (ystream-os-stream ystream
)))
341 (write-char (code-char (elt buf x
)) out
)))))
344 ;;;; STRING-YSTREAM/UTF8
348 (defstruct (string-ystream/utf8
349 (:include character-stream-ystream
/utf8
350 (os-stream (make-string-output-stream)))
351 (:conc-name
"YSTREAM-")))
353 (defmethod close-ystream ((ystream string-ystream
/utf8
))
354 (get-output-stream-string (ystream-os-stream ystream
))))
357 ;;;; helper functions
359 (defun rod-to-utf8-string (rod)
360 (let ((out (make-buffer :element-type
'character
)))
361 (runes-to-utf8/adjustable-string out rod
(length rod
))
364 (defun utf8-string-to-rod (str)
365 (let* ((bytes (map '(vector (unsigned-byte 8)) #'char-code str
))
366 (buffer (make-array (length bytes
) :element-type
'buffer-byte
))
367 (n (runes-encoding:decode-sequence
368 :utf-8 bytes
0 (length bytes
) buffer
0 0 nil
))
369 (result (make-array n
:element-type
'rune
)))
370 (map-into result
#'code-rune buffer
)
373 (defclass octet-input-stream
374 (trivial-gray-stream-mixin fundamental-binary-input-stream
)
375 ((octets :initarg
:octets
)
378 (defmethod close ((stream octet-input-stream
) &key abort
)
379 (declare (ignore abort
))
380 (open-stream-p stream
))
382 (defmethod stream-read-byte ((stream octet-input-stream
))
383 (with-slots (octets pos
) stream
384 (if (>= pos
(length octets
))
390 (defmethod stream-read-sequence
391 ((stream octet-input-stream
) sequence start end
&key
&allow-other-keys
)
392 (with-slots (octets pos
) stream
393 (let* ((length (min (- end start
) (- (length octets
) pos
)))
394 (end1 (+ start length
))
395 (end2 (+ pos length
)))
396 (replace sequence octets
:start1 start
:end1 end1
:start2 pos
:end2 end2
)
400 (defun make-octet-input-stream (octets)
401 (make-instance 'octet-input-stream
:octets octets
))