Fix transform-%with-array-data/mumble
[sbcl.git] / src / code / external-formats / enc-utf.lisp
blobcc7f0ee9acf90650addb3b65ed946a398baf8329
1 ;;;; Unicode Transformation Format (UTF) encodings
2 ;;;;
3 ;;;; In our interpretation, these are distinct from UCS encodings: the
4 ;;;; UCS encodings are a direct encoding of the code point, in 16- and
5 ;;;; 32-bit variants; by contrast, the UTF encodings handle Unicode
6 ;;;; surrogate code points specially.
8 ;;;; This software is part of the SBCL system. See the README file for
9 ;;;; more information.
10 ;;;;
11 ;;;; This software is derived from the CMU CL system, which was
12 ;;;; written at Carnegie Mellon University and released into the
13 ;;;; public domain. The software is in the public domain and is
14 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
15 ;;;; files for more information.
17 (in-package "SB-IMPL")
20 (declaim (inline utf-noncharacter-code-p))
21 (defun utf-noncharacter-code-p (code)
22 (or (<= #xd800 code #xdfff)
23 (<= #xfdd0 code #xfdef)
24 (= (logand code #xfffe) #xfffe)))
26 ;;; Conversion to UTF-16{LE,BE}
27 (declaim (inline char->utf-16le))
28 (defun char->utf-16le (char dest string pos replacement)
29 (declare (optimize speed #.*safety-0*)
30 (type (array (unsigned-byte 8) (*)) dest))
31 (let ((code (char-code char)))
32 (if (utf-noncharacter-code-p code)
33 (let ((replacement (encoding-error :utf-16le replacement string pos)))
34 (declare (type (simple-array (unsigned-byte 8) (*)) replacement))
35 (dotimes (i (length replacement))
36 (vector-push-extend (aref replacement i) dest)))
37 (flet ((add-byte (b)
38 (declare (type (unsigned-byte 8) b))
39 (vector-push-extend b dest)))
40 (declare (inline add-byte))
41 (cond
42 ((< code #x10000)
43 (add-byte (ldb (byte 8 0) code))
44 (add-byte (ldb (byte 8 8) code)))
46 (let* ((codeoid (- code #x10000))
47 (high (dpb (ldb (byte 10 10) codeoid) (byte 10 0) #xd800))
48 (low (dpb (ldb (byte 10 0) codeoid) (byte 10 0) #xdc00)))
49 (add-byte (ldb (byte 8 0) high))
50 (add-byte (ldb (byte 8 8) high))
51 (add-byte (ldb (byte 8 0) low))
52 (add-byte (ldb (byte 8 8) low)))))))))
54 (declaim (inline char->utf-16be))
55 (defun char->utf-16be (char dest string pos replacement)
56 (declare (optimize speed #.*safety-0*)
57 (type (array (unsigned-byte 8) (*)) dest))
58 (let ((code (char-code char)))
59 (if (utf-noncharacter-code-p code)
60 (let ((replacement (encoding-error :utf-16be replacement string pos)))
61 (declare (type (simple-array (unsigned-byte 8) (*)) replacement))
62 (dotimes (i (length replacement))
63 (vector-push-extend (aref replacement i) dest)))
64 (flet ((add-byte (b)
65 (declare (type (unsigned-byte 8) b))
66 (vector-push-extend b dest)))
67 (declare (inline add-byte))
68 (cond
69 ((< code #x10000)
70 (add-byte (ldb (byte 8 8) code))
71 (add-byte (ldb (byte 8 0) code)))
73 (let* ((codeoid (- code #x10000))
74 (high (dpb (ldb (byte 10 10) codeoid) (byte 10 0) #xd800))
75 (low (dpb (ldb (byte 10 0) codeoid) (byte 10 0) #xdc00)))
76 (add-byte (ldb (byte 8 8) high))
77 (add-byte (ldb (byte 8 0) high))
78 (add-byte (ldb (byte 8 8) low))
79 (add-byte (ldb (byte 8 0) low)))))))))
81 (defun string->utf-16le (string sstart send additional-space replacement)
82 (declare (optimize speed #.*safety-0*)
83 (type simple-string string)
84 (type array-range sstart send additional-space))
85 (let ((array (make-array (* 2 (+ additional-space (- send sstart)))
86 :element-type '(unsigned-byte 8)
87 :fill-pointer 0 :adjustable t)))
88 (loop for i from sstart below send
89 do (char->utf-16le (char string i) array string i replacement))
90 (dotimes (i (* 2 additional-space))
91 (vector-push-extend 0 array))
92 (coerce array '(simple-array (unsigned-byte 8) (*)))))
94 (defun string->utf-16be (string sstart send additional-space replacement)
95 (declare (optimize speed #.*safety-0*)
96 (type simple-string string)
97 (type array-range sstart send additional-space))
98 (let ((array (make-array (* 2 (+ additional-space (- send sstart)))
99 :element-type '(unsigned-byte 8)
100 :fill-pointer 0 :adjustable t)))
101 (loop for i from sstart below send
102 do (char->utf-16be (char string i) array string i replacement))
103 (dotimes (i (* 2 additional-space))
104 (vector-push-extend 0 array))
105 (coerce array '(simple-array (unsigned-byte 8) (*)))))
107 ;; Conversion from UTF-16{LE,BE}
108 (defmacro define-bytes-per-utf16-character (accessor type)
109 (declare (ignore type))
110 (let ((name-le (make-od-name 'bytes-per-utf-16le-character accessor))
111 (name-be (make-od-name 'bytes-per-utf-16be-character accessor)))
112 `(progn
113 (defun ,name-le (array pos end replacement)
114 (let ((remaining (- end pos)))
115 (when (< remaining 2)
116 (return-from ,name-le (values remaining (decoding-error array pos end :utf-16le replacement 'octet-decoding-error pos))))
117 (let ((low (dpb (,accessor array (+ pos 1)) (byte 8 8) (,accessor array pos))))
118 (if (<= #xd800 low #xdbff)
119 (if (< remaining 4)
120 (values remaining (decoding-error array pos end :utf-16le replacement 'octet-decoding-error pos))
121 (let ((high (dpb (,accessor array (+ pos 3)) (byte 8 8) (,accessor array (+ pos 2)))))
122 (if (<= #xdc00 high #xdfff)
123 (let ((code (dpb (ldb (byte 10 0) low) (byte 10 10) (ldb (byte 10 0) high))))
124 (if (= (logand code #xfffe) #xfffe)
125 (values 4 (decoding-error array pos (+ pos 4) :utf-16le replacement 'octet-decoding-error pos))
126 (values 4 nil)))
127 (values 2 (decoding-error array pos (+ pos 2) :utf-16le replacement 'octet-decoding-error pos)))))
128 (if (or (<= #xdc00 low #xdfff)
129 (<= #xfdd0 low #xfdef)
130 (= (logand low #xfffe) #xfffe))
131 (values 2 (decoding-error array pos (+ pos 2) :utf-16le replacement 'octet-decoding-error pos))
132 (values 2 nil))))))
133 (defun ,name-be (array pos end replacement)
134 (let ((remaining (- end pos)))
135 (when (< remaining 2)
136 (return-from ,name-be (values remaining (decoding-error array pos end :utf-16le replacement 'octet-decoding-error pos))))
137 (let ((low (dpb (,accessor array pos) (byte 8 8) (,accessor array (+ pos 1)))))
138 (if (<= #xd800 low #xdbff)
139 (if (< remaining 4)
140 (values remaining (decoding-error array pos end :utf-16le replacement 'octet-decoding-error pos))
141 (let ((high (dpb (,accessor array (+ pos 2)) (byte 8 8) (,accessor array (+ pos 3)))))
142 (if (<= #xdc00 high #xdfff)
143 (let ((code (dpb (ldb (byte 10 0) low) (byte 10 10) (ldb (byte 10 0) high))))
144 (if (= (logand code #xfffe) #xfffe)
145 (values 4 (decoding-error array pos (+ pos 4) :utf-16le replacement 'octet-decoding-error pos))
146 (values 4 nil)))
147 (values 2 (decoding-error array pos (+ pos 2) :utf-16le replacement 'octet-decoding-error pos)))))
148 (if (or (<= #xdc00 low #xdfff)
149 (<= #xfdd0 low #xfdef)
150 (= (logand low #xfffe) #xfffe))
151 (values 2 (decoding-error array pos (+ pos 2) :utf-16le replacement 'octet-decoding-error pos))
152 (values 2 nil)))))))))
153 (instantiate-octets-definition define-bytes-per-utf16-character)
155 (defmacro define-simple-get-utf16-character (accessor type)
156 (let ((name-le (make-od-name 'simple-get-utf-16le-char accessor))
157 (name-be (make-od-name 'simple-get-utf-16be-char accessor)))
158 `(progn
159 (defun ,name-le (array pos bytes)
160 (declare (optimize speed #.*safety-0*)
161 (type ,type array)
162 (type array-range pos)
163 (type (integer 1 4) bytes)
164 (ignore bytes))
165 ;; Optimization for SYSTEM-AREA-POINTER: use SAP-REF-16LE that
166 ;; reads two bytes at once on some architectures.
167 (let ((code ,(if (and (eq accessor 'sap-ref-8)
168 (eq type 'system-area-pointer))
169 '(sap-ref-16le array pos)
170 `(flet ((cref (x)
171 (,accessor array (the array-range (+ pos x)))))
172 (declare (inline cref))
173 (dpb (cref 1) (byte 8 8) (cref 0))))))
174 (if (<= #xd800 code #xdbff)
175 (let ((next ,(if (and (eq accessor 'sap-ref-8)
176 (eq type 'system-area-pointer))
177 '(sap-ref-16le array (+ pos 2))
178 `(flet ((cref (x)
179 (,accessor array (the array-range (+ pos x)))))
180 (declare (inline cref))
181 (dpb (cref 3) (byte 8 8) (cref 2))))))
182 (code-char (+ #x10000 (dpb (ldb (byte 10 0) code) (byte 10 10) (ldb (byte 10 0) next)))))
183 (code-char code))))
184 (defun ,name-be (array pos bytes)
185 (declare (optimize speed #.*safety-0*)
186 (type ,type array)
187 (type array-range pos)
188 (type (integer 1 4) bytes)
189 (ignore bytes))
190 ;; Use SAP-REF-16BE even if it is not optimized
191 (let ((code ,(if (and (eq accessor 'sap-ref-8)
192 (eq type 'system-area-pointer))
193 '(sap-ref-16be array pos)
194 `(flet ((cref (x)
195 (,accessor array (the array-range (+ pos x)))))
196 (declare (inline cref))
197 (dpb (cref 0) (byte 8 8) (cref 1))))))
198 (if (<= #xd800 code #xdbff)
199 (let ((next ,(if (and (eq accessor 'sap-ref-8)
200 (eq type 'system-area-pointer))
201 '(sap-ref-16be array (+ pos 2))
202 `(flet ((cref (x)
203 (,accessor array (the array-range (+ pos x)))))
204 (declare (inline cref))
205 (dpb (cref 2) (byte 8 8) (cref 3))))))
206 (code-char (+ #x10000 (dpb (ldb (byte 10 0) code) (byte 10 10) (ldb (byte 10 0) next)))))
207 (code-char code)))))))
209 (instantiate-octets-definition define-simple-get-utf16-character)
211 (defmacro define-utf-16->string (accessor type)
212 (let ((name-le (make-od-name 'utf-16le->string accessor))
213 (name-be (make-od-name 'utf-16be->string accessor)))
214 `(progn
215 (defun ,name-le (array astart aend replacement)
216 (declare (optimize speed #.*safety-0*)
217 (type ,type array)
218 (type array-range astart aend))
219 (let ((string (make-array 0 :adjustable t :fill-pointer 0 :element-type 'character)))
220 (loop with pos = astart
221 while (< pos aend)
222 do (multiple-value-bind (bytes invalid)
223 (,(make-od-name 'bytes-per-utf-16le-character accessor) array pos aend replacement)
224 (declare (type (or null string) invalid))
225 (cond
226 ((null invalid)
227 (vector-push-extend
228 (,(make-od-name 'simple-get-utf-16le-char accessor)
229 array pos bytes)
230 string))
231 (t (dotimes (i (length invalid))
232 (vector-push-extend (char invalid i) string))))
233 (incf pos bytes)))
234 (copy-seq string)))
235 (defun ,name-be (array astart aend replacement)
236 (declare (optimize speed #.*safety-0*)
237 (type ,type array)
238 (type array-range astart aend))
239 (let ((string (make-array 0 :adjustable t :fill-pointer 0 :element-type 'character)))
240 (loop with pos = astart
241 while (< pos aend)
242 do (multiple-value-bind (bytes invalid)
243 (,(make-od-name 'bytes-per-utf-16be-character accessor) array pos aend replacement)
244 (declare (type (or null string) invalid))
245 (cond
246 ((null invalid)
247 (vector-push-extend
248 (,(make-od-name 'simple-get-utf-16be-char accessor)
249 array pos bytes)
250 string))
251 (t (dotimes (i (length invalid))
252 (vector-push-extend (char invalid i) string))))
253 (incf pos bytes)))
254 (copy-seq string))))))
256 (instantiate-octets-definition define-utf-16->string)
258 (define-external-format/variable-width (:utf-16le :utf16le) t
259 (code-char #xfffd)
260 (let ((bits (char-code |ch|)))
261 (if (< bits #x10000) 2 4))
262 (cond
263 ((< bits #x10000)
264 (if (utf-noncharacter-code-p bits)
265 (external-format-encoding-error stream bits)
266 (setf (sap-ref-16le sap tail) bits)))
267 (t (if (= (logand bits #xfffe) #xfffe)
268 (external-format-encoding-error stream bits)
269 (let* ((new-bits (- bits #x10000))
270 (high (ldb (byte 10 10) new-bits))
271 (low (ldb (byte 10 0) new-bits)))
272 (setf (sap-ref-16le sap tail) (dpb high (byte 10 0) #xd800))
273 (setf (sap-ref-16le sap (+ tail 2)) (dpb low (byte 10 0) #xdc00))))))
274 (2 (if (<= #xd800 (sap-ref-16le sap head) #xdbff) 4 2))
275 (let ((bits (sap-ref-16le sap head)))
276 (cond
277 ((or (<= #xdc00 bits #xdfff)
278 (<= #xfdd0 bits #xfdef)
279 (= (logand bits #xfffe) #xfffe))
280 (return-from decode-break-reason 2))
281 ((<= #xd800 bits #xdbff)
282 (let ((next (sap-ref-16le sap (+ head 2))))
283 (unless (<= #xdc00 next #xdfff)
284 (return-from decode-break-reason 2))
285 (let ((code (dpb (ldb (byte 10 0) bits) (byte 10 10) (ldb (byte 10 0) next))))
286 (if (= (logand code #xfffe) #xfffe)
287 (return-from decode-break-reason 4)
288 (code-char (+ #x10000 code))))))
289 (t (code-char bits))))
290 utf-16le->string-aref
291 string->utf-16le
292 :char-encodable-p (not (utf-noncharacter-code-p (char-code |ch|))))
294 (define-external-format/variable-width (:utf-16be :utf16be) t
295 (code-char #xfffd)
296 (let ((bits (char-code |ch|)))
297 (if (< bits #x10000) 2 4))
298 (cond
299 ((< bits #x10000)
300 (if (utf-noncharacter-code-p bits)
301 (external-format-encoding-error stream bits)
302 (setf (sap-ref-16be sap tail) bits)))
303 (t (if (= (logand bits #xfffe) #xfffe)
304 (external-format-encoding-error stream bits)
305 (let* ((new-bits (- bits #x10000))
306 (high (ldb (byte 10 10) new-bits))
307 (low (ldb (byte 10 0) new-bits)))
308 (setf (sap-ref-16be sap tail) (dpb high (byte 10 0) #xd800))
309 (setf (sap-ref-16be sap (+ tail 2)) (dpb low (byte 10 0) #xdc00))))))
310 (2 (if (<= #xd800 (sap-ref-16be sap head) #xdbff) 4 2))
311 (let ((bits (sap-ref-16be sap head)))
312 (cond
313 ((or (<= #xdc00 bits #xdfff)
314 (<= #xfdd0 bits #xfdef)
315 (= (logand bits #xfffe) #xfffe))
316 (return-from decode-break-reason 2))
317 ((<= #xd800 bits #xdbff)
318 (let ((next (sap-ref-16be sap (+ head 2))))
319 (unless (<= #xdc00 next #xdfff)
320 (return-from decode-break-reason 2))
321 (let ((code (dpb (ldb (byte 10 0) bits) (byte 10 10) (ldb (byte 10 0) next))))
322 (if (= (logand code #xfffe) #xfffe)
323 (return-from decode-break-reason 4)
324 (code-char (+ #x10000 code))))))
325 (t (code-char bits))))
326 utf-16be->string-aref
327 string->utf-16be
328 :char-encodable-p (not (utf-noncharacter-code-p (char-code |ch|))))
330 (declaim (inline char->utf-32le))
331 (defun char->utf-32le (char dest string pos replacement)
332 (declare (optimize speed #.*safety-0*)
333 (type (array (unsigned-byte 8) (*)) dest))
334 (let ((code (char-code char)))
335 (if (utf-noncharacter-code-p code)
336 (let ((replacement (encoding-error :utf-32le replacement string pos)))
337 (declare (type (simple-array (unsigned-byte 8) (*)) replacement))
338 (dotimes (i (length replacement))
339 (vector-push-extend (aref replacement i) dest)))
340 (flet ((add-byte (b)
341 (declare (type (unsigned-byte 8) b))
342 (vector-push-extend b dest)))
343 (declare (inline add-byte))
344 (add-byte (ldb (byte 8 0) code))
345 (add-byte (ldb (byte 8 8) code))
346 (add-byte (ldb (byte 8 16) code))
347 (add-byte (ldb (byte 8 24) code))))))
349 (declaim (inline char->utf-32be))
350 (defun char->utf-32be (char dest string pos replacement)
351 (declare (optimize speed #.*safety-0*)
352 (type (array (unsigned-byte 8) (*)) dest))
353 (let ((code (char-code char)))
354 (if (utf-noncharacter-code-p code)
355 (let ((replacement (encoding-error :utf-32be replacement string pos)))
356 (declare (type (simple-array (unsigned-byte 8) (*)) replacement))
357 (dotimes (i (length replacement))
358 (vector-push-extend (aref replacement i) dest)))
359 (flet ((add-byte (b)
360 (declare (type (unsigned-byte 8) b))
361 (vector-push-extend b dest)))
362 (declare (inline add-byte))
363 (add-byte (ldb (byte 8 24) code))
364 (add-byte (ldb (byte 8 16) code))
365 (add-byte (ldb (byte 8 8) code))
366 (add-byte (ldb (byte 8 0) code))))))
368 (defun string->utf-32le (string sstart send additional-space replacement)
369 (declare (optimize speed #.*safety-0*)
370 (type simple-string string)
371 (type array-range sstart send additional-space))
372 (let ((array (make-array (* 4 (+ additional-space (- send sstart)))
373 :element-type '(unsigned-byte 8)
374 :fill-pointer 0 :adjustable t)))
375 (loop for i from sstart below send
376 do (char->utf-32le (char string i) array string i replacement))
377 (dotimes (i (* 4 additional-space))
378 (vector-push-extend 0 array))
379 (coerce array '(simple-array (unsigned-byte 8) (*)))))
381 (defun string->utf-32be (string sstart send additional-space replacement)
382 (declare (optimize speed #.*safety-0*)
383 (type simple-string string)
384 (type array-range sstart send additional-space))
385 (let ((array (make-array (* 4 (+ additional-space (- send sstart)))
386 :element-type '(unsigned-byte 8)
387 :fill-pointer 0 :adjustable t)))
388 (loop for i from sstart below send
389 do (char->utf-32be (char string i) array string i replacement))
390 (dotimes (i (* 4 additional-space))
391 (vector-push-extend 0 array))
392 (coerce array '(simple-array (unsigned-byte 8) (*)))))
394 ;; Conversion from UTF-32{LE,BE}
395 (defmacro define-bytes-per-utf32-character (accessor type)
396 (declare (ignore type))
397 (let ((name-le (make-od-name 'bytes-per-utf-32le-character accessor))
398 (name-be (make-od-name 'bytes-per-utf-32be-character accessor)))
399 `(progn
400 (defun ,name-le (array pos end)
401 (declare (ignore array pos end))
402 (values 4 nil))
403 (defun ,name-be (array pos end)
404 (declare (ignore array pos end))
405 (values 4 nil)))))
406 (instantiate-octets-definition define-bytes-per-utf32-character)
408 (defmacro define-simple-get-utf32-character (accessor type)
409 (let ((name-le (make-od-name 'simple-get-utf-32le-char accessor))
410 (name-be (make-od-name 'simple-get-utf-32be-char accessor)))
411 `(progn
412 (defun ,name-le (array pos bytes replacement)
413 (declare (optimize speed #.*safety-0*)
414 (type ,type array)
415 (type array-range pos)
416 (type (integer 1 4) bytes))
417 ;; Optimization for SYSTEM-AREA-POINTER: use SAP-REF-32LE that
418 ;; reads four bytes at once on some architectures.
419 (let ((code ,(if (and (eq accessor 'sap-ref-8)
420 (eq type 'system-area-pointer))
421 '(sap-ref-32le array pos)
422 `(flet ((cref (x)
423 (,accessor array (the array-range (+ pos x)))))
424 (declare (inline cref))
425 (dpb (cref 3) (byte 8 24)
426 (dpb (cref 2) (byte 8 16)
427 (dpb (cref 1) (byte 8 8) (cref 0))))))))
428 (if (and (< code char-code-limit)
429 (not (utf-noncharacter-code-p code)))
430 (code-char code)
431 (decoding-error array pos (+ pos bytes) :utf-32le replacement
432 'octet-decoding-error pos))))
433 (defun ,name-be (array pos bytes replacement)
434 (declare (optimize speed #.*safety-0*)
435 (type ,type array)
436 (type array-range pos)
437 (type (integer 1 4) bytes))
438 ;; Use SAP-REF-32BE even if it is not optimized
439 (let ((code ,(if (and (eq accessor 'sap-ref-8)
440 (eq type 'system-area-pointer))
441 '(sap-ref-32be array pos)
442 `(flet ((cref (x)
443 (,accessor array (the array-range (+ pos x)))))
444 (declare (inline cref))
445 (dpb (cref 0) (byte 8 24)
446 (dpb (cref 1) (byte 8 16)
447 (dpb (cref 2) (byte 8 8) (cref 3))))))))
448 (if (and (< code char-code-limit)
449 (not (utf-noncharacter-code-p code)))
450 (code-char code)
451 (decoding-error array pos (+ pos bytes) :utf-32be replacement
452 'octet-decoding-error pos)))))))
454 (declaim (muffle-conditions compiler-note))
455 (instantiate-octets-definition define-simple-get-utf32-character)
457 (defmacro define-utf-32->string (accessor type)
458 (let ((name-le (make-od-name 'utf-32le->string accessor))
459 (name-be (make-od-name 'utf-32be->string accessor)))
460 `(progn
461 (defun ,name-le (array astart aend replacement)
462 (declare (optimize speed #.*safety-0*)
463 (type ,type array)
464 (type array-range astart aend))
465 (let ((string (make-array 0 :adjustable t :fill-pointer 0 :element-type 'character)))
466 (loop with pos = astart
467 while (< pos aend)
468 do (multiple-value-bind (bytes invalid)
469 (,(make-od-name 'bytes-per-utf-32le-character accessor) array pos aend)
470 (declare (type (or null string) invalid))
471 (aver (null invalid))
472 (let ((thing
473 (if (<= (+ pos bytes) aend)
474 (,(make-od-name 'simple-get-utf-32le-char accessor) array pos bytes replacement)
475 (decoding-error array pos aend :utf-32le replacement
476 'octet-decoding-error pos))))
477 (typecase thing
478 (character (vector-push-extend thing string))
479 (string (dotimes (i (length thing))
480 (vector-push-extend (char thing i) string)))))
481 (incf pos bytes)))
482 (copy-seq string)))
483 (defun ,name-be (array astart aend replacement)
484 (declare (optimize speed #.*safety-0*)
485 (type ,type array)
486 (type array-range astart aend))
487 (let ((string (make-array 0 :adjustable t :fill-pointer 0 :element-type 'character)))
488 (loop with pos = astart
489 while (< pos aend)
490 do (multiple-value-bind (bytes invalid)
491 (,(make-od-name 'bytes-per-utf-32be-character accessor) array pos aend)
492 (declare (type (or null string) invalid))
493 (aver (null invalid))
494 (let ((thing
495 (if (<= (+ pos bytes) aend)
496 (,(make-od-name 'simple-get-utf-32be-char accessor) array pos bytes replacement)
497 (decoding-error array pos aend :utf-32be replacement
498 'octet-decoding-error pos))))
499 (typecase thing
500 (character (vector-push-extend thing string))
501 (string (dotimes (i (length thing))
502 (vector-push-extend (char thing i) string)))))
503 (incf pos bytes)))
504 (copy-seq string))))))
506 (instantiate-octets-definition define-utf-32->string)
508 (define-external-format/variable-width (:utf-32le :utf32le) t
509 (code-char #xfffd)
511 (if (utf-noncharacter-code-p bits)
512 (external-format-encoding-error stream bits)
513 (setf (sap-ref-32le sap tail) bits))
515 (let ((code (sap-ref-32le sap head)))
516 (if (and (< code char-code-limit)
517 (not (utf-noncharacter-code-p code)))
518 (code-char code)
519 (return-from decode-break-reason 4)))
520 utf-32le->string-aref
521 string->utf-32le
522 :char-encodable-p (not (utf-noncharacter-code-p (char-code |ch|))))
524 (define-external-format/variable-width (:utf-32be :utf32be) t
525 (code-char #xfffd)
527 (if (utf-noncharacter-code-p bits)
528 (external-format-encoding-error stream bits)
529 (setf (sap-ref-32be sap tail) bits))
531 (let ((code (sap-ref-32be sap head)))
532 (if (and (< code char-code-limit)
533 (not (utf-noncharacter-code-p code)))
534 (code-char code)
535 (return-from decode-break-reason 4)))
536 utf-32be->string-aref
537 string->utf-32be
538 :char-encodable-p (not (utf-noncharacter-code-p (char-code |ch|))))