3 (define-condition encode-error
(error)
4 ((seq :reader encode-error-seq
:initarg
:seq
:initform
(error ":seq required") :type sequence
)
5 (pos :reader encode-error-pos
:initarg
:pos
:initform
(error ":pos required") :type
'unsigned-byte
))
6 (:report
(lambda (condition stream
)
7 (format stream
"UTF-8 encode error at position ~D" (encode-error-pos condition
)))))
9 (defgeneric encode-sequence
(seq &key start end byte-order-mark
))
11 (defmethod encode-sequence ((seq vector
) &key
(start 0) end byte-order-mark
)
13 (setf end
(length seq
)))
14 (assert (<= 0 start end
(length seq
)))
16 (let ((octet-vector (make-array (+ (* 4 (- end start
)) (if byte-order-mark
19 :element-type
'(unsigned-byte 8)))
22 (setf (aref octet-vector
0) #xEF
)
23 (setf (aref octet-vector
1) #xBB
)
24 (setf (aref octet-vector
2) #xBF
)
26 (loop for pos from start to
(1- end
)
27 for code-point
= (aref seq pos
)
28 do
(cond ((<= #x0000 code-point
#x007F
)
29 (setf (aref octet-vector octet-pos
) code-point
)
32 ((<= #x0080 code-point
#x07FF
)
33 (setf (aref octet-vector octet-pos
) (logior #b11000000
(ldb (byte 5 6) code-point
)))
34 (setf (aref octet-vector
(1+ octet-pos
)) (logior #b10000000
(ldb (byte 6 0) code-point
)))
37 ((or (<= #x0800 code-point
#xD7FF
)
38 (<= #xE000 code-point
#xFFFF
))
39 (setf (aref octet-vector octet-pos
) (logior #b11100000
(ldb (byte 4 12) code-point
)))
40 (setf (aref octet-vector
(1+ octet-pos
)) (logior #b10000000
(ldb (byte 6 6) code-point
)))
41 (setf (aref octet-vector
(+ octet-pos
2)) (logior #b10000000
(ldb (byte 6 0) code-point
)))
44 ((<= #x10000 code-point
#x10FFFF
)
45 (setf (aref octet-vector octet-pos
) (logior #b11110000
(ldb (byte 3 18) code-point
)))
46 (setf (aref octet-vector
(1+ octet-pos
)) (logior #b10000000
(ldb (byte 6 12) code-point
)))
47 (setf (aref octet-vector
(+ octet-pos
2)) (logior #b10000000
(ldb (byte 6 6) code-point
)))
48 (setf (aref octet-vector
(+ octet-pos
3)) (logior #b10000000
(ldb (byte 6 0) code-point
)))
51 (t (error 'encode-error
:seq seq
:pos pos
))))
52 (adjust-array octet-vector octet-pos
)))
54 (defmethod encode-sequence ((seq list
) &key
(start 0) end byte-order-mark
)
56 (setf end
(length seq
)))
57 (assert (<= 0 start end
(length seq
)))
59 (let ((octet-list (if byte-order-mark
60 (nreverse (list #xEF
#xBB
#xBF
))
62 (loop for pos from start to
(1- end
)
63 for code-point in
(nthcdr start seq
)
64 do
(cond ((<= #x0000 code-point
#x007F
)
65 (push code-point octet-list
))
67 ((<= #x0080 code-point
#x07FF
)
68 (push (logior #b11000000
(ldb (byte 5 6) code-point
)) octet-list
)
69 (push (logior #b10000000
(ldb (byte 6 0) code-point
)) octet-list
))
71 ((or (<= #x0800 code-point
#xD7FF
)
72 (<= #xE000 code-point
#xFFFF
))
73 (push (logior #b11100000
(ldb (byte 4 12) code-point
)) octet-list
)
74 (push (logior #b10000000
(ldb (byte 6 6) code-point
)) octet-list
)
75 (push (logior #b10000000
(ldb (byte 6 0) code-point
)) octet-list
))
77 ((<= #x10000 code-point
#x10FFFF
)
78 (push (logior #b11110000
(ldb (byte 3 18) code-point
)) octet-list
)
79 (push (logior #b10000000
(ldb (byte 6 12) code-point
)) octet-list
)
80 (push (logior #b10000000
(ldb (byte 6 6) code-point
)) octet-list
)
81 (push (logior #b10000000
(ldb (byte 6 0) code-point
)) octet-list
))
83 (t (error 'encode-error
:seq seq
:pos pos
))))
84 (nreverse octet-list
)))