1 ;;;; -*- Mode: Lisp; indent-tabs-mode: nil -*-
3 ;;; --- Strings used for decoding Unix pathnames: invalid UTF8 octets
4 ;;; are #\Null-escaped.
6 ;;; TODO: add 8bit-chars versions of SSTRING-TO-CSTRING,
7 ;;; COUNT-SSTRING-OCTETS and CSTRING-TO-SSTRING
9 (in-package :iolib.syscalls
)
11 (eval-when (:compile-toplevel
:load-toplevel
:execute
)
13 ((<= char-code-limit
#x100
) :8bit-chars
)
14 ((<= #x101 char-code-limit
#x10000
) :16bit-chars
)
15 ((> char-code-limit
#x10000
) :21bit-chars
))
18 (eval-when (:compile-toplevel
:load-toplevel
:execute
)
19 (defconstant +cstring-path-max
+ 65535))
21 (defun sstring-to-cstring (sstring c-ptr
)
22 (declare (optimize (speed 3) (safety 0) (debug 0)))
24 (flet ((output-octet (octet)
25 (setf (cffi:mem-aref c-ptr
:unsigned-char index
) octet
)
27 (declare (inline output-octet
))
28 (loop :with len
:= (length sstring
)
29 :with end-offset
:= (1- len
)
31 :for code
:= (char-code (char sstring i
)) :do
36 (output-octet (char-code (char sstring
(incf i
))))))
40 (output-octet (logior #xC0
(ldb (byte 5 6) code
)))
41 (output-octet (logior #x80
(ldb (byte 6 0) code
))))
43 (output-octet (logior #xE0
(ldb (byte 4 12) code
)))
44 (output-octet (logior #x80
(ldb (byte 6 6) code
)))
45 (output-octet (logior #x80
(ldb (byte 6 0) code
))))
48 (output-octet (logior #xF0
(ldb (byte 3 18) code
)))
49 (output-octet (logior #x80
(ldb (byte 6 12) code
)))
50 (output-octet (logior #x80
(ldb (byte 6 6) code
)))
51 (output-octet (logior #x80
(ldb (byte 6 0) code
)))))
52 :finally
(output-octet 0))
53 (values c-ptr index
))))
55 (defun count-sstring-octets (sstring)
56 (declare (optimize (speed 3) (safety 0) (debug 0)))
57 (loop :with len
:= (length sstring
)
58 :with end-offset
:= (1- len
)
60 :for code
:= (char-code (char sstring i
))
63 (when (< i end-offset
) (incf i
))
69 ((< code
#x110000
) 4))))
71 (defun cstring-alloc (sstring)
72 "Allocate a null-terminated foreign buffer containing SSTRING."
73 (let* ((length (count-sstring-octets sstring
))
74 (ptr (foreign-alloc :char
:count
(1+ length
))))
75 (sstring-to-cstring sstring ptr
)))
77 (defmacro with-sstring-to-cstring
((var sstring
&optional size-var
) &body body
)
78 `(multiple-value-bind (,var
,@(when size-var
(list size-var
)))
79 (cstring-alloc ,sstring
)
82 (foreign-free ,var
))))
84 (deftype cstr-offset
()
85 `(integer 0 ,(1+ +cstring-path-max
+)))
87 (declaim (inline utf8-extra-bytes
))
88 (defun utf8-extra-bytes (code)
89 (declare (type (unsigned-byte 8) code
)
90 (optimize (speed 3) (safety 0) (debug 0)))
91 (declare (ignorable code
))
94 (let ((vec (load-time-value
98 #(0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
99 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
100 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
101 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
103 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
104 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
105 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0)
108 #(0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
109 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
110 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
111 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
112 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
113 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
114 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
115 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 0 0 0 0 0 0 0 0 0 0 0)
116 '(simple-array (unsigned-byte 8) (256))))))
117 (aref (the (simple-array (unsigned-byte 8) (256)) vec
) code
)))
119 (declaim (inline offsets-from-utf8
))
120 (defun offsets-from-utf8 (extra-bytes)
121 (declare (type (mod 4) extra-bytes
)
122 (optimize (speed 3) (safety 0) (debug 0)))
123 (let ((vec (load-time-value
124 (coerce #(#x00000000
#x00003080
#x000E2080
#x03C82080
)
125 '(simple-array (unsigned-byte 26) (4))))))
126 (aref (the (simple-array (unsigned-byte 26) (4)) vec
) extra-bytes
)))
128 (declaim (inline legal-utf8-cstring
))
129 (defun legal-utf8-cstring (ptr start len
)
130 (declare (type cstr-offset start len
)
131 (optimize (speed 3) (safety 0) (debug 0)))
132 (let ((end (+ start len
))
133 (srchr (mem-aref ptr
:unsigned-char start
))
136 (when (>= srchr
#xF4
) (return* nil
))
138 (mem-aref ptr
:unsigned-char
(decf (the (unsigned-byte 17) end
)))))
139 (declare (inline getch
))
140 (when (= len
4) (setf c
(getch)) (unless (<= #x80 c
#xBF
) (return* nil
)))
141 (when (>= len
3) (setf c
(getch)) (unless (<= #x80 c
#xBF
) (return* nil
)))
142 (when (>= len
2) (setf c
(getch)) (unless (<= #x00 c
#xBF
) (return* nil
))
144 (#xE0
(when (< c
#xA0
) (return* nil
)))
145 (#xED
(when (> c
#x9F
) (return* nil
)))
146 (#xF0
(when (< c
#x90
) (return* nil
)))
148 (#xF4
(when (> c
#x8F
) (return* nil
)))
149 (t (when (< c
#x80
) (return* nil
)))))
150 (when (>= len
1) (when (<= #x80 srchr
#xC1
) (return* nil
)))
151 (when (> srchr
#xF4
) (return* nil
))
154 (defun cstring-to-sstring (c-ptr &optional
(c-len (1+ +cstring-path-max
+)))
155 (declare (type cstr-offset c-len
)
156 (optimize (speed 3) (safety 0) (debug 0)))
157 (let ((index 0) (sindex 0)
158 (sstring (make-string (* 2 c-len
))))
159 (declare (type cstr-offset index sindex
))
160 (flet ((input-char ()
161 (prog1 (mem-aref c-ptr
:unsigned-char index
)
164 (setf (char sstring sindex
) char
)
167 (setf (char sstring sindex
) (code-char code
))
169 (declare (inline input-char output-char output-code
))
170 (loop :for byte0
:= (mem-aref c-ptr
:unsigned-char index
)
171 :until
(or (>= index c-len
) (zerop byte0
)) :do
172 (block decode-one-char
174 (extra-bytes (min (utf8-extra-bytes byte0
)))
175 (legalp (and (legal-utf8-cstring c-ptr index
(1+ extra-bytes
))
176 (< extra-bytes
(- c-len index
)))))
177 (declare (type (mod 4) extra-bytes
)
178 (type (unsigned-byte 27) code
))
179 (labels ((finish-seq (extra-bytes)
182 (decf code
(the (unsigned-byte 26) (offsets-from-utf8 extra-bytes
)))
185 (output-char #\Null
) (output-code code
))))
187 (unless legalp
(finish-seq 0) (return-from decode-one-char
))))
188 (when (>= extra-bytes
3) (setf code
(ash (+ code
(input-char)) 6)) (legalchk))
189 (when (>= extra-bytes
2) (setf code
(ash (+ code
(input-char)) 6)) (legalchk))
190 (when (>= extra-bytes
1) (setf code
(ash (+ code
(input-char)) 6)) (legalchk))
191 (when (>= extra-bytes
0) (setf code
(ash (+ code
(input-char)) 0)) (legalchk))
192 (finish-seq extra-bytes
))))))
193 (shrink-vector sstring sindex
)))
195 (defmacro with-cstring-to-sstring
((var size
&optional size-var
) &body body
)
196 `(with-foreign-pointer (,var
,size
,size-var
)
198 (cstring-to-sstring ,var
,size-var
))))
201 ;;; Automatic Conversion of Foreign Strings to sstrings
202 ;;; Initially copied from cffi/src/string.lisp
204 (define-foreign-type cstring-type
()
205 (;; Should we free after translating from foreign?
206 (free-from-foreign :initarg
:free-from-foreign
207 :reader cstring-free-from-foreign-p
208 :initform nil
:type boolean
)
209 ;; Should we free after translating to foreign?
210 (free-to-foreign :initarg
:free-to-foreign
211 :reader cstring-free-to-foreign-p
212 :initform t
:type boolean
))
213 (:actual-type
:pointer
)
214 (:simple-parser sstring
))
216 ;; TODO: use EXPAND-TO-FOREIGN
217 (defmethod translate-to-foreign (s (type cstring-type
))
218 (check-type s string
)
219 (values (cstring-alloc s
)
220 (cstring-free-to-foreign-p type
)))
222 (defmethod translate-from-foreign (ptr (type cstring-type
))
224 (if (null-pointer-p ptr
)
226 (cstring-to-sstring ptr
))
227 (when (and (cstring-free-from-foreign-p type
)
228 (not (null-pointer-p ptr
)))
229 (foreign-free ptr
))))
231 (defmethod free-translated-object (ptr (type cstring-type
) free-p
)