1 ;;;; This file is for testing external-format functionality, using
2 ;;;; test machinery which might have side effects (e.g. executing
3 ;;;; DEFUN, writing files). Note that the tests here reach into
4 ;;;; unexported functionality, and should not be used as a guide for
7 ;;;; This software is part of the SBCL system. See the README file for
10 ;;;; While most of SBCL is derived from the CMU CL system, the test
11 ;;;; files (like this one) were written from scratch after the fork
14 ;;;; This software is in the public domain and is provided with
15 ;;;; absolutely no warranty. See the COPYING and CREDITS files for
16 ;;;; more information.
18 (defmacro do-external-formats
((xf &optional result
) &body body
)
20 `(dolist (,nxf sb-impl
::*external-formats
* ,result
)
21 (let ((,xf
(first (first ,nxf
))))
24 (defvar *test-path
* "external-format-test.tmp")
26 (do-external-formats (xf)
27 (with-open-file (s #-win32
"/dev/null" #+win32
"nul" :direction
:input
:external-format xf
)
28 (assert (eq (read-char s nil s
) s
))))
30 ;;; Test standard character read-write equivalency over all external formats.
31 (let ((standard-characters "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!$\"'(),_-./:;?+<=>#%&*@[\\]{|}`^~"))
32 (do-external-formats (xf)
33 (with-open-file (s *test-path
* :direction
:output
34 :if-exists
:supersede
:external-format xf
)
35 (loop for character across standard-characters
36 do
(write-char character s
)))
37 (with-open-file (s *test-path
* :direction
:input
39 (loop for character across standard-characters
40 do
(let ((got (read-char s
)))
41 (unless (eql character got
)
42 (error "wanted ~S, got ~S" character got
)))))))
44 (delete-file *test-path
*)
47 (test-util:report-test-status
)
48 (sb-ext:quit
:unix-status
104))
50 ;;; Test UTF-8 writing and reading of 1, 2, 3 and 4 octet characters with
51 ;;; all possible offsets. Tests for buffer edge bugs. fd-stream buffers are
54 (let ((character (code-char (elt '(1 #x81
#x801
#x10001
) width-1
))))
55 (dotimes (offset (+ width-1
1))
56 (with-open-file (s *test-path
* :direction
:output
57 :if-exists
:supersede
:external-format
:utf-8
)
60 (dotimes (n (+ 4 sb-impl
::+bytes-per-buffer
+))
61 (write-char character s
)))
62 (with-open-file (s *test-path
* :direction
:input
63 :external-format
:utf-8
)
65 (assert (eql (read-char s
) #\a)))
66 (dotimes (n (+ 4 sb-impl
::+bytes-per-buffer
+))
67 (let ((got (read-char s
)))
68 (unless (eql got character
)
69 (error "wanted ~S, got ~S (~S)" character got n
))))
70 (assert (eql (read-char s nil s
) s
))))))
72 ;;; Test character decode restarts.
73 (with-open-file (s *test-path
* :direction
:output
74 :if-exists
:supersede
:element-type
'(unsigned-byte 8))
79 (with-open-file (s *test-path
* :direction
:input
80 :external-format
:utf-8
)
82 ((sb-int:character-decoding-error
#'(lambda (decoding-error)
83 (declare (ignore decoding-error
))
85 'sb-int
:attempt-resync
))))
86 (assert (equal (read-line s nil s
) "ABC"))
87 (assert (equal (read-line s nil s
) s
))))
88 (with-open-file (s *test-path
* :direction
:input
89 :external-format
:utf-8
)
91 ((sb-int:character-decoding-error
#'(lambda (decoding-error)
92 (declare (ignore decoding-error
))
94 'sb-int
:force-end-of-file
))))
95 (assert (equal (read-line s nil s
) "AB"))
96 (assert (equal (read-line s nil s
) s
))))
98 ;;; And again with more data to account for buffering (this was briefly)
99 ;;; broken in early 0.9.6.
100 (with-open-file (s *test-path
* :direction
:output
101 :if-exists
:supersede
:element-type
'(unsigned-byte 8))
102 (let ((a (make-array 50
103 :element-type
'(unsigned-byte 64)
104 :initial-contents
(map 'list
#'char-code
105 "1234567890123456789012345678901234567890123456789."))))
106 (setf (aref a
49) (char-code #\Newline
))
108 (write-sequence a s
))
111 (write-sequence a s
))))
112 (with-test (:name
(:character-decode-large
:attempt-resync
))
113 (with-open-file (s *test-path
* :direction
:input
114 :external-format
:utf-8
)
116 ((sb-int:character-decoding-error
#'(lambda (decoding-error)
117 (declare (ignore decoding-error
))
119 'sb-int
:attempt-resync
)))
120 ;; The failure mode is an infinite loop, add a timeout to detetct it.
121 (sb-ext:timeout
(lambda () (error "Timeout"))))
122 (sb-ext:with-timeout
5
124 (assert (equal (read-line s nil s
)
125 "1234567890123456789012345678901234567890123456789")))))))
127 (with-test (:name
(:character-decode-large
:force-end-of-file
)
129 (error "We can't reliably test this due to WITH-TIMEOUT race condition")
130 ;; This test will currently fail. But sometimes it will fail in
131 ;; ungracefully due to the WITH-TIMEOUT race mentioned above. This
132 ;; rightfully confuses some people, so we'll skip running the code
133 ;; for now. -- JES, 2006-01-27
135 (with-open-file (s *test-path
* :direction
:input
136 :external-format
:utf-8
)
138 ((sb-int:character-decoding-error
#'(lambda (decoding-error)
139 (declare (ignore decoding-error
))
141 'sb-int
:force-end-of-file
)))
142 ;; The failure mode is an infinite loop, add a timeout to detetct it.
143 (sb-ext:timeout
(lambda () (error "Timeout"))))
144 (sb-ext:with-timeout
5
146 (assert (equal (read-line s nil s
)
147 "1234567890123456789012345678901234567890123456789")))
148 (assert (equal (read-line s nil s
) s
))))))
150 ;;; Test character encode restarts.
151 (with-open-file (s *test-path
* :direction
:output
152 :if-exists
:supersede
:external-format
:latin-1
)
154 ((sb-int:character-encoding-error
#'(lambda (encoding-error)
155 (declare (ignore encoding-error
))
157 'sb-impl
::output-nothing
))))
160 (write-char (code-char 322) s
)
162 (with-open-file (s *test-path
* :direction
:input
163 :external-format
:latin-1
)
164 (assert (equal (read-line s nil s
) "ABC"))
165 (assert (equal (read-line s nil s
) s
)))
167 (with-open-file (s *test-path
* :direction
:output
168 :if-exists
:supersede
:external-format
:latin-1
)
170 ((sb-int:character-encoding-error
#'(lambda (encoding-error)
171 (declare (ignore encoding-error
))
173 'sb-impl
::output-nothing
))))
174 (let ((string (make-array 4 :element-type
'character
175 :initial-contents
`(#\A
#\B
,(code-char 322)
177 (write-string string s
))))
178 (with-open-file (s *test-path
* :direction
:input
179 :external-format
:latin-1
)
180 (assert (equal (read-line s nil s
) "ABC"))
181 (assert (equal (read-line s nil s
) s
)))
183 ;;; Test skipping character-decode-errors in comments.
184 (let ((s (open "external-format-test.lisp" :direction
:output
185 :if-exists
:supersede
:external-format
:latin-1
)))
188 (write-string ";;; ABCD" s
)
189 (write-char (code-char 233) s
)
192 (compile-file "external-format-test.lisp" :external-format
:utf-8
))
194 (let ((p (probe-file (compile-file-pathname "external-format-test.lisp"))))
199 ;;;; KOI8-R external format
200 (with-open-file (s *test-path
* :direction
:output
201 :if-exists
:supersede
:external-format
:koi8-r
)
202 (write-char (code-char #xB0
) s
)
206 (write-char (code-char #xBAAD
) s
)
208 (sb-int:character-encoding-error
()
211 (with-open-file (s *test-path
* :direction
:input
212 :element-type
'(unsigned-byte 8))
213 (let ((byte (read-byte s
)))
214 (assert (= (eval byte
) #x9C
))))
215 (with-open-file (s *test-path
* :direction
:input
216 :external-format
:koi8-r
)
217 (let ((char (read-char s
)))
218 (assert (= (char-code (eval char
)) #xB0
))))
219 (delete-file *test-path
*)
221 (let* ((koi8-r-codes (coerce '(240 210 201 215 197 212 33) '(vector (unsigned-byte 8))))
222 (uni-codes #(1055 1088 1080 1074 1077 1090 33))
224 (string (octets-to-string koi8-r-codes
:external-format
:koi8-r
))
225 (uni-decoded (map 'vector
#'char-code string
)))
226 (assert (equalp (map 'vector
#'char-code
(octets-to-string koi8-r-codes
:external-format
:koi8-r
))
228 (assert (equalp (string-to-octets (map 'string
#'code-char uni-codes
) :external-format
:koi8-r
)
231 ;;; tests of FILE-STRING-LENGTH
232 (let ((standard-characters "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!$\"'(),_-./:;?+<=>#%&*@[\\]{|}`^~"))
233 (do-external-formats (xf)
234 (with-open-file (s *test-path
* :direction
:output
236 (loop for x across standard-characters
237 for position
= (file-position s
)
238 for char-length
= (file-string-length s x
)
240 do
(assert (= (file-position s
) (+ position char-length
))))
241 (let ((position (file-position s
))
242 (string-length (file-string-length s standard-characters
)))
243 (write-string standard-characters s
)
244 (assert (= (file-position s
) (+ position string-length
)))))
245 (delete-file *test-path
*)))
247 (let ((char-codes '(0 1 255 256 511 512 1023 1024 2047 2048 4095 4096
248 8191 8192 16383 16384 32767 32768 65535 65536 131071
249 131072 262143 262144)))
250 (with-open-file (s *test-path
* :direction
:output
251 :external-format
:utf-8
)
252 (dolist (code char-codes
)
253 (let* ((char (code-char code
))
254 (position (file-position s
))
255 (char-length (file-string-length s char
)))
257 (assert (= (file-position s
) (+ position char-length
)))))
258 (let* ((string (map 'string
#'code-char char-codes
))
259 (position (file-position s
))
260 (string-length (file-string-length s string
)))
261 (write-string string s
)
262 (assert (= (file-position s
) (+ position string-length
))))))
265 ;;; See sbcl-devel "Subject: Bug in FILE-POSITION on UTF-8-encoded files"
266 ;;; by Lutz Euler on 2006-03-05 for more details.
267 (with-test (:name
(:file-position
:utf-8
))
268 (let ((path *test-path
*))
269 (with-open-file (s path
271 :if-exists
:supersede
272 :element-type
'(unsigned-byte 8))
273 ;; Write #\*, encoded in UTF-8, to the file.
275 ;; Append #\adiaeresis, encoded in UTF-8, to the file.
276 (write-sequence '(195 164) s
))
277 (with-open-file (s path
:external-format
:utf-8
)
279 (let ((pos (file-position s
))
280 (char (read-char s
)))
281 (format t
"read character with code ~a successfully from file position ~a~%"
282 (char-code char
) pos
)
283 (file-position s pos
)
284 (format t
"set file position back to ~a, trying to read-char again~%" pos
)
285 (let ((new-char (read-char s
)))
286 (assert (char= char new-char
)))))
288 (delete-file *test-path
*)
290 ;;; We used to call STREAM-EXTERNAL-FORMAT on the stream in the error
291 ;;; when printing a coding error, but that didn't work if the stream
292 ;;; was closed by the time the error was printed. See sbcl-devel
293 ;;; "Subject: Printing coding errors for closed streams" by Zach Beane
294 ;;; on 2008-10-16 for more info.
295 (with-test (:name
(:character-coding-error-stream-external-format
))
296 (flet ((first-file-character ()
297 (with-open-file (stream *test-path
* :external-format
:utf-8
)
298 (read-char stream
))))
299 (with-open-file (stream *test-path
*
301 :if-exists
:supersede
302 :element-type
'(unsigned-byte 8))
303 (write-byte 192 stream
))
304 (princ-to-string (nth-value 1 (ignore-errors (first-file-character))))))
305 (delete-file *test-path
*)
307 ;;; External format support in SB-ALIEN
309 (with-test (:name
(:sb-alien
:vanilla
))
310 (define-alien-routine strdup c-string
(str c-string
))
311 (assert (equal "foo" (strdup "foo"))))
313 (with-test (:name
(:sb-alien
:utf-8
:utf-8
))
314 (define-alien-routine strdup
(c-string :external-format
:utf-8
)
315 (str (c-string :external-format
:utf-8
)))
316 (assert (equal "foo" (strdup "foo"))))
318 (with-test (:name
(:sb-alien
:latin-1
:utf-8
))
319 (define-alien-routine strdup
(c-string :external-format
:latin-1
)
320 (str (c-string :external-format
:utf-8
)))
321 (assert (= (length (strdup (string (code-char 246))))
324 (with-test (:name
(:sb-alien
:utf-8
:latin-1
))
325 (define-alien-routine strdup
(c-string :external-format
:utf-8
)
326 (str (c-string :external-format
:latin-1
)))
327 (assert (equal (string (code-char 228))
328 (strdup (concatenate 'string
329 (list (code-char 195))
330 (list (code-char 164)))))))
332 (with-test (:name
(:sb-alien
:ebcdic
:ebcdic
))
333 (define-alien-routine strdup
(c-string :external-format
:ebcdic-us
)
334 (str (c-string :external-format
:ebcdic-us
)))
335 (assert (equal "foo" (strdup "foo"))))
337 (with-test (:name
(:sb-alien
:latin-1
:ebcdic
))
338 (define-alien-routine strdup
(c-string :external-format
:latin-1
)
339 (str (c-string :external-format
:ebcdic-us
)))
340 (assert (not (equal "foo" (strdup "foo")))))
342 (with-test (:name
(:sb-alien
:simple-base-string
))
343 (define-alien-routine strdup
(c-string :external-format
:ebcdic-us
344 :element-type base-char
)
345 (str (c-string :external-format
:ebcdic-us
)))
346 (assert (typep (strdup "foo") 'simple-base-string
)))