safepoint: Remove unused context argument.
[sbcl.git] / tests / utf-8.pure.lisp
blob5c6590944b915ee20a61e2e35bf6914378cf6e42
1 ;;;; This file is for testing external-format functionality for UTF-8,
2 ;;;; using test machinery which does not have side effects. Note that
3 ;;;; the tests here reach into unexported functionality, and should
4 ;;;; not be used as a guide for users.
6 ;;;; This software is part of the SBCL system. See the README file for
7 ;;;; more information.
8 ;;;;
9 ;;;; While most of SBCL is derived from the CMU CL system, the test
10 ;;;; files (like this one) were written from scratch after the fork
11 ;;;; from CMU CL.
12 ;;;;
13 ;;;; This software is in the public domain and is provided with
14 ;;;; absolutely no warranty. See the COPYING and CREDITS files for
15 ;;;; more information.
17 (defvar *test-path* (scratch-file-name))
19 (macrolet ((input-test (inxf expected &environment env)
20 `(progn
21 (with-test (:name (,(macroexpand 'name env) :file ,inxf))
22 (with-open-file (s *test-path* :external-format ',inxf)
23 (let* ((string (make-string 20))
24 (count (read-sequence string s)))
25 (assert (equal (map 'list 'identity (subseq string 0 count)) ,expected)))))
26 (with-test (:name (,(macroexpand 'name env) :octets ,inxf))
27 (let ((octets (coerce bytes '(simple-array (unsigned-byte 8) 1))))
28 (assert (equal (sb-ext:octets-to-string octets :external-format ',inxf)
29 (coerce ,expected 'string)))))))
30 (with-input-bytes ((id bytes) &body body)
31 `(let ((bytes ,bytes))
32 (with-open-file (s *test-path* :element-type '(unsigned-byte 8)
33 :direction :output :if-exists :supersede)
34 (dolist (byte bytes)
35 (write-byte byte s)))
36 (symbol-macrolet ((name ,id))
37 (macrolet ((test (inxf expected)
38 `(input-test ,inxf ,expected)))
39 ,@body))))
40 (output-test (chars outxf expected &environment env)
41 `(progn
42 (with-open-file (s *test-path* :element-type 'character
43 :external-format ',outxf
44 :direction :output :if-exists :supersede)
45 (write-sequence ,chars s))
46 (with-test (:name (,(macroexpand 'name env) :file ,outxf))
47 (with-open-file (s *test-path* :element-type '(unsigned-byte 8))
48 (let* ((vector (make-array 20 :element-type '(unsigned-byte 8)))
49 (count (read-sequence vector s)))
50 (assert (equal (map 'list 'identity (subseq vector 0 count)) ,expected)))))
51 (with-test (:name (,(macroexpand 'name env) :octets string ,outxf))
52 (let* ((string (coerce chars 'string))
53 (octets (sb-ext:string-to-octets string :external-format ',outxf)))
54 (assert (typep octets '(simple-array (unsigned-byte 8) 1)))
55 (assert (equal (coerce octets 'list) ,expected))))
56 (when (every (lambda (x) (typep x 'base-char)) chars)
57 (with-test (:name (,(macroexpand 'name env) :octets base-string ,outxf))
58 (let* ((string (coerce chars 'base-string))
59 (octets (sb-ext:string-to-octets string :external-format ',outxf)))
60 (assert (typep octets '(simple-array (unsigned-byte 8) 1)))
61 (assert (equal (coerce octets 'list) ,expected)))))))
62 (with-output-characters ((id chars) &body body)
63 `(let ((chars ,chars))
64 (symbol-macrolet ((name ,id))
65 (macrolet ((test (outxf expected)
66 `(output-test chars ,outxf ,expected)))
67 ,@body)))))
68 (with-input-bytes ((:input :lf :ascii) '(#x35 #x0a #x37))
69 (test :utf-8 '(#\5 #\Newline #\7))
70 (test (:utf-8 :newline :lf) '(#\5 #\Newline #\7))
71 (test (:utf-8 :newline :cr) '(#\5 #\Newline #\7))
72 (test (:utf-8 :newline :crlf) '(#\5 #\Newline #\7)))
73 (with-output-characters ((:output :lf :ascii) '(#\5 #\Linefeed #\7))
74 (test :utf-8 '(#x35 #x0a #x37))
75 (test (:utf-8 :newline :lf) '(#x35 #x0a #x37))
76 (test (:utf-8 :newline :cr) '(#x35 #x0d #x37))
77 (test (:utf-8 :newline :crlf) '(#x35 #x0d #x0a #x37)))
78 (with-input-bytes ((:input :cr :ascii) '(#x35 #x0d #x37))
79 (test :utf-8 '(#\5 #\Return #\7))
80 (test (:utf-8 :newline :lf) '(#\5 #\Return #\7))
81 (test (:utf-8 :newline :cr) '(#\5 #\Newline #\7))
82 (test (:utf-8 :newline :crlf) '(#\5 #\Return #\7)))
83 (with-output-characters ((:output :cr :ascii) '(#\5 #\Return #\7))
84 (test :utf-8 '(#x35 #x0d #x37))
85 (test (:utf-8 :newline :lf) '(#x35 #x0d #x37))
86 (test (:utf-8 :newline :cr) '(#x35 #x0d #x37))
87 (test (:utf-8 :newline :crlf) '(#x35 #x0d #x37)))
88 (with-input-bytes ((:input :crlf :ascii) '(#x35 #x0d #x0a #x37))
89 (test :utf-8 '(#\5 #\Return #\Newline #\7))
90 (test (:utf-8 :newline :lf) '(#\5 #\Return #\Newline #\7))
91 (test (:utf-8 :newline :cr) '(#\5 #\Newline #\Newline #\7))
92 (test (:utf-8 :newline :crlf) '(#\5 #\Newline #\7)))
93 (with-output-characters ((:output :crlf :ascii) '(#\5 #\Return #\Linefeed #\7))
94 (test :utf-8 '(#x35 #x0d #x0a #x37))
95 (test (:utf-8 :newline :lf) '(#x35 #x0d #x0a #x37))
96 (test (:utf-8 :newline :cr) '(#x35 #x0d #x0d #x37))
97 (test (:utf-8 :newline :crlf) '(#x35 #x0d #x0d #x0a #x37)))
98 (with-input-bytes ((:input :lf) '(#xc3 #xa9 #x0a #xc3 #xa8))
99 (test :utf-8 '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Newline #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
100 (test (:utf-8 :newline :lf) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Newline #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
101 (test (:utf-8 :newline :cr) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Newline #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
102 (test (:utf-8 :newline :crlf) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Newline #\LATIN_SMALL_LETTER_E_WITH_GRAVE)))
103 (with-output-characters ((:output :lf) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Linefeed #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
104 (test :utf-8 '(#xc3 #xa9 #x0a #xc3 #xa8))
105 (test (:utf-8 :newline :lf) '(#xc3 #xa9 #x0a #xc3 #xa8))
106 (test (:utf-8 :newline :cr) '(#xc3 #xa9 #x0d #xc3 #xa8))
107 (test (:utf-8 :newline :crlf) '(#xc3 #xa9 #x0d #x0a #xc3 #xa8)))
108 (with-input-bytes ((:input :cr) '(#xc3 #xa9 #x0d #xc3 #xa8))
109 (test :utf-8 '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Return #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
110 (test (:utf-8 :newline :lf) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Return #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
111 (test (:utf-8 :newline :cr) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Newline #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
112 (test (:utf-8 :newline :crlf) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Return #\LATIN_SMALL_LETTER_E_WITH_GRAVE)))
113 (with-output-characters ((:output :cr) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Return #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
114 (test :utf-8 '(#xc3 #xa9 #x0d #xc3 #xa8))
115 (test (:utf-8 :newline :lf) '(#xc3 #xa9 #x0d #xc3 #xa8))
116 (test (:utf-8 :newline :cr) '(#xc3 #xa9 #x0d #xc3 #xa8))
117 (test (:utf-8 :newline :crlf) '(#xc3 #xa9 #x0d #xc3 #xa8)))
118 (with-input-bytes ((:input :crlf) '(#xc3 #xa9 #x0d #x0a #xc3 #xa8))
119 (test :utf-8 '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Return #\Newline #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
120 (test (:utf-8 :newline :lf) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Return #\Newline #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
121 (test (:utf-8 :newline :cr) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Newline #\Newline #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
122 (test (:utf-8 :newline :crlf) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Newline #\LATIN_SMALL_LETTER_E_WITH_GRAVE)))
123 (with-output-characters ((:output :crlf) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Return #\Linefeed #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
124 (test :utf-8 '(#xc3 #xa9 #x0d #x0a #xc3 #xa8))
125 (test (:utf-8 :newline :lf) '(#xc3 #xa9 #x0d #x0a #xc3 #xa8))
126 (test (:utf-8 :newline :cr) '(#xc3 #xa9 #x0d #x0d #xc3 #xa8))
127 (test (:utf-8 :newline :crlf) '(#xc3 #xa9 #x0d #x0d #x0a #xc3 #xa8))))
129 (macrolet ((input-test (inxf expected &environment env)
130 `(progn
131 (with-test (:name (,(macroexpand 'name env) :file ,inxf))
132 (with-open-file (s *test-path* :external-format ',inxf)
133 (handler-bind ((sb-int:character-decoding-error
134 (lambda (c) (use-value "" c))))
135 (let* ((string (make-string 20))
136 (count (read-sequence string s)))
137 (assert (equal (map 'list 'identity (subseq string 0 count)) ,expected))))))
138 (with-test (:name (,(macroexpand 'name env) :octets ,inxf))
139 (handler-bind ((sb-int:character-decoding-error
140 (lambda (c) (use-value "" c))))
141 (let ((octets (coerce bytes '(simple-array (unsigned-byte 8) 1))))
142 (assert (equal (sb-ext:octets-to-string octets :external-format ',inxf)
143 (coerce ,expected 'string))))))))
144 (with-input-bytes ((id bytes) &body body)
145 `(let ((bytes ,bytes))
146 (with-open-file (s *test-path* :element-type '(unsigned-byte 8)
147 :direction :output :if-exists :supersede)
148 (dolist (byte bytes)
149 (write-byte byte s)))
150 (symbol-macrolet ((name ,id))
151 (macrolet ((test (inxf expected)
152 `(input-test ,inxf ,expected)))
153 ,@body))))
154 (output-test (chars outxf expected &environment env)
155 `(progn
156 (with-open-file (s *test-path* :element-type 'character
157 :external-format ',outxf
158 :direction :output :if-exists :supersede)
159 (handler-bind ((sb-int:character-encoding-error
160 (lambda (c) (use-value "" c))))
161 (write-sequence ,chars s)))
162 (with-test (:name (,(macroexpand 'name env) :file ,outxf))
163 (with-open-file (s *test-path* :element-type '(unsigned-byte 8))
164 (let* ((vector (make-array 20 :element-type '(unsigned-byte 8)))
165 (count (read-sequence vector s)))
166 (assert (equal (map 'list 'identity (subseq vector 0 count)) ,expected)))))
167 (with-test (:name (,(macroexpand 'name env) :octets ,outxf))
168 (handler-bind ((sb-int:character-encoding-error
169 (lambda (c) (use-value "" c))))
170 (let* ((string (coerce chars 'string))
171 (octets (sb-ext:string-to-octets string :external-format ',outxf)))
172 (assert (typep octets '(simple-array (unsigned-byte 8) 1)))
173 (assert (equal (coerce octets 'list) ,expected)))))))
174 (with-output-characters ((id chars) &body body)
175 `(let ((chars ,chars))
176 (symbol-macrolet ((name ,id))
177 (macrolet ((test (outxf expected)
178 `(output-test chars ,outxf ,expected)))
179 ,@body)))))
180 (with-input-bytes ((:input :invalid :crlf) (list #xc3 #xa9 #xff #x0d #x0a #xc3 #xa8))
181 (test :utf-8 '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Return #\Linefeed #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
182 (test (:utf-8 :replacement #\?) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\? #\Return #\Linefeed #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
183 (test (:utf-8 :newline :lf) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Return #\Linefeed #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
184 (test (:utf-8 :newline :lf :replacement #\?) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\? #\Return #\Linefeed #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
185 (test (:utf-8 :newline :cr) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Linefeed #\Linefeed #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
186 (test (:utf-8 :newline :cr :replacement #\?) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\? #\Linefeed #\Linefeed #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
187 (test (:utf-8 :newline :crlf) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Linefeed #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
188 (test (:utf-8 :newline :crlf :replacement #\?) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\? #\Linefeed #\LATIN_SMALL_LETTER_E_WITH_GRAVE)))
189 (with-input-bytes ((:input :multiple-invalid :crlf) (list #xc3 #xa9 #xff #x0d #x0a #xff #xff #xc3 #xa8))
190 (test :utf-8 '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Return #\Linefeed #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
191 (test (:utf-8 :replacement #\?) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\? #\Return #\Linefeed #\? #\? #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
192 (test (:utf-8 :newline :lf) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Return #\Linefeed #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
193 (test (:utf-8 :newline :lf :replacement #\?) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\? #\Return #\Linefeed #\? #\? #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
194 (test (:utf-8 :newline :cr) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Linefeed #\Linefeed #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
195 (test (:utf-8 :newline :cr :replacement #\?) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\? #\Linefeed #\Linefeed #\? #\? #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
196 (test (:utf-8 :newline :crlf) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Linefeed #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
197 (test (:utf-8 :newline :crlf :replacement #\?) '(#\LATIN_SMALL_LETTER_E_WITH_ACUTE #\? #\Linefeed #\? #\? #\LATIN_SMALL_LETTER_E_WITH_GRAVE)))
198 #+sb-unicode
199 (with-output-characters ((:output :invalid :lf) (list #\LATIN_SMALL_LETTER_E_WITH_ACUTE (code-char #xd800) #\Linefeed #\LATIN_SMALL_LETTER_E_WITH_GRAVE))
200 (test :utf-8 '(#xc3 #xa9 #x0a #xc3 #xa8))
201 (test (:utf-8 :replacement #\?) '(#xc3 #xa9 #x3f #x0a #xc3 #xa8))
202 (test (:utf-8 :newline :lf) '(#xc3 #xa9 #x0a #xc3 #xa8))
203 (test (:utf-8 :newline :lf :replacement #\?) '(#xc3 #xa9 #x3f #x0a #xc3 #xa8))
204 (test (:utf-8 :newline :cr) '(#xc3 #xa9 #x0d #xc3 #xa8))
205 (test (:utf-8 :newline :cr :replacement #\?) '(#xc3 #xa9 #x3f #x0d #xc3 #xa8))
206 (test (:utf-8 :newline :crlf) '(#xc3 #xa9 #x0d #x0a #xc3 #xa8))
207 (test (:utf-8 :newline :crlf :replacement #\?) '(#xc3 #xa9 #x3f #x0d #x0a #xc3 #xa8))))
209 (macrolet ((test (inxf expected &environment env)
210 `(with-test (:name (,(macroexpand 'name env) ,inxf))
211 (with-open-file (s *test-path* :external-format ',inxf)
212 (let* ((string (make-string 10000))
213 (count (read-sequence string s)))
214 (assert (equal (map 'list 'char-code (subseq string 0 count)) ,expected))))))
215 (with-test-file ((id bytes) &body body)
216 `(progn
217 (with-open-file (s *test-path* :element-type '(unsigned-byte 8)
218 :direction :output :if-exists :supersede)
219 (dolist (byte ,bytes)
220 (write-byte byte s)))
221 (symbol-macrolet ((name ,id))
222 ,@body)))
223 (tests (size)
224 `(progn
225 (with-test-file ((:input :lf ,size) (contents ,size '(10)))
226 (test :utf-8 (contents ,size '(10)))
227 (test (:utf-8 :newline :lf) (contents ,size '(10)))
228 (test (:utf-8 :newline :cr) (contents ,size '(10)))
229 (test (:utf-8 :newline :crlf) (contents ,size '(10))))
230 (with-test-file ((:input :cr ,size) (contents ,size '(13)))
231 (test :utf-8 (contents ,size '(13)))
232 (test (:utf-8 :newline :lf) (contents ,size '(13)))
233 (test (:utf-8 :newline :cr) (contents ,size '(10)))
234 (test (:utf-8 :newline :crlf) (contents ,size '(13))))
235 (with-test-file ((:input :crlf ,size) (contents ,size '(13 10)))
236 (test :utf-8 (contents ,size '(13 10)))
237 (test (:utf-8 :newline :lf) (contents ,size '(13 10)))
238 (test (:utf-8 :newline :cr) (contents ,size '(10 10)))
239 (test (:utf-8 :newline :crlf) (contents ,(1- size) '(10)))))))
240 (flet ((contents (size nl)
241 (let ((bytes (make-array size :initial-element #x61)))
242 (loop for x in nl
243 for j from (- (length bytes) (length nl))
244 do (setf (aref bytes j) x))
245 (coerce bytes 'list))))
246 (tests 2)
248 (with-test (:name :ansi-stream-cin-buffer-length)
249 (assert (= sb-impl::+ansi-stream-in-buffer-length+ 512)))
251 (tests 511)
252 (tests 512)
253 (tests 513)
255 ;; +ANSI-STREAM-IN-BUFFER-EXTRA+ is possibly also relevant. Can't
256 ;; test for it as the constant gets shaken out, but it's currently
257 ;; 4.
258 (tests 515)
259 (tests 516)
260 (tests 517)
262 (with-test (:name :fd-stream-bytes-per-buffer)
263 (assert (= sb-impl::+bytes-per-buffer+ 8192)))
265 (tests 8190)
266 (tests 8191)
267 (tests 8192)
268 (tests 8193)
269 (tests 8194)))
271 (macrolet ((test (inxf expected &optional (unread-expected expected) &environment env)
272 `(progn
273 (with-test (:name (,(macroexpand 'name env) ,inxf))
274 (with-open-file (s *test-path* :external-format ',inxf)
275 (let ((actual
276 (cons (file-position s)
277 (loop for char = (read-char s nil nil)
278 while char
279 collect (file-position s)))))
280 (assert (equal actual ,expected)))))
281 (with-test (:name (,(macroexpand 'name env) unread-char ,inxf))
282 (with-open-file (s *test-path* :external-format ',inxf)
283 (assert (sb-impl::ansi-stream-cin-buffer s))
284 (let ((actual (loop for char = (read-char s nil nil)
285 if (null char) collect (file-position s) and do (loop-finish)
286 do (unread-char char s)
287 collect (file-position s)
288 do (read-char s))))
289 (assert (equal actual ,unread-expected)))))
290 (with-test (:name (,(macroexpand 'name env) unread-char :io ,inxf))
291 (with-open-file (s *test-path* :external-format ',inxf
292 :direction :io :if-exists :overwrite)
293 ;; if we reinstate in character buffers for :io character streams,
294 ;; make a stream that is unbuffered some other way
295 (assert (not (sb-impl::ansi-stream-cin-buffer s)))
296 (let ((actual (loop for char = (read-char s nil nil)
297 if (null char) collect (file-position s) and do (loop-finish)
298 do (unread-char char s)
299 collect (file-position s)
300 do (read-char s))))
301 (assert (equal actual ,unread-expected)))))))
302 (with-test-file ((id bytes) &body body)
303 `(progn
304 (with-open-file (s *test-path* :element-type '(unsigned-byte 8)
305 :direction :output :if-exists :supersede)
306 (dolist (byte ,bytes)
307 (write-byte byte s)))
308 (symbol-macrolet ((name ,id))
309 ,@body))))
310 (with-test-file ((file-position :lf) '(#xc3 #xa9 #x0a #xc3 #xa8 #x38 #x0a #x39 #x3a #x0a #x3b))
311 (test :utf-8 '(0 2 3 5 6 7 8 9 10 11))
312 (test (:utf-8 :newline :lf) '(0 2 3 5 6 7 8 9 10 11))
313 (test (:utf-8 :newline :cr) '(0 2 3 5 6 7 8 9 10 11))
314 (test (:utf-8 :newline :crlf) '(0 2 3 5 6 7 8 9 10 11)))
315 (with-test-file ((file-position :cr) '(#xc3 #xa9 #x0d #xc3 #xa8 #x38 #x0d #x39 #x3a #x0d #x3b))
316 (test :utf-8 '(0 2 3 5 6 7 8 9 10 11))
317 (test (:utf-8 :newline :lf) '(0 2 3 5 6 7 8 9 10 11))
318 (test (:utf-8 :newline :cr) '(0 2 3 5 6 7 8 9 10 11))
319 (test (:utf-8 :newline :crlf) '(0 2 3 5 6 7 8 9 10 11)))
320 (with-test-file ((file-position :crlf) '(#xc3 #xa9 #x0d #x0a #xc3 #xa8 #x38 #x0d #x0a #x39 #x3a #x0d #x0a #x3b))
321 (test :utf-8 '(0 2 3 4 6 7 8 9 10 11 12 13 14))
322 (test (:utf-8 :newline :lf) '(0 2 3 4 6 7 8 9 10 11 12 13 14))
323 (test (:utf-8 :newline :cr) '(0 2 3 4 6 7 8 9 10 11 12 13 14))
324 (test (:utf-8 :newline :crlf) '(0 2 4 6 7 9 10 11 13 14)))
325 (with-test-file ((file-position :replacement character) '(#xc3 #xa9 #x0d #x0a #xff #xc3 #xa8 #xff #x0d #x0a))
326 (test (:utf-8 :replacement #\?) '(0 2 3 4 5 7 8 9 10) '(0 2 3 5 5 8 8 9 10))
327 (test (:utf-8 :newline :lf :replacement #\?) '(0 2 3 4 5 7 8 9 10) '(0 2 3 5 5 8 8 9 10))
328 (test (:utf-8 :newline :cr :replacement #\?) '(0 2 3 4 5 7 8 9 10) '(0 2 3 5 5 8 8 9 10))
329 (test (:utf-8 :newline :crlf :replacement #\?) '(0 2 4 5 7 8 10) '(0 2 5 5 8 8 10)))
330 (with-test-file ((file-position :replacement string) '(#xc3 #xa9 #x0d #x0a #xff #xc3 #xa8 #xff #x0d #x0a))
331 (test (:utf-8 :replacement "??") '(0 2 3 4 5 5 7 8 8 9 10) '(0 2 3 5 5 5 8 8 8 9 10))
332 (test (:utf-8 :newline :lf :replacement "??") '(0 2 3 4 5 5 7 8 8 9 10) '(0 2 3 5 5 5 8 8 8 9 10))
333 (test (:utf-8 :newline :cr :replacement "??") '(0 2 3 4 5 5 7 8 8 9 10) '(0 2 3 5 5 5 8 8 8 9 10))
334 (test (:utf-8 :newline :crlf :replacement "??") '(0 2 4 5 5 7 8 8 10) '(0 2 5 5 5 8 8 8 10))))
336 (macrolet ((output-test (chars outxf expected &environment env)
337 `(progn
338 (with-test (:name (,(macroexpand 'name env) file-string-length ,outxf))
339 (let ((string (coerce ,chars 'string)))
340 (with-open-file (s *test-path* :element-type 'character
341 :external-format ',outxf
342 :direction :output :if-exists :supersede)
343 (handler-bind ((sb-int:character-encoding-error
344 (lambda (c) (use-value "" c))))
345 (let ((pos (file-position s))
346 (len (file-string-length s string)))
347 (let ((actual
348 (loop for index from 0 below (length string)
349 for char = (char string index)
350 for thislen = (file-string-length s char)
351 for thisstringlen = (file-string-length s (subseq string index))
352 if (null thisstringlen) do (assert (some 'null (subseq ,expected index))) else do (assert (notany 'null (subseq ,expected index)))
353 collect thislen
354 if (and (null len) thisstringlen) do (setf len (+ pos thisstringlen))
355 if thisstringlen do (assert (= (+ pos thisstringlen) len))
356 do (write-char char s)
357 if thislen do (assert (= (+ pos thislen) (file-position s)))
358 do (setf pos (file-position s)))))
359 (assert (equal actual ,expected))))))))))
360 (with-output-characters ((id chars) &body body)
361 `(let ((chars ,chars))
362 (symbol-macrolet ((name ,id))
363 (macrolet ((test (outxf expected)
364 `(output-test chars ,outxf ,expected)))
365 ,@body)))))
366 (with-output-characters ((:output :lf) (list #\5 #\LATIN_SMALL_LETTER_E_WITH_ACUTE #\Linefeed #\7))
367 (test :utf-8 '(1 2 1 1))
368 (test (:utf-8 :newline :lf) '(1 2 1 1))
369 (test (:utf-8 :newline :cr) '(1 2 1 1))
370 (test (:utf-8 :newline :crlf) '(1 2 2 1)))
371 #+sb-unicode
372 (with-output-characters ((:output :invalid :lf) (list #\5 #\LATIN_SMALL_LETTER_E_WITH_ACUTE (code-char #xdb00) #\Linefeed #\7))
373 ;; A sufficiently-smart streams implementation could statically determine the lengths
374 ;; of replacement characters given as part of the external format
375 (test :utf-8 '(1 2 nil 1 1))
376 (test (:utf-8 :replacement #\?) '(1 2 nil 1 1))
377 (test (:utf-8 :newline :lf) '(1 2 nil 1 1))
378 (test (:utf-8 :newline :lf :replacement #\?) '(1 2 nil 1 1))
379 (test (:utf-8 :newline :cr) '(1 2 nil 1 1))
380 (test (:utf-8 :newline :cr :replacement #\?) '(1 2 nil 1 1))
381 (test (:utf-8 :newline :crlf) '(1 2 nil 2 1))
382 (test (:utf-8 :newline :crlf :replacement #\?) '(1 2 nil 2 1))))
384 (delete-file *test-path*)