prehash-for-perfect-hash: add truly-thes.
[sbcl.git] / tests / iso-8859-3.pure.lisp
blobc6d7905635b7b90bc21c1e3914c20d0a29364685
1 ;;;; This file is for testing external-format functionality for
2 ;;;; ISO-8859-3, using test machinery which does not have side
3 ;;;; effects. Note that the tests here reach into unexported
4 ;;;; functionality, and should not be used as a guide for users.
6 ;;;; This software is part of the SBCL system. See the README file for
7 ;;;; more information.
8 ;;;;
9 ;;;; While most of SBCL is derived from the CMU CL system, the test
10 ;;;; files (like this one) were written from scratch after the fork
11 ;;;; from CMU CL.
12 ;;;;
13 ;;;; This software is in the public domain and is provided with
14 ;;;; absolutely no warranty. See the COPYING and CREDITS files for
15 ;;;; more information.
17 #+(or (not sb-unicode) unicode-lite) (invoke-restart 'run-tests::skip-file)
19 (defvar *test-path* (scratch-file-name))
21 (macrolet ((input-test (inxf expected &environment env)
22 `(progn
23 (with-test (:name (,(macroexpand 'name env) :file ,inxf))
24 (with-open-file (s *test-path* :external-format ',inxf)
25 (let* ((string (make-string 20))
26 (count (read-sequence string s)))
27 (assert (equal (map 'list 'identity (subseq string 0 count)) ,expected)))))
28 (with-test (:name (,(macroexpand 'name env) :octets ,inxf))
29 (let ((octets (coerce bytes '(simple-array (unsigned-byte 8) 1))))
30 (assert (equal (sb-ext:octets-to-string octets :external-format ',inxf)
31 (coerce ,expected 'string)))))))
32 (with-input-bytes ((id bytes) &body body)
33 `(let ((bytes ,bytes))
34 (with-open-file (s *test-path* :element-type '(unsigned-byte 8)
35 :direction :output :if-exists :supersede)
36 (dolist (byte bytes)
37 (write-byte byte s)))
38 (symbol-macrolet ((name ,id))
39 (macrolet ((test (inxf expected)
40 `(input-test ,inxf ,expected)))
41 ,@body))))
42 (output-test (chars outxf expected &environment env)
43 `(progn
44 (with-open-file (s *test-path* :element-type 'character
45 :external-format ',outxf
46 :direction :output :if-exists :supersede)
47 (write-sequence ,chars s))
48 (with-test (:name (,(macroexpand 'name env) :file ,outxf))
49 (with-open-file (s *test-path* :element-type '(unsigned-byte 8))
50 (let* ((vector (make-array 20 :element-type '(unsigned-byte 8)))
51 (count (read-sequence vector s)))
52 (assert (equal (map 'list 'identity (subseq vector 0 count)) ,expected)))))
53 (with-test (:name (,(macroexpand 'name env) :octets ,outxf))
54 (let* ((string (coerce chars 'string))
55 (octets (sb-ext:string-to-octets string :external-format ',outxf)))
56 (assert (typep octets '(simple-array (unsigned-byte 8) 1)))
57 (assert (equal (coerce octets 'list) ,expected))))))
58 (with-output-characters ((id chars) &body body)
59 `(let ((chars ,chars))
60 (symbol-macrolet ((name ,id))
61 (macrolet ((test (outxf expected)
62 `(output-test chars ,outxf ,expected)))
63 ,@body)))))
64 (with-input-bytes ((:input :lf) '(#xa2 #x0a #x37))
65 (test :iso-8859-3 '(#\BREVE #\Newline #\7))
66 (test (:iso-8859-3 :newline :lf) '(#\BREVE #\Newline #\7))
67 (test (:iso-8859-3 :newline :cr) '(#\BREVE #\Newline #\7))
68 (test (:iso-8859-3 :newline :crlf) '(#\BREVE #\Newline #\7)))
69 (with-output-characters ((:output :lf) '(#\BREVE #\Linefeed #\7))
70 (test :iso-8859-3 '(#xa2 #x0a #x37))
71 (test (:iso-8859-3 :newline :lf) '(#xa2 #x0a #x37))
72 (test (:iso-8859-3 :newline :cr) '(#xa2 #x0d #x37))
73 (test (:iso-8859-3 :newline :crlf) '(#xa2 #x0d #x0a #x37)))
74 (with-input-bytes ((:input :cr) '(#xa2 #x0d #x37))
75 (test :iso-8859-3 '(#\BREVE #\Return #\7))
76 (test (:iso-8859-3 :newline :lf) '(#\BREVE #\Return #\7))
77 (test (:iso-8859-3 :newline :cr) '(#\BREVE #\Newline #\7))
78 (test (:iso-8859-3 :newline :crlf) '(#\BREVE #\Return #\7)))
79 (with-output-characters ((:output :cr) '(#\BREVE #\Return #\7))
80 (test :iso-8859-3 '(#xa2 #x0d #x37))
81 (test (:iso-8859-3 :newline :lf) '(#xa2 #x0d #x37))
82 (test (:iso-8859-3 :newline :cr) '(#xa2 #x0d #x37))
83 (test (:iso-8859-3 :newline :crlf) '(#xa2 #x0d #x37)))
84 (with-input-bytes ((:input :crlf) '(#xa2 #x0d #x0a #x37))
85 (test :iso-8859-3 '(#\BREVE #\Return #\Newline #\7))
86 (test (:iso-8859-3 :newline :lf) '(#\BREVE #\Return #\Newline #\7))
87 (test (:iso-8859-3 :newline :cr) '(#\BREVE #\Newline #\Newline #\7))
88 (test (:iso-8859-3 :newline :crlf) '(#\BREVE #\Newline #\7)))
89 (with-output-characters ((:output :crlf) '(#\BREVE #\Return #\Linefeed #\7))
90 (test :iso-8859-3 '(#xa2 #x0d #x0a #x37))
91 (test (:iso-8859-3 :newline :lf) '(#xa2 #x0d #x0a #x37))
92 (test (:iso-8859-3 :newline :cr) '(#xa2 #x0d #x0d #x37))
93 (test (:iso-8859-3 :newline :crlf) '(#xa2 #x0d #x0d #x0a #x37))))
95 (macrolet ((input-test (inxf expected &environment env)
96 `(progn
97 (with-test (:name (,(macroexpand 'name env) :file ,inxf))
98 (with-open-file (s *test-path* :external-format ',inxf)
99 (handler-bind ((sb-int:character-decoding-error
100 (lambda (c) (use-value "" c))))
101 (let* ((string (make-string 20))
102 (count (read-sequence string s)))
103 (assert (equal (map 'list 'identity (subseq string 0 count)) ,expected))))))
104 (with-test (:name (,(macroexpand 'name env) :octets ,inxf))
105 (handler-bind ((sb-int:character-decoding-error
106 (lambda (c) (use-value "" c))))
107 (let ((octets (coerce bytes '(simple-array (unsigned-byte 8) 1))))
108 (assert (equal (sb-ext:octets-to-string octets :external-format ',inxf)
109 (coerce ,expected 'string))))))))
110 (with-input-bytes ((id bytes) &body body)
111 `(let ((bytes ,bytes))
112 (with-open-file (s *test-path* :element-type '(unsigned-byte 8)
113 :direction :output :if-exists :supersede)
114 (dolist (byte bytes)
115 (write-byte byte s)))
116 (symbol-macrolet ((name ,id))
117 (macrolet ((test (inxf expected)
118 `(input-test ,inxf ,expected)))
119 ,@body))))
120 (output-test (chars outxf expected &environment env)
121 `(progn
122 (with-open-file (s *test-path* :element-type 'character
123 :external-format ',outxf
124 :direction :output :if-exists :supersede)
125 (handler-bind ((sb-int:character-encoding-error
126 (lambda (c) (use-value "" c))))
127 (write-sequence ,chars s)))
128 (with-test (:name (,(macroexpand 'name env) :file ,outxf))
129 (with-open-file (s *test-path* :element-type '(unsigned-byte 8))
130 (let* ((vector (make-array 20 :element-type '(unsigned-byte 8)))
131 (count (read-sequence vector s)))
132 (assert (equal (map 'list 'identity (subseq vector 0 count)) ,expected)))))
133 (with-test (:name (,(macroexpand 'name env) :octets ,outxf))
134 (handler-bind ((sb-int:character-encoding-error
135 (lambda (c) (use-value "" c))))
136 (let* ((string (coerce chars 'string))
137 (octets (sb-ext:string-to-octets string :external-format ',outxf)))
138 (assert (typep octets '(simple-array (unsigned-byte 8) 1)))
139 (assert (equal (coerce octets 'list) ,expected)))))))
140 (with-output-characters ((id chars) &body body)
141 `(let ((chars ,chars))
142 (symbol-macrolet ((name ,id))
143 (macrolet ((test (outxf expected)
144 `(output-test chars ,outxf ,expected)))
145 ,@body)))))
146 (with-input-bytes ((:input :invalid :crlf) (list #x35 #xa5 #x0d #x0a #x37))
147 (test :ascii '(#\5 #\Return #\Linefeed #\7))
148 (test (:ascii :replacement #\?) '(#\5 #\? #\Return #\Linefeed #\7))
149 (test (:ascii :newline :lf) '(#\5 #\Return #\Linefeed #\7))
150 (test (:ascii :newline :lf :replacement #\?) '(#\5 #\? #\Return #\Linefeed #\7))
151 (test (:ascii :newline :cr) '(#\5 #\Linefeed #\Linefeed #\7))
152 (test (:ascii :newline :cr :replacement #\?) '(#\5 #\? #\Linefeed #\Linefeed #\7))
153 (test (:ascii :newline :crlf) '(#\5 #\Linefeed #\7))
154 (test (:ascii :newline :crlf :replacement #\?) '(#\5 #\? #\Linefeed #\7)))
155 (with-input-bytes ((:input :multiple-invalid :crlf) (list #x35 #xa5 #x0d #x0a #xa5 #xa5 #x37))
156 (test :ascii '(#\5 #\Return #\Linefeed #\7))
157 (test (:ascii :replacement #\?) '(#\5 #\? #\Return #\Linefeed #\? #\? #\7))
158 (test (:ascii :newline :lf) '(#\5 #\Return #\Linefeed #\7))
159 (test (:ascii :newline :lf :replacement #\?) '(#\5 #\? #\Return #\Linefeed #\? #\? #\7))
160 (test (:ascii :newline :cr) '(#\5 #\Linefeed #\Linefeed #\7))
161 (test (:ascii :newline :cr :replacement #\?) '(#\5 #\? #\Linefeed #\Linefeed #\? #\? #\7))
162 (test (:ascii :newline :crlf) '(#\5 #\Linefeed #\7))
163 (test (:ascii :newline :crlf :replacement #\?) '(#\5 #\? #\Linefeed #\? #\? #\7)))
164 (with-output-characters ((:output :invalid :lf) (list #\BREVE (code-char 512) #\Linefeed #\7))
165 (test :iso-8859-3 '(#xa2 #x0a #x37))
166 (test (:iso-8859-3 :replacement #\?) '(#xa2 #x3f #x0a #x37))
167 (test (:iso-8859-3 :newline :lf) '(#xa2 #x0a #x37))
168 (test (:iso-8859-3 :newline :lf :replacement #\?) '(#xa2 #x3f #x0a #x37))
169 (test (:iso-8859-3 :newline :cr) '(#xa2 #x0d #x37))
170 (test (:iso-8859-3 :newline :cr :replacement #\?) '(#xa2 #x3f #x0d #x37))
171 (test (:iso-8859-3 :newline :crlf) '(#xa2 #x0d #x0a #x37))
172 (test (:iso-8859-3 :newline :crlf :replacement #\?) '(#xa2 #x3f #x0d #x0a #x37))))
174 (macrolet ((test (inxf expected &environment env)
175 `(with-test (:name (,(macroexpand 'name env) ,inxf))
176 (with-open-file (s *test-path* :external-format ',inxf)
177 (let* ((string (make-string 10000))
178 (count (read-sequence string s)))
179 (assert (equal (map 'list 'char-code (subseq string 0 count)) ,expected))))))
180 (with-test-file ((id bytes) &body body)
181 `(progn
182 (with-open-file (s *test-path* :element-type '(unsigned-byte 8)
183 :direction :output :if-exists :supersede)
184 (dolist (byte ,bytes)
185 (write-byte byte s)))
186 (symbol-macrolet ((name ,id))
187 ,@body)))
188 (tests (size)
189 `(progn
190 (with-test-file ((:input :lf ,size) (contents ,size '(#x0a)))
191 (test :iso-8859-3 (contents ,size '(10)))
192 (test (:iso-8859-3 :newline :lf) (contents ,size '(10)))
193 (test (:iso-8859-3 :newline :cr) (contents ,size '(10)))
194 (test (:iso-8859-3 :newline :crlf) (contents ,size '(10))))
195 (with-test-file ((:input :cr ,size) (contents ,size '(#x0d)))
196 (test :iso-8859-3 (contents ,size '(13)))
197 (test (:iso-8859-3 :newline :lf) (contents ,size '(13)))
198 (test (:iso-8859-3 :newline :cr) (contents ,size '(10)))
199 (test (:iso-8859-3 :newline :crlf) (contents ,size '(13))))
200 (with-test-file ((:input :crlf ,size) (contents ,size '(#x0d #x0a)))
201 (test :iso-8859-3 (contents ,size '(13 10)))
202 (test (:iso-8859-3 :newline :lf) (contents ,size '(13 10)))
203 (test (:iso-8859-3 :newline :cr) (contents ,size '(10 10)))
204 (test (:iso-8859-3 :newline :crlf) (contents ,(1- size) '(10)))))))
205 (flet ((contents (size nl)
206 (let ((bytes (make-array size :initial-element #x61)))
207 (loop for x in nl
208 for j from (- (length bytes) (length nl))
209 do (setf (aref bytes j) x))
210 (coerce bytes 'list))))
211 (tests 2)
213 (with-test (:name :ansi-stream-cin-buffer-length)
214 (assert (= sb-impl::+ansi-stream-in-buffer-length+ 512)))
216 (tests 511)
217 (tests 512)
218 (tests 513)
220 ;; +ANSI-STREAM-IN-BUFFER-EXTRA+ is possibly also relevant. Can't
221 ;; test for it as the constant gets shaken out, but it's currently
222 ;; 4.
223 (tests 515)
224 (tests 516)
225 (tests 517)
227 (with-test (:name :fd-stream-bytes-per-buffer)
228 (assert (= sb-impl::+bytes-per-buffer+ 8192)))
230 (tests 8190)
231 (tests 8191)
232 (tests 8192)
233 (tests 8193)
234 (tests 8194)))
236 (macrolet ((test (inxf expected &optional (unread-expected expected) &environment env)
237 `(progn
238 (with-test (:name (,(macroexpand 'name env) ,inxf))
239 (with-open-file (s *test-path* :external-format ',inxf)
240 (let ((actual
241 (cons (file-position s)
242 (loop for char = (read-char s nil nil)
243 while char
244 collect (file-position s)))))
245 (assert (equal actual ,expected)))))
246 (with-test (:name (,(macroexpand 'name env) unread-char ,inxf))
247 (with-open-file (s *test-path* :external-format ',inxf)
248 (assert (sb-impl::ansi-stream-cin-buffer s))
249 (let ((actual (loop for char = (read-char s nil nil)
250 if (null char) collect (file-position s) and do (loop-finish)
251 do (unread-char char s)
252 collect (file-position s)
253 do (read-char s))))
254 (assert (equal actual ,unread-expected)))))
255 (with-test (:name (,(macroexpand 'name env) unread-char :io ,inxf))
256 (with-open-file (s *test-path* :external-format ',inxf
257 :direction :io :if-exists :overwrite)
258 ;; if we reinstate in character buffers for :io character streams,
259 ;; make a stream that is unbuffered some other way
260 (assert (not (sb-impl::ansi-stream-cin-buffer s)))
261 (let ((actual (loop for char = (read-char s nil nil)
262 if (null char) collect (file-position s) and do (loop-finish)
263 do (unread-char char s)
264 collect (file-position s)
265 do (read-char s))))
266 (assert (equal actual ,unread-expected)))))))
267 (with-test-file ((id bytes) &body body)
268 `(progn
269 (with-open-file (s *test-path* :element-type '(unsigned-byte 8)
270 :direction :output :if-exists :supersede)
271 (dolist (byte ,bytes)
272 (write-byte byte s)))
273 (symbol-macrolet ((name ,id))
274 ,@body))))
275 (with-test-file ((file-position :lf) '(#xa2 #x0a #x37 #x38 #x0a #x39 #x3a #x0a #x3b))
276 (test :iso-8859-3 (loop for i from 0 to 9 collect i))
277 (test (:iso-8859-3 :newline :lf) (loop for i from 0 to 9 collect i))
278 (test (:iso-8859-3 :newline :cr) (loop for i from 0 to 9 collect i))
279 (test (:iso-8859-3 :newline :crlf) (loop for i from 0 to 9 collect i)))
280 (with-test-file ((file-position :cr) '(#xa2 #x0d #x37 #x38 #x0d #x39 #x3a #x0d #x3b))
281 (test :iso-8859-3 (loop for i from 0 to 9 collect i))
282 (test (:iso-8859-3 :newline :lf) (loop for i from 0 to 9 collect i))
283 (test (:iso-8859-3 :newline :cr) (loop for i from 0 to 9 collect i))
284 (test (:iso-8859-3 :newline :crlf) (loop for i from 0 to 9 collect i)))
285 (with-test-file ((file-position :crlf) '(#xa2 #x0d #x0a #x37 #x38 #x0d #x0a #x39 #x3a #x0d #x0a #x3b))
286 (test :iso-8859-3 (loop for i from 0 to 12 collect i))
287 (test (:iso-8859-3 :newline :lf) (loop for i from 0 to 12 collect i))
288 (test (:iso-8859-3 :newline :cr) (loop for i from 0 to 12 collect i))
289 (test (:iso-8859-3 :newline :crlf) '(0 1 3 4 5 7 8 9 11 12)))
290 (with-test-file ((file-position :replacement character) '(#xa2 #x0d #x0a #xa5 #x37 #xa5 #x0d #x0a))
291 (test (:iso-8859-3 :replacement #\?) (loop for i from 0 to 8 collect i) '(0 1 2 4 4 6 6 7 8))
292 (test (:iso-8859-3 :newline :lf :replacement #\?) (loop for i from 0 to 8 collect i) '(0 1 2 4 4 6 6 7 8))
293 (test (:iso-8859-3 :newline :cr :replacement #\?) (loop for i from 0 to 8 collect i) '(0 1 2 4 4 6 6 7 8))
294 (test (:iso-8859-3 :newline :crlf :replacement #\?) '(0 1 3 4 5 6 8) '(0 1 4 4 6 6 8)))
295 (with-test-file ((file-position :replacement string) '(#xa2 #x0d #x0a #xa5 #x37 #xa5 #x0d #x0a))
296 (test (:iso-8859-3 :replacement "??") '(0 1 2 3 4 4 5 6 6 7 8) '(0 1 2 4 4 4 6 6 6 7 8))
297 (test (:iso-8859-3 :newline :lf :replacement "??") '(0 1 2 3 4 4 5 6 6 7 8) '(0 1 2 4 4 4 6 6 6 7 8))
298 (test (:iso-8859-3 :newline :cr :replacement "??") '(0 1 2 3 4 4 5 6 6 7 8) '(0 1 2 4 4 4 6 6 6 7 8))
299 (test (:iso-8859-3 :newline :crlf :replacement "??") '(0 1 3 4 4 5 6 6 8) '(0 1 4 4 4 6 6 6 8))))
301 (macrolet ((output-test (chars outxf expected &environment env)
302 `(progn
303 (with-test (:name (,(macroexpand 'name env) file-string-length ,outxf))
304 (let ((string (coerce ,chars 'string)))
305 (with-open-file (s *test-path* :element-type 'character
306 :external-format ',outxf
307 :direction :output :if-exists :supersede)
308 (handler-bind ((sb-int:character-encoding-error
309 (lambda (c) (use-value "" c))))
310 (let ((pos (file-position s))
311 (len (file-string-length s string)))
312 (let ((actual
313 (loop for index from 0 below (length string)
314 for char = (char string index)
315 for thislen = (file-string-length s char)
316 for thisstringlen = (file-string-length s (subseq string index))
317 if (null thisstringlen) do (assert (some 'null (subseq ,expected index))) else do (assert (notany 'null (subseq ,expected index)))
318 collect thislen
319 if (and (null len) thisstringlen) do (setf len (+ pos thisstringlen))
320 if thisstringlen do (assert (= (+ pos thisstringlen) len))
321 do (write-char char s)
322 if thislen do (assert (= (+ pos thislen) (file-position s)))
323 do (setf pos (file-position s)))))
324 (assert (equal actual ,expected))))))))))
325 (with-output-characters ((id chars) &body body)
326 `(let ((chars ,chars))
327 (symbol-macrolet ((name ,id))
328 (macrolet ((test (outxf expected)
329 `(output-test chars ,outxf ,expected)))
330 ,@body)))))
331 (with-output-characters ((:output :lf) (list #\5 #\Linefeed #\7))
332 (test :iso-8859-3 '(1 1 1))
333 (test (:iso-8859-3 :newline :lf) '(1 1 1))
334 (test (:iso-8859-3 :newline :cr) '(1 1 1))
335 (test (:iso-8859-3 :newline :crlf) '(1 2 1)))
336 (with-output-characters ((:output :invalid :lf) (list #\5 (code-char #xa1) #\Linefeed #\7))
337 ;; A sufficiently-smart streams implementation could statically determine the lengths
338 ;; of replacement characters given as part of the external format
339 (test :iso-8859-3 '(1 nil 1 1))
340 (test (:iso-8859-3 :replacement #\?) '(1 nil 1 1))
341 (test (:iso-8859-3 :newline :lf) '(1 nil 1 1))
342 (test (:iso-8859-3 :newline :lf :replacement #\?) '(1 nil 1 1))
343 (test (:iso-8859-3 :newline :cr) '(1 nil 1 1))
344 (test (:iso-8859-3 :newline :cr :replacement #\?) '(1 nil 1 1))
345 (test (:iso-8859-3 :newline :crlf) '(1 nil 2 1))
346 (test (:iso-8859-3 :newline :crlf :replacement #\?) '(1 nil 2 1))))
348 (delete-file *test-path*)