Turn some commented-out tests into a real regression test
[sbcl.git] / tests / reader.pure.lisp
blobbabb4d20191ca3ddc58e1d6c8b5306fea4636e49
1 ;;;; tests related to the Lisp reader
3 ;;;; This software is part of the SBCL system. See the README file for
4 ;;;; more information.
5 ;;;;
6 ;;;; While most of SBCL is derived from the CMU CL system, the test
7 ;;;; files (like this one) were written from scratch after the fork
8 ;;;; from CMU CL.
9 ;;;;
10 ;;;; This software is in the public domain and is provided with
11 ;;;; absolutely no warranty. See the COPYING and CREDITS files for
12 ;;;; more information.
14 (in-package "CL-USER")
16 (load "compiler-test-util.lisp")
18 (assert (equal (symbol-name '#:|fd\sA|) "fdsA"))
20 ;;; Prior to sbcl-0.7.2.10, SBCL disobeyed the ANSI requirements on
21 ;;; returning NIL for unset dispatch-macro-character functions. (bug
22 ;;; 151, fixed by Alexey Dejenka sbcl-devel "bug 151" 2002-04-12)
23 (assert (not (get-dispatch-macro-character #\# #\{)))
24 (assert (not (get-dispatch-macro-character #\# #\0)))
25 ;;; And we might as well test that we don't have any cross-compilation
26 ;;; shebang residues left...
27 (assert (not (get-dispatch-macro-character #\# #\!)))
28 ;;; Also test that all the illegal sharp macro characters are
29 ;;; recognized as being illegal.
30 (loop for char in '(#\Backspace #\Tab #\Newline #\Linefeed
31 #\Page #\Return #\Space #\) #\<)
32 do (assert (get-dispatch-macro-character #\# char)))
34 (assert (not (ignore-errors (get-dispatch-macro-character #\! #\0)
35 t)))
37 ;;; In sbcl-0.7.3, GET-MACRO-CHARACTER and SET-MACRO-CHARACTER didn't
38 ;;; use NIL to represent the no-macro-attached-to-this-character case
39 ;;; as ANSI says they should. (This problem is parallel to the
40 ;;; GET-DISPATCH-MACRO misbehavior fixed in sbcl-0.7.2.10, but
41 ;;; was fixed a little later.)
42 (dolist (customizable-char
43 ;; According to ANSI "2.1.4 Character Syntax Types", these
44 ;; characters are reserved for the programmer.
45 '(#\? #\! #\[ #\] #\{ #\}))
46 ;; So they should have no macro-characterness.
47 (multiple-value-bind (macro-fun non-terminating-p)
48 (get-macro-character customizable-char)
49 (assert (null macro-fun))
50 ;; Also, in a bit of ANSI weirdness, NON-TERMINATING-P can be
51 ;; true only when MACRO-FUN is true. (When the character
52 ;; is not a macro character, it can be embedded in a token,
53 ;; so it'd be more logical for NON-TERMINATING-P to be T in
54 ;; this case; but ANSI says it's NIL in this case.
55 (assert (null non-terminating-p))))
57 ;;; rudimentary test of SET-SYNTAX-FROM-CHAR, just to verify that it
58 ;;; wasn't totally broken by the GET-MACRO-CHARACTER/SET-MACRO-CHARACTER
59 ;;; fixes in 0.7.3.16
60 (assert (= 123579 (read-from-string "123579")))
61 (let ((*readtable* (copy-readtable)))
62 (set-syntax-from-char #\7 #\;)
63 (assert (= 1235 (read-from-string "123579"))))
65 ;;; PARSE-INTEGER must signal an error of type PARSE-ERROR if it is
66 ;;; unable to parse an integer and :JUNK-ALLOWED is NIL.
67 (macrolet ((assert-parse-error (form)
68 `(multiple-value-bind (val cond)
69 (ignore-errors ,form)
70 (assert (null val))
71 (assert (typep cond 'parse-error)))))
72 (assert-parse-error (parse-integer " "))
73 (assert-parse-error (parse-integer "12 a"))
74 (assert-parse-error (parse-integer "12a"))
75 (assert-parse-error (parse-integer "a"))
76 (assert (= (parse-integer "12") 12))
77 (assert (= (parse-integer " 12 ") 12))
78 (assert (= (parse-integer " 12asdb" :junk-allowed t) 12)))
80 ;;; #A notation enforces that once one 0 dimension has been found, all
81 ;;; subsequent ones are also 0.
82 (assert (equal (array-dimensions (read-from-string "#3A()"))
83 '(0 0 0)))
84 (assert (equal (array-dimensions (read-from-string "#3A(())"))
85 '(1 0 0)))
86 (assert (equal (array-dimensions (read-from-string "#3A((() ()))"))
87 '(1 2 0)))
89 ;;; Bug reported by Nikodemus Siivola on sbcl-devel 2003-07-21:
90 ;;; package misconfiguration
91 (assert (eq
92 (handler-case (with-input-from-string (s "cl:") (read s))
93 (end-of-file (c)
94 (declare (ignore c))
95 'good))
96 'good))
98 ;;; Bugs found by Paul Dietz
99 (assert (equal (multiple-value-list
100 (parse-integer " 123 "))
101 '(123 12)))
103 (let* ((base "xxx 123 yyy")
104 (intermediate (make-array 8 :element-type (array-element-type base)
105 :displaced-to base
106 :displaced-index-offset 2))
107 (string (make-array 6 :element-type (array-element-type base)
108 :displaced-to intermediate
109 :displaced-index-offset 1)))
110 (assert (equal (multiple-value-list
111 (parse-integer string))
112 '(123 6))))
114 (let ((*read-base* *read-base*))
115 (dolist (float-string '(".9" ".9e9" ".9e+9" ".9e-9"
116 "-.9" "-.9e9" "-.9e+9" "-.9e-9"
117 "+.9" "+.9e9" "+.9e+9" "+.9e-9"
118 "0.9" "0.9e9" "0.9e+9" "0.9e-9"
119 "9.09" "9.09e9" "9.09e+9" "9.09e-9"
120 #|"9e9" could be integer|# "9e+9" "9e-9"))
121 (loop for i from 2 to 36
122 do (setq *read-base* i)
123 do (assert (typep (read-from-string float-string)
124 *read-default-float-format*))
125 do (assert (typep
126 (read-from-string (substitute #\E #\e float-string))
127 *read-default-float-format*))
128 if (position #\e float-string)
129 do (assert (typep
130 (read-from-string (substitute #\s #\e float-string))
131 'short-float))
132 and do (assert (typep
133 (read-from-string (substitute #\S #\e float-string))
134 'short-float))
135 and do (assert (typep
136 (read-from-string (substitute #\f #\e float-string))
137 'single-float))
138 and do (assert (typep
139 (read-from-string (substitute #\F #\e float-string))
140 'single-float))
141 and do (assert (typep
142 (read-from-string (substitute #\d #\e float-string))
143 'double-float))
144 and do (assert (typep
145 (read-from-string (substitute #\D #\e float-string))
146 'double-float))
147 and do (assert (typep
148 (read-from-string (substitute #\l #\e float-string))
149 'long-float))
150 and do (assert (typep
151 (read-from-string (substitute #\L #\e float-string))
152 'long-float)))))
154 (let ((*read-base* *read-base*))
155 (dolist (integer-string '("1." "2." "3." "4." "5." "6." "7." "8." "9." "0."))
156 (loop for i from 2 to 36
157 do (setq *read-base* i)
158 do (assert (typep (read-from-string integer-string) 'integer)))))
160 (let ((*read-base* *read-base*))
161 (dolist (symbol-string '("A." "a." "Z." "z."
163 "+.9eA" "+.9ea"
165 "0.A" "0.a" "0.Z" "0.z"
167 #|"9eA" "9ea"|# "9e+A" "9e+a" "9e-A" "9e-a"
168 #|"Ae9" "ae9"|# "Ae+9" "ae+9" "Ae-9" "ae-9"
170 "ee+9" "Ee+9" "eE+9" "EE+9"
171 "ee-9" "Ee-9" "eE-9" "EE-9"
173 "A.0" "A.0e10" "a.0" "a.0e10"
175 "1e1e+9"))
176 (loop for i from 2 to 36
177 do (setq *read-base* i)
178 do (assert (typep (read-from-string symbol-string) 'symbol)))))
180 (let ((standard-chars " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~
182 (standard-terminating-macro-chars "\"'(),;`")
183 (standard-nonterminating-macro-chars "#"))
184 (flet ((frob (char)
185 (multiple-value-bind (fun non-terminating-p)
186 (get-macro-character char)
187 (cond
188 ((find char standard-terminating-macro-chars)
189 (unless (and fun (not non-terminating-p))
190 (list char)))
191 ((find char standard-nonterminating-macro-chars)
192 (unless (and fun non-terminating-p)
193 (list char)))
194 (t (unless (and (not fun) (not non-terminating-p))
195 (list char)))))))
196 (let ((*readtable* (copy-readtable nil)))
197 (assert (null (loop for c across standard-chars append (frob c)))))))
199 (let ((standard-chars " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~
201 (undefined-chars "!\"$%&,;>?@[]^_`~{}/dDeEfFgGhHiIjJkKlLmMnNqQtTuUvVwWyYzZ"))
202 (flet ((frob (char)
203 (let ((fun (get-dispatch-macro-character #\# char)))
204 (cond
205 ((find char undefined-chars)
206 (when fun (list char)))
207 ((digit-char-p char 10)
208 (when fun (list char)))
210 (unless fun (list char)))))))
211 (let ((*readtable* (copy-readtable nil)))
212 (assert (null (loop for c across standard-chars append (frob c)))))))
214 (with-test (:name :copy-readtable-with-unicode-macro
215 :skipped-on '(not :sb-unicode))
216 (let ((rt (copy-readtable)))
217 (set-macro-character (code-char #x100fa) #'error nil rt)
218 (assert (plusp (hash-table-count (sb-impl::character-macro-hash-table rt))))
219 (copy-readtable nil rt)
220 (assert (null (get-macro-character #\UFC rt)))))
222 ;;; All these must return a primary value of NIL when *read-suppress* is T
223 ;;; Reported by Bruno Haible on cmucl-imp 2004-10-25.
224 (with-test (:name :read-suppress-char-macros)
225 (let ((*read-suppress* t))
226 (assert (null (read-from-string "(1 2 3)")))
227 (assert (null (with-input-from-string (s "abc xyz)")
228 (read-delimited-list #\) s))))
229 (assert (null (with-input-from-string (s "(1 2 3)")
230 (read-preserving-whitespace s))))
231 (assert (null (with-input-from-string (s "(1 2 3)")
232 (read s))))
233 ;; .. and it's better to avoid consing rather than produce an object and
234 ;; throw it away, even though it's (mostly) indistinguishable to the user.
235 (let ((input (make-string-input-stream "this-is-a-string! .")))
236 (assert (string= (sb-impl::with-read-buffer ()
237 (sb-impl::read-string input #\!))
238 "")))))
240 ;;; System code that asks whether %READ-PRESERVING-WHITESPACE hit EOF
241 ;;; mistook NIL as an object returned normally for NIL the default eof mark.
242 (with-test (:name :read-preserving-whitespace-file-position)
243 (multiple-value-bind (obj pos1) (read-from-string "NIL A")
244 (declare (ignore obj))
245 (multiple-value-bind (obj pos2) (read-from-string "NNN A")
246 (declare (ignore obj))
247 (assert (= pos1 pos2 4))))
248 ;; This also affected *READ-SUPPRESS*. The root cause is the same,
249 ;; but the rationale for why the change is valid is slightly subtle
250 ;; on account of the vague if not weird implication regarding READ
251 ;; that there might actually be differences in non-preservation of
252 ;; whitespace based on *READ-SUPPRESS*. CLHS entry for READ:
253 ;; "When *read-suppress* is false, read throws away the delimiting
254 ;; character required by certain printed representations if it is a
255 ;; whitespace[2] character; but read preserves the character (using
256 ;; unread-char) if it is syntactically meaningful, because it could
257 ;; be the start of the next expression."
258 ;; Why would it mention *read-supress* at all, unless the expectation
259 ;; is that when suppressing you might /not/ throw away a space?
260 ;; But it isn't "when-and-only-when" so we're certainly legal to
261 ;; make the behavior identical regardless of *read-suppress*.
262 (dolist (test '("#-q 1 2" "#+sbcl 1 2")) ; Two tests from lp#327790.
263 (flet ((try (*read-suppress*)
264 (with-input-from-string (s test)
265 (read s)
266 (file-position s))))
267 (assert (= (try nil) (try t)))))
268 ;; Check that conversion from local eof-object to user-specified eof
269 ;; object is nearly perfectly immune to false positives.
270 ;; The only remaining confusion is that
271 ;; (read-from-string "#.(code-header-ref (fun-code-header #'read) 6)")
272 ;; returns NIL instead of (NIL) [subject to change depending on
273 ;; what 6 should be] but that is too ridiculous to worry about.
274 (assert (eq (read-from-string "#.sb-impl::*eof-object*")
275 sb-impl::*eof-object*)))
277 ;;; EOF-ERROR-P defaults to true. Reported by Bruno Haible on
278 ;;; cmucl-imp 2004-10-18.
279 (multiple-value-bind (res err) (ignore-errors (read-from-string ""))
280 (assert (not res))
281 (assert (typep err 'end-of-file)))
283 (assert (equal '((0 . "A") (1 . "B"))
284 (coerce (read-from-string "#((0 . \"A\") (1 . \"B\"))")
285 'list)))
287 ;;; parse-integer uses whitespace[1] not whitespace[2] as its
288 ;;; definition of whitespace to skip.
289 (let ((*readtable* (copy-readtable)))
290 (set-syntax-from-char #\7 #\Space)
291 (assert (= 710 (parse-integer "710"))))
293 (let ((*readtable* (copy-readtable)))
294 (set-syntax-from-char #\7 #\Space)
295 (assert (string= (format nil "~7D" 1) " 1")))
297 (let ((symbol (find-symbol "DOES-NOT-EXIST" "CL-USER")))
298 (assert (null symbol))
299 (handler-case
300 (read-from-string "CL-USER:DOES-NOT-EXIST")
301 (reader-error (c)
302 (princ c))))
304 ;;; The GET-MACRO-CHARACTER in SBCL <= "1.0.34.2" bogusly computed its
305 ;;; second return value relative to *READTABLE* rather than the passed
306 ;;; readtable.
307 (let* ((*readtable* (copy-readtable nil)))
308 (set-syntax-from-char #\" #\A)
309 (multiple-value-bind (reader-fn non-terminating-p)
310 (get-macro-character #\" (copy-readtable nil))
311 (declare (ignore reader-fn))
312 (assert (not non-terminating-p))))
314 (with-test (:name :bug-309093)
315 (assert (eq :error
316 (handler-case
317 (read-from-string "`#2A((,(1+ 0) 0) (0 0))")
318 (reader-error ()
319 :error)))))
321 (with-test (:name :set-syntax-from-char-dispatch-macro-char)
322 (let ((rt (copy-readtable)))
323 (make-dispatch-macro-character #\! nil rt)
324 (set-dispatch-macro-character #\! #\! (constantly 'bang^2) rt)
325 (flet ((maybe-bang ()
326 (let ((*readtable* rt))
327 (read-from-string "!!"))))
328 (assert (eq 'bang^2 (maybe-bang)))
329 (set-syntax-from-char #\! #\! rt)
330 (assert (eq '!! (maybe-bang))))))
332 (with-test (:name :read-in-package-syntax)
333 (assert (equal '(sb-c::a (sb-kernel::x sb-kernel::y) sb-c::b)
334 (read-from-string "sb-c::(a sb-kernel::(x y) b)")))
335 (assert (equal '(cl-user::yes-this-is-sbcl)
336 (read-from-string "cl-user::(#+sbcl yes-this-is-sbcl)")))
337 #+sb-package-locks
338 (assert (eq :violated!
339 (handler-case
340 (read-from-string "cl::'foo")
341 (package-lock-violation ()
342 :violated!)))))
344 (with-test (:name :bug-309070)
345 (with-timeout 10
346 (assert-error (read-from-string "10e10000000000000000000")
347 sb-kernel:reader-impossible-number-error)))
349 (with-test (:name :bug-1095918)
350 (assert (= (length `#3(1)) 3)))
352 (with-test (:name :obscure-reader-package-usage)
353 ;; commit 8fd604 cause a bug in reading "::(foo bar)" which tried
354 ;; to treat the package-designator as a string, but in this case
355 ;; it is hardcoded to *keyword-package*.
356 (assert (equal (read-from-string "::(foo bar)") '(:foo :bar))))
358 ;; I do not know the complete list of platforms for which this test
359 ;; will not cons, but there were four different heap allocations
360 ;; instead of using dx allocation or a recyclable resource:
361 ;; - most obviously, a 128-character buffer per invocation of READ
362 ;; - calling SUBSEQ for package names
363 ;; - multiple-value-call in WITH-CHAR-MACRO-RESULT
364 ;; - the initial cons cell in READ-LIST
365 (with-test (:name :read-does-not-cons-per-se
366 :skipped-on '(:or :interpreter (:not :x86-64)))
367 (flet ((test-reading (string)
368 (let ((s (make-string-input-stream string)))
369 (read s) ; once outside the loop, to make A-SYMBOL
370 (ctu:assert-no-consing
371 (progn (file-position s 0)
372 (read s))
373 40000))))
374 ;; These each used to produce at least 20 MB of garbage,
375 ;; a result of using 128-character (= 512 bytes for Unicode) buffers.
376 ;; Now we use exactly one buffer, or maybe two for package + symbol-name.
377 ;; There is no way to allow an allocation of precisely 512 bytes
378 ;; without counting a whole allocation page against this test.
379 ;; If you get unlucky, the tests might cons one SB-IMPL::TOKEN-BUFFER.
380 ;; And if you get really unlucky, that might be the straw that breaks
381 ;; the camel's back - forcing the use of a new GC page, which looks
382 ;; like it consed 32768 bytes on the old page. Due to the allowable
383 ;; tolerance in CHECK-CONSING, running the test more times than there
384 ;; are bytes consed should pass for "no consing" because it's obviously
385 ;; impossible to cons 1 byte per run.
386 ;; If this still fails, it might be due to somebody changing the
387 ;; backend-page-bytes to exceed 32KB. Not sure what to do about that.
388 (test-reading "4.0s0")
389 (test-reading "COMMON-LISP-USER::A-SYMBOL")
390 (test-reading "()")
391 (test-reading "#\\-") ; should not copy the token buffer
392 ;; *READ-SUPPRESS* avoids creation of lists
393 (test-reading "#-sbcl(a (b c (d (e) (f) g)) h i j . x . y baz) 5")
396 (with-test (:name :sharp-left-paren-empty-list)
397 (assert (read-from-string "#0()")) ; edge case that works
398 (assert (eq (handler-case (read-from-string "#3()")
399 (sb-int:simple-reader-error () :win))
400 :win)))
402 (with-test (:name :sharp-star-empty-multiple-escapes)
403 (assert (eq (handler-case (read-from-string "#*101||1")
404 (sb-int:simple-reader-error () :win))
405 :win)))
407 ;;; The WITH-FAST-READ-BYTE macro accidentally left the package lock
408 ;;; of FAST-READ-BYTE disabled during its body.
409 (with-test (:name :fast-read-byte-package-lock
410 :skipped-on '(not :sb-package-locks))
411 (let ((fun
412 ;; Suppress the compiler output to avoid noise when running the
413 ;; test. (There are a warning and an error about the package
414 ;; lock violation and a note about FAST-READ-BYTE being
415 ;; unused.) It's easy and more precise to test for the error
416 ;; that the compiled function signals when it is called.
417 (let ((*error-output* (make-broadcast-stream)))
418 (compile nil
419 '(lambda ()
420 (sb-int:with-fast-read-byte (t *standard-input*)
421 ;; Signal an error if the symbol is locked.
422 (declare (special sb-int:fast-read-byte))))))))
423 (assert-error (funcall fun) program-error)))
425 (with-test (:name :sharp-plus-requires-subform)
426 (assert-error (read-from-string "(let ((foo 3) #+sbcl) wat)"))
427 (assert-error (read-from-string "(let ((foo 3) #-brand-x) wat)")))
429 (with-test (:name :impossible-number-error)
430 (princ (nth-value 1 (ignore-errors (READ-FROM-STRING "1/0")))))
432 (with-test (:name :read-from-string-compiler-macro)
433 ;; evaluation order should be the customary one. In particular,
434 ;; do not assume that EOF-ERROR-P and EOF-VALUE are constants.
435 (sb-int:collect ((l))
436 (read-from-string "a" (l 'first) (l 'second) :start (progn (l 'third) 0))
437 (assert (equal (l) '(first second third)))))
439 (with-test (:name :sharp-star-reader-error)
440 (assert-error (read-from-string (format nil "#~D*" (1+ most-positive-fixnum))) reader-error))
442 (defun test1 (print &optional expect)
443 (let ((*readtable* (copy-readtable nil)))
444 (when print
445 (format t "READTABLE-CASE Input Symbol-name~@
446 ----------------------------------~%"))
447 (dolist (readtable-case '(:upcase :downcase :preserve :invert))
448 (setf (readtable-case *readtable*) readtable-case)
449 (dolist (input '("ZEBRA" "Zebra" "zebra"))
450 (if print
451 (format t "~&:~A~16T~A~24T~A"
452 (string-upcase readtable-case)
453 input
454 (symbol-name (read-from-string input)))
455 (assert (string= (symbol-name (read-from-string input))
456 (pop expect))))))))
458 (defun test2 (print &optional expect)
459 (let ((*readtable* (copy-readtable nil)))
460 (when print
461 (format t "READTABLE-CASE *PRINT-CASE* Symbol-name Output Princ~@
462 --------------------------------------------------------~%"))
463 (dolist (readtable-case '(:upcase :downcase :preserve :invert))
464 (setf (readtable-case *readtable*) readtable-case)
465 (dolist (*print-case* '(:upcase :downcase :capitalize))
466 (dolist (symbol '(|ZEBRA| |Zebra| |zebra|))
467 (if print
468 (format t "~&:~A~15T:~A~29T~A~42T~A~50T~A"
469 (string-upcase readtable-case)
470 (string-upcase *print-case*)
471 (symbol-name symbol)
472 (prin1-to-string symbol)
473 (princ-to-string symbol))
474 (progn
475 (assert (string= (prin1-to-string symbol) (pop expect)))
476 (assert (string= (princ-to-string symbol) (pop expect))))))))))
478 (with-test (:name :readtable-cases)
479 (test1 nil '("ZEBRA" "ZEBRA" "ZEBRA"
480 "zebra" "zebra" "zebra"
481 "ZEBRA" "Zebra" "zebra"
482 "zebra" "Zebra" "ZEBRA"))
483 (test2 nil '("ZEBRA" "ZEBRA"
484 "|Zebra|" "Zebra"
485 "|zebra|" "zebra"
486 "zebra" "zebra"
487 "|Zebra|" "zebra"
488 "|zebra|" "zebra"
489 "Zebra" "Zebra"
490 "|Zebra|" "Zebra"
491 "|zebra|" "zebra"
492 "|ZEBRA|" "ZEBRA"
493 "|Zebra|" "ZEBRA"
494 "ZEBRA" "ZEBRA"
495 "|ZEBRA|" "ZEBRA"
496 "|Zebra|" "Zebra"
497 "zebra" "zebra"
498 "|ZEBRA|" "ZEBRA"
499 "|Zebra|" "Zebra"
500 "Zebra" "Zebra"
501 "ZEBRA" "ZEBRA"
502 "Zebra" "Zebra"
503 "zebra" "zebra"
504 "ZEBRA" "ZEBRA"
505 "Zebra" "Zebra"
506 "zebra" "zebra"
507 "ZEBRA" "ZEBRA"
508 "Zebra" "Zebra"
509 "zebra" "zebra"
510 "zebra" "zebra"
511 "Zebra" "Zebra"
512 "ZEBRA" "ZEBRA"
513 "zebra" "zebra"
514 "Zebra" "Zebra"
515 "ZEBRA" "ZEBRA"
516 "zebra" "zebra"
517 "Zebra" "Zebra"
518 "ZEBRA" "ZEBRA")))