fix alphanumericp (lp#1377712)
[sbcl.git] / tests / reader.pure.lisp
blob06681fc472b25166cb5ea524be6af105d595b9be
1 ;;;; tests related to the Lisp reader
3 ;;;; This software is part of the SBCL system. See the README file for
4 ;;;; more information.
5 ;;;;
6 ;;;; While most of SBCL is derived from the CMU CL system, the test
7 ;;;; files (like this one) were written from scratch after the fork
8 ;;;; from CMU CL.
9 ;;;;
10 ;;;; This software is in the public domain and is provided with
11 ;;;; absolutely no warranty. See the COPYING and CREDITS files for
12 ;;;; more information.
14 (in-package "CL-USER")
16 (load "compiler-test-util.lisp")
18 (assert (equal (symbol-name '#:|fd\sA|) "fdsA"))
20 ;;; Prior to sbcl-0.7.2.10, SBCL disobeyed the ANSI requirements on
21 ;;; returning NIL for unset dispatch-macro-character functions. (bug
22 ;;; 151, fixed by Alexey Dejenka sbcl-devel "bug 151" 2002-04-12)
23 (assert (not (get-dispatch-macro-character #\# #\{)))
24 (assert (not (get-dispatch-macro-character #\# #\0)))
25 ;;; And we might as well test that we don't have any cross-compilation
26 ;;; shebang residues left...
27 (assert (not (get-dispatch-macro-character #\# #\!)))
28 ;;; Also test that all the illegal sharp macro characters are
29 ;;; recognized as being illegal.
30 (loop for char in '(#\Backspace #\Tab #\Newline #\Linefeed
31 #\Page #\Return #\Space #\) #\<)
32 do (assert (get-dispatch-macro-character #\# char)))
34 (assert (not (ignore-errors (get-dispatch-macro-character #\! #\0)
35 t)))
37 ;;; In sbcl-0.7.3, GET-MACRO-CHARACTER and SET-MACRO-CHARACTER didn't
38 ;;; use NIL to represent the no-macro-attached-to-this-character case
39 ;;; as ANSI says they should. (This problem is parallel to the
40 ;;; GET-DISPATCH-MACRO misbehavior fixed in sbcl-0.7.2.10, but
41 ;;; was fixed a little later.)
42 (dolist (customizable-char
43 ;; According to ANSI "2.1.4 Character Syntax Types", these
44 ;; characters are reserved for the programmer.
45 '(#\? #\! #\[ #\] #\{ #\}))
46 ;; So they should have no macro-characterness.
47 (multiple-value-bind (macro-fun non-terminating-p)
48 (get-macro-character customizable-char)
49 (assert (null macro-fun))
50 ;; Also, in a bit of ANSI weirdness, NON-TERMINATING-P can be
51 ;; true only when MACRO-FUN is true. (When the character
52 ;; is not a macro character, it can be embedded in a token,
53 ;; so it'd be more logical for NON-TERMINATING-P to be T in
54 ;; this case; but ANSI says it's NIL in this case.
55 (assert (null non-terminating-p))))
57 ;;; rudimentary test of SET-SYNTAX-FROM-CHAR, just to verify that it
58 ;;; wasn't totally broken by the GET-MACRO-CHARACTER/SET-MACRO-CHARACTER
59 ;;; fixes in 0.7.3.16
60 (assert (= 123579 (read-from-string "123579")))
61 (let ((*readtable* (copy-readtable)))
62 (set-syntax-from-char #\7 #\;)
63 (assert (= 1235 (read-from-string "123579"))))
65 ;;; PARSE-INTEGER must signal an error of type PARSE-ERROR if it is
66 ;;; unable to parse an integer and :JUNK-ALLOWED is NIL.
67 (macrolet ((assert-parse-error (form)
68 `(multiple-value-bind (val cond)
69 (ignore-errors ,form)
70 (assert (null val))
71 (assert (typep cond 'parse-error)))))
72 (assert-parse-error (parse-integer " "))
73 (assert-parse-error (parse-integer "12 a"))
74 (assert-parse-error (parse-integer "12a"))
75 (assert-parse-error (parse-integer "a"))
76 (assert (= (parse-integer "12") 12))
77 (assert (= (parse-integer " 12 ") 12))
78 (assert (= (parse-integer " 12asdb" :junk-allowed t) 12)))
80 ;;; #A notation enforces that once one 0 dimension has been found, all
81 ;;; subsequent ones are also 0.
82 (assert (equal (array-dimensions (read-from-string "#3A()"))
83 '(0 0 0)))
84 (assert (equal (array-dimensions (read-from-string "#3A(())"))
85 '(1 0 0)))
86 (assert (equal (array-dimensions (read-from-string "#3A((() ()))"))
87 '(1 2 0)))
89 ;;; Bug reported by Nikodemus Siivola on sbcl-devel 2003-07-21:
90 ;;; package misconfiguration
91 (assert (eq
92 (handler-case (with-input-from-string (s "cl:") (read s))
93 (end-of-file (c)
94 'good))
95 'good))
97 ;;; Bugs found by Paul Dietz
98 (assert (equal (multiple-value-list
99 (parse-integer " 123 "))
100 '(123 12)))
102 (let* ((base "xxx 123 yyy")
103 (intermediate (make-array 8 :element-type (array-element-type base)
104 :displaced-to base
105 :displaced-index-offset 2))
106 (string (make-array 6 :element-type (array-element-type base)
107 :displaced-to intermediate
108 :displaced-index-offset 1)))
109 (assert (equal (multiple-value-list
110 (parse-integer string))
111 '(123 6))))
113 (let ((*read-base* *read-base*))
114 (dolist (float-string '(".9" ".9e9" ".9e+9" ".9e-9"
115 "-.9" "-.9e9" "-.9e+9" "-.9e-9"
116 "+.9" "+.9e9" "+.9e+9" "+.9e-9"
117 "0.9" "0.9e9" "0.9e+9" "0.9e-9"
118 "9.09" "9.09e9" "9.09e+9" "9.09e-9"
119 #|"9e9" could be integer|# "9e+9" "9e-9"))
120 (loop for i from 2 to 36
121 do (setq *read-base* i)
122 do (assert (typep (read-from-string float-string)
123 *read-default-float-format*))
124 do (assert (typep
125 (read-from-string (substitute #\E #\e float-string))
126 *read-default-float-format*))
127 if (position #\e float-string)
128 do (assert (typep
129 (read-from-string (substitute #\s #\e float-string))
130 'short-float))
131 and do (assert (typep
132 (read-from-string (substitute #\S #\e float-string))
133 'short-float))
134 and do (assert (typep
135 (read-from-string (substitute #\f #\e float-string))
136 'single-float))
137 and do (assert (typep
138 (read-from-string (substitute #\F #\e float-string))
139 'single-float))
140 and do (assert (typep
141 (read-from-string (substitute #\d #\e float-string))
142 'double-float))
143 and do (assert (typep
144 (read-from-string (substitute #\D #\e float-string))
145 'double-float))
146 and do (assert (typep
147 (read-from-string (substitute #\l #\e float-string))
148 'long-float))
149 and do (assert (typep
150 (read-from-string (substitute #\L #\e float-string))
151 'long-float)))))
153 (let ((*read-base* *read-base*))
154 (dolist (integer-string '("1." "2." "3." "4." "5." "6." "7." "8." "9." "0."))
155 (loop for i from 2 to 36
156 do (setq *read-base* i)
157 do (assert (typep (read-from-string integer-string) 'integer)))))
159 (let ((*read-base* *read-base*))
160 (dolist (symbol-string '("A." "a." "Z." "z."
162 "+.9eA" "+.9ea"
164 "0.A" "0.a" "0.Z" "0.z"
166 #|"9eA" "9ea"|# "9e+A" "9e+a" "9e-A" "9e-a"
167 #|"Ae9" "ae9"|# "Ae+9" "ae+9" "Ae-9" "ae-9"
169 "ee+9" "Ee+9" "eE+9" "EE+9"
170 "ee-9" "Ee-9" "eE-9" "EE-9"
172 "A.0" "A.0e10" "a.0" "a.0e10"
174 "1e1e+9"))
175 (loop for i from 2 to 36
176 do (setq *read-base* i)
177 do (assert (typep (read-from-string symbol-string) 'symbol)))))
179 (let ((standard-chars " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~
181 (standard-terminating-macro-chars "\"'(),;`")
182 (standard-nonterminating-macro-chars "#"))
183 (flet ((frob (char)
184 (multiple-value-bind (fun non-terminating-p)
185 (get-macro-character char)
186 (cond
187 ((find char standard-terminating-macro-chars)
188 (unless (and fun (not non-terminating-p))
189 (list char)))
190 ((find char standard-nonterminating-macro-chars)
191 (unless (and fun non-terminating-p)
192 (list char)))
193 (t (unless (and (not fun) (not non-terminating-p))
194 (list char)))))))
195 (let ((*readtable* (copy-readtable nil)))
196 (assert (null (loop for c across standard-chars append (frob c)))))))
198 (let ((standard-chars " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~
200 (undefined-chars "!\"$%&,;>?@[]^_`~{}/dDeEfFgGhHiIjJkKlLmMnNqQtTuUvVwWyYzZ"))
201 (flet ((frob (char)
202 (let ((fun (get-dispatch-macro-character #\# char)))
203 (cond
204 ((find char undefined-chars)
205 (when fun (list char)))
206 ((digit-char-p char 10)
207 (when fun (list char)))
209 (unless fun (list char)))))))
210 (let ((*readtable* (copy-readtable nil)))
211 (assert (null (loop for c across standard-chars append (frob c)))))))
213 ;;; All these must return a primary value of NIL when *read-suppress* is T
214 ;;; Reported by Bruno Haible on cmucl-imp 2004-10-25.
215 (let ((*read-suppress* t))
216 (assert (null (read-from-string "(1 2 3)")))
217 (assert (null (with-input-from-string (s "abc xyz)")
218 (read-delimited-list #\) s))))
219 (assert (null (with-input-from-string (s "(1 2 3)")
220 (read-preserving-whitespace s))))
221 (assert (null (with-input-from-string (s "(1 2 3)")
222 (read s)))))
224 ;;; EOF-ERROR-P defaults to true. Reported by Bruno Haible on
225 ;;; cmucl-imp 2004-10-18.
226 (multiple-value-bind (res err) (ignore-errors (read-from-string ""))
227 (assert (not res))
228 (assert (typep err 'end-of-file)))
230 (assert (equal '((0 . "A") (1 . "B"))
231 (coerce (read-from-string "#((0 . \"A\") (1 . \"B\"))")
232 'list)))
234 ;;; parse-integer uses whitespace[1] not whitespace[2] as its
235 ;;; definition of whitespace to skip.
236 (let ((*readtable* (copy-readtable)))
237 (set-syntax-from-char #\7 #\Space)
238 (assert (= 710 (parse-integer "710"))))
240 (let ((*readtable* (copy-readtable)))
241 (set-syntax-from-char #\7 #\Space)
242 (assert (string= (format nil "~7D" 1) " 1")))
244 (let ((symbol (find-symbol "DOES-NOT-EXIST" "CL-USER")))
245 (assert (null symbol))
246 (handler-case
247 (read-from-string "CL-USER:DOES-NOT-EXIST")
248 (reader-error (c)
249 (princ c))))
251 ;;; The GET-MACRO-CHARACTER in SBCL <= "1.0.34.2" bogusly computed its
252 ;;; second return value relative to *READTABLE* rather than the passed
253 ;;; readtable.
254 (let* ((*readtable* (copy-readtable nil)))
255 (set-syntax-from-char #\" #\A)
256 (multiple-value-bind (reader-fn non-terminating-p)
257 (get-macro-character #\" (copy-readtable nil))
258 (declare (ignore reader-fn))
259 (assert (not non-terminating-p))))
261 (with-test (:name :bug-309093)
262 (assert (eq :error
263 (handler-case
264 (read-from-string "`#2A((,(1+ 0) 0) (0 0))")
265 (reader-error ()
266 :error)))))
268 (with-test (:name :set-syntax-from-char-dispatch-macro-char)
269 (let ((rt (copy-readtable)))
270 (make-dispatch-macro-character #\! nil rt)
271 (set-dispatch-macro-character #\! #\! (constantly 'bang^2) rt)
272 (flet ((maybe-bang ()
273 (let ((*readtable* rt))
274 (read-from-string "!!"))))
275 (assert (eq 'bang^2 (maybe-bang)))
276 (set-syntax-from-char #\! #\! rt)
277 (assert (eq '!! (maybe-bang))))))
279 (with-test (:name :read-in-package-syntax)
280 (assert (equal '(sb-c::a (sb-kernel::x sb-kernel::y) sb-c::b)
281 (read-from-string "sb-c::(a sb-kernel::(x y) b)")))
282 (assert (equal '(cl-user::yes-this-is-sbcl)
283 (read-from-string "cl-user::(#+sbcl yes-this-is-sbcl)")))
284 #+sb-package-locks
285 (assert (eq :violated!
286 (handler-case
287 (read-from-string "cl::'foo")
288 (package-lock-violation ()
289 :violated!)))))
291 (with-test (:name :bug-309070)
292 (with-timeout 10
293 (assert-error (read-from-string "10e10000000000000000000")
294 sb-kernel:reader-impossible-number-error)))
296 (with-test (:name :bug-1095918)
297 (assert (= (length `#3(1)) 3)))
299 (with-test (:name :obscure-reader-package-usage)
300 ;; commit 8fd604 cause a bug in reading "::(foo bar)" which tried
301 ;; to treat the package-designator as a string, but in this case
302 ;; it is hardcoded to *keyword-package*.
303 (assert (equal (read-from-string "::(foo bar)") '(:foo :bar))))
305 #+x86-64
306 ;; I do not know the complete list of platforms for which this test
307 ;; will not cons, but there were four different heap allocations
308 ;; instead of using dx allocation or a recyclable resource:
309 ;; - most obviously, a 128-character buffer per invocation of READ
310 ;; - calling SUBSEQ for package names
311 ;; - multiple-value-call in WITH-CHAR-MACRO-RESULT
312 ;; - the initial cons cell in READ-LIST
313 (with-test (:name :read-does-not-cons-per-se)
314 (flet ((test-reading (string)
315 (let ((s (make-string-input-stream string)))
316 (read s) ; once outside the loop, to make A-SYMBOL
317 (ctu:assert-no-consing
318 (progn (file-position s 0)
319 (read s))
320 40000))))
321 ;; These each used to produce at least 20 MB of garbage,
322 ;; a result of using 128-character (= 512 bytes for Unicode) buffers.
323 ;; Now we use exactly one buffer, or maybe two for package + symbol-name.
324 ;; There is no way to allow an allocation of precisely 512 bytes
325 ;; without counting a whole allocation page against this test.
326 ;; If you get unlucky, the tests might cons one SB-IMPL::TOKEN-BUFFER.
327 ;; And if you get really unlucky, that might be the straw that breaks
328 ;; the camel's back - forcing the use of a new GC page, which looks
329 ;; like it consed 32768 bytes on the old page. Due to the allowable
330 ;; tolerance in CHECK-CONSING, running the test more times than there
331 ;; are bytes consed should pass for "no consing" because it's obviously
332 ;; impossible to cons 1 byte per run.
333 ;; If this still fails, it might be due to somebody changing the
334 ;; backend-page-bytes to exceed 32KB. Not sure what to do about that.
335 (test-reading "4.0s0")
336 (test-reading "COMMON-LISP-USER::A-SYMBOL")
337 (test-reading "()")
338 (test-reading "#\\-") ; should not copy the token buffer
342 (with-test (:name :sharp-star-empty-multiple-escapes)
343 (assert (eq (handler-case (read-from-string "#*101||1")
344 (sb-int:simple-reader-error () :win))
345 :win)))