Decentralize per-thread initial special bindings.
[sbcl.git] / src / code / reader.lisp
blobdbf81568dc2e857ea0f631d4427327199965b2a3
1 ;;;; READ and friends
3 ;;;; This software is part of the SBCL system. See the README file for
4 ;;;; more information.
5 ;;;;
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
12 (in-package "SB!IMPL")
14 ;;;; miscellaneous global variables
16 ;;; ANSI: "the floating-point format that is to be used when reading a
17 ;;; floating-point number that has no exponent marker or that has e or
18 ;;; E for an exponent marker"
19 (!defvar *read-default-float-format* 'single-float)
20 (declaim (type (member short-float single-float double-float long-float)
21 *read-default-float-format*))
23 (defvar *readtable*)
24 (declaim (type readtable *readtable*))
25 (setf (fdocumentation '*readtable* 'variable)
26 "Variable bound to current readtable.")
28 ;;; A standard Lisp readtable (once cold-init is through). This is for
29 ;;; recovery from broken read-tables (and for
30 ;;; WITH-STANDARD-IO-SYNTAX), and should not normally be user-visible.
31 ;;; If the initial value is changed from NIL to something more interesting,
32 ;;; be sure to update the duplicated definition in "src/code/print.lisp"
33 (defglobal *standard-readtable* nil)
35 ;;; In case we get an error trying to parse a symbol, we want to rebind the
36 ;;; above stuff so it's cool.
39 ;;;; reader errors
41 (defun reader-eof-error (stream context)
42 (declare (optimize allow-non-returning-tail-call))
43 (error 'reader-eof-error
44 :stream stream
45 :context context))
47 ;;; If The Gods didn't intend for us to use multiple namespaces, why
48 ;;; did They specify them?
49 (defun simple-reader-error (stream control &rest args)
50 (declare (optimize allow-non-returning-tail-call))
51 (error 'simple-reader-error
52 :stream stream
53 :format-control control
54 :format-arguments args))
56 ;;;; macros and functions for character tables
58 (declaim (ftype (sfunction (character readtable) (unsigned-byte 8))
59 get-cat-entry))
60 (defun get-cat-entry (char rt)
61 (if (typep char 'base-char)
62 (elt (character-attribute-array rt) (char-code char))
63 (values (gethash char (character-attribute-hash-table rt)
64 +char-attr-constituent+))))
66 (defun set-cat-entry (char newvalue &optional (rt *readtable*))
67 (declare (character char) (type (unsigned-byte 8) newvalue) (readtable rt))
68 (if (typep char 'base-char)
69 (setf (elt (character-attribute-array rt) (char-code char)) newvalue)
70 (if (= newvalue +char-attr-constituent+)
71 ;; Default value for the C-A-HASH-TABLE is +CHAR-ATTR-CONSTITUENT+.
72 (%remhash char (character-attribute-hash-table rt))
73 (setf (gethash char (character-attribute-hash-table rt)) newvalue)))
74 (values))
76 ;; Set the character-macro-table entry without coercing NEW-VALUE.
77 ;; As used by set-syntax-from-char it must always process "raw" values.
78 (defun set-cmt-entry (char new-value &optional (rt *readtable*))
79 (declare (character char)
80 (type (or null function fdefn) new-value)
81 (type readtable rt))
82 (if (typep char 'base-char)
83 (setf (svref (character-macro-array rt) (char-code char)) new-value)
84 (if new-value ; never store NILs
85 (setf (gethash char (character-macro-hash-table rt)) new-value)
86 (remhash char (character-macro-hash-table rt)))))
88 ;;; the value actually stored in the character macro table. As per
89 ;;; ANSI #'GET-MACRO-CHARACTER and #'SET-MACRO-CHARACTER, this can
90 ;;; be either a function-designator or NIL, except that we store
91 ;;; symbols not as themselves but as their #<fdefn>.
92 (defun get-raw-cmt-entry (char readtable)
93 (declare (character char) (readtable readtable))
94 (if (typep char 'base-char)
95 (svref (character-macro-array readtable) (char-code char))
96 (values (gethash char (character-macro-hash-table readtable) nil))))
98 ;; As above but get the entry for SUB-CHAR in a dispatching macro table.
99 (defun get-raw-cmt-dispatch-entry (sub-char sub-table)
100 (declare (character sub-char))
101 (if (typep sub-char 'base-char)
102 (svref (truly-the (simple-vector #.base-char-code-limit)
103 (cdr (truly-the cons sub-table)))
104 (char-code sub-char))
105 (awhen (car sub-table)
106 (gethash sub-char it))))
108 ;; Coerce THING to a character-macro-table entry
109 (defmacro !coerce-to-cmt-entry (thing)
110 `(let ((x ,thing))
111 (if (typep x '(or null function)) x (find-or-create-fdefn x))))
113 ;; Return a callable function given a character-macro-table entry.
114 (defmacro !cmt-entry-to-function (val fallback)
115 `(let ((x ,val))
116 (truly-the
117 function
118 (cond ((functionp x) x)
119 ((null x) ,fallback)
120 (t (sb!c:safe-fdefn-fun x))))))
122 ;; Return a function-designator given a character-macro-table entry.
123 (defmacro !cmt-entry-to-fun-designator (val)
124 `(let ((x ,val))
125 (if (fdefn-p x) (fdefn-name x) x)))
127 ;;; The character attribute table is a BASE-CHAR-CODE-LIMIT vector
128 ;;; of (unsigned-byte 8) plus a hashtable to handle higher character codes.
130 (defmacro test-attribute (char whichclass rt)
131 `(= (get-cat-entry ,char ,rt) ,whichclass))
133 ;;; predicates for testing character attributes
135 #!-sb-fluid
136 (progn
137 (declaim (inline whitespace[1]p whitespace[2]p))
138 (declaim (inline constituentp terminating-macrop))
139 (declaim (inline single-escape-p multiple-escape-p))
140 (declaim (inline token-delimiterp)))
142 ;;; the [1] and [2] here refer to ANSI glossary entries for
143 ;;; "whitespace".
144 ;; whitespace[2]p is the only predicate whose readtable is optional
145 ;; - other than whitespace[1]p which has a fixed readtable - due to
146 ;; callers not otherwise needing a readtable at all, and so not binding
147 ;; *READTABLE* into a local variable throughout their lifetime.
148 (defun whitespace[1]p (char)
149 (test-attribute char +char-attr-whitespace+ *standard-readtable*))
150 (defun whitespace[2]p (char &optional (rt *readtable*))
151 (test-attribute char +char-attr-whitespace+ rt))
153 (defun constituentp (char rt)
154 (test-attribute char +char-attr-constituent+ rt))
156 (defun terminating-macrop (char rt)
157 (test-attribute char +char-attr-terminating-macro+ rt))
159 (defun single-escape-p (char rt)
160 (test-attribute char +char-attr-single-escape+ rt))
162 (defun multiple-escape-p (char rt)
163 (test-attribute char +char-attr-multiple-escape+ rt))
165 (defun token-delimiterp (char &optional (rt *readtable*))
166 ;; depends on actual attribute numbering in readtable.lisp.
167 (<= (get-cat-entry char rt) +char-attr-terminating-macro+))
169 ;;;; constituent traits (see ANSI 2.1.4.2)
171 ;;; There are a number of "secondary" attributes which are constant
172 ;;; properties of characters (as long as they are constituents).
174 (defconstant +constituent-trait-table+
175 #.(let ((a (!make-specialized-array base-char-code-limit '(unsigned-byte 8))))
176 (fill a +char-attr-constituent+)
177 (flet ((!set-constituent-trait (char trait)
178 (aver (typep char 'base-char))
179 (setf (elt a (char-code char)) trait)))
180 (!set-constituent-trait #\: +char-attr-package-delimiter+)
181 (!set-constituent-trait #\. +char-attr-constituent-dot+)
182 (!set-constituent-trait #\+ +char-attr-constituent-sign+)
183 (!set-constituent-trait #\- +char-attr-constituent-sign+)
184 (!set-constituent-trait #\/ +char-attr-constituent-slash+)
185 (do ((i (char-code #\0) (1+ i)))
186 ((> i (char-code #\9)))
187 (!set-constituent-trait (code-char i) +char-attr-constituent-digit+))
188 (!set-constituent-trait #\E +char-attr-constituent-expt+)
189 (!set-constituent-trait #\F +char-attr-constituent-expt+)
190 (!set-constituent-trait #\D +char-attr-constituent-expt+)
191 (!set-constituent-trait #\S +char-attr-constituent-expt+)
192 (!set-constituent-trait #\L +char-attr-constituent-expt+)
193 (!set-constituent-trait #\e +char-attr-constituent-expt+)
194 (!set-constituent-trait #\f +char-attr-constituent-expt+)
195 (!set-constituent-trait #\d +char-attr-constituent-expt+)
196 (!set-constituent-trait #\s +char-attr-constituent-expt+)
197 (!set-constituent-trait #\l +char-attr-constituent-expt+)
198 (!set-constituent-trait #\Space +char-attr-invalid+)
199 (!set-constituent-trait #\Newline +char-attr-invalid+)
200 (dolist (c (list backspace-char-code tab-char-code form-feed-char-code
201 return-char-code rubout-char-code))
202 (!set-constituent-trait (code-char c) +char-attr-invalid+)))
205 (declaim (inline get-constituent-trait))
206 (defun get-constituent-trait (char)
207 (if (typep char 'base-char)
208 (elt +constituent-trait-table+ (char-code char))
209 +char-attr-constituent+))
211 ;;;; Readtable Operations
213 (defun assert-not-standard-readtable (readtable operation)
214 (when (eq readtable *standard-readtable*)
215 (cerror "Frob it anyway!" 'standard-readtable-modified-error
216 :operation operation)))
218 (defun readtable-case (readtable)
219 (aref #(:upcase :downcase :preserve :invert) (%readtable-case readtable)))
221 (defun (setf readtable-case) (case readtable)
222 ;; This function does not accept a readtable designator, only a readtable.
223 (assert-not-standard-readtable readtable '(setf readtable-case))
224 (setf (%readtable-case readtable)
225 (ecase case (:upcase 0) (:downcase 1) (:preserve 2) (:invert 3)))
226 case)
228 (defun readtable-normalization (readtable)
229 "Returns T if READTABLE normalizes strings to NFKC, and NIL otherwise.
230 The READTABLE-NORMALIZATION of the standard readtable is T."
231 (%readtable-normalization readtable))
233 (defun (setf readtable-normalization) (new-value readtable)
234 "Sets the READTABLE-NORMALIZATION of the given READTABLE to NEW-VALUE.
235 Pass T to make READTABLE normalize symbols to NFKC (the default behavior),
236 and NIL to suppress normalization."
237 ;; This function does not accept a readtable designator, only a readtable.
238 (assert-not-standard-readtable readtable '(setf readtable-normalization))
239 (setf (%readtable-normalization readtable) new-value))
241 (defun readtable-base-char-preference (readtable)
242 "Returns :SYMBOLS, :STRINGS, :BOTH, or NIL, depending on whether the
243 reader should try to intern a base-string when reading a symbol name,
244 respectively produce a base-string when reading a quoted string, or in both
245 cases, or neither. The preference applies when a symbol-name or string
246 contains only BASE-CHAR characters. An (ARRAY CHARACTER (*)) can always
247 be interned (returned, respectively) as required. The default is :SYMBOLS."
248 ;; For efficiency the single preference occupies two slots internally.
249 (let ((symbols (eq (%readtable-symbol-preference readtable) 'base-char))
250 (strings (eq (%readtable-string-preference readtable) 'base-char)))
251 (cond ((and strings symbols) :both)
252 (symbols :symbols)
253 (strings :strings))))
255 (defun (setf readtable-base-char-preference) (new-value readtable)
256 (declare (type (member :symbols :strings :both nil) new-value))
257 "Sets the READTABLE-BASE-CHAR-PREFERENCE of the given READTABLE."
258 (setf (%readtable-symbol-preference readtable)
259 (if (member new-value '(:symbols :both)) 'base-char 'character)
260 (%readtable-string-preference readtable)
261 (if (member new-value '(:strings :both)) 'base-char 'character))
262 new-value)
264 (defun replace/eql-hash-table (to from &optional (transform #'identity))
265 (maphash (lambda (k v) (setf (gethash k to) (funcall transform v))) from)
268 (defun %make-dispatch-macro-char (dtable)
269 (lambda (stream char)
270 (declare (ignore char))
271 (read-dispatch-char stream dtable)))
273 (defun %dispatch-macro-char-table (fun)
274 (and (closurep fun)
275 (eq (%closure-fun fun)
276 (load-time-value (%closure-fun (%make-dispatch-macro-char nil))
278 (find-if-in-closure #'consp fun)))
280 ;; If ENTRY is a dispatching macro, copy its dispatch table.
281 ;; Otherwise return it without alteration.
282 (defun copy-cmt-entry (entry)
283 (let ((dtable (%dispatch-macro-char-table entry)))
284 (if dtable
285 (%make-dispatch-macro-char
286 (cons (awhen (car dtable)
287 (replace/eql-hash-table (make-hash-table) it))
288 (copy-seq (cdr dtable))))
289 entry)))
291 (defun copy-readtable (&optional (from-readtable *readtable*) to-readtable)
292 "Copies FROM-READTABLE and returns the result. Uses TO-READTABLE as a target
293 for the copy when provided, otherwise a new readtable is created. The
294 FROM-READTABLE defaults to the standard readtable when NIL and to the current
295 readtable when not provided."
296 (assert-not-standard-readtable to-readtable 'copy-readtable)
297 (let ((really-from-readtable (or from-readtable *standard-readtable*))
298 (really-to-readtable (or to-readtable (make-readtable))))
299 (replace (character-attribute-array really-to-readtable)
300 (character-attribute-array really-from-readtable))
301 (replace/eql-hash-table
302 (clrhash (character-attribute-hash-table really-to-readtable))
303 (character-attribute-hash-table really-from-readtable))
304 (map-into (character-macro-array really-to-readtable)
305 #'copy-cmt-entry
306 (character-macro-array really-from-readtable))
307 (replace/eql-hash-table
308 (clrhash (character-macro-hash-table really-to-readtable))
309 (character-macro-hash-table really-from-readtable)
310 #'copy-cmt-entry)
311 (setf (readtable-case really-to-readtable)
312 (readtable-case really-from-readtable))
313 (setf (%readtable-string-preference really-to-readtable)
314 (%readtable-string-preference really-from-readtable)
315 (%readtable-symbol-preference really-to-readtable)
316 (%readtable-symbol-preference really-from-readtable))
317 (setf (readtable-normalization really-to-readtable)
318 (readtable-normalization really-from-readtable))
319 really-to-readtable))
321 (defun set-syntax-from-char (to-char from-char &optional
322 (to-readtable *readtable*) (from-readtable nil))
323 "Causes the syntax of TO-CHAR to be the same as FROM-CHAR in the optional
324 readtable (defaults to the current readtable). The FROM-TABLE defaults to the
325 standard Lisp readtable when NIL."
326 ;; TO-READTABLE is a readtable, not a readtable-designator
327 (assert-not-standard-readtable to-readtable 'set-syntax-from-char)
328 (let* ((really-from-readtable (or from-readtable *standard-readtable*))
329 (att (get-cat-entry from-char really-from-readtable))
330 (mac (get-raw-cmt-entry from-char really-from-readtable)))
331 (set-cat-entry to-char att to-readtable)
332 (set-cmt-entry to-char (copy-cmt-entry mac) to-readtable))
335 (defun set-macro-character (char function &optional
336 (non-terminatingp nil)
337 (rt-designator *readtable*))
338 "Causes CHAR to be a macro character which invokes FUNCTION when seen
339 by the reader. The NON-TERMINATINGP flag can be used to make the macro
340 character non-terminating, i.e. embeddable in a symbol name."
341 (let ((designated-readtable (or rt-designator *standard-readtable*)))
342 (assert-not-standard-readtable designated-readtable 'set-macro-character)
343 (set-cat-entry char (if non-terminatingp
344 +char-attr-constituent+
345 +char-attr-terminating-macro+)
346 designated-readtable)
347 (set-cmt-entry char (!coerce-to-cmt-entry function) designated-readtable)
348 t)) ; (ANSI-specified return value)
350 (defun get-macro-character (char &optional (rt-designator *readtable*))
351 "Return the function associated with the specified CHAR which is a macro
352 character, or NIL if there is no such function. As a second value, return
353 T if CHAR is a macro character which is non-terminating, i.e. which can
354 be embedded in a symbol name."
355 (let* ((designated-readtable (or rt-designator *standard-readtable*))
356 ;; the first return value: (OR FUNCTION SYMBOL) if CHAR is a macro
357 ;; character, or NIL otherwise
358 (fun-value (!cmt-entry-to-fun-designator
359 (get-raw-cmt-entry char designated-readtable))))
360 (values fun-value
361 ;; NON-TERMINATING-P return value:
362 (if fun-value
363 (or (constituentp char designated-readtable)
364 (not (terminating-macrop char designated-readtable)))
365 ;; ANSI's definition of GET-MACRO-CHARACTER says this
366 ;; value is NIL when CHAR is not a macro character.
367 ;; I.e. this value means not just "non-terminating
368 ;; character?" but "non-terminating macro character?".
369 nil))))
371 (defun get-dispatch-macro-char-table (disp-char readtable &optional (errorp t))
372 (cond ((%dispatch-macro-char-table (get-raw-cmt-entry disp-char readtable)))
373 (errorp (error "~S is not a dispatching macro character." disp-char))))
375 (defun make-dispatch-macro-character (char &optional
376 (non-terminating-p nil)
377 (rt *readtable*))
378 "Cause CHAR to become a dispatching macro character in readtable (which
379 defaults to the current readtable). If NON-TERMINATING-P, the char will
380 be non-terminating."
381 ;; This used to call ERROR if the character was already a dispatching
382 ;; macro but I saw no evidence of that in other implementations except cmucl.
383 ;; Without a portable way to inquire whether a character is dispatching,
384 ;; a file that frobs *READTABLE* can't be repeatedly loaded except
385 ;; by catching the error, so I removed it.
386 ;; RT is a readtable, not a readtable-designator, as per CLHS.
387 (unless (get-dispatch-macro-char-table char rt nil)
388 ;; The dtable is a cons whose whose CAR is initially NIL but upgraded
389 ;; to a hashtable if required, and whose CDR is a vector indexed by
390 ;; char-code up to the maximum base-char.
391 (let ((dtable (cons nil (make-array base-char-code-limit
392 :initial-element nil))))
393 (set-macro-character char (%make-dispatch-macro-char dtable)
394 non-terminating-p rt)))
397 (defun set-dispatch-macro-character (disp-char sub-char function
398 &optional (rt-designator *readtable*))
399 "Cause FUNCTION to be called whenever the reader reads DISP-CHAR
400 followed by SUB-CHAR."
401 ;; Get the dispatch char for macro (error if not there), diddle
402 ;; entry for sub-char.
403 (let* ((sub-char (char-upcase sub-char))
404 (readtable (or rt-designator *standard-readtable*)))
405 (assert-not-standard-readtable readtable 'set-dispatch-macro-character)
406 (when (digit-char-p sub-char)
407 (error "SUB-CHAR must not be a decimal digit: ~S" sub-char))
408 (let ((dtable (get-dispatch-macro-char-table disp-char readtable))
409 (function (!coerce-to-cmt-entry function)))
410 ;; (SET-MACRO-CHARACTER #\$ (GET-MACRO-CHARACTER #\#)) will share
411 ;; the dispatch table. Perhaps it should be copy-on-write?
412 (if (typep sub-char 'base-char)
413 (setf (svref (cdr dtable) (char-code sub-char)) function)
414 (let ((hashtable (car dtable)))
415 (cond (function ; allocate the hashtable if it wasn't made yet
416 (setf (gethash sub-char
417 (or hashtable (setf (car dtable)
418 (make-hash-table))))
419 function))
420 (hashtable ; remove an existing entry
421 (remhash sub-char hashtable)))))))
424 (defun get-dispatch-macro-character (disp-char sub-char
425 &optional (rt-designator *readtable*))
426 "Return the macro character function for SUB-CHAR under DISP-CHAR
427 or NIL if there is no associated function."
428 (let ((dtable (get-dispatch-macro-char-table
429 disp-char (or rt-designator *standard-readtable*))))
430 (!cmt-entry-to-fun-designator
431 (get-raw-cmt-dispatch-entry (char-upcase sub-char) dtable))))
434 ;;;; definitions to support internal programming conventions
436 (defconstant +EOF+ 0)
438 (defun flush-whitespace (stream)
439 ;; This flushes whitespace chars, returning the last char it read (a
440 ;; non-white one). It always gets an error on end-of-file.
441 (let* ((stream (in-stream-from-designator stream))
442 (rt *readtable*)
443 (attribute-array (character-attribute-array rt))
444 (attribute-hash-table (character-attribute-hash-table rt)))
445 (macrolet ((done-p ()
446 '(not (eql (if (typep char 'base-char)
447 (aref attribute-array (char-code char))
448 (gethash char attribute-hash-table
449 +char-attr-constituent+))
450 +char-attr-whitespace+))))
451 (if (ansi-stream-p stream)
452 (prepare-for-fast-read-char stream
453 (loop (let ((char (fast-read-char t)))
454 (cond ((done-p)
455 (done-with-fast-read-char)
456 (return char))))))
457 ;; CLOS stream
458 (loop (let ((char (read-char stream nil +EOF+)))
459 ;; (THE) should not be needed if DONE-P, but it was not
460 ;; being derived to return a character, causing an extra
461 ;; check in consumers of flush-whitespace despite the
462 ;; promise to return a character or else signal EOF.
463 (cond ((eq char +EOF+) (error 'end-of-file :stream stream))
464 ((done-p) (return (the character char))))))))))
466 ;;;; temporary initialization hack
468 ;; Install the (easy) standard macro-chars into *READTABLE*.
469 (defun !cold-init-standard-readtable ()
470 (/show0 "entering !cold-init-standard-readtable")
471 ;; All characters get boring defaults in MAKE-READTABLE. Now we
472 ;; override the boring defaults on characters which need more
473 ;; interesting behavior.
474 (flet ((whitespaceify (char)
475 (set-cmt-entry char nil)
476 (set-cat-entry char +char-attr-whitespace+)))
477 (whitespaceify (code-char tab-char-code))
478 (whitespaceify #\Newline)
479 (whitespaceify #\Space)
480 (whitespaceify (code-char form-feed-char-code))
481 (whitespaceify (code-char return-char-code)))
483 (set-cat-entry #\\ +char-attr-single-escape+)
484 (set-cmt-entry #\\ nil)
486 (set-cat-entry #\| +char-attr-multiple-escape+)
487 (set-cmt-entry #\| nil)
489 ;; Easy macro-character definitions are in this source file.
490 (set-macro-character #\" #'read-string)
491 (set-macro-character #\' #'read-quote)
492 ;; Using symbols makes these traceable and redefineable with ease,
493 ;; as well as avoids a forward-referenced function (from "backq")
494 (set-macro-character #\( 'read-list)
495 (set-macro-character #\) 'read-right-paren)
496 (set-macro-character #\; #'read-comment)
497 ;; (The hairier macro-character definitions, for #\# and #\`, are
498 ;; defined elsewhere, in their own source files.)
500 ;; all constituents
501 (do ((ichar 0 (1+ ichar))
502 (char))
503 ((= ichar base-char-code-limit))
504 (setq char (code-char ichar))
505 (when (constituentp char *readtable*)
506 (set-cmt-entry char nil)))
508 (/show0 "leaving !cold-init-standard-readtable"))
510 ;;;; implementation of the read buffer
512 (defstruct (token-buf (:predicate nil) (:copier nil)
513 (:constructor
514 make-token-buf
515 (&aux
516 (initial-string (make-string 128))
517 (string initial-string)
518 (adjustable-string
519 (make-array 0
520 :element-type 'character
521 :fill-pointer nil
522 :displaced-to string)))))
523 ;; The string accumulated during reading of tokens.
524 ;; Always starts out EQ to 'initial-string'.
525 (string nil :type (simple-array character (*)))
526 ;; Counter advanced as characters are placed into 'string'
527 (fill-ptr 0 :type index)
528 ;; Counter advanced as characters are consumed from 'string' on re-scan
529 ;; by auxilliary functions MAKE-{INTEGER,FLOAT,RATIONAL} etc.
530 (cursor 0 :type index)
531 ;; A string used only for FIND-PACKAGE calls in package-qualified
532 ;; symbols so that we don't need to call SUBSEQ on the 'string'.
533 (adjustable-string nil :type (and (array character (*)) (not simple-array)))
534 ;; A small string that is permanently assigned into this token-buf.
535 (initial-string nil :type (simple-array character (128))
536 :read-only t)
537 (escapes (make-array 10 :element-type 'fixnum :fill-pointer 0 :adjustable t)
538 :type (and (vector fixnum) (not simple-array)) :read-only t)
539 ;; Link to next TOKEN-BUF, to chain the *TOKEN-BUF-POOL* together.
540 (next nil :type (or null token-buf))
541 (only-base-chars t :type boolean))
542 (declaim (freeze-type token-buf))
544 (defmethod print-object ((self token-buf) stream)
545 (print-unreadable-object (self stream :identity t :type t)
546 (format stream "~@[next=~S~]" (token-buf-next self))))
548 ;; The current TOKEN-BUF
549 (declaim (type token-buf *read-buffer*))
550 (defvar *read-buffer*)
552 ;; A list of available TOKEN-BUFs
553 (declaim (type (or null token-buf) *token-buf-pool*))
554 (!define-thread-local *token-buf-pool* nil)
556 (defun reset-read-buffer (buffer)
557 ;; Turn BUFFER into an empty read buffer.
558 (setf (fill-pointer (token-buf-escapes buffer)) 0)
559 (setf (token-buf-fill-ptr buffer) 0)
560 (setf (token-buf-cursor buffer) 0)
561 (setf (token-buf-only-base-chars buffer) t)
562 buffer)
564 ;; "Output" a character into the reader's buffer.
565 (declaim (inline ouch-read-buffer))
566 (defun ouch-read-buffer (char buffer)
567 ;; When buffer overflow
568 (let ((op (token-buf-fill-ptr buffer)))
569 (declare (optimize (sb!c::insert-array-bounds-checks 0)))
570 (when (>= op (length (token-buf-string buffer)))
571 ;; an out-of-line call for the uncommon case avoids bloat.
572 ;; Size should be doubled.
573 (grow-read-buffer))
574 (unless (typep char 'base-char)
575 (setf (token-buf-only-base-chars buffer) nil))
576 (setf (elt (token-buf-string buffer) op) char)
577 (setf (token-buf-fill-ptr buffer) (1+ op))))
579 (defun ouch-read-buffer-escaped (char buf)
580 (vector-push-extend (token-buf-fill-ptr buf) (token-buf-escapes buf))
581 (ouch-read-buffer char buf))
583 (defun grow-read-buffer ()
584 (let* ((b *read-buffer*)
585 (string (token-buf-string b)))
586 (setf (token-buf-string b)
587 (replace (make-string (* 2 (length string))) string))))
589 ;; Retun the next character from the buffered token, or NIL.
590 (declaim (maybe-inline token-buf-getchar))
591 (defun token-buf-getchar (b)
592 (declare (optimize (sb!c::insert-array-bounds-checks 0)))
593 (let ((i (token-buf-cursor (truly-the token-buf b))))
594 (and (< i (token-buf-fill-ptr b))
595 (prog1 (elt (token-buf-string b) i)
596 (setf (token-buf-cursor b) (1+ i))))))
598 ;; Grab a buffer off the token-buf pool if there is one, or else make one.
599 ;; This does not need to be protected against other threads because the
600 ;; pool is thread-local, or against async interrupts. An async signal
601 ;; delivered anywhere in the midst of the code sequence below can not
602 ;; corrupt the buffer given to the caller of ACQUIRE-TOKEN-BUF.
603 ;; Additionally the cleanup is on a "best effort" basis. Async unwinds
604 ;; through WITH-READ-BUFFER fail to recycle token-bufs, but that's ok.
605 (defun acquire-token-buf ()
606 (let ((this-buffer *token-buf-pool*))
607 (cond (this-buffer
608 (shiftf *token-buf-pool* (token-buf-next this-buffer) nil)
609 this-buffer)
611 (make-token-buf)))))
613 (defun release-token-buf (chain)
614 (named-let free ((buffer chain))
615 ;; If 'adjustable-string' was displaced to 'string',
616 ;; adjust it back down to allow GC of the abnormally large string.
617 (unless (eq (%array-data (token-buf-adjustable-string buffer))
618 (token-buf-initial-string buffer))
619 (adjust-array (token-buf-adjustable-string buffer) '(0)
620 :displaced-to (token-buf-initial-string buffer)))
621 ;; 'initial-string' is assigned into 'string'
622 ;; so not to preserve huge buffers in the pool indefinitely.
623 (setf (token-buf-string buffer) (token-buf-initial-string buffer))
624 (if (token-buf-next buffer)
625 (free (token-buf-next buffer))
626 (setf (token-buf-next buffer) *token-buf-pool*)))
627 (setf *token-buf-pool* chain))
629 ;; Return a fresh copy of BUFFER's string
630 (defun copy-token-buf-string (buffer)
631 (subseq (token-buf-string buffer) 0 (token-buf-fill-ptr buffer)))
633 ;; Return a string displaced to BUFFER's string.
634 ;; The string should not be held onto - either a copy must be made
635 ;; by the receiver, or it should be parsed into something else.
636 (defun sized-token-buf-string (buffer)
637 ;; It would in theory be faster to make the adjustable array have
638 ;; a fill-pointer, and just set that most of the time. Except we still
639 ;; need the ability to displace to a different string if a package name
640 ;; has >128 characters, so then there'd be two modes of sharing, one of
641 ;; which is rarely exercised and most likely to be subtly wrong.
642 ;; At any rate, SET-ARRAY-HEADER is faster than ADJUST-ARRAY.
643 ;; TODO: find evidence that it is/is-not worth having complicated
644 ;; mechanism involving a fill-pointer or not.
645 (set-array-header
646 (token-buf-adjustable-string buffer) ; the array
647 (token-buf-string buffer) ; the underlying data
648 (token-buf-fill-ptr buffer) ; total size
649 nil ; fill-pointer
650 0 ; displacement
651 (token-buf-fill-ptr buffer) ; dimension 0
652 t nil)) ; displacedp / newp
654 ;; Acquire a TOKEN-BUF from the pool and execute the body, returning only
655 ;; the primary value therefrom. Recycle the buffer when done.
656 ;; No UNWIND-PROTECT - recycling is designed to help with the common case
657 ;; of normal return and is not intended to be resilient against nonlocal exit.
658 (defmacro with-read-buffer (() &body body)
659 `(let* ((*read-buffer* (acquire-token-buf))
660 (result (progn ,@body)))
661 (release-token-buf *read-buffer*)
662 result))
664 (defun check-for-recursive-read (stream recursive-p operator-name)
665 (when (and recursive-p (not (boundp '*read-buffer*)))
666 (simple-reader-error
667 stream
668 "~A was invoked with RECURSIVE-P being true outside ~
669 of a recursive read operation."
670 `(,operator-name))))
672 ;;;; READ-PRESERVING-WHITESPACE, READ-DELIMITED-LIST, and READ
674 ;;; A list for #=, used to keep track of objects with labels assigned that
675 ;;; have been completely read. Each entry is a SHARP-EQUAL-WRAPPER object.
677 ;;; KLUDGE: Should this really be a list? It seems as though users
678 ;;; could reasonably expect N log N performance for large datasets.
679 ;;; On the other hand, it's probably very very seldom a problem in practice.
680 ;;; On the third hand, it might be just as easy to use a hash table,
681 ;;; so maybe we should. -- WHN 19991202
682 (defvar *sharp-equal* ())
684 (declaim (ftype (sfunction (t t) (values bit t)) read-maybe-nothing))
686 ;;; Like READ-PRESERVING-WHITESPACE, but doesn't check the read buffer
687 ;;; for being set up properly.
688 (defun %read-preserving-whitespace (stream eof-error-p eof-value recursive-p)
689 (declare (optimize (sb!c::check-tag-existence 0)))
690 (if recursive-p
691 ;; a loop for repeating when a macro returns nothing
692 (let* ((tracking-p (form-tracking-stream-p stream))
693 (outermost-p
694 (and tracking-p
695 (null (form-tracking-stream-form-start-char-pos stream)))))
696 (loop
697 (let ((char (read-char stream eof-error-p +EOF+)))
698 (cond ((eq char +EOF+) (return eof-value))
699 ((whitespace[2]p char))
701 (when outermost-p
702 ;; Calling FILE-POSITION at each token seems to slow down
703 ;; the reader by somewhere between 8x to 10x.
704 ;; Once per outermost form is acceptably fast though.
705 (setf (form-tracking-stream-form-start-byte-pos stream)
706 ;; pretend we queried the position before reading CHAR
707 (- (file-position stream)
708 (or (file-string-length stream (string char)) 0))
709 (form-tracking-stream-form-start-char-pos stream)
710 ;; likewise
711 (1- (form-tracking-stream-input-char-pos stream))))
712 (multiple-value-bind (result-p result)
713 (read-maybe-nothing stream char)
714 (unless (zerop result-p)
715 (return (unless *read-suppress* result)))
716 ;; Repeat if macro returned nothing.
717 (when tracking-p
718 (funcall (form-tracking-stream-observer stream)
719 :reset nil nil))))))))
720 (let ((*sharp-equal* nil))
721 (with-read-buffer ()
722 (%read-preserving-whitespace stream eof-error-p eof-value t)))))
724 ;;; READ-PRESERVING-WHITESPACE behaves just like READ, only it makes
725 ;;; sure to leave terminating whitespace in the stream. (This is a
726 ;;; COMMON-LISP exported symbol.)
727 (defun read-preserving-whitespace (&optional (stream *standard-input*)
728 (eof-error-p t)
729 (eof-value nil)
730 (recursive-p nil))
731 "Read from STREAM and return the value read, preserving any whitespace
732 that followed the object."
733 (declare (explicit-check))
734 (check-for-recursive-read stream recursive-p 'read-preserving-whitespace)
735 (%read-preserving-whitespace stream eof-error-p eof-value recursive-p))
737 ;;; Read from STREAM given starting CHAR, returning 1 and the resulting
738 ;;; object, unless CHAR is a macro yielding no value, then 0 and NIL,
739 ;;; for functions that want comments to return so that they can look
740 ;;; past them. CHAR must not be whitespace.
741 (defun read-maybe-nothing (stream char)
742 (truly-the
743 (values bit t) ; avoid a type-check. M-V-CALL is lame
744 (multiple-value-call
745 (lambda (stream start-pos &optional (result nil supplied-p) &rest junk)
746 (declare (ignore junk)) ; is this ANSI-specified?
747 (when (and supplied-p start-pos)
748 (funcall (form-tracking-stream-observer stream)
749 start-pos
750 (form-tracking-stream-input-char-pos stream) result))
751 (values (if supplied-p 1 0) result))
752 ;; KLUDGE: not capturing anything in the lambda avoids closure consing
753 stream
754 (and (form-tracking-stream-p stream)
755 ;; Subtract 1 because the position points _after_ CHAR.
756 (1- (form-tracking-stream-input-char-pos stream)))
757 (funcall (!cmt-entry-to-function
758 (get-raw-cmt-entry char *readtable*) #'read-token)
759 stream char))))
761 (defun read (&optional (stream *standard-input*)
762 (eof-error-p t)
763 (eof-value nil)
764 (recursive-p nil))
765 "Read the next Lisp value from STREAM, and return it."
766 (declare (explicit-check))
767 (check-for-recursive-read stream recursive-p 'read)
768 (let* ((local-eof-val (load-time-value (cons nil nil) t))
769 (result (%read-preserving-whitespace
770 stream eof-error-p local-eof-val recursive-p)))
771 ;; This function generally discards trailing whitespace. If you
772 ;; don't want to discard trailing whitespace, call
773 ;; CL:READ-PRESERVING-WHITESPACE instead.
774 (unless (or (eql result local-eof-val) recursive-p)
775 (let ((next-char (read-char stream nil +EOF+)))
776 (unless (or (eq next-char +EOF+)
777 (whitespace[2]p next-char))
778 (unread-char next-char stream))))
779 (if (eq result local-eof-val) eof-value result)))
782 ;;;; basic readmacro definitions
783 ;;;;
784 ;;;; Some large, hairy subsets of readmacro definitions (backquotes
785 ;;;; and sharp macros) are not here, but in their own source files.
787 (defun read-quote (stream ignore)
788 (declare (ignore ignore))
789 (list 'quote (read stream t nil t)))
791 (defun read-comment (stream ignore)
792 (declare (ignore ignore))
793 (handler-bind
794 ((character-decoding-error
795 #'(lambda (decoding-error)
796 (declare (ignorable decoding-error))
797 (style-warn
798 'sb!kernel::character-decoding-error-in-macro-char-comment
799 :position (file-position stream) :stream stream)
800 (invoke-restart 'attempt-resync))))
801 (let ((stream (in-stream-from-designator stream)))
802 (if (ansi-stream-p stream)
803 (prepare-for-fast-read-char stream
804 (loop (let ((char (fast-read-char nil +EOF+)))
805 (when (or (eq char +EOF+) (char= char #\newline))
806 (return (done-with-fast-read-char))))))
807 ;; CLOS stream
808 (loop (let ((char (read-char stream nil +EOF+)))
809 (when (or (eq char +EOF+) (char= char #\newline))
810 (return)))))))
811 ;; Don't return anything.
812 (values))
814 ;;; FIXME: for these two macro chars, if STREAM is a FORM-TRACKING-STREAM,
815 ;;; every cons cell should generate a notification so that the readtable
816 ;;; manipulation in SB-COVER can be eliminated in favor of a stream observer.
817 ;;; It is cheap to add events- it won't increase consing in the compiler
818 ;;; because it the extra events can simply be ignored.
819 (macrolet
820 ((with-list-reader ((streamvar delimiter) &body body)
821 `(let* ((thelist (list nil))
822 (listtail thelist)
823 (collectp (if *read-suppress* 0 -1)))
824 (declare (dynamic-extent thelist))
825 (loop (let ((firstchar (flush-whitespace ,streamvar)))
826 (when (eq firstchar ,delimiter)
827 (return (cdr thelist)))
828 ,@body))))
829 (read-list-item (streamvar)
830 `(multiple-value-bind (winp obj)
831 (read-maybe-nothing ,streamvar firstchar)
832 ;; allow for a character macro return to return nothing
833 (unless (zerop (logand winp collectp))
834 (setq listtail
835 (cdr (rplacd (truly-the cons listtail) (list obj))))))))
837 ;;; The character macro handler for left paren
838 (defun read-list (stream ignore)
839 (declare (ignore ignore))
840 (with-list-reader (stream #\))
841 (when (eq firstchar #\.)
842 (let ((nextchar (read-char stream t)))
843 (cond ((token-delimiterp nextchar)
844 (cond ((eq listtail thelist)
845 (unless (zerop collectp)
846 (simple-reader-error
847 stream "Nothing appears before . in list.")))
848 ((whitespace[2]p nextchar)
849 (setq nextchar (flush-whitespace stream))))
850 (rplacd (truly-the cons listtail)
851 (read-after-dot stream nextchar collectp))
852 ;; Check for improper ". ,@" or ". ,." now rather than
853 ;; in the #\` reader. The resulting QUASIQUOTE macro might
854 ;; never be exapanded, but nonetheless could be erroneous.
855 (unless (zerop (logand *backquote-depth* collectp))
856 (let ((lastcdr (cdr (last listtail))))
857 (when (and (comma-p lastcdr) (comma-splicing-p lastcdr))
858 (simple-reader-error
859 stream "~S contains a splicing comma after a dot"
860 (cdr thelist)))))
861 (return (cdr thelist)))
862 ;; Put back NEXTCHAR so that we can read it normally.
863 (t (unread-char nextchar stream)))))
864 ;; Next thing is not an isolated dot.
865 (read-list-item stream)))
867 ;;; (This is a COMMON-LISP exported symbol.)
868 (defun read-delimited-list (endchar &optional
869 (input-stream *standard-input*)
870 recursive-p)
871 "Read Lisp values from INPUT-STREAM until the next character after a
872 value's representation is ENDCHAR, and return the objects as a list."
873 (declare (explicit-check))
874 (check-for-recursive-read input-stream recursive-p 'read-delimited-list)
875 (flet ((%read-delimited-list ()
876 (with-list-reader (input-stream endchar)
877 (read-list-item input-stream))))
878 (if recursive-p
879 (%read-delimited-list)
880 (with-read-buffer () (%read-delimited-list)))))) ; end MACROLET
882 (defun read-after-dot (stream firstchar collectp)
883 ;; FIRSTCHAR is non-whitespace!
884 (let ((lastobj ()))
885 (do ((char firstchar (flush-whitespace stream)))
886 ((eq char #\))
887 (if (zerop collectp)
888 (return-from read-after-dot nil)
889 (simple-reader-error stream "Nothing appears after . in list.")))
890 ;; See whether there's something there.
891 (multiple-value-bind (winp obj) (read-maybe-nothing stream char)
892 (unless (zerop winp) (return (setq lastobj obj)))))
893 ;; At least one thing appears after the dot.
894 ;; Check for more than one thing following dot.
895 (loop
896 (let ((char (flush-whitespace stream)))
897 (cond ((eq char #\)) (return lastobj)) ;success!
898 ;; Try reading virtual whitespace.
899 ((not (zerop (logand (read-maybe-nothing stream char)
900 (truly-the fixnum collectp))))
901 (simple-reader-error
902 stream "More than one object follows . in list.")))))))
904 (defun read-string (stream closech)
905 ;; This accumulates chars until it sees same char that invoked it.
906 ;; We avoid copying any given input character more than twice-
907 ;; once to a temp buffer and then to the result. In the worst case,
908 ;; we can waste space equal the unwasted space, if the final character
909 ;; causes allocation of a new buffer for just that character,
910 ;; because the buffer size is doubled each time it overflows.
911 ;; (Would be better to peek at the frc-buffer if the stream has one.)
912 ;; Scratch vectors are GC-able as soon as this function returns though.
913 (declare (character closech))
914 (macrolet ((scan (read-a-char eofp &optional finish)
915 `(loop (let ((char ,read-a-char))
916 (declare (optimize (sb!c::insert-array-bounds-checks 0)))
917 (cond (,eofp (error 'end-of-file :stream stream))
918 ((eql char closech)
919 (return ,finish))
920 ((single-escape-p char rt)
921 (setq char ,read-a-char)
922 (when ,eofp
923 (error 'end-of-file :stream stream))))
924 (when (>= ptr lim)
925 (unless suppress
926 (push buf chain)
927 (setq lim (the index (ash lim 1))
928 buf (make-array lim :element-type 'character)))
929 (setq ptr 0))
930 (setf (schar buf ptr) (truly-the character char))
931 #!+sb-unicode ; BASE-CHAR-P does not exist if not
932 (unless (base-char-p char) (setq only-base-chars nil))
933 (incf ptr)))))
934 (let* ((token-buf *read-buffer*)
935 (buf (token-buf-string token-buf))
936 (rt *readtable*)
937 (stream (in-stream-from-designator stream))
938 (suppress *read-suppress*)
939 (lim (length buf))
940 (ptr 0)
941 (only-base-chars t)
942 (chain))
943 (declare (type (simple-array character (*)) buf))
944 (reset-read-buffer token-buf)
945 (if (ansi-stream-p stream)
946 (prepare-for-fast-read-char stream
947 (scan (fast-read-char t) nil (done-with-fast-read-char)))
948 ;; CLOS stream
949 (scan (read-char stream nil +EOF+) (eq char +EOF+)))
950 (if suppress
952 (let* ((sum (loop for buf in chain sum (length buf)))
953 (result
954 (make-array (+ sum ptr)
955 :element-type (if only-base-chars
956 (%readtable-string-preference rt)
957 'character))))
958 (setq ptr sum)
959 ;; Now work backwards from the end
960 (replace result buf :start1 ptr)
961 (dolist (buf chain result)
962 (declare (type (simple-array character (*)) buf))
963 (let ((len (length buf)))
964 (decf ptr len)
965 (replace result buf :start1 ptr))))))))
967 (defun read-right-paren (stream ignore)
968 (declare (ignore ignore))
969 (simple-reader-error stream "unmatched close parenthesis"))
971 ;;; Read from the stream up to the next delimiter. Leave the resulting
972 ;;; token in *READ-BUFFER*, and return three values:
973 ;;; -- a TOKEN-BUF
974 ;;; -- whether any escape character was seen (even if no character is escaped)
975 ;;; -- whether a package delimiter character was seen
976 ;;; Normalizes the input to NFKC before returning
977 (defun internal-read-extended-token (stream firstchar escape-firstchar
978 &aux (read-buffer *read-buffer*))
979 (reset-read-buffer read-buffer)
980 (when escape-firstchar
981 (ouch-read-buffer-escaped firstchar read-buffer)
982 (setq firstchar (read-char stream nil +EOF+)))
983 (do ((char firstchar (read-char stream nil +EOF+))
984 (seen-multiple-escapes nil)
985 (rt *readtable*)
986 (colon nil))
987 ((cond ((eq char +EOF+) t)
988 ((token-delimiterp char rt)
989 (unread-char char stream)
991 (t nil))
992 (progn
993 (multiple-value-setq (read-buffer colon)
994 (normalize-read-buffer read-buffer colon))
995 (values read-buffer
996 (or (plusp (fill-pointer (token-buf-escapes read-buffer)))
997 seen-multiple-escapes)
998 colon)))
999 (flet ((escape-1-char ()
1000 ;; It can't be a number, even if it's 1\23.
1001 ;; Read next char here, so it won't be casified.
1002 (let ((nextchar (read-char stream nil +EOF+)))
1003 (if (eq nextchar +EOF+)
1004 (reader-eof-error stream "after escape character")
1005 (ouch-read-buffer-escaped nextchar read-buffer)))))
1006 (cond ((single-escape-p char rt) (escape-1-char))
1007 ((multiple-escape-p char rt)
1008 (setq seen-multiple-escapes t)
1009 ;; Read to next multiple-escape, escaping single chars
1010 ;; along the way.
1011 (loop
1012 (let ((ch (read-char stream nil +EOF+)))
1013 (cond ((eq ch +EOF+)
1014 (reader-eof-error stream "inside extended token"))
1015 ((multiple-escape-p ch rt) (return))
1016 ((single-escape-p ch rt) (escape-1-char))
1017 (t (ouch-read-buffer-escaped ch read-buffer))))))
1019 (when (and (not colon) ; easiest test first
1020 (constituentp char rt)
1021 (eql (get-constituent-trait char)
1022 +char-attr-package-delimiter+))
1023 (setq colon t))
1024 (ouch-read-buffer char read-buffer))))))
1026 ;;;; character classes
1028 ;;; Return the character class for CHAR.
1030 ;;; FIXME: why aren't these ATT-getting forms using GET-CAT-ENTRY?
1031 ;;; Because we've cached the readtable tables?
1032 (defmacro char-class (char attarray atthash)
1033 `(let ((att (if (typep (truly-the character ,char) 'base-char)
1034 (aref ,attarray (char-code ,char))
1035 (gethash ,char ,atthash +char-attr-constituent+))))
1036 (declare (fixnum att))
1037 (cond
1038 ((<= att +char-attr-terminating-macro+) +char-attr-delimiter+)
1039 ((< att +char-attr-constituent+) att)
1040 (t (setf att (get-constituent-trait ,char))
1041 (if (= att +char-attr-invalid+)
1042 (simple-reader-error stream "invalid constituent")
1043 att)))))
1045 ;;; Return the character class for CHAR, which might be part of a
1046 ;;; rational number.
1047 (defmacro char-class2 (char attarray atthash read-base)
1048 `(let ((att (if (typep (truly-the character ,char) 'base-char)
1049 (aref ,attarray (char-code ,char))
1050 (gethash ,char ,atthash +char-attr-constituent+))))
1051 (declare (fixnum att))
1052 (cond
1053 ((<= att +char-attr-terminating-macro+) +char-attr-delimiter+)
1054 ((< att +char-attr-constituent+) att)
1055 (t (setf att (get-constituent-trait ,char))
1056 (cond
1057 ((digit-char-p ,char ,read-base) +char-attr-constituent-digit+)
1058 ((= att +char-attr-constituent-digit+) +char-attr-constituent+)
1059 ((= att +char-attr-invalid+)
1060 (simple-reader-error stream "invalid constituent"))
1061 (t att))))))
1063 ;;; Return the character class for a char which might be part of a
1064 ;;; rational or floating number. (Assume that it is a digit if it
1065 ;;; could be.)
1066 (defmacro char-class3 (char attarray atthash read-base)
1067 `(let ((att (if (typep (truly-the character ,char) 'base-char)
1068 (aref ,attarray (char-code ,char))
1069 (gethash ,char ,atthash +char-attr-constituent+))))
1070 (declare (fixnum att))
1071 (cond
1072 ((<= att +char-attr-terminating-macro+) +char-attr-delimiter+)
1073 ((< att +char-attr-constituent+) att)
1074 (t (setf att (get-constituent-trait ,char))
1075 (when possibly-rational
1076 (setq possibly-rational
1077 (or (digit-char-p ,char ,read-base)
1078 (= att +char-attr-constituent-slash+))))
1079 (when possibly-float
1080 (setq possibly-float
1081 (or (digit-char-p ,char 10)
1082 (= att +char-attr-constituent-dot+))))
1083 (cond
1084 ((digit-char-p ,char (max ,read-base 10))
1085 (if (digit-char-p ,char ,read-base)
1086 (if (= att +char-attr-constituent-expt+)
1087 +char-attr-constituent-digit-or-expt+
1088 +char-attr-constituent-digit+)
1089 +char-attr-constituent-decimal-digit+))
1090 ((= att +char-attr-invalid+)
1091 (simple-reader-error stream "invalid constituent"))
1092 (t att))))))
1094 ;;;; token fetching
1096 (defvar *read-suppress* nil
1097 "Suppress most interpreting in the reader when T.")
1099 (defvar *read-base* 10
1100 "the radix that Lisp reads numbers in")
1101 (declaim (type (integer 2 36) *read-base*))
1103 ;;; Normalize TOKEN-BUF to NFKC, returning a new TOKEN-BUF and the
1104 ;;; COLON value
1105 (defun normalize-read-buffer (token-buf &optional colon)
1106 (when (or (token-buf-only-base-chars token-buf)
1107 (not (readtable-normalization *readtable*)))
1108 (return-from normalize-read-buffer (values token-buf colon)))
1109 (let ((current-buffer (copy-token-buf-string token-buf))
1110 (old-escapes (copy-seq (token-buf-escapes token-buf)))
1111 (str-to-normalize (make-string (token-buf-fill-ptr token-buf)))
1112 (normalize-ptr 0) (escapes-ptr 0))
1113 (reset-read-buffer token-buf)
1114 (macrolet ((clear-str-to-normalize ()
1115 `(progn
1116 (loop for char across (sb!unicode:normalize-string
1117 (subseq str-to-normalize 0 normalize-ptr)
1118 :nfkc) do
1119 (ouch-read-buffer char token-buf))
1120 (setf normalize-ptr 0)))
1121 (push-to-normalize (ch)
1122 (let ((ch-gen (gensym)))
1123 `(let ((,ch-gen ,ch))
1124 (setf (char str-to-normalize normalize-ptr) ,ch-gen)
1125 (incf normalize-ptr)))))
1126 (loop for c across current-buffer
1127 for i from 0
1129 (if (and (< escapes-ptr (length old-escapes))
1130 (eql i (aref old-escapes escapes-ptr)))
1131 (progn
1132 (clear-str-to-normalize)
1133 (ouch-read-buffer-escaped c token-buf)
1134 (incf escapes-ptr))
1135 (push-to-normalize c)))
1136 (clear-str-to-normalize)
1137 (values token-buf colon))))
1139 ;;; Modify the read buffer according to READTABLE-CASE, ignoring
1140 ;;; ESCAPES. ESCAPES is a vector of the escaped indices.
1141 (defun casify-read-buffer (token-buf)
1142 (let ((case (readtable-case *readtable*))
1143 (escapes (token-buf-escapes token-buf)))
1144 (cond
1145 ((and (zerop (length escapes)) (eq case :upcase))
1146 (let ((buffer (token-buf-string token-buf)))
1147 (dotimes (i (token-buf-fill-ptr token-buf))
1148 (declare (optimize (sb!c::insert-array-bounds-checks 0)))
1149 (setf (schar buffer i) (char-upcase (schar buffer i))))))
1150 ((eq case :preserve))
1152 (macrolet ((skip-esc (&body body)
1153 `(do ((i (1- (token-buf-fill-ptr token-buf)) (1- i))
1154 (buffer (token-buf-string token-buf))
1155 (esc (if (zerop (fill-pointer escapes))
1156 -1 (vector-pop escapes))))
1157 ((minusp i))
1158 (declare (fixnum i)
1159 (optimize (sb!c::insert-array-bounds-checks 0)))
1160 (if (< esc i)
1161 (let ((ch (schar buffer i)))
1162 ,@body)
1163 (progn
1164 (aver (= esc i))
1165 (setq esc (if (zerop (fill-pointer escapes))
1166 -1 (vector-pop escapes))))))))
1167 (flet ((lower-em ()
1168 (skip-esc (setf (schar buffer i) (char-downcase ch))))
1169 (raise-em ()
1170 (skip-esc (setf (schar buffer i) (char-upcase ch)))))
1171 (ecase case
1172 (:upcase (raise-em))
1173 (:downcase (lower-em))
1174 (:invert
1175 (let ((all-upper t)
1176 (all-lower t)
1177 (fillptr (fill-pointer escapes)))
1178 (skip-esc
1179 (when (both-case-p ch)
1180 (if (upper-case-p ch)
1181 (setq all-lower nil)
1182 (setq all-upper nil))))
1183 (setf (fill-pointer escapes) fillptr)
1184 (cond (all-lower (raise-em))
1185 (all-upper (lower-em))))))))))))
1187 (eval-when (:compile-toplevel :load-toplevel :execute)
1188 (defvar *reader-package* nil))
1189 (declaim (type (or null package) *reader-package*)
1190 (always-bound *reader-package*))
1192 (defun reader-find-package (package-designator stream)
1193 (if (%instancep package-designator)
1194 package-designator
1195 (let ((package (find-package package-designator)))
1196 (cond (package
1197 ;; Release the token-buf that was used for the designator
1198 (release-token-buf (shiftf (token-buf-next *read-buffer*) nil))
1199 package)
1201 (error 'simple-reader-package-error
1202 :package package-designator
1203 :stream stream
1204 :format-control "Package ~A does not exist."
1205 :format-arguments (list package-designator)))))))
1207 (defun read-token (stream firstchar)
1208 "Default readmacro function. Handles numbers, symbols, and SBCL's
1209 extended <package-name>::<form-in-package> syntax."
1210 ;; Check explicitly whether FIRSTCHAR has an entry for
1211 ;; NON-TERMINATING in CHARACTER-ATTRIBUTE-TABLE and
1212 ;; READ-DOT-NUMBER-SYMBOL in CMT. Report an error if these are
1213 ;; violated. (If we called this, we want something that is a
1214 ;; legitimate token!) Read in the longest possible string satisfying
1215 ;; the Backus-Naur form for "unqualified-token". Leave the result in
1216 ;; the *READ-BUFFER*. Return next char after token (last char read).
1217 (when *read-suppress*
1218 (internal-read-extended-token stream firstchar nil)
1219 (return-from read-token nil))
1220 (let* ((rt *readtable*)
1221 (base *read-base*)
1222 (attribute-array (character-attribute-array rt))
1223 (attribute-hash-table (character-attribute-hash-table rt))
1224 (buf *read-buffer*)
1225 (package-designator nil)
1226 (colons 0)
1227 (possibly-rational t)
1228 (seen-digit-or-expt nil)
1229 (possibly-float t)
1230 (was-possibly-float nil)
1231 (seen-multiple-escapes nil))
1232 (declare (token-buf buf))
1233 (reset-read-buffer buf)
1234 (macrolet ((getchar-or-else (what)
1235 `(when (eq (setq char (read-char stream nil +EOF+)) +EOF+)
1236 ,what)))
1237 (prog ((char firstchar))
1238 (case (char-class3 char attribute-array attribute-hash-table base)
1239 (#.+char-attr-constituent-sign+ (go SIGN))
1240 (#.+char-attr-constituent-digit+ (go LEFTDIGIT))
1241 (#.+char-attr-constituent-digit-or-expt+
1242 (setq seen-digit-or-expt t)
1243 (go LEFTDIGIT))
1244 (#.+char-attr-constituent-decimal-digit+ (go LEFTDECIMALDIGIT))
1245 (#.+char-attr-constituent-dot+ (go FRONTDOT))
1246 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE))
1247 (#.+char-attr-package-delimiter+ (go COLON))
1248 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE))
1249 (#.+char-attr-invalid+ (simple-reader-error stream
1250 "invalid constituent"))
1251 ;; can't have eof, whitespace, or terminating macro as first char!
1252 (t (go SYMBOL)))
1253 SIGN ; saw "sign"
1254 (ouch-read-buffer char buf)
1255 (getchar-or-else (go RETURN-SYMBOL))
1256 (setq possibly-rational t
1257 possibly-float t)
1258 (case (char-class3 char attribute-array attribute-hash-table base)
1259 (#.+char-attr-constituent-digit+ (go LEFTDIGIT))
1260 (#.+char-attr-constituent-digit-or-expt+
1261 (setq seen-digit-or-expt t)
1262 (go LEFTDIGIT))
1263 (#.+char-attr-constituent-decimal-digit+ (go LEFTDECIMALDIGIT))
1264 (#.+char-attr-constituent-dot+ (go SIGNDOT))
1265 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE))
1266 (#.+char-attr-package-delimiter+ (go COLON))
1267 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE))
1268 (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL))
1269 (t (go SYMBOL)))
1270 LEFTDIGIT ; saw "[sign] {digit}+"
1271 (ouch-read-buffer char buf)
1272 (getchar-or-else (return (make-integer)))
1273 (setq was-possibly-float possibly-float)
1274 (case (char-class3 char attribute-array attribute-hash-table base)
1275 (#.+char-attr-constituent-digit+ (go LEFTDIGIT))
1276 (#.+char-attr-constituent-decimal-digit+ (if possibly-float
1277 (go LEFTDECIMALDIGIT)
1278 (go SYMBOL)))
1279 (#.+char-attr-constituent-dot+ (if possibly-float
1280 (go MIDDLEDOT)
1281 (go SYMBOL)))
1282 (#.+char-attr-constituent-digit-or-expt+
1283 (if (or seen-digit-or-expt (not was-possibly-float))
1284 (progn (setq seen-digit-or-expt t) (go LEFTDIGIT))
1285 (progn (setq seen-digit-or-expt t) (go LEFTDIGIT-OR-EXPT))))
1286 (#.+char-attr-constituent-expt+
1287 (if was-possibly-float
1288 (go EXPONENT)
1289 (go SYMBOL)))
1290 (#.+char-attr-constituent-slash+ (if possibly-rational
1291 (go RATIO)
1292 (go SYMBOL)))
1293 (#.+char-attr-delimiter+ (unread-char char stream)
1294 (return (make-integer)))
1295 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE))
1296 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE))
1297 (#.+char-attr-package-delimiter+ (go COLON))
1298 (t (go SYMBOL)))
1299 LEFTDIGIT-OR-EXPT
1300 (ouch-read-buffer char buf)
1301 (getchar-or-else (return (make-integer)))
1302 (case (char-class3 char attribute-array attribute-hash-table base)
1303 (#.+char-attr-constituent-digit+ (go LEFTDIGIT))
1304 (#.+char-attr-constituent-decimal-digit+ (bug "impossible!"))
1305 (#.+char-attr-constituent-dot+ (go SYMBOL))
1306 (#.+char-attr-constituent-digit-or-expt+ (go LEFTDIGIT))
1307 (#.+char-attr-constituent-expt+ (go SYMBOL))
1308 (#.+char-attr-constituent-sign+ (go EXPTSIGN))
1309 (#.+char-attr-constituent-slash+ (if possibly-rational
1310 (go RATIO)
1311 (go SYMBOL)))
1312 (#.+char-attr-delimiter+ (unread-char char stream)
1313 (return (make-integer)))
1314 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE))
1315 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE))
1316 (#.+char-attr-package-delimiter+ (go COLON))
1317 (t (go SYMBOL)))
1318 LEFTDECIMALDIGIT ; saw "[sign] {decimal-digit}+"
1319 (aver possibly-float)
1320 (ouch-read-buffer char buf)
1321 (getchar-or-else (go RETURN-SYMBOL))
1322 (case (char-class char attribute-array attribute-hash-table)
1323 (#.+char-attr-constituent-digit+ (go LEFTDECIMALDIGIT))
1324 (#.+char-attr-constituent-dot+ (go MIDDLEDOT))
1325 (#.+char-attr-constituent-expt+ (go EXPONENT))
1326 (#.+char-attr-constituent-slash+ (aver (not possibly-rational))
1327 (go SYMBOL))
1328 (#.+char-attr-delimiter+ (unread-char char stream)
1329 (go RETURN-SYMBOL))
1330 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE))
1331 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE))
1332 (#.+char-attr-package-delimiter+ (go COLON))
1333 (t (go SYMBOL)))
1334 MIDDLEDOT ; saw "[sign] {digit}+ dot"
1335 (ouch-read-buffer char buf)
1336 (getchar-or-else (return (make-integer 10)))
1337 (case (char-class char attribute-array attribute-hash-table)
1338 (#.+char-attr-constituent-digit+ (go RIGHTDIGIT))
1339 (#.+char-attr-constituent-expt+ (go EXPONENT))
1340 (#.+char-attr-delimiter+
1341 (unread-char char stream)
1342 (return (make-integer 10)))
1343 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE))
1344 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE))
1345 (#.+char-attr-package-delimiter+ (go COLON))
1346 (t (go SYMBOL)))
1347 RIGHTDIGIT ; saw "[sign] {decimal-digit}* dot {digit}+"
1348 (ouch-read-buffer char buf)
1349 (getchar-or-else (return (make-float stream)))
1350 (case (char-class char attribute-array attribute-hash-table)
1351 (#.+char-attr-constituent-digit+ (go RIGHTDIGIT))
1352 (#.+char-attr-constituent-expt+ (go EXPONENT))
1353 (#.+char-attr-delimiter+
1354 (unread-char char stream)
1355 (return (make-float stream)))
1356 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE))
1357 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE))
1358 (#.+char-attr-package-delimiter+ (go COLON))
1359 (t (go SYMBOL)))
1360 SIGNDOT ; saw "[sign] dot"
1361 (ouch-read-buffer char buf)
1362 (getchar-or-else (go RETURN-SYMBOL))
1363 (case (char-class char attribute-array attribute-hash-table)
1364 (#.+char-attr-constituent-digit+ (go RIGHTDIGIT))
1365 (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL))
1366 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE))
1367 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE))
1368 (t (go SYMBOL)))
1369 FRONTDOT ; saw "dot"
1370 (ouch-read-buffer char buf)
1371 (getchar-or-else (simple-reader-error stream "dot context error"))
1372 (case (char-class char attribute-array attribute-hash-table)
1373 (#.+char-attr-constituent-digit+ (go RIGHTDIGIT))
1374 (#.+char-attr-constituent-dot+ (go DOTS))
1375 (#.+char-attr-delimiter+ (simple-reader-error stream
1376 "dot context error"))
1377 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE))
1378 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE))
1379 (#.+char-attr-package-delimiter+ (go COLON))
1380 (t (go SYMBOL)))
1381 EXPONENT
1382 (ouch-read-buffer char buf)
1383 (getchar-or-else (go RETURN-SYMBOL))
1384 (setq possibly-float t)
1385 (case (char-class char attribute-array attribute-hash-table)
1386 (#.+char-attr-constituent-sign+ (go EXPTSIGN))
1387 (#.+char-attr-constituent-digit+ (go EXPTDIGIT))
1388 (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL))
1389 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE))
1390 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE))
1391 (#.+char-attr-package-delimiter+ (go COLON))
1392 (t (go SYMBOL)))
1393 EXPTSIGN ; got to EXPONENT, and saw a sign character
1394 (ouch-read-buffer char buf)
1395 (getchar-or-else (go RETURN-SYMBOL))
1396 (case (char-class char attribute-array attribute-hash-table)
1397 (#.+char-attr-constituent-digit+ (go EXPTDIGIT))
1398 (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL))
1399 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE))
1400 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE))
1401 (#.+char-attr-package-delimiter+ (go COLON))
1402 (t (go SYMBOL)))
1403 EXPTDIGIT ; got to EXPONENT, saw "[sign] {digit}+"
1404 (ouch-read-buffer char buf)
1405 (getchar-or-else (return (make-float stream)))
1406 (case (char-class char attribute-array attribute-hash-table)
1407 (#.+char-attr-constituent-digit+ (go EXPTDIGIT))
1408 (#.+char-attr-delimiter+
1409 (unread-char char stream)
1410 (return (make-float stream)))
1411 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE))
1412 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE))
1413 (#.+char-attr-package-delimiter+ (go COLON))
1414 (t (go SYMBOL)))
1415 RATIO ; saw "[sign] {digit}+ slash"
1416 (ouch-read-buffer char buf)
1417 (getchar-or-else (go RETURN-SYMBOL))
1418 (case (char-class2 char attribute-array attribute-hash-table base)
1419 (#.+char-attr-constituent-digit+ (go RATIODIGIT))
1420 (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL))
1421 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE))
1422 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE))
1423 (#.+char-attr-package-delimiter+ (go COLON))
1424 (t (go SYMBOL)))
1425 RATIODIGIT ; saw "[sign] {digit}+ slash {digit}+"
1426 (ouch-read-buffer char buf)
1427 (getchar-or-else (return (make-ratio stream)))
1428 (case (char-class2 char attribute-array attribute-hash-table base)
1429 (#.+char-attr-constituent-digit+ (go RATIODIGIT))
1430 (#.+char-attr-delimiter+
1431 (unread-char char stream)
1432 (return (make-ratio stream)))
1433 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE))
1434 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE))
1435 (#.+char-attr-package-delimiter+ (go COLON))
1436 (t (go SYMBOL)))
1437 DOTS ; saw "dot {dot}+"
1438 (ouch-read-buffer char buf)
1439 (getchar-or-else (simple-reader-error stream "too many dots"))
1440 (case (char-class char attribute-array attribute-hash-table)
1441 (#.+char-attr-constituent-dot+ (go DOTS))
1442 (#.+char-attr-delimiter+
1443 (unread-char char stream)
1444 (simple-reader-error stream "too many dots"))
1445 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE))
1446 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE))
1447 (#.+char-attr-package-delimiter+ (go COLON))
1448 (t (go SYMBOL)))
1449 SYMBOL ; not a dot, dots, or number
1450 (let ((stream (in-stream-from-designator stream)))
1451 (macrolet
1452 ((scan (read-a-char &optional finish)
1453 `(prog ()
1454 SYMBOL-LOOP
1455 (ouch-read-buffer char buf)
1456 (setq char ,read-a-char)
1457 (when (eq char +EOF+) (go RETURN-SYMBOL))
1458 (case (char-class char attribute-array attribute-hash-table)
1459 (#.+char-attr-single-escape+ ,finish (go SINGLE-ESCAPE))
1460 (#.+char-attr-delimiter+ ,finish
1461 (unread-char char stream)
1462 (go RETURN-SYMBOL))
1463 (#.+char-attr-multiple-escape+ ,finish (go MULT-ESCAPE))
1464 (#.+char-attr-package-delimiter+ ,finish (go COLON))
1465 (t (go SYMBOL-LOOP))))))
1466 (if (ansi-stream-p stream)
1467 (prepare-for-fast-read-char stream
1468 (scan (fast-read-char nil +EOF+) (done-with-fast-read-char)))
1469 ;; CLOS stream
1470 (scan (read-char stream nil +EOF+)))))
1471 SINGLE-ESCAPE ; saw a single-escape
1472 ;; Don't put the escape character in the read buffer.
1473 ;; READ-NEXT CHAR, put in buffer (no case conversion).
1474 (let ((nextchar (read-char stream nil +EOF+)))
1475 (when (eq nextchar +EOF+)
1476 (reader-eof-error stream "after single-escape character"))
1477 (ouch-read-buffer-escaped nextchar buf))
1478 (getchar-or-else (go RETURN-SYMBOL))
1479 (case (char-class char attribute-array attribute-hash-table)
1480 (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL))
1481 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE))
1482 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE))
1483 (#.+char-attr-package-delimiter+ (go COLON))
1484 (t (go SYMBOL)))
1485 MULT-ESCAPE
1486 (setq seen-multiple-escapes t)
1487 ;; sometimes we pass eof-error=nil but check. here we just let it err.
1488 ;; should pick one style and stick with it.
1489 (do ((char (read-char stream t) (read-char stream t)))
1490 ((multiple-escape-p char rt))
1491 (if (single-escape-p char rt) (setq char (read-char stream t)))
1492 (ouch-read-buffer-escaped char buf))
1493 (getchar-or-else (go RETURN-SYMBOL))
1494 (case (char-class char attribute-array attribute-hash-table)
1495 (#.+char-attr-delimiter+ (unread-char char stream) (go RETURN-SYMBOL))
1496 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE))
1497 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE))
1498 (#.+char-attr-package-delimiter+ (go COLON))
1499 (t (go SYMBOL)))
1500 COLON
1501 (unless (zerop colons)
1502 (simple-reader-error
1503 stream "too many colons in ~S" (copy-token-buf-string buf)))
1504 (setf buf (normalize-read-buffer buf))
1505 (casify-read-buffer buf)
1506 (setq colons 1)
1507 (setq package-designator
1508 (if (or (plusp (token-buf-fill-ptr buf)) seen-multiple-escapes)
1509 (prog1 (sized-token-buf-string buf)
1510 (let ((new (acquire-token-buf)))
1511 (setf (token-buf-next new) buf ; new points to old
1512 buf new *read-buffer* new)))
1513 *keyword-package*))
1514 (reset-read-buffer buf)
1515 (getchar-or-else (reader-eof-error stream "after reading a colon"))
1516 (case (char-class char attribute-array attribute-hash-table)
1517 (#.+char-attr-delimiter+
1518 (unread-char char stream)
1519 (simple-reader-error stream
1520 "illegal terminating character after a colon: ~S"
1521 char))
1522 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE))
1523 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE))
1524 (#.+char-attr-package-delimiter+ (go INTERN))
1525 (t (go SYMBOL)))
1526 INTERN
1527 (setq colons 2)
1528 (getchar-or-else (reader-eof-error stream "after reading a colon"))
1529 (case (char-class char attribute-array attribute-hash-table)
1530 (#.+char-attr-delimiter+
1531 (unread-char char stream)
1532 (if package-designator
1533 (let* ((*reader-package*
1534 (reader-find-package package-designator stream)))
1535 (return (read stream t nil t)))
1536 (simple-reader-error stream
1537 "illegal terminating character after a double-colon: ~S"
1538 char)))
1539 (#.+char-attr-single-escape+ (go SINGLE-ESCAPE))
1540 (#.+char-attr-multiple-escape+ (go MULT-ESCAPE))
1541 (#.+char-attr-package-delimiter+
1542 (simple-reader-error stream
1543 "too many colons after ~S name"
1544 package-designator))
1545 (t (go SYMBOL)))
1546 RETURN-SYMBOL
1547 (setf buf (normalize-read-buffer buf))
1548 (casify-read-buffer buf)
1549 (let* ((pkg (if package-designator
1550 (reader-find-package package-designator stream)
1551 (or *reader-package* (sane-package))))
1552 (intern-p (or (/= colons 1) (eq pkg *keyword-package*))))
1553 (unless intern-p ; Try %FIND-SYMBOL
1554 (multiple-value-bind (symbol accessibility)
1555 (%find-symbol (token-buf-string buf) (token-buf-fill-ptr buf) pkg)
1556 (when (eq accessibility :external) (return symbol))
1557 (with-simple-restart (continue "Use symbol anyway.")
1558 (error 'simple-reader-package-error
1559 :package pkg
1560 :stream stream
1561 :format-arguments
1562 (list (copy-token-buf-string buf) (package-name pkg))
1563 :format-control
1564 (if accessibility
1565 "The symbol ~S is not external in the ~A package."
1566 "Symbol ~S not found in the ~A package.")))))
1567 (return (%intern (token-buf-string buf)
1568 (token-buf-fill-ptr buf)
1570 (if (token-buf-only-base-chars buf)
1571 (%readtable-symbol-preference rt)
1572 'character))))))))
1574 ;;; For semi-external use: Return 3 values: the token-buf,
1575 ;;; a flag for whether there was an escape char, and the position of
1576 ;;; any package delimiter. The returned token-buf is not case-converted.
1577 (defun read-extended-token (stream)
1578 ;; recursive-p = T is basically irrelevant.
1579 (let ((first-char (read-char stream nil +EOF+ t)))
1580 (if (neq first-char +EOF+)
1581 (internal-read-extended-token stream first-char nil)
1582 (values (reset-read-buffer *read-buffer*) nil nil))))
1584 ;;; for semi-external use:
1586 ;;; Read an extended token with the first character escaped. Return
1587 ;;; the token-buf. The returned token-buf is not case-converted.
1588 (defun read-extended-token-escaped (stream)
1589 (let ((first-char (read-char stream nil +EOF+)))
1590 (if (neq first-char +EOF+)
1591 (values (internal-read-extended-token stream first-char t))
1592 (reader-eof-error stream "after escape"))))
1594 ;;;; number-reading functions
1596 ;; Mapping of read-base to the max input characters in a positive fixnum.
1597 (eval-when (:compile-toplevel :execute)
1598 (defun integer-reader-safe-digits ()
1599 (do ((a (make-array 35 :element-type '(unsigned-byte 8)))
1600 (base 2 (1+ base)))
1601 ((> base 36) a)
1602 (do ((total (1- base) (+ (* total base) (1- base)))
1603 (n-digits 0 (1+ n-digits)))
1604 ((sb!xc:typep total 'bignum)
1605 (setf (aref a (- base 2)) n-digits))
1606 ;; empty DO body
1609 ;; self-test
1610 (do ((maxdigits (integer-reader-safe-digits))
1611 (base 2 (1+ base)))
1612 ((> base 36))
1613 (let* ((n-digits (aref maxdigits (- base 2)))
1614 (d (char (write-to-string (1- base) :base base) 0))
1615 (string (make-string (1+ n-digits) :initial-element d))) ; 1 extra
1616 (assert (not (typep (parse-integer string :radix base)
1617 `(unsigned-byte ,sb!vm:n-positive-fixnum-bits))))
1618 (assert (typep (parse-integer string :end n-digits :radix base)
1619 `(unsigned-byte ,sb!vm:n-positive-fixnum-bits))))))
1621 (defmacro !setq-optional-leading-sign (sign-flag token-buf rewind)
1622 ;; guaranteed to have at least one character in buffer at the start
1623 ;; or immediately following [ESFDL] marker depending on 'rewind' flag.
1624 `(locally (declare (optimize (sb!c::insert-array-bounds-checks 0)))
1625 (,(if rewind 'setf 'incf)
1626 (token-buf-cursor ,token-buf)
1627 (case (elt (token-buf-string ,token-buf)
1628 ,(if rewind 0 `(token-buf-cursor ,token-buf)))
1629 (#\- (setq ,sign-flag t) 1)
1630 (#\+ 1)
1631 (t 0)))))
1633 (defun make-integer (&optional (base *read-base*))
1634 "Minimizes bignum-fixnum multiplies by reading a 'safe' number of digits,
1635 then multiplying by a power of the base and adding."
1636 (declare ((integer 2 36) base)
1637 (inline token-buf-getchar)) ; makes for smaller code
1638 (let* ((fixnum-max-digits
1639 (macrolet ((maxdigits ()
1640 (!coerce-to-specialized (integer-reader-safe-digits)
1641 '(unsigned-byte 8))))
1642 (aref (maxdigits) (- base 2))))
1643 (base-power
1644 (macrolet ((base-powers ()
1645 (do ((maxdigits (integer-reader-safe-digits))
1646 (a (make-array 35))
1647 (base 2 (1+ base)))
1648 ((> base 36) a)
1649 (setf (aref a (- base 2))
1650 (expt base (aref maxdigits (- base 2)))))))
1651 (truly-the integer (aref (base-powers) (- base 2)))))
1652 (negativep nil)
1653 (result 0)
1654 (buf *read-buffer*))
1655 (!setq-optional-leading-sign negativep buf t)
1656 (loop
1657 (let ((acc 0))
1658 (declare (type (and fixnum unsigned-byte) acc))
1659 (dotimes (digit-count fixnum-max-digits)
1660 (let ((ch (token-buf-getchar buf)))
1661 (if (or (not ch) (eql ch #\.))
1662 (return-from make-integer
1663 (let ((result
1664 (if (zerop result) acc
1665 (+ (* result (expt base digit-count)) acc))))
1666 (if negativep (- result) result)))
1667 (setq acc (truly-the fixnum
1668 (+ (digit-char-p ch base)
1669 (truly-the fixnum (* acc base))))))))
1670 (setq result (+ (* result base-power) acc))))))
1672 (defun truncate-exponent (exponent number divisor)
1673 "Truncate exponent if it's too large for a float"
1674 ;; Work with base-2 logarithms to avoid conversions to floats,
1675 ;; and convert to base-10 conservatively at the end.
1676 ;; Use the least positive float, because denormalized exponent
1677 ;; can be larger than normalized.
1678 (let* ((max-exponent
1679 #!-long-float
1680 (+ sb!vm:double-float-digits sb!vm:double-float-bias))
1681 (number-magnitude (integer-length number))
1682 (divisor-magnitude (1- (integer-length divisor)))
1683 (magnitude (- number-magnitude divisor-magnitude)))
1684 (if (minusp exponent)
1685 (max exponent (ceiling (- (+ max-exponent magnitude))
1686 #.(floor (log 10 2))))
1687 (min exponent (floor (- max-exponent magnitude)
1688 #.(floor (log 10 2)))))))
1690 (defun make-float (stream)
1691 ;; Assume that the contents of *read-buffer* are a legal float, with nothing
1692 ;; else after it.
1693 (let ((buf *read-buffer*)
1694 (negative-fraction nil)
1695 (number 0)
1696 (divisor 1)
1697 (negative-exponent nil)
1698 (exponent 0)
1699 (float-char ())
1700 char)
1701 (!setq-optional-leading-sign negative-fraction buf t)
1702 ;; Read digits before the dot.
1703 (macrolet ((accumulate (expr)
1704 `(let (digit)
1705 (loop (if (and (setq char (token-buf-getchar buf))
1706 (setq digit (digit-char-p char)))
1707 ,expr
1708 (return))))))
1709 (accumulate (setq number (+ (* number 10) digit)))
1710 ;; Deal with the dot, if it's there.
1711 (when (char= char #\.)
1712 ;; Read digits after the dot.
1713 (accumulate (setq divisor (* divisor 10)
1714 number (+ (* number 10) digit))))
1715 ;; Is there an exponent letter?
1716 (cond
1717 ((null char)
1718 ;; If not, we've read the whole number.
1719 (let ((num (make-float-aux number divisor
1720 *read-default-float-format*
1721 stream)))
1722 (return-from make-float (if negative-fraction (- num) num))))
1723 ((= (get-constituent-trait char) +char-attr-constituent-expt+)
1724 (setq float-char char)
1725 ;; Check leading sign.
1726 (!setq-optional-leading-sign negative-exponent buf nil)
1727 ;; Read digits for exponent.
1728 (accumulate (setq exponent (+ (* exponent 10) digit)))
1729 (setq exponent (if negative-exponent (- exponent) exponent))
1730 ;; Generate and return the float, depending on FLOAT-CHAR:
1731 (let* ((float-format (case (char-upcase float-char)
1732 (#\E *read-default-float-format*)
1733 (#\S 'short-float)
1734 (#\F 'single-float)
1735 (#\D 'double-float)
1736 (#\L 'long-float)))
1737 (exponent (truncate-exponent exponent number divisor))
1738 (result (make-float-aux (* (expt 10 exponent) number)
1739 divisor float-format stream)))
1740 (return-from make-float
1741 (if negative-fraction (- result) result))))
1742 (t (bug "bad fallthrough in floating point reader"))))))
1744 (defun make-float-aux (number divisor float-format stream)
1745 (handler-case
1746 (coerce (/ number divisor) float-format)
1747 (type-error (c)
1748 (error 'reader-impossible-number-error
1749 :error c :stream stream
1750 :format-control "failed to build float from ~a"
1751 :format-arguments (list (copy-token-buf-string *read-buffer*))))))
1753 (defun make-ratio (stream)
1754 ;; Assume *READ-BUFFER* contains a legal ratio. Build the number from
1755 ;; the string.
1756 ;; This code is inferior to that of MAKE-INTEGER because it makes no
1757 ;; attempt to perform as few bignum multiplies as possible.
1759 (let ((numerator 0) (denominator 0) (negativep nil)
1760 (base *read-base*) (buf *read-buffer*))
1761 (!setq-optional-leading-sign negativep buf t)
1762 ;; Get numerator.
1763 (loop (let ((dig (digit-char-p (token-buf-getchar buf) base)))
1764 (if dig
1765 (setq numerator (+ (* numerator base) dig))
1766 (return))))
1767 ;; Get denominator.
1768 (do* ((ch (token-buf-getchar buf) (token-buf-getchar buf))
1769 (dig ()))
1770 ((or (null ch) (not (setq dig (digit-char-p ch base)))))
1771 (setq denominator (+ (* denominator base) dig)))
1772 (let ((num (handler-case
1773 (/ numerator denominator)
1774 (arithmetic-error (c)
1775 (error 'reader-impossible-number-error
1776 :error c :stream stream
1777 :format-control "failed to build ratio")))))
1778 (if negativep (- num) num))))
1780 ;;;; General reader for dispatch macros
1782 (defun dispatch-char-error (stream sub-char ignore)
1783 (declare (optimize allow-non-returning-tail-call))
1784 (declare (ignore ignore))
1785 (if *read-suppress*
1786 (values)
1787 (simple-reader-error stream
1788 "no dispatch function defined for ~S"
1789 sub-char)))
1791 (defun read-dispatch-char (stream dispatch-table)
1792 ;; Read some digits.
1793 (let ((numargp nil)
1794 (numarg 0)
1795 (sub-char ()))
1796 (loop
1797 (let ((ch (read-char stream nil +EOF+)))
1798 (if (eq ch +EOF+)
1799 (reader-eof-error stream "inside dispatch character")
1800 ;; Take care of the extra char.
1801 (let ((dig (digit-char-p ch)))
1802 (if dig
1803 (setq numargp t numarg (+ (* numarg 10) dig))
1804 (return (setq sub-char (char-upcase ch))))))))
1805 ;; Look up the function and call it.
1806 (let ((fn (get-raw-cmt-dispatch-entry sub-char dispatch-table)))
1807 (funcall (!cmt-entry-to-function fn #'dispatch-char-error)
1808 stream sub-char (if numargp numarg nil)))))
1810 ;;;; READ-FROM-STRING
1812 (declaim (ftype (sfunction (string t t index (or null index) t) (values t index))
1813 %read-from-string))
1814 (defun %read-from-string (string eof-error-p eof-value start end preserve-whitespace)
1815 (with-array-data ((string string :offset-var offset)
1816 (start start)
1817 (end end)
1818 :check-fill-pointer t)
1819 (let ((stream (make-string-input-stream string start end)))
1820 (values (if preserve-whitespace
1821 (%read-preserving-whitespace stream eof-error-p eof-value nil)
1822 (read stream eof-error-p eof-value))
1823 (- (string-input-stream-current stream) offset)))))
1825 (locally
1826 (declare (muffle-conditions style-warning))
1827 (defun read-from-string (string &optional (eof-error-p t) eof-value
1828 &key (start 0) end preserve-whitespace)
1829 "The characters of string are successively given to the lisp reader
1830 and the lisp object built by the reader is returned. Macro chars
1831 will take effect."
1832 (declare (string string))
1833 (maybe-note-read-from-string-signature-issue eof-error-p)
1834 (%read-from-string string eof-error-p eof-value start end preserve-whitespace)))
1836 ;;;; PARSE-INTEGER
1838 (defun parse-integer (string &key (start 0) end (radix 10) junk-allowed)
1839 "Examine the substring of string delimited by start and end
1840 (default to the beginning and end of the string) It skips over
1841 whitespace characters and then tries to parse an integer. The
1842 radix parameter must be between 2 and 36."
1843 (flet ((parse-error (format-control)
1844 (declare (optimize allow-non-returning-tail-call))
1845 (error 'simple-parse-error
1846 :format-control format-control
1847 :format-arguments (list string))))
1848 (with-array-data ((string string :offset-var offset)
1849 (start start)
1850 (end end)
1851 :check-fill-pointer t)
1852 (let ((index (do ((i start (1+ i)))
1853 ((= i end)
1854 (if junk-allowed
1855 (return-from parse-integer (values nil end))
1856 (parse-error "no non-whitespace characters in string ~S.")))
1857 (declare (fixnum i))
1858 (unless (whitespace[1]p (char string i)) (return i))))
1859 (minusp nil)
1860 (found-digit nil)
1861 (result 0))
1862 (declare (fixnum index))
1863 (let ((char (char string index)))
1864 (cond ((char= char #\-)
1865 (setq minusp t)
1866 (incf index))
1867 ((char= char #\+)
1868 (incf index))))
1869 (loop
1870 (when (= index end) (return nil))
1871 (let* ((char (char string index))
1872 (weight (digit-char-p char radix)))
1873 (cond (weight
1874 (setq result (+ weight (* result radix))
1875 found-digit t))
1876 (junk-allowed (return nil))
1877 ((whitespace[1]p char)
1878 (loop
1879 (incf index)
1880 (when (= index end) (return))
1881 (unless (whitespace[1]p (char string index))
1882 (parse-error "junk in string ~S")))
1883 (return nil))
1885 (parse-error "junk in string ~S"))))
1886 (incf index))
1887 (values
1888 (if found-digit
1889 (if minusp (- result) result)
1890 (if junk-allowed
1892 (parse-error "no digits in string ~S")))
1893 (- index offset))))))
1895 ;;;; reader initialization code
1897 (defun !reader-cold-init ()
1898 (!cold-init-standard-readtable))
1900 (defmethod print-object ((readtable readtable) stream)
1901 (print-unreadable-object (readtable stream :identity t :type t)))
1903 ;; Backward-compatibility adapter. The "named-readtables" system in
1904 ;; Quicklisp expects this interface, and it's a reasonable thing to support.
1905 ;; What is silly however is that DISPATCH-TABLES was an alist each of whose
1906 ;; values was a hashtable which got immediately coerced to an alist.
1907 ;; In anticipation of perhaps not doing an extra re-shaping, if HASH-TABLE-P
1908 ;; is NIL then return nested alists: ((#\# (#\R . #<FUNCTION SHARP-R>) ...))
1909 (defun dispatch-tables (readtable &optional (hash-table-p t))
1910 (let (alist)
1911 (flet ((process (char fn &aux (dtable (%dispatch-macro-char-table fn)))
1912 (when dtable
1913 (let ((output (awhen (car dtable) (%hash-table-alist it))))
1914 (loop for fn across (the simple-vector (cdr dtable))
1915 and ch from 0
1916 when fn do (push (cons (code-char ch) fn) output))
1917 (dolist (cell output) ; coerce values to function-designator
1918 (rplacd cell (!cmt-entry-to-fun-designator (cdr cell))))
1919 (when hash-table-p ; caller wants hash-tables
1920 (setq output (%stuff-hash-table (make-hash-table) output)))
1921 (push (cons char output) alist)))))
1922 (loop for fn across (character-macro-array readtable) and ch from 0
1923 do (process (code-char ch) fn))
1924 (maphash #'process (character-macro-hash-table readtable)))
1925 alist))
1927 ;; Stub - should never get called with anything but NIL
1928 ;; and only after all macros have been changed to constituents already.
1929 (defun (setf dispatch-tables) (new-alist readtable)
1930 (declare (ignore readtable))
1931 (unless (null new-alist)
1932 (error "Assignment to virtual DISPATCH-TABLES slot not allowed"))
1933 new-alist)
1935 ;;; like LISTEN, but any whitespace in the input stream will be flushed
1936 (defun listen-skip-whitespace (&optional (stream *standard-input*))
1937 (do ((char (read-char-no-hang stream nil nil nil)
1938 (read-char-no-hang stream nil nil nil)))
1939 ((null char) nil)
1940 (cond ((not (whitespace[1]p char))
1941 (unread-char char stream)
1942 (return t)))))