3 ;;;; This software is part of the SBCL system. See the README file for
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
12 (in-package "SB-IMPL")
14 (sb-xc:deftype attribute-table
()
15 `(simple-array (unsigned-byte 8) (,base-char-code-limit
)))
17 ;;; constants for readtable character attributes. These are all as in
20 ;;; FIXME: wait a minute. Firstly, I doubt they're in the manual.
21 ;;; Secondly, the numerical order of these constants is coupled with
22 ;;; code in CHAR-CLASS{,2,3} in the reader implementation, so beware
23 ;;; when changing them.
24 (defconstant +char-attr-whitespace
+ 0)
25 (defconstant +char-attr-terminating-macro
+ 1)
26 (defconstant +char-attr-single-escape
+ 2)
27 (defconstant +char-attr-multiple-escape
+ 3)
28 (defconstant +char-attr-constituent
+ 4)
29 (defconstant +char-attr-constituent-dot
+ 5)
30 (defconstant +char-attr-constituent-expt
+ 6)
31 (defconstant +char-attr-constituent-slash
+ 7)
32 (defconstant +char-attr-constituent-digit
+ 8)
33 (defconstant +char-attr-constituent-sign
+ 9)
34 ;;; the following two are not static but depend on *READ-BASE*.
35 ;;; DECIMAL-DIGIT is for characters being digits in base 10 but not in
36 ;;; base *READ-BASE* (which is therefore perforce smaller than 10);
37 ;;; DIGIT-OR-EXPT is for characters being both exponent markers and
38 ;;; digits in base *READ-BASE* (which is therefore perforce larger
39 ;;; than 10). -- CSR, 2004-03-16
40 (defconstant +char-attr-constituent-decimal-digit
+ 10)
41 (defconstant +char-attr-constituent-digit-or-expt
+ 11)
43 (defconstant +char-attr-package-delimiter
+ 12)
44 (defconstant +char-attr-invalid
+ 13)
45 ;; Meta: there is no such function as READ-UNQUALIFIED-TOKEN. No biggie.
46 (defconstant +char-attr-delimiter
+ 14) ; (a fake for READ-UNQUALIFIED-TOKEN)
48 (define-load-time-global *empty-extended-char-table
* (make-hash-table :rehash-size
1 :test
#'eq
))
50 (sb-xc:defstruct
(readtable (:conc-name nil
)
51 (:constructor make-readtable
())
52 (:predicate readtablep
)
53 ;; ANSI requires a CL:COPY-READTABLE to do
54 ;; a deep copy, so the DEFSTRUCT-generated
55 ;; default is not suitable.
57 "A READTABLE is a data structure that maps characters into syntax
58 types for the Common Lisp expression reader."
59 ;; The BASE-CHAR-SYNTAX-ARRAY is a vector of BASE-CHAR-CODE-LIMIT
60 ;; integers for describing the character type. Conceptually, there
61 ;; are 4 distinct "primary" character attributes:
62 ;; +CHAR-ATTR-WHITESPACE+, +CHAR-ATTR-TERMINATING-MACRO+,
63 ;; +CHAR-ATTR-ESCAPE+, and +CHAR-ATTR-CONSTITUENT+. Non-terminating
64 ;; macros (such as the symbol reader) have the attribute
65 ;; +CHAR-ATTR-CONSTITUENT+.
67 ;; In order to make READ-TOKEN fast, all this information is stored
68 ;; in the character attribute table by having different varieties of
70 (base-char-syntax-array
71 (make-array base-char-code-limit
72 :element-type
'(unsigned-byte 8)
73 :initial-element
+char-attr-constituent
+)
76 ;; The BASE-CHAR-MACRO-TABLE is a vector of BASE-CHAR-CODE-LIMIT
77 ;; functions. One of these functions called with appropriate
78 ;; arguments whenever any non-WHITESPACE character is encountered
79 ;; inside READ-PRESERVING-WHITESPACE. These functions are used to
80 ;; implement user-defined read-macros, system read-macros, and the
81 ;; number-symbol reader.
82 (base-char-macro-array
83 (make-array base-char-code-limit
:initial-element nil
)
84 :type
(simple-vector #.base-char-code-limit
)
86 ;; Characters above the BASE-CHAR range
87 (extended-char-table *empty-extended-char-table
* :type hash-table
)
88 (%readtable-case
:upcase
:type
(member :upcase
:downcase
:preserve
:invert
))
89 ;; Element type to use when reading a string literal with no extended-chars.
90 ;; The system itself prefers base-string, but otherwise it is a contentious
91 ;; issue. We don't (by default) use base-strings, because people often write:
92 ;; (SETF (CHAR (READ-STRING S) 0) #\PILE_OF_POO),
93 ;; or more likely, something the effect of which resembles
94 ;; (SETF (CHAR (ADJUST-ARRAY "" 10) 0) #\SMILE)
95 ;; which are each dubious constructs, because they assume READ to produce
96 ;; strings capable of holding any char. The latter further assumes something
97 ;; about compilation, because in that example, considering that there are no
98 ;; characters in the literal, it is unclear whether the array should
99 ;; be similar-as-constant to an array of base-char or array of character.
100 ;; While indeed SBCL prints base-strings readably (if *PRINT-READABLY* is T)
101 ;; using #. syntax, the question is what the writer of the code intended
102 ;; if (s)he did not know that the string should have been expressly
103 ;; specified via #.(MAKE-STRING ... :ELEMENT-TYPE) or somesuch.
104 (%readtable-string-preference
'base-char
:type
(member character base-char
))
105 ;; With symbols, it's fairly clear that immutability of print names
106 ;; renders the distinction between the kinds of string in the symbol-name
107 ;; as being less relevant. If you expect (copy-seq (string asymbol))
108 ;; to produce a certain type of string, your code is unportable anyway.
109 (%readtable-symbol-preference
'base-char
:type
(member character base-char
))
110 (%readtable-normalization
#+sb-unicode t
#-sb-unicode nil
:type boolean
))
112 (declaim (freeze-type readtable
))