3 ;;;; This software is part of the SBCL system. See the README file for
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
12 (in-package "SB!IMPL")
14 (sb!xc
:deftype attribute-table
()
15 '(simple-array (unsigned-byte 8) (#.base-char-code-limit
)))
17 ;;; constants for readtable character attributes. These are all as in
20 ;;; FIXME: wait a minute. Firstly, I doubt they're in the manual.
21 ;;; Secondly, the numerical order of these constants is coupled with
22 ;;; code in CHAR-CLASS{,2,3} in the reader implementation, so beware
23 ;;; when changing them.
24 (defconstant +char-attr-whitespace
+ 0)
25 (defconstant +char-attr-terminating-macro
+ 1)
26 (defconstant +char-attr-single-escape
+ 2)
27 (defconstant +char-attr-multiple-escape
+ 3)
28 (defconstant +char-attr-constituent
+ 4)
29 (defconstant +char-attr-constituent-dot
+ 5)
30 (defconstant +char-attr-constituent-expt
+ 6)
31 (defconstant +char-attr-constituent-slash
+ 7)
32 (defconstant +char-attr-constituent-digit
+ 8)
33 (defconstant +char-attr-constituent-sign
+ 9)
34 ;;; the following two are not static but depend on *READ-BASE*.
35 ;;; DECIMAL-DIGIT is for characters being digits in base 10 but not in
36 ;;; base *READ-BASE* (which is therefore perforce smaller than 10);
37 ;;; DIGIT-OR-EXPT is for characters being both exponent markers and
38 ;;; digits in base *READ-BASE* (which is therefore perforce larger
39 ;;; than 10). -- CSR, 2004-03-16
40 (defconstant +char-attr-constituent-decimal-digit
+ 10)
41 (defconstant +char-attr-constituent-digit-or-expt
+ 11)
43 (defconstant +char-attr-package-delimiter
+ 12)
44 (defconstant +char-attr-invalid
+ 13)
45 ;; Meta: there is no such function as READ-UNQUALIFIED-TOKEN. No biggie.
46 (defconstant +char-attr-delimiter
+ 14) ; (a fake for READ-UNQUALIFIED-TOKEN)
48 (sb!xc
:defstruct
(readtable (:conc-name nil
)
49 (:constructor make-readtable
())
50 (:predicate readtablep
)
51 ;; ANSI requires a CL:COPY-READTABLE to do
52 ;; a deep copy, so the DEFSTRUCT-generated
53 ;; default is not suitable.
55 "A READTABLE is a data structure that maps characters into syntax
56 types for the Common Lisp expression reader."
57 ;; The CHARACTER-ATTRIBUTE-TABLE is a vector of BASE-CHAR-CODE-LIMIT
58 ;; integers for describing the character type. Conceptually, there
59 ;; are 4 distinct "primary" character attributes:
60 ;; +CHAR-ATTR-WHITESPACE+, +CHAR-ATTR-TERMINATING-MACRO+,
61 ;; +CHAR-ATTR-ESCAPE+, and +CHAR-ATTR-CONSTITUENT+. Non-terminating
62 ;; macros (such as the symbol reader) have the attribute
63 ;; +CHAR-ATTR-CONSTITUENT+.
65 ;; In order to make READ-TOKEN fast, all this information is stored
66 ;; in the character attribute table by having different varieties of
68 (character-attribute-array
69 (make-array base-char-code-limit
70 :element-type
'(unsigned-byte 8)
71 :initial-element
+char-attr-constituent
+)
74 (character-attribute-hash-table (make-hash-table)
77 ;; The CHARACTER-MACRO-TABLE is a vector of BASE-CHAR-CODE-LIMIT
78 ;; functions. One of these functions called with appropriate
79 ;; arguments whenever any non-WHITESPACE character is encountered
80 ;; inside READ-PRESERVING-WHITESPACE. These functions are used to
81 ;; implement user-defined read-macros, system read-macros, and the
82 ;; number-symbol reader.
83 (character-macro-array
84 (make-array base-char-code-limit
:initial-element nil
)
85 :type
(simple-vector #.base-char-code-limit
)
87 (character-macro-hash-table (make-hash-table) :type hash-table
89 (%readtable-case
0 :type
(mod 4))
90 ;; Element type to use when reading a string literal with no extended-chars.
91 ;; The system itself prefers base-string, but otherwise it is a contentious
92 ;; issue. We don't (by default) use base-strings, because people often write:
93 ;; (SETF (CHAR (READ-STRING S) 0) #\PILE_OF_POO),
94 ;; or more likely, something the effect of which resembles
95 ;; (SETF (CHAR (ADJUST-ARRAY "" 10) 0) #\SMILE)
96 ;; which are each dubious constructs, because they assume READ to produce
97 ;; strings capable of holding any char. The latter further assumes something
98 ;; about compilation, because in that example, considering that there are no
99 ;; characters in the literal, it is unclear whether the array should
100 ;; be similar-as-constant to an array of base-char or array of character.
101 ;; While indeed SBCL prints base-strings readably (if *PRINT-READABLY* is T)
102 ;; using #. syntax, the question is what the writer of the code intended
103 ;; if (s)he did not know that the string should have been expressly
104 ;; specified via #.(MAKE-STRING ... :ELEMENT-TYPE) or somesuch.
105 (%readtable-string-preference
'base-char
:type
(member character base-char
))
106 ;; With symbols, it's fairly clear that immutability of print names
107 ;; renders the distinction between the kinds of string in the symbol-name
108 ;; as being less relevant. If you expect (copy-seq (string asymbol))
109 ;; to produce a certain type of string, your code is unportable anyway.
110 (%readtable-symbol-preference
'base-char
:type
(member character base-char
))
111 (%readtable-normalization
#!+sb-unicode t
#!-sb-unicode nil
:type boolean
))
113 (defconstant +readtable-upcase
+ 0)
114 (defconstant +readtable-downcase
+ 1)
116 (declaim (freeze-type readtable
))