x86-64: Treat more symbols as having immediate storage class
[sbcl.git] / src / code / readtable.lisp
blobbf7b4f2998efd40b8b524db3cbad53b50bdad5a8
1 ;;;; READTABLEs
3 ;;;; This software is part of the SBCL system. See the README file for
4 ;;;; more information.
5 ;;;;
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
12 (in-package "SB!IMPL")
14 (sb!xc:deftype attribute-table ()
15 '(simple-array (unsigned-byte 8) (#.base-char-code-limit)))
17 ;;; constants for readtable character attributes. These are all as in
18 ;;; the manual.
19 ;;;
20 ;;; FIXME: wait a minute. Firstly, I doubt they're in the manual.
21 ;;; Secondly, the numerical order of these constants is coupled with
22 ;;; code in CHAR-CLASS{,2,3} in the reader implementation, so beware
23 ;;; when changing them.
24 (defconstant +char-attr-whitespace+ 0)
25 (defconstant +char-attr-terminating-macro+ 1)
26 (defconstant +char-attr-single-escape+ 2)
27 (defconstant +char-attr-multiple-escape+ 3)
28 (defconstant +char-attr-constituent+ 4)
29 (defconstant +char-attr-constituent-dot+ 5)
30 (defconstant +char-attr-constituent-expt+ 6)
31 (defconstant +char-attr-constituent-slash+ 7)
32 (defconstant +char-attr-constituent-digit+ 8)
33 (defconstant +char-attr-constituent-sign+ 9)
34 ;;; the following two are not static but depend on *READ-BASE*.
35 ;;; DECIMAL-DIGIT is for characters being digits in base 10 but not in
36 ;;; base *READ-BASE* (which is therefore perforce smaller than 10);
37 ;;; DIGIT-OR-EXPT is for characters being both exponent markers and
38 ;;; digits in base *READ-BASE* (which is therefore perforce larger
39 ;;; than 10). -- CSR, 2004-03-16
40 (defconstant +char-attr-constituent-decimal-digit+ 10)
41 (defconstant +char-attr-constituent-digit-or-expt+ 11)
43 (defconstant +char-attr-package-delimiter+ 12)
44 (defconstant +char-attr-invalid+ 13)
45 ;; Meta: there is no such function as READ-UNQUALIFIED-TOKEN. No biggie.
46 (defconstant +char-attr-delimiter+ 14) ; (a fake for READ-UNQUALIFIED-TOKEN)
48 (sb!xc:defstruct (readtable (:conc-name nil)
49 (:constructor make-readtable ())
50 (:predicate readtablep)
51 ;; ANSI requires a CL:COPY-READTABLE to do
52 ;; a deep copy, so the DEFSTRUCT-generated
53 ;; default is not suitable.
54 (:copier nil))
55 "A READTABLE is a data structure that maps characters into syntax
56 types for the Common Lisp expression reader."
57 ;; The CHARACTER-ATTRIBUTE-TABLE is a vector of BASE-CHAR-CODE-LIMIT
58 ;; integers for describing the character type. Conceptually, there
59 ;; are 4 distinct "primary" character attributes:
60 ;; +CHAR-ATTR-WHITESPACE+, +CHAR-ATTR-TERMINATING-MACRO+,
61 ;; +CHAR-ATTR-ESCAPE+, and +CHAR-ATTR-CONSTITUENT+. Non-terminating
62 ;; macros (such as the symbol reader) have the attribute
63 ;; +CHAR-ATTR-CONSTITUENT+.
65 ;; In order to make READ-TOKEN fast, all this information is stored
66 ;; in the character attribute table by having different varieties of
67 ;; constituents.
68 (character-attribute-array
69 (make-array base-char-code-limit
70 :element-type '(unsigned-byte 8)
71 :initial-element +char-attr-constituent+)
72 :type attribute-table
73 :read-only t)
74 (character-attribute-hash-table (make-hash-table)
75 :type hash-table
76 :read-only t)
77 ;; The CHARACTER-MACRO-TABLE is a vector of BASE-CHAR-CODE-LIMIT
78 ;; functions. One of these functions called with appropriate
79 ;; arguments whenever any non-WHITESPACE character is encountered
80 ;; inside READ-PRESERVING-WHITESPACE. These functions are used to
81 ;; implement user-defined read-macros, system read-macros, and the
82 ;; number-symbol reader.
83 (character-macro-array
84 (make-array base-char-code-limit :initial-element nil)
85 :type (simple-vector #.base-char-code-limit)
86 :read-only t)
87 (character-macro-hash-table (make-hash-table) :type hash-table
88 :read-only t)
89 (%readtable-case 0 :type (mod 4))
90 ;; Element type to use when reading a string literal with no extended-chars.
91 ;; The system itself prefers base-string, but otherwise it is a contentious
92 ;; issue. We don't (by default) use base-strings, because people often write:
93 ;; (SETF (CHAR (READ-STRING S) 0) #\PILE_OF_POO),
94 ;; or more likely, something the effect of which resembles
95 ;; (SETF (CHAR (ADJUST-ARRAY "" 10) 0) #\SMILE)
96 ;; which are each dubious constructs, because they assume READ to produce
97 ;; strings capable of holding any char. The latter further assumes something
98 ;; about compilation, because in that example, considering that there are no
99 ;; characters in the literal, it is unclear whether the array should
100 ;; be similar-as-constant to an array of base-char or array of character.
101 ;; While indeed SBCL prints base-strings readably (if *PRINT-READABLY* is T)
102 ;; using #. syntax, the question is what the writer of the code intended
103 ;; if (s)he did not know that the string should have been expressly
104 ;; specified via #.(MAKE-STRING ... :ELEMENT-TYPE) or somesuch.
105 (%readtable-string-preference 'base-char :type (member character base-char))
106 ;; With symbols, it's fairly clear that immutability of print names
107 ;; renders the distinction between the kinds of string in the symbol-name
108 ;; as being less relevant. If you expect (copy-seq (string asymbol))
109 ;; to produce a certain type of string, your code is unportable anyway.
110 (%readtable-symbol-preference 'base-char :type (member character base-char))
111 (%readtable-normalization #!+sb-unicode t #!-sb-unicode nil :type boolean))
113 (defconstant +readtable-upcase+ 0)
114 (defconstant +readtable-downcase+ 1)
116 (declaim (freeze-type readtable))