Fix -sb-unicode
[sbcl.git] / src / code / readtable.lisp
bloba23e5b406ab3b3903514c68c074a681763c9cf03
1 ;;;; READTABLEs
3 ;;;; This software is part of the SBCL system. See the README file for
4 ;;;; more information.
5 ;;;;
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
12 (in-package "SB-IMPL")
14 (sb-xc:deftype attribute-table ()
15 `(simple-array (unsigned-byte 8) (,base-char-code-limit)))
17 ;;; constants for readtable character attributes. These are all as in
18 ;;; the manual.
19 ;;;
20 ;;; FIXME: wait a minute. Firstly, I doubt they're in the manual.
21 ;;; Secondly, the numerical order of these constants is coupled with
22 ;;; code in CHAR-CLASS{,2,3} in the reader implementation, so beware
23 ;;; when changing them.
24 (defconstant +char-attr-whitespace+ 0)
25 (defconstant +char-attr-terminating-macro+ 1)
26 (defconstant +char-attr-single-escape+ 2)
27 (defconstant +char-attr-multiple-escape+ 3)
28 (defconstant +char-attr-constituent+ 4)
29 (defconstant +char-attr-constituent-dot+ 5)
30 (defconstant +char-attr-constituent-expt+ 6)
31 (defconstant +char-attr-constituent-slash+ 7)
32 (defconstant +char-attr-constituent-digit+ 8)
33 (defconstant +char-attr-constituent-sign+ 9)
34 ;;; the following two are not static but depend on *READ-BASE*.
35 ;;; DECIMAL-DIGIT is for characters being digits in base 10 but not in
36 ;;; base *READ-BASE* (which is therefore perforce smaller than 10);
37 ;;; DIGIT-OR-EXPT is for characters being both exponent markers and
38 ;;; digits in base *READ-BASE* (which is therefore perforce larger
39 ;;; than 10). -- CSR, 2004-03-16
40 (defconstant +char-attr-constituent-decimal-digit+ 10)
41 (defconstant +char-attr-constituent-digit-or-expt+ 11)
43 (defconstant +char-attr-package-delimiter+ 12)
44 (defconstant +char-attr-invalid+ 13)
45 ;; Meta: there is no such function as READ-UNQUALIFIED-TOKEN. No biggie.
46 (defconstant +char-attr-delimiter+ 14) ; (a fake for READ-UNQUALIFIED-TOKEN)
48 (define-load-time-global *empty-extended-char-table* (make-hash-table :rehash-size 1 :test #'eq))
50 (sb-xc:defstruct (readtable (:conc-name nil)
51 (:constructor make-readtable ())
52 (:predicate readtablep)
53 ;; ANSI requires a CL:COPY-READTABLE to do
54 ;; a deep copy, so the DEFSTRUCT-generated
55 ;; default is not suitable.
56 (:copier nil))
57 "A READTABLE is a data structure that maps characters into syntax
58 types for the Common Lisp expression reader."
59 ;; The BASE-CHAR-SYNTAX-ARRAY is a vector of BASE-CHAR-CODE-LIMIT
60 ;; integers for describing the character type. Conceptually, there
61 ;; are 4 distinct "primary" character attributes:
62 ;; +CHAR-ATTR-WHITESPACE+, +CHAR-ATTR-TERMINATING-MACRO+,
63 ;; +CHAR-ATTR-ESCAPE+, and +CHAR-ATTR-CONSTITUENT+. Non-terminating
64 ;; macros (such as the symbol reader) have the attribute
65 ;; +CHAR-ATTR-CONSTITUENT+.
67 ;; In order to make READ-TOKEN fast, all this information is stored
68 ;; in the character attribute table by having different varieties of
69 ;; constituents.
70 (base-char-syntax-array
71 (make-array base-char-code-limit
72 :element-type '(unsigned-byte 8)
73 :initial-element +char-attr-constituent+)
74 :type attribute-table
75 :read-only t)
76 ;; The BASE-CHAR-MACRO-TABLE is a vector of BASE-CHAR-CODE-LIMIT
77 ;; functions. One of these functions called with appropriate
78 ;; arguments whenever any non-WHITESPACE character is encountered
79 ;; inside READ-PRESERVING-WHITESPACE. These functions are used to
80 ;; implement user-defined read-macros, system read-macros, and the
81 ;; number-symbol reader.
82 (base-char-macro-array
83 (make-array base-char-code-limit :initial-element nil)
84 :type (simple-vector #.base-char-code-limit)
85 :read-only t)
86 ;; Characters above the BASE-CHAR range
87 (extended-char-table *empty-extended-char-table* :type hash-table)
88 (%readtable-case :upcase :type (member :upcase :downcase :preserve :invert))
89 ;; Element type to use when reading a string literal with no extended-chars.
90 ;; The system itself prefers base-string, but otherwise it is a contentious
91 ;; issue. We don't (by default) use base-strings, because people often write:
92 ;; (SETF (CHAR (READ-STRING S) 0) #\PILE_OF_POO),
93 ;; or more likely, something the effect of which resembles
94 ;; (SETF (CHAR (ADJUST-ARRAY "" 10) 0) #\SMILE)
95 ;; which are each dubious constructs, because they assume READ to produce
96 ;; strings capable of holding any char. The latter further assumes something
97 ;; about compilation, because in that example, considering that there are no
98 ;; characters in the literal, it is unclear whether the array should
99 ;; be similar-as-constant to an array of base-char or array of character.
100 ;; While indeed SBCL prints base-strings readably (if *PRINT-READABLY* is T)
101 ;; using #. syntax, the question is what the writer of the code intended
102 ;; if (s)he did not know that the string should have been expressly
103 ;; specified via #.(MAKE-STRING ... :ELEMENT-TYPE) or somesuch.
104 (%readtable-string-preference 'base-char :type (member character base-char))
105 ;; With symbols, it's fairly clear that immutability of print names
106 ;; renders the distinction between the kinds of string in the symbol-name
107 ;; as being less relevant. If you expect (copy-seq (string asymbol))
108 ;; to produce a certain type of string, your code is unportable anyway.
109 (%readtable-symbol-preference 'base-char :type (member character base-char))
110 (%readtable-normalization #+sb-unicode t #-sb-unicode nil :type boolean))
112 (declaim (freeze-type readtable))