1 ;;;; the needed-on-the-cross-compilation-host part of HASH-TABLE
4 ;;;; This software is part of the SBCL system. See the README file for
7 ;;;; This software is derived from the CMU CL system, which was
8 ;;;; written at Carnegie Mellon University and released into the
9 ;;;; public domain. The software is in the public domain and is
10 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
11 ;;;; files for more information.
13 (in-package "SB-IMPL")
15 ;;; Our table representation is as illustrated below.
16 ;;; SIZE is always the exact number of K/V entries that can be stored,
17 ;;; and can be any number, not necessarily a power of 2.
19 ;;; __________________________________________
21 ;;; vector | * | * | K | V | K | V | ......... | * |
22 ;;; +________________________________________+
25 ;;; ^--- pair index 1 and so on
28 ;;; The length of PAIRS (the K/V vector) is the specified :SIZE * 2
29 ;;; plus 3 elements of overhead, 2 at the beginning and one at the end.
30 ;;; (It's slighly strange that extra cells are in two different places,
31 ;;; however there's a reason: we need an indicator for the end of a chain,
32 ;;; and/or unused bin, and we use 0 for that, which means that k/v pair 0
33 ;;; is unusable. But we can't keep indiscriminately adding overhead cells
34 ;;; to the front because that make even more k/v pairs unusable,
35 ;;; whereas adding at the end doesn't cause any such problem)
36 ;;; Pair index 1 is the first pair that stores user data.
38 ;;; The length of the HASH-VECTOR is in direct correspondence with the
39 ;;; physical k/v cells, so that we can store a hash per key and not worry
40 ;;; about one wasted cell. (i.e. the 0th k/v cell can't be used, so neither
41 ;;; can the 0th hash value. To avoid this 1 cell of waste would mean adding
42 ;;; and subtracting 1 here and there, needlessly complicating things)
44 ;;; The INDEX vector is the traditional power-of-2 sized vector mapping a hash
45 ;;; value to a pair. These are what you might calls the "bins" or "buckets" of
46 ;;; the table. The value in a bin is the pair index of the start of the chain
47 ;;; belonging to the bin. The value is 1..SIZE or 0 for an empty bin, which works
48 ;;; well because pair index 0 isn't usable. The NEXT vector is the pointer to
49 ;;; follow from a pair index to the next pair index in the same chain. As with
50 ;;; the hash vector, the NEXT vector is sized at 1 greater than minimally
51 ;;; necessary, to avoid adding and subtracting 1 from a pair index.
53 ;;; The PAIRS vector has an odd length with the following overhead elements:
55 ;;; [0] = high-water-mark
56 ;;; [1] = rehash-due-to-GC indicator
58 ;;; [length-1] = auxiliary info depending on kind of table
59 ;;; See KV-VECTOR-AUX-INFO in 'target-hash-table'
61 ;;; HASH-TABLE is implemented as a STRUCTURE-OBJECT.
62 (sb-xc:deftype hash-table-index
() '(unsigned-byte 32))
63 (sb-xc:defstruct
(hash-table (:copier nil
)
64 (:constructor %alloc-hash-table
80 (gethash-impl #'error
:type
(sfunction * (values t boolean
)) :read-only t
)
81 (puthash-impl #'error
:type
(sfunction * t
) :read-only t
)
82 (remhash-impl #'error
:type
(sfunction * t
) :read-only t
)
83 (clrhash-impl #'error
:type
(sfunction * t
) :read-only t
)
84 ;; The Key-Value pair vector.
85 ;; Note: this vector has a "high water mark" which resembles a fill
86 ;; pointer, but unlike a fill pointer, GC can ignore elements
87 ;; above the high water mark. If you store non-immediate data past
88 ;; that mark, you're sure to have problems.
89 (pairs nil
:type simple-vector
)
90 ;; MRU physical index of a key in the k/v vector. If < (LENGTH PAIRS)
91 ;; the cell can be examined first in GETHASH and PUTHASH. The "unknown" value
92 ;; is not 0 because that would look valid but could accidentally return a
93 ;; false match if the user's key is EQ to element 0 in the pair vector.
94 (cache (- array-dimension-limit
2) :type index
)
95 ;; The index vector. This may be larger than the capacity to help
97 (index-vector nil
:type
(simple-array hash-table-index
(*)))
98 ;; This table parallels the KV vector, and is used to chain together
99 ;; the hash buckets and the free list. A slot will only ever be in
100 ;; one of these lists.
101 ;; (I think that free k/v slots could be linked through the KV vector
102 ;; and not the next vector which affords some minor improvements)
103 (next-vector nil
:type
(simple-array hash-table-index
(*)))
104 ;; This table parallels the KV table, and can be used to store the
105 ;; hash associated with the key, saving recalculation. Could be
106 ;; useful for EQL, and EQUAL hash tables. This table is not needed
107 ;; for EQ hash tables, and when present the value of
108 ;; +MAGIC-HASH-VECTOR-VALUE+ represents address-based hashing on the
110 (hash-vector nil
:type
(or null
(simple-array hash-table-index
(*))))
111 ;; flags: WEAKNESS | KIND | WEAKP | {notused} | USERFUNP | SYNCHRONIZED
112 ;; WEAKNESS is 2 bits, KIND is 2 bits, the rest are 1 bit each
113 ;; - WEAKNESS : {K-and-V, K, V, K-or-V}, irrelevant unless WEAKP
114 ;; - KIND : {EQ, EQL, EQUAL, EQUALP}, irrelevant if USERFUNP
115 ;; - WEAKP : table is weak
116 ;; - USERFUNP : table has a nonstandard hash function
117 ;; - SYNCHRONIZED : all operations are automatically guarded by a mutex
118 ;; If you change these, be sure to check the definitions of hash_table_weakp()
119 ;; and other autogenerated C code (see WRITE-HASH-TABLE-FLAG-EXTRACTORS)
120 (flags 0 :type sb-vm
:word
:read-only t
)
121 ;; Used for locking GETHASH/(SETF GETHASH)/REMHASH
122 ;; The lock is always created for synchronized tables, or created just-in-time
123 ;; with nonsynchronized tables that are guarded by WITH-LOCKED-HASH-TABLE
124 ;; or an equivalent "system" variant of the locking macro.
125 (%lock nil
#-c-headers-only
:type
#-c-headers-only
(or null sb-thread
:mutex
))
127 ;; The 4 standard tests functions don't need these next 2 slots:
128 ;; (TODO: possibly don't have them in all hash-tables)
129 ;; The function used to compare two keys. Returns T if they are the
130 ;; same and NIL if not.
131 (test-fun nil
:type function
:read-only t
)
132 ;; The function used to compute the hashing of a key. Returns two
133 ;; values: the index hashing and T if that might change with the
135 (hash-fun nil
:type function
:read-only t
)
137 ;; The type of hash table this is. Part of the exported interface,
138 ;; as well as needed for the MAKE-LOAD-FORM and PRINT-OBJECT methods.
139 (test nil
:type
(or symbol function
) :read-only t
)
140 ;; How much to grow the hash table by when it fills up. If an index,
141 ;; then add that amount. If a floating point number, then multiply
143 (rehash-size nil
:type
(or index
(single-float (1.0
)))
145 ;; How full the hash table has to get before we rehash
146 ;; but only for the initial determination of how many buckets to make.
147 ;; Subsequent resizing is at our discretion. i.e. you might think that a
148 ;; deliberate choice of rehash size and threshold implies that you want the new
149 ;; table to be X amount larger *and* that you care at about what load factor the
150 ;; new table gets rehashed, but no, you don't get to pick both every time.
151 ;; (CLHS says that these are all just "hints" and we're free to ignore)
152 (rehash-threshold nil
:type
(single-float (0.0
) 1.0) :read-only t
)
153 ;; The current number of entries in the table.
154 (%count
0 :type index
)
155 ;; Index into the Next vector chaining together free slots in the KV
157 ;; This index is allowed to exceed the high-water-mark by 1 unless
158 ;; the HWM is at its maximum in which case this must be 0.
159 (next-free-kv 1 :type index
)
161 ;; Statistics gathering for new gethash algorithm that doesn't
162 ;; disable GC during rehash as a consequence of key movement.
163 #+hash-table-metrics
(n-rehash+find
0 :type word
)
164 #+hash-table-metrics
(n-lsearch 0 :type word
)
165 ;; this counter is incremented if we observe that GC marked the table invalid
166 ;; while already in the midst of being rehashed due to invalidation.
167 #+hash-table-metrics
(n-rehash-again 0 :type word
)
168 ;; this counter is incremented if the fast-read-lock (implicit in the
169 ;; 'stamp' field) implies that there was an inconsistent view of the table
170 #+hash-table-metrics
(n-stamp-change 0 :type word
)
173 (sb-xc:defstruct
(general-hash-table (:copier nil
)
174 (:conc-name hash-table-
)
175 (:include hash-table
)
176 (:constructor %alloc-general-hash-table
191 ;; List of (pair-index . bucket-number) which GC smashed and are almost
192 ;; equivalent to free cells, except that they are not yet unlinked from
193 ;; their chain. Skipping the removal in GC eliminates a race with REMHASH.
194 ;; Pushing onto the free list wouldn't actually be difficult,
195 ;; but removing from the bucket is impossible without implementing
196 ;; lock-free linked lists compatibly between C and Lisp.
198 ;; This slot is used to link weak hash tables during GC. When the GC
199 ;; isn't running it is always NIL.
200 (next-weak-hash-table nil
:type null
))
202 (defconstant hash-table-weak-flag
8)
203 ;;; USERFUN-FLAG implies a nonstandard hash function. Such tables may also have
204 ;;; a custom comparator. But you can't have a custom comparator without a custom
205 ;;; hash, because there's no way in general to produce a compatible hash.
206 (defconstant hash-table-userfun-flag
2)
207 (defconstant hash-table-synchronized-flag
1)
209 ;;; Keep in sync with weak_ht_alivep_funs[] in gc-common
210 (defconstant +ht-weak-key-AND-value
+ 0)
211 (defconstant +ht-weak-key
+ 1)
212 (defconstant +ht-weak-value
+ 2)
213 (defconstant +ht-weak-key-OR-value
+ 3)
215 (sb-xc:defmacro hash-table-lock
(table)
216 `(let ((ht ,table
)) (or (hash-table-%lock ht
) (install-hash-table-lock ht
))))
218 (defmacro pack-ht-flags-weakness
(x) `(logior (ash ,x
6) hash-table-weak-flag
))
219 (defmacro ht-flags-weakness
(flags) `(ldb (byte 2 6) ,flags
))
220 ;;; KIND corresponds directly to the HASH-TABLE-TEST for the 4 standard tests,
221 ;;; but is not meaningful with a user-provided test or hash function.
222 (sb-xc:defmacro pack-ht-flags-kind
(x) `(ash ,x
4))
223 (defmacro ht-flags-kind
(flags) `(ldb (byte 2 4) ,flags
))
225 (defconstant default-rehash-size
1.5)
226 ;; Don't raise this number to 8 - if you do it'll increase the memory
227 ;; consumption of a default MAKE-HASH-TABLE call by 7% just due to
228 ;; padding slots. This is a "perfect" minimal size.
229 (defconstant +min-hash-table-size
+ 7)
231 (sb-xc:defmacro make-system-hash-table
(&key test synchronized weakness
)
232 (multiple-value-bind (kind args
)
233 (cond ((equal test
'(quote eq
)) (values 0 '('eq
#'eq
#'eq-hash
)))
234 ((equal test
'(quote eql
)) (values 1 '('eql
#'eql
#'eql-hash
)))
236 (bug "Incomplete implementation of MAKE-SYSTEM-HASH-TABLE")
239 (logior ,(ecase weakness
240 (:key
(pack-ht-flags-weakness +ht-weak-key
+))
241 (:value
(pack-ht-flags-weakness +ht-weak-value
+)))
242 (pack-ht-flags-kind ,kind
)
243 ,(if synchronized
'hash-table-synchronized-flag
0))
245 ;; Splicing these constant values at expansion time avoids a problem invoking
246 ;; MAKE-SYSTEM-HASH-TABLE before the constants are known to make-host-2, as happens
247 ;; when compiling type-class. hash-table.lisp can't be moved earlier in build-order
248 ;; (without pain) so the expanded code can't use the values.
249 ,+min-hash-table-size
+
253 ;; Our hash-tables store precomputed hashes to speed rehash and to guard
254 ;; the call of the general comparator.
255 ;; i.e. we take the value from mumble-hash {SXHASH, EQ-HASH, etc}
256 ;; and fuzz the bits some more, then clip to 31 bits and store that.
257 ;; (As a practical matter, this limits tables to 2^31 bins.)
258 ;; Address-sensitive keys can't store a precomputed hash. They instead
259 ;; store this value that indicates address-sensitivity.
260 (defconstant +magic-hash-vector-value
+ #xFFFFFFFF
)