Delete scratch files
[sbcl.git] / src / code / hash-table.lisp
blob9113343277c001bd19c8fdf480fa1cddabc85d65
1 ;;;; the needed-on-the-cross-compilation-host part of HASH-TABLE
2 ;;;; implementation
4 ;;;; This software is part of the SBCL system. See the README file for
5 ;;;; more information.
6 ;;;;
7 ;;;; This software is derived from the CMU CL system, which was
8 ;;;; written at Carnegie Mellon University and released into the
9 ;;;; public domain. The software is in the public domain and is
10 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
11 ;;;; files for more information.
13 (in-package "SB-IMPL")
15 ;;; Our table representation is as illustrated below.
16 ;;; SIZE is always the exact number of K/V entries that can be stored,
17 ;;; and can be any number, not necessarily a power of 2.
19 ;;; __________________________________________
20 ;;; K/V | | |
21 ;;; vector | * | * | K | V | K | V | ......... | * |
22 ;;; +________________________________________+
23 ;;; | <--- SIZE -->|
24 ;;;
25 ;;; ^--- pair index 1 and so on
26 ;;;
28 ;;; The length of PAIRS (the K/V vector) is the specified :SIZE * 2
29 ;;; plus 3 elements of overhead, 2 at the beginning and one at the end.
30 ;;; (It's slighly strange that extra cells are in two different places,
31 ;;; however there's a reason: we need an indicator for the end of a chain,
32 ;;; and/or unused bin, and we use 0 for that, which means that k/v pair 0
33 ;;; is unusable. But we can't keep indiscriminately adding overhead cells
34 ;;; to the front because that make even more k/v pairs unusable,
35 ;;; whereas adding at the end doesn't cause any such problem)
36 ;;; Pair index 1 is the first pair that stores user data.
38 ;;; The length of the HASH-VECTOR is in direct correspondence with the
39 ;;; physical k/v cells, so that we can store a hash per key and not worry
40 ;;; about one wasted cell. (i.e. the 0th k/v cell can't be used, so neither
41 ;;; can the 0th hash value. To avoid this 1 cell of waste would mean adding
42 ;;; and subtracting 1 here and there, needlessly complicating things)
44 ;;; The INDEX vector is the traditional power-of-2 sized vector mapping a hash
45 ;;; value to a pair. These are what you might calls the "bins" or "buckets" of
46 ;;; the table. The value in a bin is the pair index of the start of the chain
47 ;;; belonging to the bin. The value is 1..SIZE or 0 for an empty bin, which works
48 ;;; well because pair index 0 isn't usable. The NEXT vector is the pointer to
49 ;;; follow from a pair index to the next pair index in the same chain. As with
50 ;;; the hash vector, the NEXT vector is sized at 1 greater than minimally
51 ;;; necessary, to avoid adding and subtracting 1 from a pair index.
53 ;;; The PAIRS vector has an odd length with the following overhead elements:
54 ;;;
55 ;;; [0] = high-water-mark
56 ;;; [1] = rehash-due-to-GC indicator
57 ;;; ...
58 ;;; [length-1] = auxiliary info depending on kind of table
59 ;;; See KV-VECTOR-AUX-INFO in 'target-hash-table'
61 ;;; HASH-TABLE is implemented as a STRUCTURE-OBJECT.
62 (sb-xc:deftype hash-table-index () '(unsigned-byte 32))
63 (sb-xc:defstruct (hash-table (:copier nil)
64 (:constructor %alloc-hash-table
65 (flags
66 gethash-impl
67 puthash-impl
68 remhash-impl
69 clrhash-impl
70 test
71 test-fun
72 hash-fun
73 rehash-size
74 rehash-threshold
75 pairs
76 index-vector
77 next-vector
78 hash-vector)))
80 (gethash-impl #'error :type (sfunction * (values t boolean)) :read-only t)
81 (puthash-impl #'error :type (sfunction * t) :read-only t)
82 (remhash-impl #'error :type (sfunction * t) :read-only t)
83 (clrhash-impl #'error :type (sfunction * t) :read-only t)
84 ;; The Key-Value pair vector.
85 ;; Note: this vector has a "high water mark" which resembles a fill
86 ;; pointer, but unlike a fill pointer, GC can ignore elements
87 ;; above the high water mark. If you store non-immediate data past
88 ;; that mark, you're sure to have problems.
89 (pairs nil :type simple-vector)
90 ;; MRU physical index of a key in the k/v vector. If < (LENGTH PAIRS)
91 ;; the cell can be examined first in GETHASH and PUTHASH. The "unknown" value
92 ;; is not 0 because that would look valid but could accidentally return a
93 ;; false match if the user's key is EQ to element 0 in the pair vector.
94 (cache (- array-dimension-limit 2) :type index)
95 ;; The index vector. This may be larger than the capacity to help
96 ;; reduce collisions.
97 (index-vector nil :type (simple-array hash-table-index (*)))
98 ;; This table parallels the KV vector, and is used to chain together
99 ;; the hash buckets and the free list. A slot will only ever be in
100 ;; one of these lists.
101 ;; (I think that free k/v slots could be linked through the KV vector
102 ;; and not the next vector which affords some minor improvements)
103 (next-vector nil :type (simple-array hash-table-index (*)))
104 ;; This table parallels the KV table, and can be used to store the
105 ;; hash associated with the key, saving recalculation. Could be
106 ;; useful for EQL, and EQUAL hash tables. This table is not needed
107 ;; for EQ hash tables, and when present the value of
108 ;; +MAGIC-HASH-VECTOR-VALUE+ represents address-based hashing on the
109 ;; respective key.
110 (hash-vector nil :type (or null (simple-array hash-table-index (*))))
111 ;; flags: WEAKNESS | KIND | WEAKP | {notused} | USERFUNP | SYNCHRONIZED
112 ;; WEAKNESS is 2 bits, KIND is 2 bits, the rest are 1 bit each
113 ;; - WEAKNESS : {K-and-V, K, V, K-or-V}, irrelevant unless WEAKP
114 ;; - KIND : {EQ, EQL, EQUAL, EQUALP}, irrelevant if USERFUNP
115 ;; - WEAKP : table is weak
116 ;; - USERFUNP : table has a nonstandard hash function
117 ;; - SYNCHRONIZED : all operations are automatically guarded by a mutex
118 ;; If you change these, be sure to check the definitions of hash_table_weakp()
119 ;; and other autogenerated C code (see WRITE-HASH-TABLE-FLAG-EXTRACTORS)
120 (flags 0 :type sb-vm:word :read-only t)
121 ;; Used for locking GETHASH/(SETF GETHASH)/REMHASH
122 ;; The lock is always created for synchronized tables, or created just-in-time
123 ;; with nonsynchronized tables that are guarded by WITH-LOCKED-HASH-TABLE
124 ;; or an equivalent "system" variant of the locking macro.
125 (%lock nil #-c-headers-only :type #-c-headers-only (or null sb-thread:mutex))
127 ;; The 4 standard tests functions don't need these next 2 slots:
128 ;; (TODO: possibly don't have them in all hash-tables)
129 ;; The function used to compare two keys. Returns T if they are the
130 ;; same and NIL if not.
131 (test-fun nil :type function :read-only t)
132 ;; The function used to compute the hashing of a key. Returns two
133 ;; values: the index hashing and T if that might change with the
134 ;; next GC.
135 (hash-fun nil :type function :read-only t)
137 ;; The type of hash table this is. Part of the exported interface,
138 ;; as well as needed for the MAKE-LOAD-FORM and PRINT-OBJECT methods.
139 (test nil :type (or symbol function) :read-only t)
140 ;; How much to grow the hash table by when it fills up. If an index,
141 ;; then add that amount. If a floating point number, then multiply
142 ;; it by that.
143 (rehash-size nil :type (or index (single-float (1.0)))
144 :read-only t)
145 ;; How full the hash table has to get before we rehash
146 ;; but only for the initial determination of how many buckets to make.
147 ;; Subsequent resizing is at our discretion. i.e. you might think that a
148 ;; deliberate choice of rehash size and threshold implies that you want the new
149 ;; table to be X amount larger *and* that you care at about what load factor the
150 ;; new table gets rehashed, but no, you don't get to pick both every time.
151 ;; (CLHS says that these are all just "hints" and we're free to ignore)
152 (rehash-threshold nil :type (single-float (0.0) 1.0) :read-only t)
153 ;; The current number of entries in the table.
154 (%count 0 :type index)
155 ;; Index into the Next vector chaining together free slots in the KV
156 ;; vector.
157 ;; This index is allowed to exceed the high-water-mark by 1 unless
158 ;; the HWM is at its maximum in which case this must be 0.
159 (next-free-kv 1 :type index)
161 ;; Statistics gathering for new gethash algorithm that doesn't
162 ;; disable GC during rehash as a consequence of key movement.
163 #+hash-table-metrics (n-rehash+find 0 :type word)
164 #+hash-table-metrics (n-lsearch 0 :type word)
165 ;; this counter is incremented if we observe that GC marked the table invalid
166 ;; while already in the midst of being rehashed due to invalidation.
167 #+hash-table-metrics (n-rehash-again 0 :type word)
168 ;; this counter is incremented if the fast-read-lock (implicit in the
169 ;; 'stamp' field) implies that there was an inconsistent view of the table
170 #+hash-table-metrics (n-stamp-change 0 :type word)
173 (sb-xc:defstruct (general-hash-table (:copier nil)
174 (:conc-name hash-table-)
175 (:include hash-table)
176 (:constructor %alloc-general-hash-table
177 (flags
178 gethash-impl
179 puthash-impl
180 remhash-impl
181 clrhash-impl
182 test
183 test-fun
184 hash-fun
185 rehash-size
186 rehash-threshold
187 pairs
188 index-vector
189 next-vector
190 hash-vector)))
191 ;; List of (pair-index . bucket-number) which GC smashed and are almost
192 ;; equivalent to free cells, except that they are not yet unlinked from
193 ;; their chain. Skipping the removal in GC eliminates a race with REMHASH.
194 ;; Pushing onto the free list wouldn't actually be difficult,
195 ;; but removing from the bucket is impossible without implementing
196 ;; lock-free linked lists compatibly between C and Lisp.
197 (smashed-cells nil)
198 ;; This slot is used to link weak hash tables during GC. When the GC
199 ;; isn't running it is always NIL.
200 (next-weak-hash-table nil :type null))
202 (defconstant hash-table-weak-flag 8)
203 ;;; USERFUN-FLAG implies a nonstandard hash function. Such tables may also have
204 ;;; a custom comparator. But you can't have a custom comparator without a custom
205 ;;; hash, because there's no way in general to produce a compatible hash.
206 (defconstant hash-table-userfun-flag 2)
207 (defconstant hash-table-synchronized-flag 1)
209 ;;; Keep in sync with weak_ht_alivep_funs[] in gc-common
210 (defconstant +ht-weak-key-AND-value+ 0)
211 (defconstant +ht-weak-key+ 1)
212 (defconstant +ht-weak-value+ 2)
213 (defconstant +ht-weak-key-OR-value+ 3)
215 (sb-xc:defmacro hash-table-lock (table)
216 `(let ((ht ,table)) (or (hash-table-%lock ht) (install-hash-table-lock ht))))
218 (defmacro pack-ht-flags-weakness (x) `(logior (ash ,x 6) hash-table-weak-flag))
219 (defmacro ht-flags-weakness (flags) `(ldb (byte 2 6) ,flags))
220 ;;; KIND corresponds directly to the HASH-TABLE-TEST for the 4 standard tests,
221 ;;; but is not meaningful with a user-provided test or hash function.
222 (sb-xc:defmacro pack-ht-flags-kind (x) `(ash ,x 4))
223 (defmacro ht-flags-kind (flags) `(ldb (byte 2 4) ,flags))
225 (defconstant default-rehash-size 1.5)
226 ;; Don't raise this number to 8 - if you do it'll increase the memory
227 ;; consumption of a default MAKE-HASH-TABLE call by 7% just due to
228 ;; padding slots. This is a "perfect" minimal size.
229 (defconstant +min-hash-table-size+ 7)
231 (sb-xc:defmacro make-system-hash-table (&key test synchronized weakness)
232 (multiple-value-bind (kind args)
233 (cond ((equal test '(quote eq)) (values 0 '('eq #'eq #'eq-hash)))
234 ((equal test '(quote eql)) (values 1 '('eql #'eql #'eql-hash)))
236 (bug "Incomplete implementation of MAKE-SYSTEM-HASH-TABLE")
238 `(%make-hash-table
239 (logior ,(ecase weakness
240 (:key (pack-ht-flags-weakness +ht-weak-key+))
241 (:value (pack-ht-flags-weakness +ht-weak-value+)))
242 (pack-ht-flags-kind ,kind)
243 ,(if synchronized 'hash-table-synchronized-flag 0))
244 ,@args
245 ;; Splicing these constant values at expansion time avoids a problem invoking
246 ;; MAKE-SYSTEM-HASH-TABLE before the constants are known to make-host-2, as happens
247 ;; when compiling type-class. hash-table.lisp can't be moved earlier in build-order
248 ;; (without pain) so the expanded code can't use the values.
249 ,+min-hash-table-size+
250 ,default-rehash-size
251 1.0)))
253 ;; Our hash-tables store precomputed hashes to speed rehash and to guard
254 ;; the call of the general comparator.
255 ;; i.e. we take the value from mumble-hash {SXHASH, EQ-HASH, etc}
256 ;; and fuzz the bits some more, then clip to 31 bits and store that.
257 ;; (As a practical matter, this limits tables to 2^31 bins.)
258 ;; Address-sensitive keys can't store a precomputed hash. They instead
259 ;; store this value that indicates address-sensitivity.
260 (defconstant +magic-hash-vector-value+ #xFFFFFFFF)