3 ;;;; This software is part of the SBCL system. See the README file for
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
12 (in-package "SB!IMPL")
14 (eval-when (:compile-toplevel
:load-toplevel
:execute
)
15 (defconstant max-hash sb
!xc
:most-positive-fixnum
))
18 `(integer 0 ,max-hash
))
20 (defun pointer-hash (key)
23 ;;; the depthoid explored when calculating hash values
25 ;;; "Depthoid" here is a sort of mixture of what Common Lisp ordinarily calls
26 ;;; depth and what Common Lisp ordinarily calls length; it's incremented either
27 ;;; when we descend into a compound object or when we step through elements of
28 ;;; a compound object.
29 (defconstant +max-hash-depthoid
+ 4)
31 ;;;; mixing hash values
33 ;;; a function for mixing hash values
36 ;;; * Non-commutativity keeps us from hashing e.g. #(1 5) to the
37 ;;; same value as #(5 1), and ending up in real trouble in some
38 ;;; special cases like bit vectors the way that CMUCL 18b SXHASH
39 ;;; does. (Under CMUCL 18b, SXHASH of any bit vector is 1..)
40 ;;; * We'd like to scatter our hash values over the entire possible range
41 ;;; of values instead of hashing small or common key values (like
42 ;;; 2 and NIL and #\a) to small FIXNUMs the way that the CMUCL 18b
43 ;;; SXHASH function does, again helping to avoid pathologies like
44 ;;; hashing all bit vectors to 1.
45 ;;; * We'd like this to be simple and fast, too.
47 ;;; FIXME: Should this be INLINE?
48 (declaim (ftype (sfunction ((and fixnum unsigned-byte
)
49 (and fixnum unsigned-byte
))
50 (and fixnum unsigned-byte
))
52 (declaim (inline mix
))
54 ;; FIXME: We wouldn't need the nasty (SAFETY 0) here if the compiler
55 ;; were smarter about optimizing ASH. (Without the THE FIXNUM below,
56 ;; and the (SAFETY 0) declaration here to get the compiler to trust
57 ;; it, the sbcl-0.5.0m cross-compiler running under Debian
58 ;; cmucl-2.4.17 turns the ASH into a full call, requiring the
59 ;; UNSIGNED-BYTE 32 argument to be coerced to a bignum, requiring
60 ;; consing, and thus generally obliterating performance.)
61 (declare (optimize (speed 3) (safety 0)))
62 (declare (type (and fixnum unsigned-byte
) x y
))
64 ;; * Bits diffuse in both directions (shifted left by up to 2 places
65 ;; in the calculation of XY, and shifted right by up to 5 places
67 ;; * The #'+ and #'LOGXOR operations don't commute with each other,
68 ;; so different bit patterns are mixed together as they shift
70 ;; * The arbitrary constant in the #'LOGXOR expression is intended
71 ;; to help break up any weird anomalies we might otherwise get
72 ;; when hashing highly regular patterns.
73 ;; (These are vaguely like the ideas used in many cryptographic
74 ;; algorithms, but we're not pushing them hard enough here for them
75 ;; to be cryptographically strong.)
76 (let* ((xy (+ (* x
3) y
)))
77 (logand most-positive-fixnum
84 ;;;; Note that this operation is used in compiler symbol table
85 ;;;; lookups, so we'd like it to be fast.
87 ;;;; As of 2004-03-10, we implement the one-at-a-time algorithm
88 ;;;; designed by Bob Jenkins (see
89 ;;;; <http://burtleburtle.net/bob/hash/doobs.html> for some more
92 (declaim (inline %sxhash-substring
))
93 (defun %sxhash-substring
(string &optional
(count (length string
)))
94 ;; FIXME: As in MIX above, we wouldn't need (SAFETY 0) here if the
95 ;; cross-compiler were smarter about ASH, but we need it for
96 ;; sbcl-0.5.0m. (probably no longer true? We might need SAFETY 0
97 ;; to elide some type checks, but then again if this is inlined in
98 ;; all the critical places, we might not -- CSR, 2004-03-10)
99 (declare (optimize (speed 3) (safety 0)))
100 (declare (type string string
))
101 (declare (type index count
))
102 (macrolet ((set-result (form)
103 `(setf result
(ldb (byte #.sb
!vm
:n-word-bits
0) ,form
))))
105 (declare (type (unsigned-byte #.sb
!vm
:n-word-bits
) result
))
106 (unless (typep string
'(vector nil
))
108 (declare (type index i
))
109 (set-result (+ result
(char-code (aref string i
))))
110 (set-result (+ result
(ash result
10)))
111 (set-result (logxor result
(ash result -
6)))))
112 (set-result (+ result
(ash result
3)))
113 (set-result (logxor result
(ash result -
11)))
114 (set-result (logxor result
(ash result
15)))
115 (logand result most-positive-fixnum
))))
117 ;;; (let ((ht (make-hash-table :test 'equal)))
118 ;;; (do-all-symbols (symbol)
119 ;;; (let* ((string (symbol-name symbol))
120 ;;; (hash (%sxhash-substring string)))
121 ;;; (if (gethash hash ht)
122 ;;; (unless (string= (gethash hash ht) string)
123 ;;; (format t "collision: ~S ~S~%" string (gethash hash ht)))
124 ;;; (setf (gethash hash ht) string))))
125 ;;; (format t "final count=~W~%" (hash-table-count ht)))
127 (defun %sxhash-simple-string
(x)
128 (declare (optimize speed
))
129 (declare (type simple-string x
))
130 ;; KLUDGE: this FLET is a workaround (suggested by APD) for presence
131 ;; of let conversion in the cross compiler, which otherwise causes
132 ;; strongly suboptimal register allocation.
134 (%sxhash-substring x
)))
135 (declare (notinline trick
))
138 (defun %sxhash-simple-substring
(x count
)
139 (declare (optimize speed
))
140 (declare (type simple-string x
))
141 (declare (type index count
))
142 ;; see comment in %SXHASH-SIMPLE-STRING
143 (flet ((trick (x count
)
144 (%sxhash-substring x count
)))
145 (declare (notinline trick
))
148 ;;;; the SXHASH function
151 (declaim (ftype (sfunction (integer) (integer 0 #.sb
!xc
:most-positive-fixnum
))
153 (declaim (ftype (sfunction (t) (integer 0 #.sb
!xc
:most-positive-fixnum
))
157 ;; profiling SXHASH is hard, but we might as well try to make it go
158 ;; fast, in case it is the bottleneck somewhere. -- CSR, 2003-03-14
159 (declare (optimize speed
))
160 (labels ((sxhash-number (x)
162 (fixnum (sxhash x
)) ; through DEFTRANSFORM
163 (integer (sb!bignum
:sxhash-bignum x
))
164 (single-float (sxhash x
)) ; through DEFTRANSFORM
165 (double-float (sxhash x
)) ; through DEFTRANSFORM
166 #!+long-float
(long-float (error "stub: no LONG-FLOAT"))
167 (ratio (let ((result 127810327))
168 (declare (type fixnum result
))
169 (mixf result
(sxhash-number (numerator x
)))
170 (mixf result
(sxhash-number (denominator x
)))
172 (complex (let ((result 535698211))
173 (declare (type fixnum result
))
174 (mixf result
(sxhash-number (realpart x
)))
175 (mixf result
(sxhash-number (imagpart x
)))
177 (sxhash-recurse (x depthoid
)
178 (declare (type index depthoid
))
180 ;; we test for LIST here, rather than CONS, because the
181 ;; type test for CONS is in fact the test for
182 ;; LIST-POINTER-LOWTAG followed by a negated test for
183 ;; NIL. If we're going to have to test for NIL anyway,
184 ;; we might as well do it explicitly and pick off the
185 ;; answer. -- CSR, 2004-07-14
188 (sxhash x
) ; through DEFTRANSFORM
190 (mix (sxhash-recurse (car x
) (1- depthoid
))
191 (sxhash-recurse (cdr x
) (1- depthoid
)))
194 (if (or (typep x
'structure-object
) (typep x
'condition
))
196 (sxhash ; through DEFTRANSFORM
198 (layout-classoid (%instance-layout x
)))))
199 (sxhash-instance x
)))
200 (symbol (sxhash x
)) ; through DEFTRANSFORM
203 (simple-string (sxhash x
)) ; through DEFTRANSFORM
204 (string (%sxhash-substring x
))
205 (simple-bit-vector (sxhash x
)) ; through DEFTRANSFORM
207 ;; FIXME: It must surely be possible to do better
208 ;; than this. The problem is that a non-SIMPLE
209 ;; BIT-VECTOR could be displaced to another, with a
210 ;; non-zero offset -- so that significantly more
211 ;; work needs to be done using the %RAW-BITS
212 ;; approach. This will probably do for now.
213 (sxhash-recurse (copy-seq x
) depthoid
))
214 (t (logxor 191020317 (sxhash (array-rank x
))))))
217 (sxhash (char-code x
)))) ; through DEFTRANSFORM
218 ;; general, inefficient case of NUMBER
219 (number (sxhash-number x
))
220 (generic-function (sxhash-instance x
))
222 (sxhash-recurse x
+max-hash-depthoid
+)))
224 ;;;; the PSXHASH function
226 ;;;; FIXME: This code does a lot of unnecessary full calls. It could be made
227 ;;;; more efficient (in both time and space) by rewriting it along the lines
228 ;;;; of the SXHASH code above.
230 ;;; like SXHASH, but for EQUALP hashing instead of EQUAL hashing
231 (defun psxhash (key &optional
(depthoid +max-hash-depthoid
+))
232 (declare (optimize speed
))
233 (declare (type (integer 0 #.
+max-hash-depthoid
+) depthoid
))
234 ;; Note: You might think it would be cleaner to use the ordering given in the
235 ;; table from Figure 5-13 in the EQUALP section of the ANSI specification
236 ;; here. So did I, but that is a snare for the unwary! Nothing in the ANSI
237 ;; spec says that HASH-TABLE can't be a STRUCTURE-OBJECT, and in fact our
238 ;; HASH-TABLEs *are* STRUCTURE-OBJECTs, so we need to pick off the special
239 ;; HASH-TABLE behavior before we fall through to the generic STRUCTURE-OBJECT
240 ;; comparison behavior.
242 (array (array-psxhash key depthoid
))
243 (hash-table (hash-table-psxhash key
))
244 (structure-object (structure-object-psxhash key depthoid
))
245 (cons (list-psxhash key depthoid
))
246 (number (number-psxhash key
))
247 (character (char-code (char-upcase key
)))
250 (defun array-psxhash (key depthoid
)
251 (declare (optimize speed
))
252 (declare (type array key
))
253 (declare (type (integer 0 #.
+max-hash-depthoid
+) depthoid
))
255 ;; VECTORs have to be treated specially because ANSI specifies
256 ;; that we must respect fill pointers.
259 '(let ((result 572539))
260 (declare (type fixnum result
))
261 (mixf result
(length key
))
262 (when (plusp depthoid
)
264 (dotimes (i (length key
))
265 (declare (type fixnum i
))
267 (psxhash (aref key i
) depthoid
))))
269 (make-dispatch (types)
271 ,@(loop for type in types
274 (make-dispatch (simple-base-string
275 (simple-array character
(*))
277 (simple-array (unsigned-byte 8) (*))
278 (simple-array fixnum
(*))
280 ;; Any other array can be hashed by working with its underlying
281 ;; one-dimensional physical representation.
283 (let ((result 60828))
284 (declare (type fixnum result
))
285 (dotimes (i (array-rank key
))
286 (mixf result
(array-dimension key i
)))
287 (when (plusp depthoid
)
289 (dotimes (i (array-total-size key
))
291 (psxhash (row-major-aref key i
) depthoid
))))
294 (defun structure-object-psxhash (key depthoid
)
295 (declare (optimize speed
))
296 (declare (type structure-object key
))
297 (declare (type (integer 0 #.
+max-hash-depthoid
+) depthoid
))
298 (let* ((layout (%instance-layout key
)) ; i.e. slot #0
299 (length (layout-length layout
))
300 (classoid (layout-classoid layout
))
301 (name (classoid-name classoid
))
302 (result (mix (sxhash name
) (the fixnum
79867))))
303 (declare (type fixnum result
))
304 (dotimes (i (min depthoid
(- length
1 (layout-n-untagged-slots layout
))))
305 (declare (type fixnum i
))
306 (let ((j (1+ i
))) ; skipping slot #0, which is for LAYOUT
307 (declare (type fixnum j
))
309 (psxhash (%instance-ref key j
)
311 ;; KLUDGE: Should hash untagged slots, too. (Although +max-hash-depthoid+
312 ;; is pretty low currently, so they might not make it into the hash
316 (defun list-psxhash (key depthoid
)
317 (declare (optimize speed
))
318 (declare (type list key
))
319 (declare (type (integer 0 #.
+max-hash-depthoid
+) depthoid
))
325 (mix (psxhash (car key
) (1- depthoid
))
326 (psxhash (cdr key
) (1- depthoid
))))))
328 (defun hash-table-psxhash (key)
329 (declare (optimize speed
))
330 (declare (type hash-table key
))
331 (let ((result 103924836))
332 (declare (type fixnum result
))
333 (mixf result
(hash-table-count key
))
334 (mixf result
(sxhash (hash-table-test key
)))
337 (defun number-psxhash (key)
338 (declare (optimize speed
))
339 (declare (type number key
))
340 (flet ((sxhash-double-float (val)
341 (declare (type double-float val
))
342 ;; FIXME: Check to make sure that the DEFTRANSFORM kicks in and the
343 ;; resulting code works without consing. (In Debian cmucl 2.4.17,
347 (integer (sxhash key
))
348 (float (macrolet ((frob (type)
349 (let ((lo (coerce most-negative-fixnum type
))
350 (hi (coerce most-positive-fixnum type
)))
351 `(cond (;; This clause allows FIXNUM-sized integer
352 ;; values to be handled without consing.
354 (multiple-value-bind (q r
)
355 (floor (the (,type
,lo
,hi
) key
))
356 (if (zerop (the ,type r
))
359 (coerce key
'double-float
)))))
361 (multiple-value-bind (q r
) (floor key
)
362 (if (zerop (the ,type r
))
365 (coerce key
'double-float
)))))))))
367 (single-float (frob single-float
))
368 (double-float (frob double-float
))
370 (long-float (error "LONG-FLOAT not currently supported")))))
371 (rational (if (and (<= most-negative-double-float
373 most-positive-double-float
)
374 (= (coerce key
'double-float
) key
))
375 (sxhash-double-float (coerce key
'double-float
))
377 (complex (if (zerop (imagpart key
))
378 (number-psxhash (realpart key
))
379 (let ((result 330231))
380 (declare (type fixnum result
))
381 (mixf result
(number-psxhash (realpart key
)))
382 (mixf result
(number-psxhash (imagpart key
)))