3 ;;;; This software is part of the SBCL system. See the README file for
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
12 (in-package "SB!IMPL")
14 (defun pointer-hash (key)
17 ;;; the depthoid explored when calculating hash values
19 ;;; "Depthoid" here is a sort of mixture of what Common Lisp ordinarily calls
20 ;;; depth and what Common Lisp ordinarily calls length; it's incremented either
21 ;;; when we descend into a compound object or when we step through elements of
22 ;;; a compound object.
23 (defconstant +max-hash-depthoid
+ 4)
25 ;;;; mixing hash values
27 ;;; a function for mixing hash values
30 ;;; * Non-commutativity keeps us from hashing e.g. #(1 5) to the
31 ;;; same value as #(5 1), and ending up in real trouble in some
32 ;;; special cases like bit vectors the way that CMUCL 18b SXHASH
33 ;;; does. (Under CMUCL 18b, SXHASH of any bit vector is 1..)
34 ;;; * We'd like to scatter our hash values over the entire possible range
35 ;;; of values instead of hashing small or common key values (like
36 ;;; 2 and NIL and #\a) to small FIXNUMs the way that the CMUCL 18b
37 ;;; SXHASH function does, again helping to avoid pathologies like
38 ;;; hashing all bit vectors to 1.
39 ;;; * We'd like this to be simple and fast, too.
40 (declaim (ftype (sfunction ((and fixnum unsigned-byte
)
41 (and fixnum unsigned-byte
))
42 (and fixnum unsigned-byte
))
44 (declaim (inline mix
))
46 (declare (optimize (speed 3)))
47 (declare (type (and fixnum unsigned-byte
) x y
))
49 ;; * Bits diffuse in both directions (shifted arbitrarily left by
50 ;; the multiplication in the calculation of XY, and shifted
51 ;; right by up to 5 places by the ASH).
52 ;; * The #'+ and #'LOGXOR operations don't commute with each other,
53 ;; so different bit patterns are mixed together as they shift
55 ;; * The arbitrary constant XOR used in the LOGXOR expression is
56 ;; intended to help break up any weird anomalies we might
57 ;; otherwise get when hashing highly regular patterns.
58 ;; (These are vaguely like the ideas used in many cryptographic
59 ;; algorithms, but we're not pushing them hard enough here for them
60 ;; to be cryptographically strong.)
62 ;; note: 3622009729038463111 is a 62-bit prime such that its low 61
63 ;; bits, low 60 bits and low 29 bits are all also primes, thus
64 ;; giving decent distributions no matter which of the possible
65 ;; values of most-positive-fixnum we have. It is derived by simple
66 ;; search starting from 2^60*pi. The multiplication should be
67 ;; efficient no matter what the platform thanks to modular
69 (let* ((mul (logand 3622009729038463111 sb
!xc
:most-positive-fixnum
))
70 (xor (logand 608948948376289905 sb
!xc
:most-positive-fixnum
))
71 (xy (logand (+ (* x mul
) y
) sb
!xc
:most-positive-fixnum
)))
72 (logand (logxor xor xy
(ash xy -
5)) sb
!xc
:most-positive-fixnum
)))
74 ;; Return a number that increments by 1 for each word-pair allocation,
75 ;; barring complications such as exhaustion of the current page.
76 ;; The result is guaranteed to be a positive fixnum.
77 (declaim (inline address-based-counter-val
))
78 (defun address-based-counter-val ()
79 #!+(and (not sb-thread
) cheneygc
)
80 (ash (sap-int (dynamic-space-free-pointer)) (- (1+ sb
!vm
:word-shift
)))
81 ;; dynamic-space-free-pointer increments only when a page is full.
82 ;; Using boxed_region directly is finer-grained.
83 #!+(and (not sb-thread
) gencgc
)
84 (ash (extern-alien "boxed_region" unsigned-long
)
85 (- (1+ sb
!vm
:word-shift
)))
86 ;; threads imply gencgc. use the per-thread alloc region pointer
88 (ash (sap-int (sb!vm
::current-thread-offset-sap
89 sb
!vm
::thread-alloc-region-slot
))
90 (- (1+ sb
!vm
:word-shift
))))
92 ;; Return some bits that are dependent on the next address that will be
93 ;; allocated, mixed with the previous state (in case addresses get recycled).
94 ;; This algorithm, used for stuffing a hash-code into instances of CTYPE
95 ;; subtypes, is simpler than RANDOM, and a test of randomness won't
96 ;; measure up as well, but for the intended use, it doesn't matter.
97 ;; CLOS hashes could probably be made to use this.
98 (defun quasi-random-address-based-hash (state mask
)
99 (declare (type (simple-array (and fixnum unsigned-byte
) (1)) state
))
100 ;; Ok with multiple threads - No harm, no foul.
101 (logand (setf (aref state
0) (mix (address-based-counter-val) (aref state
0)))
107 ;;;; Note that this operation is used in compiler symbol table
108 ;;;; lookups, so we'd like it to be fast.
110 ;;;; As of 2004-03-10, we implement the one-at-a-time algorithm
111 ;;;; designed by Bob Jenkins (see
112 ;;;; <http://burtleburtle.net/bob/hash/doobs.html> for some more
115 (declaim (inline %sxhash-substring
))
116 (defun %sxhash-substring
(string &optional
(count (length string
)))
117 ;; FIXME: As in MIX above, we wouldn't need (SAFETY 0) here if the
118 ;; cross-compiler were smarter about ASH, but we need it for
119 ;; sbcl-0.5.0m. (probably no longer true? We might need SAFETY 0
120 ;; to elide some type checks, but then again if this is inlined in
121 ;; all the critical places, we might not -- CSR, 2004-03-10)
122 (declare (optimize (speed 3) (safety 0)))
123 (declare (type string string
))
124 (declare (type index count
))
125 (macrolet ((set-result (form)
126 `(setf result
(ldb (byte #.sb
!vm
:n-word-bits
0) ,form
))))
128 (declare (type (unsigned-byte #.sb
!vm
:n-word-bits
) result
))
129 (unless (typep string
'(vector nil
))
131 (declare (type index i
))
132 (set-result (+ result
(char-code (aref string i
))))
133 (set-result (+ result
(ash result
10)))
134 (set-result (logxor result
(ash result -
6)))))
135 (set-result (+ result
(ash result
3)))
136 (set-result (logxor result
(ash result -
11)))
137 (set-result (logxor result
(ash result
15)))
138 (logand result most-positive-fixnum
))))
140 ;;; (let ((ht (make-hash-table :test 'equal)))
141 ;;; (do-all-symbols (symbol)
142 ;;; (let* ((string (symbol-name symbol))
143 ;;; (hash (%sxhash-substring string)))
144 ;;; (if (gethash hash ht)
145 ;;; (unless (string= (gethash hash ht) string)
146 ;;; (format t "collision: ~S ~S~%" string (gethash hash ht)))
147 ;;; (setf (gethash hash ht) string))))
148 ;;; (format t "final count=~W~%" (hash-table-count ht)))
150 (defun %sxhash-simple-string
(x)
151 (declare (optimize speed
))
152 (declare (type simple-string x
))
153 ;; KLUDGE: this FLET is a workaround (suggested by APD) for presence
154 ;; of let conversion in the cross compiler, which otherwise causes
155 ;; strongly suboptimal register allocation.
157 (%sxhash-substring x
)))
158 (declare (notinline trick
))
161 (defun %sxhash-simple-substring
(x count
)
162 (declare (optimize speed
))
163 (declare (type simple-string x
))
164 (declare (type index count
))
165 ;; see comment in %SXHASH-SIMPLE-STRING
166 (flet ((trick (x count
)
167 (%sxhash-substring x count
)))
168 (declare (notinline trick
))
171 ;;;; the SXHASH function
174 (declaim (ftype (sfunction (integer) hash
) sxhash-bignum
))
176 (defun new-instance-hash-code ()
177 ;; ANSI SXHASH wants us to make a good-faith effort to produce
178 ;; hash-codes that are well distributed within the range of
179 ;; non-negative fixnums, and this address-based operation does that.
180 ;; This is faster than calling RANDOM, and is random enough.
184 (quasi-random-address-based-hash
185 (load-time-value (make-array 1 :element-type
'(and fixnum unsigned-byte
))
187 most-positive-fixnum
))))
189 ;; Make sure we never return 0 (almost no chance of that anyway).
193 #!+(and compact-instance-header x86-64
)
195 (declaim (inline %std-instance-hash
))
196 (defun %std-instance-hash
(slots) ; return or compute the 32-bit hash
197 (let ((stored-hash (sb!vm
::get-header-data-high slots
)))
198 (if (eql stored-hash
0)
199 (let ((new (logand (new-instance-hash-code) #xFFFFFFFF
)))
200 (let ((old (sb!vm
::cas-header-data-high slots
0 new
)))
201 (if (eql old
0) new old
)))
204 (defun std-instance-hash (instance)
205 #!+(and compact-instance-header x86-64
)
206 ;; The one logical slot (excluding layout) in the primitive object is index 0.
207 ;; That holds a vector of the clos slots, and its header holds the hash.
208 (let* ((slots (%instance-ref instance
0))
209 (hash (%std-instance-hash slots
)))
210 ;; Simulate N-POSITIVE-FIXNUM-BITS of output for backward-compatibility,
211 ;; in case people use the high order bits.
212 ;; (There are only 32 bits of actual randomness, if even that)
213 (logxor (ash hash
(- sb
!vm
:n-positive-fixnum-bits
32)) hash
))
214 #!-
(and compact-instance-header x86-64
)
215 (let ((hash (%instance-ref instance sb
!pcl
::std-instance-hash-slot-index
)))
216 (if (not (eql hash
0))
218 (let ((new (new-instance-hash-code)))
219 ;; At most one thread will compute a random hash.
220 ;; %INSTANCE-CAS is a full call if there is no vop for it.
221 (let ((old (%instance-cas instance sb
!pcl
::std-instance-hash-slot-index
223 (if (eql old
0) new old
))))))
225 ;; These are also random numbers, but not lazily computed.
226 (declaim (inline fsc-instance-hash
))
227 (defun fsc-instance-hash (fin)
228 #!+compact-instance-header
229 (sb!vm
::get-header-data-high
(%funcallable-instance-info fin
0))
230 #!-compact-instance-header
231 (%funcallable-instance-info fin sb
!pcl
::fsc-instance-hash-slot-index
))
234 ;; profiling SXHASH is hard, but we might as well try to make it go
235 ;; fast, in case it is the bottleneck somewhere. -- CSR, 2003-03-14
236 (declare (optimize speed
))
237 (labels ((sxhash-number (x)
238 (macrolet ((hash-complex-float ()
239 `(let ((result 535698211))
240 (declare (type fixnum result
))
241 (mixf result
(sxhash (realpart x
)))
242 (mixf result
(sxhash (imagpart x
)))
245 (fixnum (sxhash x
)) ; through DEFTRANSFORM
246 (integer (sb!bignum
:sxhash-bignum x
))
247 (single-float (sxhash x
)) ; through DEFTRANSFORM
248 (double-float (sxhash x
)) ; through DEFTRANSFORM
249 #!+long-float
(long-float (error "stub: no LONG-FLOAT"))
250 (ratio (let ((result 127810327))
251 (declare (type fixnum result
))
252 (mixf result
(sxhash-number (numerator x
)))
253 (mixf result
(sxhash-number (denominator x
)))
256 ((complex long-float
)
257 (hash-complex-float))
258 ((complex double-float
)
259 (hash-complex-float))
260 ((complex single-float
)
261 (hash-complex-float))
263 (let ((result 535698211)
264 (realpart (realpart x
))
265 (imagpart (imagpart x
)))
266 (declare (type fixnum result
))
267 (mixf result
(if (fixnump imagpart
)
269 (sxhash-number imagpart
)))
270 (mixf result
(if (fixnump realpart
)
272 (sxhash-number realpart
)))
274 (sxhash-recurse (x depthoid
)
275 (declare (type index depthoid
))
277 ;; we test for LIST here, rather than CONS, because the
278 ;; type test for CONS is in fact the test for
279 ;; LIST-POINTER-LOWTAG followed by a negated test for
280 ;; NIL. If we're going to have to test for NIL anyway,
281 ;; we might as well do it explicitly and pick off the
282 ;; answer. -- CSR, 2004-07-14
285 (sxhash x
) ; through DEFTRANSFORM
287 (mix (sxhash-recurse (car x
) (1- depthoid
))
288 (sxhash-recurse (cdr x
) (1- depthoid
)))
293 ;; Pathnames are EQUAL if all the components are EQUAL, so
294 ;; we hash all of the components of a pathname together.
295 (let ((hash (sxhash-recurse (pathname-host x
) depthoid
)))
296 (mixf hash
(sxhash-recurse (pathname-device x
) depthoid
))
297 (mixf hash
(%pathname-dir-hash x
))
298 (mixf hash
(%pathname-stem-hash x
))
299 ;; Hash :NEWEST the same as NIL because EQUAL for
300 ;; pathnames assumes that :newest and nil are equal.
301 (let ((version (%pathname-version x
)))
302 (mixf hash
(sxhash-recurse (if (eq version
:newest
)
307 ;; LAYOUTs have an easily-accesible hash value: we
308 ;; might as well use it. It's not actually uniform
309 ;; over the space of hash values (it excludes 0 and
310 ;; some of the larger numbers) but it's better than
311 ;; simply returning the same value for all LAYOUT
312 ;; objects, as the next branch would do.
313 (layout-clos-hash x
))
316 ;; FIXME: why not (LAYOUT-CLOS-HASH ...) ?
317 (sxhash ; through DEFTRANSFORM
319 (layout-classoid (%instance-layout x
))))))
320 (condition (sb!kernel
::condition-hash x
))
321 (t (std-instance-hash x
))))
322 (symbol (sxhash x
)) ; through DEFTRANSFORM
325 (simple-string (sxhash x
)) ; through DEFTRANSFORM
326 (string (%sxhash-substring x
))
327 (simple-bit-vector (sxhash x
)) ; through DEFTRANSFORM
329 ;; FIXME: It must surely be possible to do better
330 ;; than this. The problem is that a non-SIMPLE
331 ;; BIT-VECTOR could be displaced to another, with a
332 ;; non-zero offset -- so that significantly more
333 ;; work needs to be done using the %VECTOR-RAW-BITS
334 ;; approach. This will probably do for now.
335 (sxhash-recurse (copy-seq x
) depthoid
))
336 (t (logxor 191020317 (sxhash (array-rank x
))))))
339 (sxhash (char-code x
)))) ; through DEFTRANSFORM
340 ;; general, inefficient case of NUMBER
341 (number (sxhash-number x
))
342 (generic-function (fsc-instance-hash x
))
344 (sxhash-recurse x
+max-hash-depthoid
+)))
346 ;;;; the PSXHASH function
348 ;;;; FIXME: This code does a lot of unnecessary full calls. It could be made
349 ;;;; more efficient (in both time and space) by rewriting it along the lines
350 ;;;; of the SXHASH code above.
352 ;;; like SXHASH, but for EQUALP hashing instead of EQUAL hashing
353 (defun psxhash (key &optional
(depthoid +max-hash-depthoid
+))
354 (declare (optimize speed
))
355 (declare (type (integer 0 #.
+max-hash-depthoid
+) depthoid
))
356 ;; Note: You might think it would be cleaner to use the ordering given in the
357 ;; table from Figure 5-13 in the EQUALP section of the ANSI specification
358 ;; here. So did I, but that is a snare for the unwary! Nothing in the ANSI
359 ;; spec says that HASH-TABLE can't be a STRUCTURE-OBJECT, and in fact our
360 ;; HASH-TABLEs *are* STRUCTURE-OBJECTs, so we need to pick off the special
361 ;; HASH-TABLE behavior before we fall through to the generic STRUCTURE-OBJECT
362 ;; comparison behavior.
364 (array (array-psxhash key depthoid
))
365 (hash-table (hash-table-psxhash key
))
366 (structure-object (structure-object-psxhash key depthoid
))
367 (cons (list-psxhash key depthoid
))
368 (number (number-psxhash key
))
369 (character (char-code (char-upcase key
)))
372 (defun array-psxhash (key depthoid
)
373 (declare (optimize speed
))
374 (declare (type array key
))
375 (declare (type (integer 0 #.
+max-hash-depthoid
+) depthoid
))
377 ;; VECTORs have to be treated specially because ANSI specifies
378 ;; that we must respect fill pointers.
381 `(let ((result 572539))
382 (declare (type fixnum result
))
383 (mixf result
(length key
))
384 (when (plusp depthoid
)
386 (dotimes (i (length key
))
387 (declare (type fixnum i
))
389 (psxhash (aref key i
) depthoid
))))
391 (make-dispatch (types)
393 ,@(loop for type in types
396 (make-dispatch (simple-base-string
397 (simple-array character
(*))
399 (simple-array (unsigned-byte 8) (*))
400 (simple-array fixnum
(*))
402 ;; Any other array can be hashed by working with its underlying
403 ;; one-dimensional physical representation.
405 (let ((result 60828))
406 (declare (type fixnum result
))
407 (dotimes (i (array-rank key
))
408 (mixf result
(%array-dimension key i
)))
409 (when (plusp depthoid
)
411 (with-array-data ((key key
) (start) (end))
412 (let ((getter (truly-the function
(svref %%data-vector-reffers%%
413 (%other-pointer-widetag key
)))))
414 (loop for i from start below end
416 (psxhash (funcall getter key i
) depthoid
))))))
419 (defun structure-object-psxhash (key depthoid
)
420 (declare (optimize speed
))
421 (declare (type structure-object key
))
422 (declare (type (integer 0 #.
+max-hash-depthoid
+) depthoid
))
423 (let* ((layout (%instance-layout key
))
424 (result (layout-clos-hash layout
)))
425 (declare (type fixnum result
))
426 (when (plusp depthoid
)
427 (let ((max-iterations depthoid
)
428 (depthoid (1- depthoid
)))
429 ;; We don't mix in LAYOUT here because it was already done above.
430 (do-instance-tagged-slot (i key
:layout layout
:pad nil
)
431 (mixf result
(psxhash (%instance-ref key i
) depthoid
))
432 (if (zerop (decf max-iterations
)) (return)))))
433 ;; [The following comment blurs some issues: indeed it would take
434 ;; a second loop in the non-interleaved-slots code; that loop might
435 ;; never execute because depthoid "cuts off", although that's an arbitrary
436 ;; choice and could be decided otherwise; and efficiency would likely
437 ;; demand that we store some additional metadata in the LAYOUT indicating
438 ;; how to mix the bits in, because floating-point +/-zeros have to
439 ;; be considered EQUALP]
440 ;; KLUDGE: Should hash untagged slots, too. (Although +max-hash-depthoid+
441 ;; is pretty low currently, so they might not make it into the hash
445 (defun list-psxhash (key depthoid
)
446 (declare (optimize speed
))
447 (declare (type list key
))
448 (declare (type (integer 0 #.
+max-hash-depthoid
+) depthoid
))
454 (mix (psxhash (car key
) (1- depthoid
))
455 (psxhash (cdr key
) (1- depthoid
))))))
457 (defun hash-table-psxhash (key)
458 (declare (optimize speed
))
459 (declare (type hash-table key
))
460 (let ((result 103924836))
461 (declare (type fixnum result
))
462 (mixf result
(hash-table-count key
))
463 (mixf result
(sxhash (hash-table-test key
)))
466 (defun number-psxhash (key)
467 (declare (type number key
)
468 (muffle-conditions compiler-note
)
470 (flet ((sxhash-double-float (val)
471 (declare (type double-float val
))
472 ;; FIXME: Check to make sure that the DEFTRANSFORM kicks in and the
473 ;; resulting code works without consing. (In Debian cmucl 2.4.17,
476 (macrolet ((hash-float (type key
)
477 (let ((lo (coerce sb
!xc
:most-negative-fixnum type
))
478 (hi (coerce sb
!xc
:most-positive-fixnum type
)))
480 (cond ( ;; This clause allows FIXNUM-sized integer
481 ;; values to be handled without consing.
483 (multiple-value-bind (q r
)
484 (floor (the (,type
,lo
,hi
) key
))
485 (if (zerop (the ,type r
))
488 (coerce key
'double-float
)))))
489 ((float-infinity-p key
)
490 ;; {single,double}-float infinities are EQUALP
493 (sxhash (symbol-value 'sb
!ext
:single-float-negative-infinity
))
496 (sxhash (symbol-value 'sb
!ext
:single-float-positive-infinity
))
499 (multiple-value-bind (q r
) (floor key
)
500 (if (zerop (the ,type r
))
503 (coerce key
'double-float
)))))))))
504 (hash-complex (&optional
(hasher '(number-psxhash)))
505 `(if (zerop (imagpart key
))
506 (,@hasher
(realpart key
))
507 (let ((result 330231))
508 (declare (type fixnum result
))
509 (mixf result
(,@hasher
(realpart key
)))
510 (mixf result
(,@hasher
(imagpart key
)))
513 (integer (sxhash key
))
516 (single-float (hash-float single-float key
))
517 (double-float (hash-float double-float key
))
519 (long-float (error "LONG-FLOAT not currently supported")))))
520 (rational (if (and (<= most-negative-double-float
522 most-positive-double-float
)
523 (= (coerce key
'double-float
) key
))
524 (sxhash-double-float (coerce key
'double-float
))
526 ((complex double-float
)
527 (hash-complex (hash-float double-float
)))
528 ((complex single-float
)
529 (hash-complex (hash-float single-float
)))
533 ;;; Semantic equivalent of SXHASH, but better-behaved for function names.
534 ;;; It performs more work by not cutting off as soon in the CDR direction.
535 ;;; More work here equates to less work in the global hashtable.
536 ;;; To wit: (eq (sxhash '(foo a b c bar)) (sxhash '(foo a b c d))) => T
537 ;;; but the corresponding globaldb-sxhashoids differ.
538 (defun globaldb-sxhashoid (name)
540 (declare (optimize (safety 0))) ; after the argc check
541 ;; TRAVERSE will walk across more cons cells than RECURSE will descend.
542 ;; That's why this isn't just one self-recursive function.
543 (labels ((traverse (accumulator x length-limit
)
544 (declare (fixnum length-limit
))
545 (cond ((atom x
) (mix (sxhash x
) accumulator
))
546 ((zerop length-limit
) accumulator
)
547 (t (traverse (mix (recurse (car x
) 4) accumulator
)
548 (cdr x
) (1- length-limit
)))))
549 (recurse (x depthoid
) ; depthoid = a blend of level and length
550 (declare (fixnum depthoid
))
551 (cond ((atom x
) (sxhash x
))
553 #.
(logand sb
!xc
:most-positive-fixnum
#36Rglobaldbsxhashoid
))
554 (t (mix (recurse (car x
) (1- depthoid
))
555 (recurse (cdr x
) (1- depthoid
)))))))
556 (traverse 0 name
10))))