Transform ARRAY-IN-BOUNDS-P for multi-dimensional arrays.
[sbcl.git] / tests / hash.pure.lisp
blob972e84880f347d460560360f30a7c1bea9567e07
1 ;;;; This software is part of the SBCL system. See the README file for
2 ;;;; more information.
3 ;;;;
4 ;;;; While most of SBCL is derived from the CMU CL system, the test
5 ;;;; files (like this one) were written from scratch after the fork
6 ;;;; from CMU CL.
7 ;;;;
8 ;;;; This software is in the public domain and is provided with
9 ;;;; absolutely no warranty. See the COPYING and CREDITS files for
10 ;;;; more information.
12 ;;; The return value of SXHASH on non-string/bitvector arrays should not
13 ;;; change when the contents of the array change.
14 (with-test (:name (sxhash array :independent-of-contents))
15 (let* ((a (make-array '(1) :initial-element 1))
16 (sxhash (sxhash a))
17 (hash (make-hash-table :test 'equal)))
18 (setf (gethash a hash) t)
19 (setf (aref a 0) 0)
20 (assert (= sxhash (sxhash a)))
21 ;; Need to make another access to the hash to disable the
22 ;; last-seen-element cache.
23 (setf (gethash 'y hash) t)
24 (assert (gethash a hash))))
26 ;;; Minimum quality checks
27 (with-test (:name (sxhash :quality :minimum))
28 (assert (/= (sxhash "foo") (sxhash "bar")))
29 (assert (/= (sxhash (pathname "foo.txt")) (sxhash (pathname "bar.txt"))))
30 (assert (/= (sxhash (list 1 2 3)) (sxhash (list 3 2 1))))
31 (assert (/= (sxhash #*1010) (sxhash #*0101))))
33 ;;; This test supposes that no un-accounted-for consing occurs.
34 ;;; But now that we have to two regions for allocation, it's not necessarily
35 ;;; the case that any given allocation bumps the mixed-region free pointer.
36 (with-test (:name :address-based-hash-counter :skipped-on :interpreter)
37 ;; It doesn't particularly matter what ADDRESS-BASED-COUNTER-VAL returns,
38 ;; but it's best to verify the assumption that each cons bumps the count
39 ;; by 1, lest it be violated in a way that affects the quality of CTYPE
40 ;; hashes.
41 (let ((win 0) (n-trials 10) (prev (sb-int:address-based-counter-val)))
42 (dotimes (i n-trials)
43 (locally
44 (declare (notinline cons sb-sys:int-sap)) ; it's flushable, but don't flush it
45 #+use-cons-region (sb-sys:int-sap #xf00fa) ; 2 words in mixed-region
46 #-use-cons-region (cons 1 2))
47 (let ((ptr (sb-int:address-based-counter-val)))
48 (when (= ptr (1+ prev))
49 (incf win))
50 (setq prev ptr)))
51 ;; GC could occur in here. Just check that 9 out of 10 trials succeed.
52 (assert (>= win 9))))
54 (with-test (:name (sxhash :bit-vector-sxhash-mask-to-length))
55 (let ((bv (make-array 5 :element-type 'bit))
56 (unsafely-set-bit
57 (compile nil
58 '(lambda (bv i val)
59 (declare (optimize (sb-c:insert-array-bounds-checks 0)))
60 (setf (bit bv i) val)))))
61 (replace bv '(1 0 1 1 1))
62 (let ((hash (sxhash bv)))
63 ;; touch all bits of the first data word as well as the padding word
64 (loop for i from 5 below (* 2 sb-vm:n-word-bytes)
65 do (funcall unsafely-set-bit bv i 1)
66 (assert (eql (sxhash bv) hash))
67 (funcall unsafely-set-bit bv i 0)))))
69 (defvar *sbv* (make-array 512 :element-type 'bit))
70 (defun sxhash-for-bv-test (test-bv)
71 (let ((underlying *sbv*)
72 (expected-hash (sxhash test-bv)))
73 ;; Currently %SXHASH-BIT-VECTOR can only operate on non-simple vectors
74 ;; if the displacement on bits aligns to a word boundary,
75 ;; or possibly on a byte boundary for some CPUs.
76 ;; Otherwise it just copies the non-simple vector as there's no point
77 ;; to exercising varying values of displaced-index-offet for that.
78 (loop for index-offset
79 from 0 by 8 repeat 25
80 do (let ((unsimple-bv (make-array 300
81 :element-type 'bit
82 :displaced-to underlying
83 :displaced-index-offset index-offset
84 :fill-pointer (length test-bv))))
85 (flet ((check-unreplaced-bits (expect)
86 ;; make sure REPLACE didn't touch bits outside
87 ;; the expected range. This is a test of REPLACE
88 ;; more so than SXHASH, but is needed to establish
89 ;; that SXHASH of the nonsimple vector isn't
90 ;; looking at bits that it shouldn't.
91 (loop for i from 0 below index-offset
92 do (assert (= (bit underlying i) expect)))
93 (loop for i from (+ index-offset (length test-bv))
94 below (length underlying)
95 do (assert (= (bit underlying i) expect)))))
96 (fill underlying 0)
97 (replace unsimple-bv test-bv)
98 (check-unreplaced-bits 0)
99 (assert (eql (sxhash unsimple-bv) expected-hash))
100 (fill underlying 1)
101 (replace unsimple-bv test-bv)
102 (check-unreplaced-bits 1)
103 (assert (eql (sxhash unsimple-bv) expected-hash)))))
104 expected-hash))
106 ;;; The value of SXHASH on bit-vectors of length a multiple of the word
107 ;;; size didn't depend on the contents of the last word, specifically
108 ;;; making it a constant for bit-vectors of length equal to the word
109 ;;; size.
110 ;;; Here we test that at least two different hash codes occur per length.
111 (with-test (:name (sxhash :quality bit-vector :non-constant))
112 (let (;; Up to which length to test.
113 (max-length 200)
114 ;; How many random bits to use before declaring a test failure.
115 (random-bits-to-use 200))
116 (loop for length from 1 to max-length do
117 (let ((v (make-array length :element-type 'bit)))
118 (flet ((randomize-v ()
119 (map-into v (lambda ()
120 (random 2)))))
121 (randomize-v)
122 (let ((sxhash (sxhash-for-bv-test v))
123 (random-bits-used 0))
124 (loop
125 (randomize-v)
126 (when (/= (sxhash-for-bv-test v) sxhash)
127 (return))
128 (incf random-bits-used length)
129 (when (>= random-bits-used random-bits-to-use)
130 (error "SXHASH is constant on bit-vectors of length ~a."
131 length)))))))))
133 ;;; See the comment at the previous test.
134 ;;; Here we test that the hash code depends on any of the last N-WORD-BITS
135 ;;; bits.
136 (with-test (:name (sxhash :quality bit-vector :dependent-on-final-bits))
137 (let (;; Up to which length to test.
138 (max-length 200)
139 ;; How many random bits to use before declaring a test failure.
140 (random-bits-to-use 200))
141 ;; The previous test covers lengths up to the word size, so start
142 ;; above that.
143 (loop for length from (1+ sb-vm:n-word-bits) to max-length do
144 (let ((v (make-array length :element-type 'bit :initial-element 0)))
145 (flet ((randomize-v ()
146 (loop for i downfrom (1- length)
147 repeat sb-vm:n-word-bits
148 do (setf (aref v i) (random 2)))))
149 (randomize-v)
150 (let ((sxhash (sxhash-for-bv-test v)))
151 (dotimes (i (ceiling random-bits-to-use sb-vm:n-word-bits)
152 (error "SXHASH on bit-vectors of length ~a ~
153 does not depend on the final ~a bits."
154 length sb-vm:n-word-bits))
155 (randomize-v)
156 (when (/= (sxhash-for-bv-test v) sxhash)
157 (return)))))))))
159 (with-test (:name :maphash-multiple-evaluation)
160 (assert (null
161 (check-function-evaluation-order
162 (maphash
163 (constantly nil)
164 (make-hash-table))))))
166 (with-test (:name :equalp-hash-float-infinity)
167 (let ((table (make-hash-table :test 'equalp)))
168 (setf (gethash sb-ext:double-float-positive-infinity table) 1
169 (gethash sb-ext:double-float-negative-infinity table) -1)
170 (dolist (v (list sb-ext:single-float-positive-infinity
171 sb-ext:double-float-positive-infinity
172 (complex sb-ext:single-float-positive-infinity 0)
173 (complex sb-ext:double-float-positive-infinity 0)))
174 (assert (eql (gethash v table) 1)))
175 (dolist (v (list sb-ext:single-float-negative-infinity
176 sb-ext:double-float-negative-infinity
177 (complex sb-ext:single-float-negative-infinity 0)
178 (complex sb-ext:double-float-negative-infinity 0)))
179 (assert (eql (gethash v table) -1)))))
181 (with-test (:name (:hash equalp pathname))
182 (let* ((map (make-hash-table :test 'equalp))
183 (key #P"some/path/"))
184 (setf (gethash key map) "my-value")
185 (format (make-broadcast-stream) "Printing: ~A~%" key)
186 (assert (remhash key map))
187 (assert (= 0 (hash-table-count map)))))
189 (with-test (:name :clrhash-clears-rehash-p)
190 (let ((tbl (make-hash-table :size 128)))
191 (assert (not (sb-impl::flat-hash-table-p tbl)))
192 (dotimes (i 10)
193 (setf (gethash (cons 'foo (gensym)) tbl) 1))
194 (gc)
195 ;; Set the need-to-rehash bit
196 (setf (svref (sb-impl::hash-table-pairs tbl) 1) 1)
197 (clrhash tbl)
198 ;; The need-to-rehash bit is not set
199 (assert (eql 0 (svref (sb-impl::hash-table-pairs tbl) 1)))))
201 (with-test (:name :sxhash-signed-floating-point-zeros)
202 (assert (not (eql (sxhash -0f0) (sxhash 0f0))))
203 (assert (not (eql (sxhash -0d0) (sxhash 0d0)))))
205 (with-test (:name :sxhash-simple-bit-vector)
206 (let (hashes)
207 (let ((v (make-array sb-vm:n-word-bits :element-type 'bit)))
208 (dotimes (i sb-vm:n-word-bits)
209 (setf (aref v i) 1)
210 (push (sxhash v) hashes)
211 (setf (aref v i) 0)))
212 (assert (= (length (remove-duplicates hashes)) sb-vm:n-word-bits))))
214 (with-test (:name :eq-hash-nonpointers-not-address-sensitive)
215 (let ((tbl (make-hash-table :test 'eq)))
216 (setf (gethash #\a tbl) 1)
217 #+64-bit (setf (gethash 1.0f0 tbl) 1) ; single-float is a nonpointer
218 (let ((data (sb-kernel:get-header-data (sb-impl::hash-table-pairs tbl))))
219 (assert (not (logtest data sb-vm:vector-addr-hashing-flag))))))
221 (with-test (:name (hash-table :small-rehash-size))
222 (let ((ht (make-hash-table :rehash-size 2)))
223 (dotimes (i 100)
224 (setf (gethash (gensym) ht) 10)))
225 (let ((ht (make-hash-table :rehash-size 1.0001)))
226 (dotimes (i 100)
227 (setf (gethash (gensym) ht) 10))))
229 (with-test (:name (hash-table :custom-hashfun-with-standard-test))
230 (flet ((kv-flag-bits (ht)
231 (ash (sb-kernel:get-header-data (sb-impl::hash-table-pairs ht))
232 (- sb-vm:array-flags-data-position))))
233 ;; verify that EQ hashing on symbols is address-sensitive
234 (let ((h (make-hash-table :test 'eq :size 128)))
235 (assert (not (sb-impl::flat-hash-table-p h)))
236 (setf (gethash 'foo h) 1)
237 (assert (logtest (kv-flag-bits h) sb-vm:vector-addr-hashing-flag)))
238 (let ((h (make-hash-table :test 'eq :hash-function 'sb-kernel:symbol-hash)))
239 (setf (gethash 'foo h) 1)
240 (assert (not (logtest (kv-flag-bits h) sb-vm:vector-addr-hashing-flag))))
242 ;; Verify that any standard hash-function on a function is address-sensitive,
243 ;; but a custom hash function makes it not so.
244 ;; Require 64-bit since this uses %CODE-SERIALNO as the hash,
245 ;; and that function doesn't exist on 32-bit (but should!)
246 #+64-bit
247 (dolist (test '(eq eql equal equalp))
248 (let ((h (make-hash-table :test test :size 128)))
249 (assert (not (sb-impl::flat-hash-table-p h)))
250 (setf (gethash #'car h) 1)
251 (assert (logtest (kv-flag-bits h) sb-vm:vector-addr-hashing-flag)))
252 (let ((h (make-hash-table :test test :hash-function
253 (lambda (x)
254 (sb-kernel:%code-serialno
255 (sb-kernel:fun-code-header x))))))
256 (setf (gethash #'car h) 1)
257 (assert (not (logtest (kv-flag-bits h) sb-vm:vector-addr-hashing-flag)))))))
259 (defun hash-table-freelist (tbl)
260 (sb-int:named-let chain ((index (sb-impl::hash-table-next-free-kv tbl)))
261 (when (plusp index)
262 (nconc (list index)
263 (if (< index
264 (sb-impl::kv-vector-high-water-mark (sb-impl::hash-table-pairs tbl)))
265 (chain (aref (sb-impl::hash-table-next-vector tbl) index)))))))
267 (defvar *tbl* (make-hash-table :weakness :key))
269 (import 'sb-impl::hash-table-smashed-cells)
270 ;;; We have a bunch of tests of weakness, but this is testing the new algorithm
271 ;;; which has two different freelists - one of cells that REMHASH has made available
272 ;;; and one of cells that GC has marked as empty. Since we no longer inhibit GC
273 ;;; during table operations, we need to give GC a list of its own to manipulate.
274 (with-test (:name (hash-table :gc-smashed-cell-list)
275 :skipped-on :gc-stress
276 :broken-on :mark-region-gc)
277 (flet ((f ()
278 (dotimes (i 20000) (setf (gethash i *tbl*) (- i)))
279 (setf (gethash (cons 1 2) *tbl*) 'foolz)
280 (assert (= (sb-impl::kv-vector-high-water-mark (sb-impl::hash-table-pairs *tbl*))
281 20001))
282 (loop for i from 10 by 10 repeat 20 do (remhash i *tbl*))))
283 ;; Ensure the values remain outside of the stack pointer for scrub-control-stack to work
284 (declare (notinline f))
285 (f))
286 (eval nil)
287 (sb-sys:scrub-control-stack)
288 (gc)
289 ;; There were 20 items REMHASHed plus the freelist contains a pointer
290 ;; to a cell which is one past the high-water-mark, for 21 cells in all.
291 (assert (= (length (hash-table-freelist *tbl*)) 21))
292 ;; The (1 . 2) cons was removed
293 (assert (= (length (hash-table-smashed-cells *tbl*)) 1))
294 ;; And its representation in the list of smashed cells doesn't
295 ;; fit in a packed integer (because the cell index is 20001)
296 (assert (and (consp (hash-table-smashed-cells *tbl*))
297 (consp (car (hash-table-smashed-cells *tbl*)))))
298 (setf (gethash 'jeebus *tbl*) 9)
299 ;; Freelist should not have changed at all.
300 (assert (= (length (hash-table-freelist *tbl*)) 21))
301 ;; And the smashed cell was used.
302 (assert (null (hash-table-smashed-cells *tbl*)))
303 (setf (gethash (make-symbol "SANDWICH") *tbl*) 8)
304 ;; Now one item should have been popped
305 (assert (= (length (hash-table-freelist *tbl*)) 20))
306 ;; should have used up the smashed cell
307 (sb-sys:scrub-control-stack)
308 (gc)
309 ;; Should have smashed the uninterned symbol
310 (assert (hash-table-smashed-cells *tbl*)))
312 ;;; Immediate values are address-based but not motion-sensitive.
313 ;;; The hash function returns address-based = NIL.
314 ;;; The specialized function GETHASH/EQ never compares hashes,
315 ;;; but the generalized FINDHASH-WEAK forgot to not compare them.
316 (with-test (:name (hash-table :weak-eq-table-fixnum-key))
317 (let ((table (make-hash-table :test 'eq :weakness :key)))
318 (setf (gethash 42 table) t)
319 (gethash 42 table)))
321 (with-test (:name :write-hash-table-readably)
322 (let ((h1 (make-hash-table)))
323 (setf (gethash :a h1) 1
324 (gethash :b h1) 2
325 (gethash :c h1) 3)
326 (let* ((s1 (write-to-string h1 :readably t))
327 (h2 (read-from-string s1))
328 (s2 (write-to-string h2 :readably t)))
329 ;; S1 and S2 used to be STRING/= prior to making
330 ;; %HASH-TABLE-ALIST iterate backwards.
331 (assert (string= s1 s2)))))
333 (defun test-this-object (table-kind object)
334 (let ((store (make-hash-table :test table-kind)))
335 (setf (gethash object store) '(1 2 3))
336 (assert (equal (gethash object store) '(1 2 3)))
337 (assert (remhash object store))
338 (assert (= (hash-table-count store) 0))))
340 ;; https://bugs.launchpad.net/sbcl/+bug/1865094
341 (with-test (:name :remhash-eq-comparable-in-equal-table)
342 ;; These objects are all hashed by their address,
343 ;; so their stored hash value is the magic marker.
344 (test-this-object 'equal (make-hash-table))
345 (test-this-object 'equal (sb-kernel:find-defstruct-description 'sb-c::node))
346 (test-this-object 'equal #'car)
347 (test-this-object 'equal (sb-sys:int-sap 0))
348 ;; a CLASS is not hashed address-sensitively, so this wasn't
349 ;; actually subject to the bug. Try it anyway.
350 (test-this-object 'equal (find-class 'class)))
352 (with-test (:name :remhash-eq-comparable-in-equalp-table)
353 ;; EQUALP tables worked a little better, because more objects are
354 ;; hashed non-address-sensitively by EQUALP-HASH relative to EQUAL-HASH,
355 ;; and those objects have comparators that descend.
356 ;; However, there are still some things hashed by address:
357 (test-this-object 'equalp (make-weak-pointer "bleep"))
358 (test-this-object 'equalp (sb-int:find-fdefn '(setf car)))
359 (test-this-object 'equalp #'car)
360 (test-this-object 'equalp (constantly 5))
361 (test-this-object 'equal (sb-sys:int-sap 0)))
363 ;;; I don't like that we call SXHASH on layouts, but there was a horrible
364 ;;; regression in which we returned (SXHASH (LAYOUT-OF X)) if X was a layout,
365 ;;; which essentially meant that all layouts hashed to LAYOUT's hash.
366 ;;; This affected the performance of TYPECASE.
367 (with-test (:name :sxhash-on-layout)
368 (dolist (x '(pathname cons array))
369 (let ((l (sb-kernel:find-layout x)))
370 (assert (= (sxhash l) (sb-kernel:layout-clos-hash l))))))
372 (with-test (:name :equalp-table-fixnum-equal-to-float)
373 (let ((table (make-hash-table :test #'equalp)))
374 (assert (eql (setf (gethash 3d0 table) 1)
375 (gethash 3 table)))))
377 ;;; Check that hashing a stringlike thing which is possibly NIL uses
378 ;;; a specialized hasher (after prechecking for NIL via the transform).
379 (with-test (:name :transform-sxhash-string-and-bv)
380 (dolist (case `((bit-vector sb-kernel:%sxhash-bit-vector)
381 (string sb-kernel:%sxhash-string)
382 (simple-bit-vector sb-kernel:%sxhash-simple-bit-vector)
383 (simple-string sb-kernel:%sxhash-simple-string)))
384 (let ((f `(lambda (x) (sxhash (truly-the (or null ,(car case)) x)))))
385 (assert (equal (ctu:ir1-named-calls f) (cdr case))))))
387 (with-test (:name :sxhash-on-displaced-string
388 :fails-on :sbcl)
389 (let* ((adjustable-string
390 (make-array 100 :element-type 'character :adjustable t))
391 (displaced-string
392 (make-array 50 :element-type 'character :displaced-to adjustable-string
393 :displaced-index-offset 19)))
394 (adjust-array adjustable-string 68)
395 (assert-error (aref displaced-string 0)) ; should not work
396 ;; This should fail, but instead it computes the hash of a string of
397 ;; length 0 which is what we turn displaced-string into after adjustable-string
398 ;; is changed to be too small to hold displaced-string.
399 ;; As a possible fix, we could distinguish between safe and unsafe code,
400 ;; never do the sxhash transforms in safe code, and have the full call to
401 ;; sxhash always check for "obsolete" strings.
402 ;; I would guess that all sorts of string transforms are similarly
403 ;; suspect in this edge case.
404 ;; On the one hand, this is undefined behavior as per CLHS:
405 ;; "If A is displaced to B, the consequences are unspecified if B is adjusted
406 ;; in such a way that it no longer has enough elements to satisfy A."
407 ;; But on the other, we always try to be maximally helpful,
408 ;; and it's extremely dubious that we're totally silent here.
409 ;; Also the same issue exists with bit-vectors.
410 (assert-error (sxhash displaced-string))))
412 (with-test (:name :array-psxhash-non-consing :skipped-on :interpreter
413 :fails-on :ppc64)
414 (let ((a (make-array 1000 :element-type 'double-float
415 :initial-element (+ 0d0 #+(or arm64 x86-64)
416 1d300))))
417 (ctu:assert-no-consing (sb-int:psxhash a))))
419 (with-test (:name :array-psxhash)
420 (let ((table (make-hash-table :test 'equalp)))
421 (let ((x (vector 1.0d0 1.0d0))
422 (y (make-array 2 :element-type 'double-float :initial-contents '(1.0d0 1.0d0))))
423 (setf (gethash x table) t)
424 (assert (gethash y table)))))
428 ;;; Our SXHASH has kinda bad behavior on 64-bit fixnums.
429 ;;; I wonder if we should try to hash fixnum better for users.
430 ;;; Example:
431 * (dotimes (i 20)
432 (let ((a (+ sb-vm:dynamic-space-start (* i 32768))))
433 (format t "~4d ~x ~b~%" i a (sxhash a))))
434 0 1000000000 1000010010001101110100101010000110101100010111010111001001010
435 1 1000008000 1000010010001101110100101010000110101100000111110111001001010
436 2 1000010000 1000010010001101110100101010000110101100110110010111001001010
437 3 1000018000 1000010010001101110100101010000110101100100110110111001001010
438 4 1000020000 1000010010001101110100101010000110101101010101010111001001010
439 5 1000028000 1000010010001101110100101010000110101101000101110111001001010
440 6 1000030000 1000010010001101110100101010000110101101110100010111001001010
441 7 1000038000 1000010010001101110100101010000110101101100100110111001001010
442 8 1000040000 1000010010001101110100101010000110101110010011010111001001010
443 9 1000048000 1000010010001101110100101010000110101110000011110111001001010
444 10 1000050000 1000010010001101110100101010000110101110110010010111001001010
445 11 1000058000 1000010010001101110100101010000110101110100010110111001001010
446 12 1000060000 1000010010001101110100101010000110101111010001010111001001010
447 13 1000068000 1000010010001101110100101010000110101111000001110111001001010
448 14 1000070000 1000010010001101110100101010000110101111110000010111001001010
449 15 1000078000 1000010010001101110100101010000110101111100000110111001001010
450 16 1000080000 1000010010001101110100101010000110101000011111010111001001010
453 (with-test (:name :fixnum-hash-with-more-entropy)
454 (flet ((try (hasher)
455 (let (hashes)
456 (dotimes (i 20)
457 (let* ((a (+ #+64-bit sb-vm:dynamic-space-start
458 #-64-bit #xD7C83000
459 (* i 32768)))
460 (hash (funcall hasher a)))
461 ;; (format t "~4d ~x ~v,'0b~%" i a sb-vm:n-word-bits hash)
462 (push hash hashes)))
463 ;; Try some 4-bit subsequences of the hashes
464 ;; as various positions and make sure that there
465 ;; are none that match for all inputs.
466 ;; For 64-bit machines, SXHASH yields complete overlap
467 ;; of all the test cases at various positions.
468 ;; 32-bit doesn't seem to suffer from this.
469 (dotimes (position (- sb-vm:n-fixnum-bits 4))
470 (let ((field
471 (mapcar (lambda (x) (ldb (byte 4 position) x))
472 hashes)))
473 ;; (print `(,position , (length (delete-duplicates field))))
474 (when #-64-bit t #+64-bit (not (eq hasher 'sxhash))
475 (assert (>= (length (remove-duplicates field)) 8))))))))
476 (try 'sxhash)
477 (try 'sb-int:murmur-hash-word/fixnum)))
479 ;;; Ensure that all layout-clos-hash values have a 1 somewhere
480 ;;; such that LOGANDing any number of nonzero hashes is nonzero.
481 (with-test (:name :layout-hashes-constant-1-bit)
482 (let ((combined most-positive-fixnum))
483 (maphash (lambda (classoid layout)
484 (declare (ignore classoid))
485 (let ((hash (sb-kernel:layout-clos-hash layout)))
486 (setq combined (logand combined hash))))
487 (sb-kernel:classoid-subclasses (sb-kernel:find-classoid 't)))
488 (assert (/= 0 combined))))
490 (defun c-murmur-fmix (word)
491 (declare (type sb-vm:word word))
492 (alien-funcall (extern-alien #+64-bit "murmur3_fmix64"
493 #-64-bit "murmur3_fmix32"
494 (function unsigned unsigned))
495 word))
496 (compile 'c-murmur-fmix)
497 ;;; Assert that the Lisp translation of the C murmur hash matches.
498 ;;; This is slightly redundant with the :ADDRESS-BASED-SXHASH-GCING test,
499 ;;; though this one is a strictly a unit test of the hashing function,
500 ;;; and the other is a test that GC does the right thing.
501 ;;; So it's not a bad idea to have both.
502 (defun murmur-compare (random-state n-iter)
503 (let ((limit (1+ sb-ext:most-positive-word)))
504 (loop repeat (the fixnum n-iter)
506 (let* ((n (random limit random-state))
507 (lisp-hash (sb-impl::murmur3-fmix-word n))
508 (c-hash (c-murmur-fmix n)))
509 (assert (= lisp-hash c-hash))))))
510 (compile 'murmur-compare)
512 (with-test (:name :mumur-hash-compare)
513 (murmur-compare (make-random-state t) 100000))
515 (with-test (:name :sap-hash)
516 (assert (/= (sxhash (sb-sys:int-sap #x1000))
517 (sxhash (sb-sys:int-sap 0))))
518 #-interpreter
519 (let ((list-of-saps
520 (loop for i below 1000 collect (sb-sys:int-sap i))))
521 (ctu:assert-no-consing
522 (opaque-identity
523 (let ((foo 0))
524 (dolist (sap list-of-saps foo)
525 (setq foo (logxor foo (sxhash sap)))))))))
527 (defconstant +flat-limit/eq+ 32)
528 (defconstant +flat-limit/eql+ 16)
529 (defconstant +hft-non-adaptive+ -3)
530 (defconstant +hft-safe+ -2)
531 (defconstant +hft-flat+ -1)
532 (defconstant +hft-eq-mid+ 0)
534 (with-test (:name :eq-flat-switch)
535 (let ((h (make-hash-table :test 'eq)))
536 (loop for i below +flat-limit/eq+ do
537 (setf (gethash i h) t))
538 (assert (sb-impl::flat-hash-table-p h))
539 (assert (eq (sb-impl::hash-table-gethash-impl h)
540 #'sb-impl::gethash/eq-hash/flat))
541 (assert (eq (sb-impl::hash-table-puthash-impl h)
542 #'sb-impl::puthash/eq-hash/flat))
543 (assert (eq (sb-impl::hash-table-remhash-impl h)
544 #'sb-impl::remhash/eq-hash/flat))
545 (setf (gethash (1+ +flat-limit/eq+) h) t)
546 (assert (not (sb-impl::flat-hash-table-p h)))
547 (assert (eq (sb-impl::hash-table-gethash-impl h)
548 #'sb-impl::gethash/eq-hash/common))
549 (assert (eq (sb-impl::hash-table-puthash-impl h)
550 #'sb-impl::puthash/eq-hash/common))
551 (assert (eq (sb-impl::hash-table-remhash-impl h)
552 #'sb-impl::remhash/eq-hash/common))))
554 (with-test (:name :eql-flat-switch-point)
555 (let ((h (make-hash-table)))
556 (loop for i below +flat-limit/eql+ do
557 (setf (gethash i h) t))
558 (assert (sb-impl::flat-hash-table-p h))
559 (assert (eq (sb-impl::hash-table-gethash-impl h)
560 #'sb-impl::gethash/eql-hash/flat))
561 (assert (eq (sb-impl::hash-table-puthash-impl h)
562 #'sb-impl::puthash/eql-hash/flat))
563 (assert (eq (sb-impl::hash-table-remhash-impl h)
564 #'sb-impl::remhash/eql-hash/flat))
565 (setf (gethash (1+ +flat-limit/eql+) h) t)
566 (assert (not (sb-impl::flat-hash-table-p h)))
567 (assert (eq (sb-impl::hash-table-gethash-impl h)
568 #'sb-impl::gethash/eql-hash))
569 (assert (eq (sb-impl::hash-table-puthash-impl h)
570 #'sb-impl::puthash/eql-hash))
571 (assert (eq (sb-impl::hash-table-remhash-impl h)
572 #'sb-impl::remhash/eql-hash))))
574 (with-test (:name :eq-hash-growth-from-non-flat-init)
575 (let ((h (make-hash-table :size 222 :test 'eq)))
576 (assert (= (sb-impl::hash-table-hash-fun-state h) +hft-eq-mid+))
577 (dotimes (i 1000)
578 (setf (gethash i h) i))
579 (assert (= (sb-impl::hash-table-hash-fun-state h) +hft-eq-mid+))))
581 #+64-bit
582 (with-test (:name :eq-hash-switch-to-safe)
583 (let ((h (make-hash-table :test 'eq)))
584 ;; Prevent SB-IMPL::GUESS-EQ-HASH-FUN from finding the shift
585 ;; required to bring the informative bits into range.
586 (setf (gethash t h) t)
587 (dotimes (i (1+ +flat-limit/eq+))
588 (setf (gethash (float i) h) i))
589 (assert (= (sb-impl::hash-table-hash-fun-state h) +hft-safe+))))
591 (with-test (:name :eq-hash-switch-to-mid)
592 (let ((h (make-hash-table :test 'eq)))
593 (assert (= (sb-impl::hash-table-hash-fun-state h) +hft-flat+))
594 (loop for i below (1+ +flat-limit/eq+)
595 do (setf (gethash (cons nil nil) h) i))
596 (assert (plusp (sb-impl::hash-table-hash-fun-state h)))
597 (loop for i upfrom +flat-limit/eq+ below 8000
598 do (setf (gethash i h) i))
599 (assert (= (sb-impl::hash-table-hash-fun-state h) +hft-eq-mid+))))
601 (with-test (:name :eq-hash-switch-to-mid/weak)
602 (let ((h (make-hash-table :test 'eq :weakness :value)))
603 (assert (= (sb-impl::hash-table-hash-fun-state h) +hft-eq-mid+))
604 (loop for i below 20
605 do (setf (gethash (cons nil nil) h) i))
606 ;; Weak hash tables are not adaptive, currently.
607 (assert (= (sb-impl::hash-table-hash-fun-state h) +hft-eq-mid+))
608 (loop for i upfrom +flat-limit/eq+ below 8000
609 do (setf (gethash i h) i))
610 (assert (= (sb-impl::hash-table-hash-fun-state h) +hft-eq-mid+))))
612 (with-test (:name :eq-hash-growth-from-non-flat-init)
613 (let ((h (make-hash-table :size 222 :test 'eq)))
614 (assert (= (sb-impl::hash-table-hash-fun-state h) +hft-eq-mid+))
615 (dotimes (i 1000)
616 (setf (gethash i h) i))
617 (assert (= (sb-impl::hash-table-hash-fun-state h) +hft-eq-mid+))))
619 ;;; For a uniform multinomial distribution with K samples and the same
620 ;;; number of categories, estimate the distribution of its maximum
621 ;;; count M (i.e. the number of samples in the most populous
622 ;;; category). Then, find the X for which p(M <= X) ~= 0.99. Return
623 ;;; the X for each K in 2^0, 2^1, ..., 2^(MAX-BITS - 1).
624 (defun estimate-uniform-multinomial-maximum-cutoff (max-bits n-repeats
625 &key (verbose t)
626 (load-factor 1))
627 (declare (type fixnum n-repeats))
628 (loop
629 for k-bits upfrom 0 below max-bits
630 collect (let* ((k (expt 2 k-bits))
631 ;; +MIN-HASH-TABLE-SIZE+ implies at least 8 buckets.
632 (n-buckets (sb-int::power-of-two-ceiling
633 (max 8 (/ k load-factor))))
634 (b (make-array n-buckets :element-type '(unsigned-byte 8)))
635 (m (make-array (1+ k) :element-type '(unsigned-byte 16))))
636 (locally (declare (optimize speed (safety 0)))
637 (loop repeat n-repeats
638 do (fill b 0)
639 (loop repeat k
640 do (incf (aref b (random n-buckets))))
641 (incf (aref m (loop for c across b maximize c)))))
642 (let ((sum 0)
643 (best-n-bits nil)
644 (min-prob 0.99))
645 (when verbose
646 (format t "~%K=2^~S, ~S, R=~S~%" k-bits
647 (round (log 2 n-buckets)) n-repeats))
648 (loop for i upto k
649 do (let ((p (/ (aref m i) n-repeats)))
650 (incf sum p)
651 (when verbose
652 (format t "~S: ~,4F (~,4F)~%" i p sum)))
653 (when (< min-prob sum)
654 (setq best-n-bits i)
655 (return)))
656 (when verbose
657 (format t "just above prob ~6,4F at ~S~%"
658 min-prob best-n-bits))
659 best-n-bits))))
661 (with-test (:name :max-chain-length)
662 ;; The estimation slows down exponentially and gets flakier with
663 ;; higher MAX-BITS, so use a small value for the test. Note that
664 ;; it's still faster than a memoizing implementation of the
665 ;; algorithm in Appendix A of "Computing the exact distributions of
666 ;; some functions of the ordered multinomial counts" by Bonetti et
667 ;; al.
668 (let ((cutoffs (estimate-uniform-multinomial-maximum-cutoff
669 10 20000 :verbose nil)))
670 (loop for n-bits upfrom 0
671 for cutoff in cutoffs
672 do (assert (<= (abs (- (sb-impl::max-chain-length (ash 1 n-bits))
673 cutoff))
674 1)))))
676 (defun sxstate-limit (sxstate)
677 (ldb (byte #+64-bit 31 #-64-bit (- sb-vm:n-fixnum-bits 4) 0) sxstate))
679 (defun ht-limit (ht)
680 (sxstate-limit (sb-impl::hash-table-hash-fun-state ht)))
682 (defun sxstate-max-chain-length (sxstate)
683 (ldb (byte 4 #+64-bit 31 #-64-bit (- sb-vm:n-fixnum-bits 4)) sxstate))
685 (defun ht-max-chain-length (ht)
686 (sxstate-max-chain-length (sb-impl::hash-table-hash-fun-state ht)))
688 (defconstant +truncated-hash-bit+ #-64-bit 29 #+64-bit 31)
690 (defun truncated-hash-p (hash)
691 (logbitp +truncated-hash-bit+ hash))
693 (defun check-sxstate-limit (ht)
694 (let* ((kv-vector (sb-impl::hash-table-pairs ht))
695 (hwm (sb-impl::kv-vector-high-water-mark kv-vector))
696 (hash-vector (sb-impl::hash-table-hash-vector ht))
697 (limit (ht-limit ht)))
698 (loop
699 for i upfrom 1 upto hwm
700 do (let ((key (aref kv-vector (* 2 i))))
701 (unless (sb-impl::empty-ht-slot-p key)
702 (assert (eq (not (not (truncated-hash-p (aref hash-vector i))))
703 (not (not (< limit (length key)))))
704 () "~@<key: ~S, key length: ~S, limit: ~S, ~
705 stored hash: ~S (truncatedp: ~S)~:@>"
706 key (length key) limit (aref hash-vector i)
707 (truncated-hash-p (aref hash-vector i))))))))
709 (defun check-sxstate-max-chain-length (ht)
710 (let ((hash-vector (sb-impl::hash-table-hash-vector ht))
711 (index-vector (sb-impl::hash-table-index-vector ht))
712 (next-vector (sb-impl::hash-table-next-vector ht))
713 (max-chain-length (ht-max-chain-length ht)))
714 (loop for i across index-vector do
715 ;; In puthash, we only check for MAX-CHAIN-LENGTH when adding a
716 ;; truncated hash. This test could fail spuriously for some
717 ;; orderings of truncated and non-truncated keys, but in our
718 ;; tests we all keys of the same length.
719 (when (truncated-hash-p (aref hash-vector i))
720 (let ((chain-length (loop for j = i then (aref next-vector j)
721 until (zerop j)
722 count 1)))
723 (assert (<= chain-length max-chain-length)))))))
725 (with-test (:name :raise-sxstate-limit-and-rehash)
726 (dolist (weakness '(nil))
727 (dolist (n-keys '(32 64))
728 (dolist (n-constants '(5 6 7 8 13 16 17))
729 (let ((constant-prefix (loop for i below n-constants collect i)))
730 (sb-sys:without-gcing
731 (dolist (size '(7 8 200))
732 (let* ((h (make-hash-table :test 'equal :size size
733 :weakness weakness))
734 (orig-max-chain-length (ht-max-chain-length h)))
735 (format t "weakness: ~S, n-constants: ~S, size: ~S, ~
736 orig-max-chain-length: ~S~%"
737 weakness n-constants size orig-max-chain-length)
738 (loop for i below n-keys do
739 (let ((key (append constant-prefix (list i))))
740 (setf (gethash key h) t))
741 (check-sxstate-limit h)
742 (check-sxstate-max-chain-length h)
743 (let* ((must-have-raised-limit-p (< orig-max-chain-length
744 (hash-table-count h)))
745 (n-distinct-hashes
746 (count 0 (sb-impl::hash-table-index-vector h)
747 :test-not #'eql)))
748 (format t "at count ~S: max-chain-length: ~S, ~
749 limit: ~S, n-distinct-hashes: ~S~%"
750 (hash-table-count h)
751 (ht-max-chain-length h) (ht-limit h)
752 n-distinct-hashes)
753 (cond (must-have-raised-limit-p
754 (assert (< n-constants (ht-limit h)))
755 (assert (> n-distinct-hashes 1)))
757 (assert (= n-distinct-hashes 1))))))))))))))
759 (with-test (:name (:adaptive-equal-hash :truncate-list))
760 (let ((hash-0 (sb-impl::perhaps-truncated-equal-hash () 1)))
761 (assert (not (truncated-hash-p hash-0)))
762 ;; The final NIL does not count towards the limit.
763 (let ((hash-1 (sb-impl::perhaps-truncated-equal-hash '(1) 1)))
764 (assert (not (truncated-hash-p hash-1)))
765 (assert (/= hash-1 hash-0))
766 ;; The final cons is not in the hash.
767 (let ((hash-2 (sb-impl::perhaps-truncated-equal-hash '(1 2) 1))
768 (hash-3 (sb-impl::perhaps-truncated-equal-hash '(1 3) 1)))
769 (assert (truncated-hash-p hash-2))
770 (assert (= hash-2 hash-3))))))
772 (with-test (:name (:adaptive-equal-hash :truncate-string))
773 (flet ((hash (string limit)
774 (sb-impl::perhaps-truncated-equal-hash string limit)))
775 (let ((hash-0 (hash "1234" 4))
776 (hash-1 (hash "12a34" 4))
777 (hash-2 (hash "12a34" 5)))
778 (assert (not (truncated-hash-p hash-0)))
779 (assert (truncated-hash-p hash-1))
780 (assert (/= hash-0 hash-1))
781 (assert (not (truncated-hash-p hash-2)))
782 (assert (/= hash-1 hash-2)))))
784 (with-test (:name (:adaptive-equal-hash :eql-hash-not-truncated))
785 (assert (not (truncated-hash-p (sb-impl::perhaps-truncated-equal-hash
786 (ash 1 +truncated-hash-bit+) 0)))))