1 ;;;; This file implements a sparse set abstraction, represented as a
2 ;;;; custom lightweight hash-table. We don't use bit-vectors to
3 ;;;; represent sets in flow analysis, since the universe may be quite
4 ;;;; large but the average number of elements is small. We also don't
5 ;;;; use sorted lists like in the original CMUCL code, since it had
6 ;;;; bad worst-case performance (on some real-life programs the
7 ;;;; hash-based sset gives a 20% compilation speedup). A custom
8 ;;;; hash-table is used since the standard one is too heavy (locking,
9 ;;;; memory use) for this use.
11 ;;;; This software is part of the SBCL system. See the README file for
12 ;;;; more information.
14 ;;;; This software is derived from the CMU CL system, which was
15 ;;;; written at Carnegie Mellon University and released into the
16 ;;;; public domain. The software is in the public domain and is
17 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
18 ;;;; files for more information. (This file no)
22 ;;; Each structure that may be placed in a SSET must include the
23 ;;; SSET-ELEMENT structure. We allow an initial value of NIL to mean
24 ;;; that no ordering has been assigned yet (although an ordering must
25 ;;; be assigned before doing set operations.)
26 (defstruct (sset-element (:constructor nil
)
28 (number nil
:type
(or index null
)))
30 (defstruct (sset (:copier nil
)
31 (:constructor make-sset
(&optional vector free count
)))
32 ;; Vector containing the set values. 0 is used for empty (since
33 ;; initializing a vector with 0 is cheaper than with NIL), -1
34 ;; is used to mark buckets that used to contain an element, but no
36 (vector #() :type simple-vector
)
37 ;; How many elements can be inserted before rehashing.
38 ;; This is not the actual amount of free elements, but a ratio
39 ;; calculated from +sset-rehash-threshold+.
41 ;; How many elements are currently members of the set.
42 (count 0 :type index
))
44 (declaim (freeze-type sset
))
46 (defprinter (sset) vector
)
48 ;;; Iterate over the elements in SSET, binding VAR to each element in
50 (defmacro do-sset-elements
((var sset
&optional result
) &body body
)
51 `(loop for
,var across
(sset-vector ,sset
)
52 do
(unless (fixnump ,var
)
54 finally
(return ,result
)))
57 (declaim (inline sset-hash1
))
58 (defun sset-hash1 (element)
60 (let ((result (sset-element-number element
)))
61 ;; This is performance critical, and it's not certain that the host
62 ;; compiler does modular arithmetic optimization. Instad use
63 ;; something that most CL implementations will do efficiently.
64 (the fixnum
(logxor (the fixnum result
)
65 (the fixnum
(ash result -
9))
66 (the fixnum
(ash result -
5)))))
68 (let ((result (sset-element-number element
)))
69 (declare (type sb-vm
:word result
))
70 ;; We only use the low-order bits.
71 (macrolet ((set-result (form)
72 `(setf result
(ldb (byte #.sb-vm
:n-word-bits
0) ,form
))))
73 (set-result (+ result
(ash result -
19)))
74 (set-result (logxor result
(ash result -
13)))
75 (set-result (+ result
(ash result -
9)))
76 (set-result (logxor result
(ash result -
5)))
77 (set-result (+ result
(ash result -
2)))
78 (logand most-positive-fixnum result
))))
80 ;;; Secondary hash (for double hash probing). Needs to return an odd
82 (declaim (inline sset-hash2
))
83 (defun sset-hash2 (element)
84 (let ((number (sset-element-number element
)))
85 (declare (fixnum number
))
88 ;;; Rehash the sset when the proportion of free cells in the set is
89 ;;; lower than this, the value is a reciprocal.
90 (defconstant +sset-rehash-threshold
+ 4)
92 ;;; Double the size of the hash vector of SET.
93 (defun sset-grow (set)
94 (let* ((vector (sset-vector set
))
95 (length (if (zerop (length vector
))
97 (* (length vector
) 2)))
98 (new-vector (make-array length
100 (setf (sset-vector set
) new-vector
101 ;; SSET-ADJOIN below will decrement this and shouldn't reach zero
102 (sset-free set
) length
104 (loop for element across vector
105 do
(unless (fixnump element
)
106 (sset-adjoin element set
)))
107 ;; Now the real amount of elements which can be inserted before rehashing
108 (setf (sset-free set
) (- (sset-free set
)
109 (max 1 (truncate length
110 +sset-rehash-threshold
+))))))
113 ;;; Destructively add ELEMENT to SET. If ELEMENT was not in the set,
114 ;;; then we return true, otherwise we return false.
115 (declaim (ftype (sfunction (sset-element sset
) boolean
) sset-adjoin
))
116 (defun sset-adjoin (element set
)
117 (when (= (sset-free set
) 0)
119 (loop with vector
= (sset-vector set
)
120 with mask of-type fixnum
= (1- (length vector
))
121 with secondary-hash
= (sset-hash2 element
)
123 for hash of-type index
= (logand mask
(sset-hash1 element
)) then
124 (logand mask
(+ hash secondary-hash
))
125 for current
= (aref vector hash
)
126 do
(cond ((eql current
0)
127 (incf (sset-count set
))
129 (setf (aref vector deleted-index
) element
))
131 (decf (sset-free set
))
132 (setf (aref vector hash
) element
)))
135 (setf deleted-index hash
))
136 ((eq current element
)
139 ;;; Destructively remove ELEMENT from SET. If element was in the set,
140 ;;; then return true, otherwise return false.
141 (declaim (ftype (sfunction (sset-element sset
) boolean
) sset-delete
))
142 (defun sset-delete (element set
)
143 (when (zerop (length (sset-vector set
)))
144 (return-from sset-delete nil
))
145 (loop with vector
= (sset-vector set
)
146 with mask fixnum
= (1- (length vector
))
147 with secondary-hash
= (sset-hash2 element
)
148 for hash of-type index
= (logand mask
(sset-hash1 element
)) then
149 (logand mask
(+ hash secondary-hash
))
150 for current
= (aref vector hash
)
151 do
(cond ((eql current
0)
153 ((eq current element
)
154 (decf (sset-count set
))
155 (setf (aref vector hash
) -
1)
158 ;;; Return true if ELEMENT is in SET, false otherwise.
159 (declaim (ftype (sfunction (sset-element sset
) boolean
) sset-member
))
160 (defun sset-member (element set
)
161 (when (zerop (length (sset-vector set
)))
162 (return-from sset-member nil
))
163 (loop with vector
= (sset-vector set
)
164 with mask fixnum
= (1- (length vector
))
165 with secondary-hash
= (sset-hash2 element
)
166 for hash of-type index
= (logand mask
(sset-hash1 element
)) then
167 (logand mask
(+ hash secondary-hash
))
168 for current
= (aref vector hash
)
169 do
(cond ((eql current
0)
171 ((eq current element
)
174 (declaim (ftype (sfunction (sset sset
) boolean
) sset
=))
175 (defun sset= (set1 set2
)
176 (unless (eql (sset-count set1
)
178 (return-from sset
= nil
))
179 (do-sset-elements (element set1
)
180 (unless (sset-member element set2
)
181 (return-from sset
= nil
)))
184 ;;; Return true if SET contains no elements, false otherwise.
185 (declaim (ftype (sfunction (sset) boolean
) sset-empty
))
186 (defun sset-empty (set)
187 (zerop (sset-count set
)))
189 ;;; Return a new copy of SET.
190 (declaim (ftype (sfunction (sset) sset
) copy-sset
))
191 (defun copy-sset (set)
192 (make-sset (let* ((vector (sset-vector set
))
193 (new-vector (make-array (length vector
))))
194 (declare (type simple-vector vector new-vector
)
195 (optimize speed
(safety 0)))
196 ;; There's no REPLACE deftransform for simple-vectors.
197 (dotimes (i (length vector
))
198 (setf (aref new-vector i
)
204 ;;; Perform the appropriate set operation on SET1 and SET2 by
205 ;;; destructively modifying SET1. We return true if SET1 was modified,
207 (declaim (ftype (sfunction (sset sset
) boolean
) sset-union sset-intersection
209 (defun sset-union (set1 set2
)
210 (loop with modified
= nil
211 for element across
(sset-vector set2
)
212 do
(unless (fixnump element
)
213 (when (sset-adjoin element set1
)
215 finally
(return modified
)))
216 (defun sset-intersection (set1 set2
)
217 (loop with modified
= nil
218 for element across
(sset-vector set1
)
219 for index of-type index from
0
220 do
(unless (fixnump element
)
221 (unless (sset-member element set2
)
222 (decf (sset-count set1
))
223 (setf (aref (sset-vector set1
) index
) -
1
225 finally
(return modified
)))
226 (defun sset-difference (set1 set2
)
227 (loop with modified
= nil
228 for element across
(sset-vector set1
)
229 for index of-type index from
0
230 do
(unless (fixnump element
)
231 (when (sset-member element set2
)
232 (decf (sset-count set1
))
233 (setf (aref (sset-vector set1
) index
) -
1
235 finally
(return modified
)))
237 ;;; Destructively modify SET1 to include its union with the difference
238 ;;; of SET2 and SET3. We return true if SET1 was modified, false
240 (declaim (ftype (sfunction (sset sset sset
) boolean
) sset-union-of-difference
))
241 (defun sset-union-of-difference (set1 set2 set3
)
242 (loop with modified
= nil
243 for element across
(sset-vector set2
)
244 do
(unless (fixnump element
)
245 (unless (sset-member element set3
)
246 (when (sset-adjoin element set1
)
248 finally
(return modified
)))