Move ordered-set to sset.lisp
[sbcl.git] / src / compiler / sset.lisp
blob6588890daf2f9b268a62ac65aa06dbe31f757a03
1 ;;;; This file implements a sparse set abstraction, represented as a
2 ;;;; custom lightweight hash-table. We don't use bit-vectors to
3 ;;;; represent sets in flow analysis, since the universe may be quite
4 ;;;; large but the average number of elements is small. We also don't
5 ;;;; use sorted lists like in the original CMUCL code, since it had
6 ;;;; bad worst-case performance (on some real-life programs the
7 ;;;; hash-based sset gives a 20% compilation speedup). A custom
8 ;;;; hash-table is used since the standard one is too heavy (locking,
9 ;;;; memory use) for this use.
11 ;;;; This software is part of the SBCL system. See the README file for
12 ;;;; more information.
13 ;;;;
14 ;;;; This software is derived from the CMU CL system, which was
15 ;;;; written at Carnegie Mellon University and released into the
16 ;;;; public domain. The software is in the public domain and is
17 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
18 ;;;; files for more information. (This file no)
20 (in-package "SB!C")
22 ;;; Each structure that may be placed in a SSET must include the
23 ;;; SSET-ELEMENT structure. We allow an initial value of NIL to mean
24 ;;; that no ordering has been assigned yet (although an ordering must
25 ;;; be assigned before doing set operations.)
26 (def!struct (sset-element (:constructor nil)
27 (:copier nil))
28 (number nil :type (or index null)))
30 (defstruct (sset (:copier nil)
31 (:constructor make-sset (&optional vector free count)))
32 ;; Vector containing the set values. 0 is used for empty (since
33 ;; initializing a vector with 0 is cheaper than with NIL), -1
34 ;; is used to mark buckets that used to contain an element, but no
35 ;; longer do.
36 (vector #() :type simple-vector)
37 ;; How many elements can be inserted before rehashing.
38 ;; This is not the actual amount of free elements, but a ratio
39 ;; calculated from +sset-rehash-threshold+.
40 (free 0 :type index)
41 ;; How many elements are currently members of the set.
42 (count 0 :type index))
44 ;;; Ordered set
45 (defstruct (oset
46 (:include sset)
47 (:copier nil))
48 (members nil :type list))
50 (declaim (freeze-type sset))
52 (defprinter (sset) vector)
54 ;;; Iterate over the elements in SSET, binding VAR to each element in
55 ;;; turn.
56 (defmacro do-sset-elements ((var sset &optional result) &body body)
57 `(loop for ,var across (sset-vector ,sset)
58 do (unless (fixnump ,var)
59 ,@body)
60 finally (return ,result)))
62 ;;; Primary hash.
63 (declaim (inline sset-hash1))
64 (defun sset-hash1 (element)
65 #+sb-xc-host
66 (let ((result (sset-element-number element)))
67 ;; This is performance critical, and it's not certain that the host
68 ;; compiler does modular arithmetic optimization. Instad use
69 ;; something that most CL implementations will do efficiently.
70 (the fixnum (logxor (the fixnum result)
71 (the fixnum (ash result -9))
72 (the fixnum (ash result -5)))))
73 #-sb-xc-host
74 (let ((result (sset-element-number element)))
75 (declare (type sb!vm:word result))
76 ;; We only use the low-order bits.
77 (macrolet ((set-result (form)
78 `(setf result (ldb (byte #.sb!vm:n-word-bits 0) ,form))))
79 (set-result (+ result (ash result -19)))
80 (set-result (logxor result (ash result -13)))
81 (set-result (+ result (ash result -9)))
82 (set-result (logxor result (ash result -5)))
83 (set-result (+ result (ash result -2)))
84 (logand sb!xc:most-positive-fixnum result))))
86 ;;; Secondary hash (for double hash probing). Needs to return an odd
87 ;;; number.
88 (declaim (inline sset-hash2))
89 (defun sset-hash2 (element)
90 (let ((number (sset-element-number element)))
91 (declare (fixnum number))
92 (logior 1 number)))
94 ;;; Rehash the sset when the proportion of free cells in the set is
95 ;;; lower than this, the value is a reciprocal.
96 (defconstant +sset-rehash-threshold+ 4)
98 ;;; Double the size of the hash vector of SET.
99 (defun sset-grow (set)
100 (let* ((vector (sset-vector set))
101 (length (if (zerop (length vector))
103 (* (length vector) 2)))
104 (new-vector (make-array length
105 :initial-element 0)))
106 (setf (sset-vector set) new-vector
107 ;; SSET-ADJOIN below will decrement this and shouldn't reach zero
108 (sset-free set) length
109 (sset-count set) 0)
110 (loop for element across vector
111 do (unless (fixnump element)
112 (sset-adjoin element set)))
113 ;; Now the real amount of elements which can be inserted before rehashing
114 (setf (sset-free set) (- (sset-free set)
115 (max 1 (truncate length
116 +sset-rehash-threshold+))))))
119 ;;; Destructively add ELEMENT to SET. If ELEMENT was not in the set,
120 ;;; then we return true, otherwise we return false.
121 (declaim (ftype (sfunction (sset-element sset) boolean) sset-adjoin))
122 (defun sset-adjoin (element set)
123 (when (= (sset-free set) 0)
124 (sset-grow set))
125 (loop with vector = (sset-vector set)
126 with mask of-type fixnum = (1- (length vector))
127 with secondary-hash = (sset-hash2 element)
128 with deleted-index
129 for hash of-type index = (logand mask (sset-hash1 element)) then
130 (logand mask (+ hash secondary-hash))
131 for current = (aref vector hash)
132 do (cond ((eql current 0)
133 (incf (sset-count set))
134 (cond (deleted-index
135 (setf (aref vector deleted-index) element))
137 (decf (sset-free set))
138 (setf (aref vector hash) element)))
139 (return t))
140 ((eql current -1)
141 (setf deleted-index hash))
142 ((eq current element)
143 (return nil)))))
145 ;;; Destructively remove ELEMENT from SET. If element was in the set,
146 ;;; then return true, otherwise return false.
147 (declaim (ftype (sfunction (sset-element sset) boolean) sset-delete))
148 (defun sset-delete (element set)
149 (when (zerop (length (sset-vector set)))
150 (return-from sset-delete nil))
151 (loop with vector = (sset-vector set)
152 with mask fixnum = (1- (length vector))
153 with secondary-hash = (sset-hash2 element)
154 for hash of-type index = (logand mask (sset-hash1 element)) then
155 (logand mask (+ hash secondary-hash))
156 for current = (aref vector hash)
157 do (cond ((eql current 0)
158 (return nil))
159 ((eq current element)
160 (decf (sset-count set))
161 (setf (aref vector hash) -1)
162 (return t)))))
164 ;;; Return true if ELEMENT is in SET, false otherwise.
165 (declaim (ftype (sfunction (sset-element sset) boolean) sset-member))
166 (defun sset-member (element set)
167 (when (zerop (length (sset-vector set)))
168 (return-from sset-member nil))
169 (loop with vector = (sset-vector set)
170 with mask fixnum = (1- (length vector))
171 with secondary-hash = (sset-hash2 element)
172 for hash of-type index = (logand mask (sset-hash1 element)) then
173 (logand mask (+ hash secondary-hash))
174 for current = (aref vector hash)
175 do (cond ((eql current 0)
176 (return nil))
177 ((eq current element)
178 (return t)))))
180 (declaim (ftype (sfunction (sset sset) boolean) sset=))
181 (defun sset= (set1 set2)
182 (unless (eql (sset-count set1)
183 (sset-count set2))
184 (return-from sset= nil))
185 (do-sset-elements (element set1)
186 (unless (sset-member element set2)
187 (return-from sset= nil)))
190 ;;; Return true if SET contains no elements, false otherwise.
191 (declaim (ftype (sfunction (sset) boolean) sset-empty))
192 (defun sset-empty (set)
193 (zerop (sset-count set)))
195 ;;; Return a new copy of SET.
196 (declaim (ftype (sfunction (sset) sset) copy-sset))
197 (defun copy-sset (set)
198 (make-sset (let* ((vector (sset-vector set))
199 (new-vector (make-array (length vector))))
200 (declare (type simple-vector vector new-vector)
201 (optimize speed (safety 0)))
202 ;; There's no REPLACE deftransform for simple-vectors.
203 (dotimes (i (length vector))
204 (setf (aref new-vector i)
205 (aref vector i)))
206 new-vector)
207 (sset-free set)
208 (sset-count set)))
210 ;;; Perform the appropriate set operation on SET1 and SET2 by
211 ;;; destructively modifying SET1. We return true if SET1 was modified,
212 ;;; false otherwise.
213 (declaim (ftype (sfunction (sset sset) boolean) sset-union sset-intersection
214 sset-difference))
215 (defun sset-union (set1 set2)
216 (loop with modified = nil
217 for element across (sset-vector set2)
218 do (unless (fixnump element)
219 (when (sset-adjoin element set1)
220 (setf modified t)))
221 finally (return modified)))
222 (defun sset-intersection (set1 set2)
223 (loop with modified = nil
224 for element across (sset-vector set1)
225 for index of-type index from 0
226 do (unless (fixnump element)
227 (unless (sset-member element set2)
228 (decf (sset-count set1))
229 (setf (aref (sset-vector set1) index) -1
230 modified t)))
231 finally (return modified)))
232 (defun sset-difference (set1 set2)
233 (loop with modified = nil
234 for element across (sset-vector set1)
235 for index of-type index from 0
236 do (unless (fixnump element)
237 (when (sset-member element set2)
238 (decf (sset-count set1))
239 (setf (aref (sset-vector set1) index) -1
240 modified t)))
241 finally (return modified)))
243 ;;; Destructively modify SET1 to include its union with the difference
244 ;;; of SET2 and SET3. We return true if SET1 was modified, false
245 ;;; otherwise.
246 (declaim (ftype (sfunction (sset sset sset) boolean) sset-union-of-difference))
247 (defun sset-union-of-difference (set1 set2 set3)
248 (loop with modified = nil
249 for element across (sset-vector set2)
250 do (unless (fixnump element)
251 (unless (sset-member element set3)
252 (when (sset-adjoin element set1)
253 (setf modified t))))
254 finally (return modified)))
256 ;;; Ordered set
258 (defun oset-adjoin (oset element)
259 (when (sset-adjoin element oset)
260 (push element (oset-members oset))
263 (defun oset-delete (oset element)
264 (when (sset-delete element oset)
265 (setf (oset-members oset)
266 (delete element (oset-members oset)))
269 (declaim (inline oset-member))
270 (defun oset-member (oset element)
271 (sset-member element oset))
273 (defmacro do-oset-elements ((variable oset &optional return) &body body)
274 `(dolist (,variable (oset-members ,oset) ,return)
275 ,@body))